In [37]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

In [38]:
url = 'https://www.espncricinfo.com/cricketers/team/afghanistan-40/alpha-a'
base_url = 'https://www.espncricinfo.com'

In [39]:
def get_content_from_url(url):
    response = requests.get(url)
    if response.status_code ==200:
        content = response.content
    else:
        content =  "Error finding page"
    return content


In [137]:
# Finds the list of urls for each teams
def get_teams_url(url):
    content = get_content(url)
    soup = BeautifulSoup(content,'html.parser')
    teams_divs = soup.find_all('div',{'class':'ds-grid'})[0]
    teams_url = []
    for team_div in teams_divs:
    #    print(team_div,'\n')
        team_name = team_div.text
        team_url = base_url +'/cricketers' + team_div['href']
        teams_url.append({
            'team_name': team_name,
            'team_url': team_url
        })
    return teams_url

In [144]:
# Finds the alphabetical links of all players in a team

def get_team_aplhas(team_url):
    content = get_content_from_url(team_url)
    team_alphas = []
    if content == "Error finding page":
        return content
    soup = BeautifulSoup(content,'html.parser')
    alpha_bar = soup.find_all('div',{'class':'ds-px-4'})
    for elements in alpha_bar:
        a_tags = elements.find_all('a',{'class':'ds-h-10'})
        for tag in a_tags:
            if '/alpha-' in tag['href']:
                team_alphas.append(tag['href'])
    team_alphas = [base_url+ link for link in team_alphas]
    return team_alphas
    
    

In [145]:
# Finds all the links to player data from alphabetical links

def get_players_by_alpha(url):
    content = get_content_from_url(url)
    if content == "Error finding page":
        return content
    soup = BeautifulSoup(content, 'html.parser')
    grid = soup.find_all('div',{'class':'ds-grid'})
    players_links = []
    for div in grid:
        links = div.find_all('a',{'class':'ds-flex'})
        for link in links:
            players_links.append(base_url+link['href'])
    return players_links

In [189]:
# Finds all the player data and returns it as a dictionary
def get_player_data(player_url):
    content = get_content_from_url(player_url)
    if content == "Error finding page":
        print(content)
    soup = BeautifulSoup(content,'html.parser')
    player_data = {}
    player_profile_div = soup.find_all('div',{'class':'ds-p-4'})[0]
    for div in player_profile_div:
        title = div.find_all('p',{'class':'ds-uppercase'})
        for tag in title:
            player_key = tag.text
            player_value = tag.next_sibling.text
            player_data[player_key] = player_value
    stats = get_player_stats(player_url)
    for key,value in stats.items():
        player_data[key] = value
        
    records_url = get_player_records(soup)
    player_data['records_url'] = records_url
    get_player_image(soup)
    return player_data

def get_player_stats(player_url):
    tables = pd.read_html(player_url)
    if len(tables) < 2:
        return {}
    batting_fielding = tables[0]
    bowling = tables[1] 
    return {
        'batting_fielding': batting_fielding,
        'bowling': bowling
    }

def get_player_records(player_soup):
    records_url = player_soup.find_all('a',string='View more records')
    if len(records_url) == 0:
        return ''
    records_url = base_url + records_url[0]['href']
    return records_url

def get_player_image(player_soup):
    image_div = player_soup.find('div',{'class':'ds-ml-auto'})
    print(image_div)

In [190]:
get_player_data('https://www.espncricinfo.com/cricketers/virat-kohli-253802')

<div class="ds-ml-auto ds-w-48 ds-h-48"><div style="position:relative;padding-bottom:100%;height:0;overflow:hidden"><img alt="Virat Kohli" class="" src="https://wassets.hscicdn.com/static/images/lazyimage-transparent.png" style="position:absolute;top:0;left:0;width:100%;height:100%"/></div></div>


{'Full Name': 'Virat Kohli',
 'Born': 'November 05, 1988, Delhi',
 'Age': '34y 289d',
 'Batting Style': 'Right hand Bat',
 'Bowling Style': 'Right arm Medium',
 'Playing Role': 'Top order Batter',
 'batting_fielding':    Format  Mat  Inns  NO   Runs    HS    Ave     BF      SR  100s  50s    4s  \
 0    Test  111   187  11   8676  254*  49.29  15708   55.23    29   29   966   
 1     ODI  275   265  40  12898   183  57.32  13776   93.62    46   65  1211   
 2    T20I  115   107  31   4008  122*  52.73   2905  137.96     1   37   356   
 3      FC  143   235  18  10925  254*  50.34  19611   55.70    36   37  1279   
 4  List A  309   298  43  14340   183  56.23  15312   93.65    50   73  1375   
 5     T20  374   357  68  11965  122*  41.40   8972  133.35     8   91  1069   
 
     6s   Ct  St  
 0   24  110   0  
 1  138  142   0  
 2  117   50   0  
 3   39  141   0  
 4  162  160   0  
 5  371  170   0  ,
 'bowling':    Format  Mat  Inns  Balls  Runs  Wkts   BBI   BBM     Ave  Econ   

In [167]:
import time

In [171]:
teams = get_teams_url('https://www.espncricinfo.com/team')
teams_alphas = get_team_aplhas(teams[1]['team_url'])
players = get_players_by_alpha(teams_alphas[0])
player_data = []
for player in players:
    print(player)
    player_data.append(get_player_data(player))
    time.sleep(1)

https://www.espncricinfo.com/cricketers/ted-a-beckett-3931
https://www.espncricinfo.com/cricketers/sean-abbott-398666
https://www.espncricinfo.com/cricketers/warwick-adlam-3940
https://www.espncricinfo.com/cricketers/ashton-agar-505120
https://www.espncricinfo.com/cricketers/wes-agar-959833
https://www.espncricinfo.com/cricketers/lachlan-aitken-1356600
https://www.espncricinfo.com/cricketers/lee-albon-53590
https://www.espncricinfo.com/cricketers/terry-alderman-3943
https://www.espncricinfo.com/cricketers/george-alexander-3944
https://www.espncricinfo.com/cricketers/harry-alexander-3945
https://www.espncricinfo.com/cricketers/sarah-aley-53670
https://www.espncricinfo.com/cricketers/frank-allan-3950
https://www.espncricinfo.com/cricketers/peter-allan-3953
https://www.espncricinfo.com/cricketers/richard-allen-323798
https://www.espncricinfo.com/cricketers/reginald-allen-3958
https://www.espncricinfo.com/cricketers/phil-alley-3961
https://www.espncricinfo.com/cricketers/jeremy-allison-428

In [172]:
player_data[0]

{'Full Name': "Edward Lambert a'Beckett",
 'Born': 'August 11, 1907, East St Kilda, Melbourne, Victoria',
 'Died': 'June 02, 1989, Terang, Victoria, (aged 81y 295d)',
 'Batting Style': 'Right hand Bat',
 'Bowling Style': 'Right arm Fast medium',
 'batting_fielding':   Format  Mat  Inns  NO  Runs   HS    Ave  100s  50s 6s  Ct  St
 0   Test    4     7   0   143   41  20.42     0    0  0   4   0
 1     FC   47    64   8  1636  152  29.21     2    7  -  35   0,
 'bowling':   Format  Mat Inns  Balls  Runs  Wkts    BBI   BBM     Ave  Econ     SR 4w  \
 0   Test    4    8   1062   317     3   1/41  1/66  105.66  1.79  354.0  0   
 1     FC   47    -   9196  3062   105  6/119     -   29.16  1.99   87.5  -   
 
    5w  10w  
 0   0    0  
 1   3    0  ,
 'records_url': ''}

In [174]:
for player in player_data:
    print(player['Full Name'])

Edward Lambert a'Beckett
Sean Anthony Abbott
Warwick James Adlam
Ashton Charles Agar
Wesley Austin Agar
Lachlan Aitken
Leanne Margaret Albon
Terence Michael Alderman
George Alexander
Harry Houston Alexander
Sarah Elizabeth Aley
Francis Erskine Allan
Peter John Allan
Richard Allen
Reginald Charles Allen
Phillip John Sydney Alley
Jeremy Hammond Allison
Mary Allitt
Elizabeth Amos
Charlie Anderson
Matthew Allan Anderson
Timothy Laurence Anderson
Sarah Joy Andrews
Thomas David Andrews
Thomas James Edwin Andrews
Jo Angel
Austin Anlezark
Denise Audrey Annetts
Peggy Antonio
Monty Archdale
Kenneth Alan Archer
Ronald Graham Archer
Glenarvon Huntley Armstrong
Timothy John Armstrong
Warwick Windridge Armstrong
Ben Matthew Ashkenazi
Nathan William Ashley
Shaun Nicholas Austin
Clinton Auty
Riley R Ayre


In [170]:
print(teams)

[{'team_name': 'Afghanistan', 'team_url': 'https://www.espncricinfo.com/cricketers/team/afghanistan-40'}, {'team_name': 'Australia', 'team_url': 'https://www.espncricinfo.com/cricketers/team/australia-2'}, {'team_name': 'Bangladesh', 'team_url': 'https://www.espncricinfo.com/cricketers/team/bangladesh-25'}, {'team_name': 'England', 'team_url': 'https://www.espncricinfo.com/cricketers/team/england-1'}, {'team_name': 'India', 'team_url': 'https://www.espncricinfo.com/cricketers/team/india-6'}, {'team_name': 'Ireland', 'team_url': 'https://www.espncricinfo.com/cricketers/team/ireland-29'}, {'team_name': 'New Zealand', 'team_url': 'https://www.espncricinfo.com/cricketers/team/new-zealand-5'}, {'team_name': 'Pakistan', 'team_url': 'https://www.espncricinfo.com/cricketers/team/pakistan-7'}, {'team_name': 'South Africa', 'team_url': 'https://www.espncricinfo.com/cricketers/team/south-africa-3'}, {'team_name': 'Sri Lanka', 'team_url': 'https://www.espncricinfo.com/cricketers/team/sri-lanka-8'}

In [210]:
import sys
from PyQt5.Qt import *
from PyQt5.QtWebEngineWidgets import *
from PyQt5.QtWidgets import QApplication

ModuleNotFoundError: No module named 'PyQt5.QtWebEngineWidgets'

In [None]:
! pip install pyqtwebengine

Collecting pyqtwebengine
  Using cached PyQtWebEngine-5.15.6.tar.gz (48 kB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25l|

In [205]:
c = get_content('https://www.espncricinfo.com/cricketers/virat-kohli-253802')
soup = BeautifulSoup(c, 'html.parser')
images = soup.find_all('div',{'class': 'ds-bg-cover'})
for image in images:
    img = image['style'].split('(')[1][:-1]
    print(img)

https://img1.hscicdn.com/image/upload/f_auto,t_ds_square_w_960/lsci/db/PICTURES/CMS/240800/240853.jpg


[31mERROR: Could not find a version that satisfies the requirement PyQt4 (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for PyQt4[0m[31m
[0m