In [171]:
import requests
from datetime import datetime
import json
import time

In [101]:
player_base_url = 'https://www.espncricinfo.com/cricketers/'
teams_base_api = 'https://hs-consumer-api.espncricinfo.com/v1/pages/team'
player_url_by_id =f'https://hs-consumer-api.espncricinfo.com/v1/pages/player/home'
images_base_url = "https://img1.hscicdn.com/image/upload/lsci"

In [269]:
class PlayerProfile:
    def __init__(
        self,
        slug = None,
        id = None,
        long_name = None,
        gender = None,
        image_url = None,
        headshot_image_url=None,
        dob=None,
        dod=None,
        country_team_id=None) -> None:
        self.slug = slug
        self.id = id
        self.long_name = long_name
        self.gender = gender
        self.image_url =  image_url
        self.headshot_image_url = headshot_image_url
        self.dob = dob
        self.dod = dod
        self.country_team_id = country_team_id

    def from_json(json):
        headshot_image = json.get('headshotImage')
        if headshot_image:
            headshot_image_url = headshot_image.get('url')
        else:
            headshot_image_url=None
        return Player(
            slug= json.get('slug'),
            id=json.get('objectId'),
            long_name=json.get('longName'),
            gender = json.get('gender'),
            image_url= json.get('imageUrl'),
            headshot_image_url=headshot_image_url,
            dob= PlayerProfile.parse_date(json.get('dateOfBirth')),
            dod = PlayerProfile.parse_date(json.get('dateOfDeath')),
            country_team_id=json.get('countryTeamId')
        )
    def get_player_url(self):
        if self.slug and self.id:
            return f"{self.slug}-{self.id}"
        return None
    @staticmethod
    def parse_date(json):
        if not json:
            return None
        year = json.get('year')
        month = json.get('month')
        day = json.get('date')
        return datetime(year = year, month = month, day= day)

In [255]:
class PlayerBowlingStats:
    def __init__(
        self,
        match_format = '',
        matches = 0,
        innings = 0,
        balls = 0,
        runs = 0,
        wickets = 0,
        best_bowling_innings='',
        best_bowling_match ='',
        average = 0,
        economy = 0,
        strike_rate =0,
        four_wicket_hauls =0,
        five_wicket_hauls = 0,
        ten_wicket_hauls = 0
    ) -> None:
        self.match_format = match_format
        self.matches = matches
        self.innings = innings
        self.balls = balls
        self.runs = runs
        self.wickets = wickets
        self.best_bowling_innings = best_bowling_innings
        self.best_bowling_match = best_bowling_match
        self.average = average
        self.economy = economy
        self.strike_rate = strike_rate
        self.four_wicket_hauls = four_wicket_hauls
        self.five_wicket_hauls = five_wicket_hauls
        self.ten_wicket_hauls  = ten_wicket_hauls

    def from_json(json):
        return PlayerBowlingStats(
            match_format= get_format_by_id(json.get('cl')),
            matches= json.get('mt'),
            innings=json.get('in'),
            balls=json.get('bl'),
            runs=json.get('rn'),
            wickets = json.get('wk'),
            best_bowling_innings= json.get('bbi'),
            best_bowling_match= json.get('bbm'),
            average = json.get('avg'),
            economy= json.get('bwe'),
            strike_rate= json.get('sr'),
            four_wicket_hauls= json.get('fwk'),
            five_wicket_hauls= json.get('fw'),
            ten_wicket_hauls= json.get('tw')
        )

In [256]:
class PlayerBattingFieldingStats:
    def __init__(
        self,
        match_format = '',
        matches = 0,
        innings = 0,
        notouts = 0,
        runs = 0,
        hi_score = 0,
        average = 0,
        balls_faced = 0,
        strike_rate = 0,
        hundreds = 0,
        fifties = 0,
        fours = 0,
        sixes = 0,
        catches = 0 ,
        stumps = 0
    ) -> None:
        self.match_format = match_format
        self.matches = matches
        self.innings = innings
        self.notouts = notouts
        self.runs = runs
        self.hi_score = hi_score
        self.average = average
        self.balls_faced = balls_faced
        self.strike_rate = strike_rate
        self.hundreds = hundreds
        self.fifties = fifties
        self.fours = fours
        self.sixes = sixes
        self.catches = catches
        self.stumps = stumps
    
    def from_json(json):
        return PlayerBattingFieldingStats(
            match_format= get_format_by_id(json.get('cl')),
            matches=json.get('mt'),
            innings=json.get('in'),
            notouts=json.get('no'),
            runs = json.get('rn'),
            hi_score=json.get('hs'),
            average= json.get('avg'),
            balls_faced= json.get('bl'),
            strike_rate = json.get('sr'),
            hundreds=json.get('hn'),
            fifties= json.get('ft'),
            fours= json.get('fo'),
            sixes=json.get('si'),
            catches = json.get('ct'),
            stumps= json.get('st')
        )

In [268]:
def get_format_by_id(format_id):
    formats = {
        1: "Tests",
        2: "ODI",
        4: "First-Class",
        5: "List A",
        6: "T20"
    }
    return formats.get(format_id)

In [58]:
def get_teams_data(teams_base_api):
    response = requests.get(teams_base_api)
    data = json.loads(response.content)
    groups = data.get('content').get('featuredTeamsGroups').get('groups')
    teams =[]
    for group in groups:
        group_title = str(group.get('title'))
        if group_title == "POPULAR MEN'S INTERNATIONAL TEAMS":
            teams = group.get('teams')
    return teams

In [61]:
teams = get_teams_data(teams_base_api)

In [63]:
teams

[{'id': 40,
  'objectId': 40,
  'scribeId': 40,
  'slug': 'afghanistan',
  'name': 'Afghanistan',
  'longName': 'Afghanistan',
  'abbreviation': 'AFG',
  'unofficialName': None,
  'imageUrl': '/db/PICTURES/CMS/321000/321005.png',
  'isCountry': True,
  'primaryColor': None,
  'image': {'id': 321005,
   'objectId': 1262396,
   'slug': 'afghanistan-team-logo',
   'url': '/db/PICTURES/CMS/321000/321005.png',
   'width': 500,
   'height': 500,
   'caption': 'Afghanistan team logo',
   'longCaption': 'Afghanistan team logo',
   'credit': 'Unknown',
   'photographer': None,
   'peerUrls': None}},
 {'id': 2,
  'objectId': 2,
  'scribeId': 2,
  'slug': 'australia',
  'name': 'Australia',
  'longName': 'Australia',
  'abbreviation': 'AUS',
  'unofficialName': None,
  'imageUrl': '/db/PICTURES/CMS/340400/340493.png',
  'isCountry': True,
  'primaryColor': '#ffdd00',
  'image': {'id': 340493,
   'objectId': 1318684,
   'slug': 'australia-team-flag-latest',
   'url': '/db/PICTURES/CMS/340400/34049

In [62]:
teams[0]

{'id': 40,
 'objectId': 40,
 'scribeId': 40,
 'slug': 'afghanistan',
 'name': 'Afghanistan',
 'longName': 'Afghanistan',
 'abbreviation': 'AFG',
 'unofficialName': None,
 'imageUrl': '/db/PICTURES/CMS/321000/321005.png',
 'isCountry': True,
 'primaryColor': None,
 'image': {'id': 321005,
  'objectId': 1262396,
  'slug': 'afghanistan-team-logo',
  'url': '/db/PICTURES/CMS/321000/321005.png',
  'width': 500,
  'height': 500,
  'caption': 'Afghanistan team logo',
  'longCaption': 'Afghanistan team logo',
  'credit': 'Unknown',
  'photographer': None,
  'peerUrls': None}}

In [185]:
class Team:
    
    def __init__(
        self,
        id = None,
        slug = None,
        name = None,
        abbr = None,
        flag_url = None,
        players_db = None
    ):
        self.id = id
        self.name =name
        self.slug= slug
        self.abbr = abbr
        self.flag_url = Team.parse_flag_url(flag_url)
        self.players_db = players_db
        
    def parse_flag_url(flag_url):
        return images_base_url+flag_url
    
    def from_json(json):
        if not json:
            return None
        return Team(
            id = json.get('objectId'),
            slug = json.get('slug'),
            name = json.get('longName'),
            abbr= json.get('abbreviation'),
            flag_url = json.get('imageUrl')
        )
    def __repr__(self):
        return f"Team(id={self.id!r}, name={self.name!r}, slug={self.slug!r}, abbr={self.abbr!r}, flag_url={self.flag_url!r})"

    def __str__(self):
        return self.__repr__()

In [183]:
teams[0]

{'id': 40,
 'objectId': 40,
 'scribeId': 40,
 'slug': 'afghanistan',
 'name': 'Afghanistan',
 'longName': 'Afghanistan',
 'abbreviation': 'AFG',
 'unofficialName': None,
 'imageUrl': '/db/PICTURES/CMS/321000/321005.png',
 'isCountry': True,
 'primaryColor': None,
 'image': {'id': 321005,
  'objectId': 1262396,
  'slug': 'afghanistan-team-logo',
  'url': '/db/PICTURES/CMS/321000/321005.png',
  'width': 500,
  'height': 500,
  'caption': 'Afghanistan team logo',
  'longCaption': 'Afghanistan team logo',
  'credit': 'Unknown',
  'photographer': None,
  'peerUrls': None}}

In [98]:
teams_database = []
for team in teams:
    #print(team)
    teams_database.append(Team.from_json(team))

In [100]:
for team in teams_database:
    print(team)

Team(id=40, name='Afghanistan', slug='afghanistan', abbr='AFG', flag_url='https://img1.hscicdn.com/image/upload/lsci/db/PICTURES/CMS/321000/321005.png')
Team(id=2, name='Australia', slug='australia', abbr='AUS', flag_url='https://img1.hscicdn.com/image/upload/lsci/db/PICTURES/CMS/340400/340493.png')
Team(id=25, name='Bangladesh', slug='bangladesh', abbr='BAN', flag_url='https://img1.hscicdn.com/image/upload/lsci/db/PICTURES/CMS/341400/341456.png')
Team(id=1, name='England', slug='england', abbr='ENG', flag_url='https://img1.hscicdn.com/image/upload/lsci/db/PICTURES/CMS/313100/313114.logo.png')
Team(id=6, name='India', slug='india', abbr='IND', flag_url='https://img1.hscicdn.com/image/upload/lsci/db/PICTURES/CMS/313100/313128.logo.png')
Team(id=29, name='Ireland', slug='ireland', abbr='IRE', flag_url='https://img1.hscicdn.com/image/upload/lsci/db/PICTURES/CMS/313100/313149.logo.png')
Team(id=5, name='New Zealand', slug='new-zealand', abbr='NZ', flag_url='https://img1.hscicdn.com/image/u

In [208]:
class PlayerShort:
    def __init__(self,slug,id):
        self.slug = slug
        self.id = id
        
    def get_url(self):
        return f"{player_url_by_id}?playerId={self.id}"
    
    @staticmethod
    def from_json(json):
        if not json:
            return None
        return PlayerShort(slug=json.get('slug'),id= json.get('objectId'))

In [115]:
player_query_url = "https://hs-consumer-api.espncricinfo.com/v1/pages/player/search"

In [116]:
def players_by_team(team_id:int):
    page_num = 1
    records = 50
    base_url = player_query_url
    params = {
        'mode':'BOTH',
        'page':page_num,
        'records':records,
        'filterActive':'false',
        'filterTeamId': team_id,
        'filterFormatLevel':"INTERNATIONAL",
        'sort':'ALPHA_ASC'
    }
    
    response = requests.get(base_url,params=params)
    return response.content

In [229]:
class PlayersShortDatabase:
    player_query_url = "https://hs-consumer-api.espncricinfo.com/v1/pages/player/search"
    def __init__(self,team_id):
        self.team_id = team_id
        self.players_short_db = []
        self.total_players = 0
        self.records_per_page = 40
    
    def get_page_records(self,page_num):
        page_num = page_num
        records = self.records_per_page
        base_url = player_query_url
        params = {
            'mode':'BOTH',
            'page':page_num,
            'records':records,
            'filterActive':'false',
            'filterTeamId': self.team_id,
            'filterFormatLevel':"INTERNATIONAL",
            'sort':'ALPHA_ASC'
        }

        response = requests.get(base_url,params=params)
        if response.status_code == 200:
            data = json.loads(response.content)
        return data
    
    def get_total_players(self):
        content = self.get_page_records(page_num=1)
        self.total_players = content.get('total')
        return self.total_players
    
    def get_total_page_nums(self):
        self.get_total_players()
        total_pages = (self.total_players // self.records_per_page) + 1
        return total_pages
    
    def parse_player_data_from_records(self,records:list):
        for player in records:
            player_short = PlayerShort.from_json(player)
            self.players_short_db.append(player_short)
            print(f"[{len(self.players_short_db)} / {self.total_players} ]  {player_short.slug} added to database",end="\r",flush=True)
    
    def insert_records(self):
        page_nums = self.get_total_page_nums()
        for page_num in range(1, page_nums+1):
            player_data = self.get_page_records(page_num=page_num)
            records = player_data.get('results')
            self.parse_player_data_from_records(records)

In [206]:
11//2

5

In [209]:
content = players_by_team(team_id=7)
data = json.loads(content)
players = data.get('results')
players_short_data = []
for player in players:
    players_short_data.append(PlayerShort.from_json(player))

In [210]:
for player in players_short_data:
    print(player.slug)

aaley-haider
aamer-ali
aamer-hameed
aamer-hanif
aamer-iqbal
aamer-jamal
aamer-malik
aamer-nazir
aamer-sohail
aamer-yamin
aamer-yousuf
aamir-ali
aaqib-javed
aarish-ali-khan
aariz-kamal
abbas-afridi
abbas-ali
abdul-ameer
abdul-bangalzai
abdul-basit
abdul-faseeh
abdul-kadir
abdul-majeed
abdul-mateen
abdul-qadir
abdul-rauf
abdul-razzaq
abdullah-khan
abdullah-shafique
abdur-rauf
abdur-rehman
abid-ali
abida-khan
abrar-ahmed
adeel-malik
adil-nisar
adil-raza
adnan-akmal
adnan-mehmood
adnan-naeem
adnan-raza
adnan-zaheer
afaq-hussain
afaq-raheem
aftab-baloch
aftab-gul
aftab-ibrahim
agha-javed
agha-saadat-ali
agha-salman


In [168]:
india = PlayersShortDatabase(team_id=6)

In [169]:
india.insert_records()

200
24
scraping page 1
200
created player aaqib-khan with id: 1207680
created player aaradhya-yadav with id: 1292521
created player varun-aaron with id: 360911
created player abbas-ali-khan with id: 26511
created player abhishek-goswami with id: 1081436
created player abhishek-sharma with id: 1070183
created player syed-abid-ali with id: 26169
created player abid-nabi with id: 26504
created player hemu-adhikari with id: 26179
created player ajit-agarkar with id: 26184
created player monish-agarwal with id: 26304
created player mayank-agarwal with id: 398438
created player sandhya-agarwal with id: 53913
created player nitin-aggarwal with id: 26375
created player abu-nechim with id: 228430
created player khaleel-ahmed with id: 942645
created player suraj-ahuja with id: 1081218
created player akash-singh with id: 1175458
created player siddharth-akre with id: 1081504
created player nooshin-al-khadeer with id: 54286
created player amanpreet-singh with id: 253804
created player amar-singh w

In [170]:
india.total_players

951

In [174]:
for player in india.players_short_db[:20]:
    print(player.get_url())

https://hs-consumer-api.espncricinfo.com/v1/pages/player/home?playerId=1207680
https://hs-consumer-api.espncricinfo.com/v1/pages/player/home?playerId=1292521
https://hs-consumer-api.espncricinfo.com/v1/pages/player/home?playerId=360911
https://hs-consumer-api.espncricinfo.com/v1/pages/player/home?playerId=26511
https://hs-consumer-api.espncricinfo.com/v1/pages/player/home?playerId=1081436
https://hs-consumer-api.espncricinfo.com/v1/pages/player/home?playerId=1070183
https://hs-consumer-api.espncricinfo.com/v1/pages/player/home?playerId=26169
https://hs-consumer-api.espncricinfo.com/v1/pages/player/home?playerId=26504
https://hs-consumer-api.espncricinfo.com/v1/pages/player/home?playerId=26179
https://hs-consumer-api.espncricinfo.com/v1/pages/player/home?playerId=26184
https://hs-consumer-api.espncricinfo.com/v1/pages/player/home?playerId=26304
https://hs-consumer-api.espncricinfo.com/v1/pages/player/home?playerId=398438
https://hs-consumer-api.espncricinfo.com/v1/pages/player/home?play

In [186]:
teams_db = []
for team in teams:
    teams_db.append(Team.from_json(team))

In [189]:
teams_db[0]

Team(id=40, name='Afghanistan', slug='afghanistan', abbr='AFG', flag_url='https://img1.hscicdn.com/image/upload/lsci/db/PICTURES/CMS/321000/321005.png')

In [190]:
class TeamsDatabase:
    def __init__(self) -> None:
        self.teams = []

    def insert_team(self, team:Team):
        self.teams.append(team)
    
    def remove_team(self, team_id:int):
        for team in self.teams:
            if team.id == team_id:
                self.teams.remove(team)

In [233]:
teams_db = TeamsDatabase()
for team in teams:
    teams_db.insert_team(Team.from_json(team))

In [234]:
for team in teams_db.teams:
    team.players_db = PlayersShortDatabase(team.id)

In [235]:
for team in teams_db.teams:
    team.players_db.insert_records()
    time.sleep(1)

[169 / 169 ]  joy-zinto added to databaseseasesebaseeeesebasee

In [236]:
for team in teams_db.teams:
    print(team.name, team.players_db.total_players)

Afghanistan 152
Australia 1142
Bangladesh 395
England 1464
India 951
Ireland 292
New Zealand 751
Pakistan 782
South Africa 791
Sri Lanka 668
West Indies 738
Zimbabwe 348
Namibia 165
Nepal 65
Netherlands 244
Oman 70
Papua New Guinea 193
Scotland 220
United Arab Emirates 194
United States of America 169


In [241]:
for player in teams_db.teams[4].players_db.players_short_db[:20]:
    print(player.get_url())

https://hs-consumer-api.espncricinfo.com/v1/pages/player/home?playerId=1207680
https://hs-consumer-api.espncricinfo.com/v1/pages/player/home?playerId=1292521
https://hs-consumer-api.espncricinfo.com/v1/pages/player/home?playerId=360911
https://hs-consumer-api.espncricinfo.com/v1/pages/player/home?playerId=26511
https://hs-consumer-api.espncricinfo.com/v1/pages/player/home?playerId=1081436
https://hs-consumer-api.espncricinfo.com/v1/pages/player/home?playerId=1070183
https://hs-consumer-api.espncricinfo.com/v1/pages/player/home?playerId=26169
https://hs-consumer-api.espncricinfo.com/v1/pages/player/home?playerId=26504
https://hs-consumer-api.espncricinfo.com/v1/pages/player/home?playerId=26179
https://hs-consumer-api.espncricinfo.com/v1/pages/player/home?playerId=26184
https://hs-consumer-api.espncricinfo.com/v1/pages/player/home?playerId=26304
https://hs-consumer-api.espncricinfo.com/v1/pages/player/home?playerId=398438
https://hs-consumer-api.espncricinfo.com/v1/pages/player/home?play

In [242]:
url = "https://hs-consumer-api.espncricinfo.com/v1/pages/player/home?playerId=1207680"

In [243]:
response = requests.get(url)
data = json.loads(response.content)

In [247]:
player_profile = data.get('player')
career_stats = data.get('content').get('careerAverages').get('stats')

In [248]:
bowling_stat,batting_stat = [],[]
for stat in career_stats:
    stat_type = str(stat.get('type'))
    if stat_type == 'BATTING':
        batting_stat.append(stat)
    elif stat_type == 'BOWLING':
        bowling_stat.append(stat)

In [277]:
class PlayerDetailed:
    def __init__(
            self, 
            player_profile:PlayerProfile,
            player_bowling_stats:[PlayerBowlingStats],
            player_batting_fielding_stats : [PlayerBattingFieldingStats]
        ):
        self.player_profile : PlayerProfile = player_profile
        self.player_bowling_stats : [PlayerBowlingStats]= player_bowling_stats
        self.player_batting_fielding_stats : [PlayerBattingFieldingStats] = player_batting_fielding_stats
    
    @staticmethod
    def split_career_stats(career_stats_json):
        bowling_stats_json,batting_stats_json = [],[]
        for stat in career_stats_json:
            stat_type = str(stat.get('type'))
            if stat_type == 'BATTING':
                batting_stats_json.append(stat)
            elif stat_type == 'BOWLING':
                bowling_stats_json.append(stat)
        return {
            'batting':batting_stats_json,
            'bowling':bowling_stats_json
        }
    @staticmethod
    def parse_bowling_stats(bowling_stats_json):
        player_bowling_stats = []
        
        if not bowling_stats_json:
            return []
        
        for stat in bowling_stats_json:
            bowling_stat = PlayerBowlingStats.from_json(stat)
            player_bowling_stats.append(bowling_stat)

        return player_bowling_stats

    @staticmethod
    def parse_batting_stats(batting_stats_json) :
        if not batting_stats_json:
            return []
        
        player_batting_fielding_stats = []
        for stat in batting_stats_json:
            batting_stat = PlayerBattingFieldingStats.from_json(stat)
            player_batting_fielding_stats.append(batting_stat)
        
        return player_batting_fielding_stats



    def from_json(player_json):
        player_profile_json = player_json.get('player')
        career_stats_json = player_json.get('content').get('careerAverages').get('stats')
        
        player_profile = PlayerProfile.from_json(player_profile_json)
        
        stats = PlayerDetailed.split_career_stats(career_stats_json)
        
        bowling_stats_json = stats.get('bowling')
        batting_stats_json = stats.get('batting')
        
        player_batting_fielding_stats = PlayerDetailed.parse_batting_stats(batting_stats_json)
        player_bowling_stats = PlayerDetailed.parse_bowling_stats(bowling_stats_json)

        return PlayerDetailed(
            player_profile=player_profile,
            player_batting_fielding_stats=player_batting_fielding_stats,
            player_bowling_stats=player_bowling_stats
            )
        

In [271]:
data

{'player': {'id': 106923,
  'objectId': 1207680,
  'name': 'Aaqib Khan',
  'longName': 'Aaqib Khan',
  'mobileName': 'Aaquib Khan',
  'indexName': 'Aaqib Khan',
  'battingName': 'Aaqib Khan',
  'fieldingName': 'Aaqib Khan',
  'slug': 'aaqib-khan',
  'imageUrl': None,
  'dateOfBirth': {'year': 2003, 'month': 12, 'date': 25},
  'dateOfDeath': None,
  'gender': 'M',
  'battingStyles': ['rhb'],
  'bowlingStyles': ['rm'],
  'longBattingStyles': ['right-hand bat'],
  'longBowlingStyles': ['right-arm medium'],
  'image': None,
  'countryTeamId': 6,
  'playerRoleTypeIds': [],
  'playingRoles': [],
  'headshotImage': None,
  'fullName': 'Aaqib Khan',
  'nickNames': '',
  'alsoKnownAs': '',
  'height': 0,
  'heightUnit': None,
  'education': '',
  'country': {'id': 6,
   'objectId': 6,
   'name': 'India',
   'shortName': 'India',
   'abbreviation': 'IND',
   'slug': 'india',
   'image': {'id': 313128,
    'objectId': 1241587,
    'slug': 'india-logo',
    'url': '/db/PICTURES/CMS/313100/313128.l

In [278]:
player = PlayerDetailed.from_json(player_json=data)

In [288]:
for stats in player.player_batting_fielding_stats:
    print(stats.match_format,stats.runs,stats.hi_score)

First-Class 54 12
List A 3 3
T20 2 2*
