In [1]:
import pandas as pd
from requests import get
from bs4 import BeautifulSoup

# Find Performance Data

1. Batting: `https://stats.espncricinfo.com/ci/engine/player/<:playerID>.html?class=2;template=results;type=batting;view=innings`
2. Bowling: `https://stats.espncricinfo.com/ci/engine/player/<:playerID>.html?class=2;template=results;type=bowling;view=innings`
3. AllRound: `https://stats.espncricinfo.com/ci/engine/player/<:playerID>.html?class=2;template=results;type=allround;view=innings`

In [2]:
player_data = pd.read_csv('player_{team_name_id_role}.csv')
player_data

Unnamed: 0,team_name,player_name,player_id,player_role
0,Afghanistan,Hashmatullah Shahidi,440970,Top order Batter
1,Afghanistan,Rahmanullah Gurbaz,974087,Wicketkeeper Batter
2,Afghanistan,Ibrahim Zadran,921509,Opening Batter
3,Afghanistan,Reyaz Hassan,1061090,Batter
4,Afghanistan,Rahmat Shah Zurmati,533956,Allrounder
...,...,...,...,...
145,Sri Lanka,Dunith Wellalage,1152427,Bowler
146,Sri Lanka,Kasun Rajitha,499594,Bowler
147,Sri Lanka,Matheesha Pathirana,1194795,Bowler
148,Sri Lanka,Lahiru Kumara,784375,Bowler


## Bowling Data Extraction

In [7]:
player_id = "26421"

def fetch_performance_data(player_id):
    url = f"https://stats.espncricinfo.com/ci/engine/player/{player_id}.html?class=2;template=results;type=bowling;view=innings"


    try:
        fetched_data = get(url)

        soup = BeautifulSoup(fetched_data.text, 'html.parser')

        # find all caption tags where text is "Innings by innings list"
        captions = soup.find_all('caption')
        table = None
        for caption in captions:
            if caption.text == "Innings by innings list":
                table = caption.find_parent('table')

        # Select all rows with class data1
        rows = table.find_all('tr', class_='data1')
        raw_data = []

        for row in rows:
            cols = row.find_all('td')
            row_data = [player_id]
            for col in cols:
                # If col has a links, get the text of the last link
                if col.find('a'):
                    row_data.append(col.find_all('a')[-1].text)
                else:
                    if col.text != '':
                        row_data.append(col.text)

            raw_data.append(row_data)
    except:
        print(f"Skipping player {player_id}")
        return [None for i in range(12)]

    return raw_data

# raw_data = fetch_performance_data(player_id)
# test_df = pd.DataFrame(raw_data, columns=['player_id', 'overs', 'maidens', 'runs', 'wickets', 'economy', 'position', 'innings', 'opposition', 'ground', 'start_date', 'odi_number'])


116

In [8]:
# for player_id in player_data['player_id']: find_performance_data(player_id) and merge with df

performane_raw_data = []
for player_id in player_data['player_id']:
    try:
        performane_raw_data.extend(fetch_performance_data(player_id))
    except:
        print(f"Skipping player {player_id}")

len(performane_raw_data)


9331

In [17]:
column_names = ['player_id', 'overs', 'maidens', 'runs', 'wickets', 'economy', 'position', 'innings', 'opposition', 'ground', 'start_date', 'odi_number']

bowling_df = pd.DataFrame(performane_raw_data, columns=column_names)
bowling_df

Unnamed: 0,player_id,overs,maidens,runs,wickets,economy,position,innings,opposition,ground,start_date,odi_number
0,440970,DNB,-,-,-,-,-,1,Kenya,Sharjah,2 Oct 2013,ODI # 3417
1,440970,DNB,-,-,-,-,-,1,Kenya,Sharjah,4 Oct 2013,ODI # 3418
2,440970,DNB,-,-,-,-,-,1,Hong Kong,Kuala Lumpur,1 May 2014,ODI # 3487
3,440970,2.0,0,17,0,8.50,6,2,U.A.E.,Kuala Lumpur,2 May 2014,ODI # 3488
4,440970,DNB,-,-,-,-,-,2,Zimbabwe,Bulawayo,18 Jul 2014,ODI # 3503
...,...,...,...,...,...,...,...,...,...,...,...,...
9326,793007,10.0,0,86,2,8.60,2,1,South Africa,Delhi,7 Oct 2023,ODI # 4661
9327,793007,9.2,0,60,2,6.42,2,2,Pakistan,Hyderabad,10 Oct 2023,ODI # 4665
9328,793007,9.0,2,38,3,4.22,2,2,Australia,Lucknow,16 Oct 2023,ODI # 4671
9329,793007,9.4,1,49,4,5.06,1,1,Netherlands,Lucknow,21 Oct 2023,ODI # 4676


In [18]:
bowling_df.to_csv('bowling_data.csv', index=False)

## Batting Data Extraction