### Importing Required Libraries

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
import numpy

import warnings
warnings.filterwarnings('ignore')

### Batting Record Extraction

In [2]:
def get_batting_records(pages):
    batsmen_df = pd.DataFrame(columns=["name","country","span","matches","innings","not_out","runs","highest_score",
                                       "average","balls_faced","strike_rate","100s","50s","duck","4s","6s"])
    for i in range(1,pages+1):
        url = "https://stats.espncricinfo.com/ci/engine/stats/index.html?class=3;page="+ str(i) +"spanmax1=25+Sep+2023;spanval1=span;template=results;type=batting"
        page = requests.get(url)
        bs = BeautifulSoup(page.content,'lxml')
        
        table_body = bs.find_all('tbody')
        
        for i,table in enumerate(table_body[0:4:2]):
            rows = table.find_all('tr')
            for r in rows:
                cols = r.find_all('td')
                cols = [x.text.strip() for x in cols]
                
                name = cols[0].split("(")[0].strip()
                country = cols[0].split("(")[1].strip()[:-1]
                batsmen_df = batsmen_df.append(pd.Series([name, country,cols[1],cols[2], cols[3], cols[4], cols[5], cols[6], 
                                                          cols[7], cols[8], cols[9], cols[10], cols[11], cols[12], 
                                                          cols[13],cols[14]],index=batsmen_df.columns ), ignore_index=True)
    return(batsmen_df)

In [3]:
batting_records = get_batting_records(70)

In [4]:
batting_records.head(10)

Unnamed: 0,name,country,span,matches,innings,not_out,runs,highest_score,average,balls_faced,strike_rate,100s,50s,duck,4s,6s
0,V Kohli,IND,2010-2022,115,107,31,4008,122*,52.73,2905,137.96,1,37,4,356,117
1,RG Sharma,IND,2007-2022,148,140,17,3853,118,31.32,2767,139.24,4,29,10,348,182
2,MJ Guptill,NZ,2009-2022,122,118,7,3531,105,31.81,2602,135.7,2,20,3,309,173
3,Babar Azam,PAK,2016-2023,104,98,14,3485,122,41.48,2714,128.4,3,30,5,371,53
4,PR Stirling,IRE,2009-2023,131,130,11,3408,115*,28.63,2509,135.83,1,23,13,394,123
5,AJ Finch,AUS,2011-2022,103,103,12,3120,172,34.28,2189,142.53,2,19,8,309,125
6,DA Warner,AUS,2009-2022,99,99,11,2894,100*,32.88,2048,141.3,1,24,6,295,105
7,Mohammad Rizwan,PAK,2015-2023,85,73,16,2797,104*,49.07,2197,127.3,1,25,3,243,74
8,JC Buttler,ENG,2011-2023,109,100,21,2766,101*,35.01,1912,144.66,1,20,6,244,117
9,Mohammad Hafeez,PAK,2006-2021,119,108,13,2514,99*,26.46,2060,122.03,0,14,7,251,76


## Bowling Record Extraction

In [5]:
def get_bowling_records(pages):
    bowler_df = pd.DataFrame(columns=["name","country","span","matches","innings","overs","madiens", 
                                       "runs", "wickets","best_bowling_figure","average",
                                      "economy","strike_rate","4-fer","5-fer"])
    for i in range(1,pages+1):
        url = "https://stats.espncricinfo.com/ci/engine/stats/index.html?class=3;page="+ str(i) +"spanmax1=25+Sep+2023;spanval1=span;template=results;type=bowling"
        page = requests.get(url)
        bs = BeautifulSoup(page.content,'lxml')
        
        table_body = bs.find_all('tbody')
        
        for i,table in enumerate(table_body[0:4:2]):
            rows = table.find_all('tr')
            for r in rows:
                cols = r.find_all('td')
                cols = [x.text.strip() for x in cols]
                
                name = cols[0].split("(")[0].strip()
                country = cols[0].split("(")[1].strip()[:-1]
                bowler_df = bowler_df.append(pd.Series([name, country,cols[1],cols[2], cols[3], cols[4], cols[5], cols[6], 
                                                          cols[7], cols[8], cols[9], cols[10], cols[11], cols[12],cols[13]],
                                                         index=bowler_df.columns ), ignore_index=True)
    return(bowler_df)

In [6]:
bowling_records = get_bowling_records(70)

In [7]:
bowling_records.head()

Unnamed: 0,name,country,span,matches,innings,overs,madiens,runs,wickets,best_bowling_figure,average,economy,strike_rate,4-fer,5-fer
0,TG Southee,NZ,2008-2023,114,112,415.2,5,3402,144,5/18,23.62,8.19,17.3,1,2
1,Shakib Al Hasan,BAN,2006-2023,117,115,422.3,3,2869,140,5/20,20.49,6.79,18.1,5,2
2,Rashid Khan,AFG/ICC,2015-2023,82,82,312.2,1,1925,130,5/3,14.8,6.16,14.4,4,2
3,IS Sodhi,NZ,2014-2023,102,99,353.1,0,2822,126,4/28,22.39,7.99,16.8,3,0
4,SL Malinga,SL,2006-2020,84,83,299.5,1,2225,107,5/6,20.79,7.42,16.8,1,2


## Exporting Scraped Data to csv files



In [8]:
batting_records.to_csv("Batting Records.csv",index = False)
bowling_records.to_csv("Bowling Records.csv",index = False)