In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

# List of URLs to scrape
urls = [
        "https://www.espncricinfo.com/records/tournament/bowling-most-wickets-career/indian-premier-league-2024-15940",
       "https://www.espncricinfo.com/records/tournament/bowling-most-wickets-career/indian-premier-league-2023-15129",
       "https://www.espncricinfo.com/records/tournament/bowling-most-wickets-career/indian-premier-league-2022-14452",
       "https://www.espncricinfo.com/records/tournament/bowling-most-wickets-career/indian-premier-league-2021-13840"
]

# Function to scrape data from a URL and parse the table
def scrape_data(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, "html.parser")
    
    # Locate the table
    table = soup.select_one("table")  # Adjust the selector as needed
    if not table:
        return None  # Return None if no table is found

    rows = table.find_all("tr")
    table_data = []
    for row in rows:
        cols = row.find_all("td")
        cols = [col.get_text(strip=True) for col in cols]
        table_data.append(cols)

    return table_data

# Consolidate data from all URLs
all_data = []
for url in urls:
    table_data = scrape_data(url)
    if table_data:
        all_data.extend(table_data)

# Create a DataFrame and dynamically handle columns
df = pd.DataFrame(all_data)
df.columns = df.iloc[0]  # Use the first row as column headers
df = df[1:]  # Drop the header row from the data

# Save the data to a CSV file
#df.to_csv("ipl_most_runs.csv", index=False)

print("Data saved to ipl_most_runs.csv")


Data saved to ipl_most_runs.csv


In [3]:
df

Unnamed: 0,Player,Span,Mat,Inns,Balls,Overs,Mdns,Runs,Wkts,BBI,Ave,Econ,SR,4,5
1,HV Patel (PBKS),2024-2024,14,14,294,49.0,-,477,24,3/15,19.87,9.73,12.25,-,-
2,CV Varun (KKR),2024-2024,15,14,300,50.0,-,402,21,3/16,19.14,8.04,14.28,-,-
3,JJ Bumrah (MI),2024-2024,13,13,311,51.5,-,336,20,5/21,16.80,6.48,15.55,-,1
4,AD Russell (KKR),2024-2024,15,14,176,29.2,-,295,19,3/19,15.52,10.05,9.26,-,-
5,Harshit Rana (KKR),2024-2024,13,11,253,42.1,1,383,19,3/24,20.15,9.08,13.31,-,-
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
389,R Parag (RR),2021-2021,11,6,37,6.1,-,73,1,1/7,73.00,11.83,37.00,-,-
390,FA Allen (PBKS),2021-2021,4,3,66,11.0,1,90,1,1/22,90.00,8.18,66.00,-,-
391,NT Ellis (PBKS),2021-2021,3,3,66,11.0,-,90,1,1/12,90.00,8.18,66.00,-,-
392,M Ashwin (PBKS),2021-2021,3,3,66,11.0,-,97,1,1/32,97.00,8.81,66.00,-,-


In [5]:
df['Span'] = df['Span'].str.split('-').str[0]

In [7]:
# Separate 'Player' and 'Team' into two columns
df[['Player Name', 'Team']] = df['Player'].str.extract(r'^(.*?)\s\((.*?)\)$')

# Drop the original 'Player' column if needed
df = df.drop(columns=['Player'])

In [9]:
df

Unnamed: 0,Span,Mat,Inns,Balls,Overs,Mdns,Runs,Wkts,BBI,Ave,Econ,SR,4,5,Player Name,Team
1,2024,14,14,294,49.0,-,477,24,3/15,19.87,9.73,12.25,-,-,HV Patel,PBKS
2,2024,15,14,300,50.0,-,402,21,3/16,19.14,8.04,14.28,-,-,CV Varun,KKR
3,2024,13,13,311,51.5,-,336,20,5/21,16.80,6.48,15.55,-,1,JJ Bumrah,MI
4,2024,15,14,176,29.2,-,295,19,3/19,15.52,10.05,9.26,-,-,AD Russell,KKR
5,2024,13,11,253,42.1,1,383,19,3/24,20.15,9.08,13.31,-,-,Harshit Rana,KKR
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
389,2021,11,6,37,6.1,-,73,1,1/7,73.00,11.83,37.00,-,-,R Parag,RR
390,2021,4,3,66,11.0,1,90,1,1/22,90.00,8.18,66.00,-,-,FA Allen,PBKS
391,2021,3,3,66,11.0,-,90,1,1/12,90.00,8.18,66.00,-,-,NT Ellis,PBKS
392,2021,3,3,66,11.0,-,97,1,1/32,97.00,8.81,66.00,-,-,M Ashwin,PBKS


In [11]:
df.dtypes

0
Span           object
Mat            object
Inns           object
Balls          object
Overs          object
Mdns           object
Runs           object
Wkts           object
BBI            object
Ave            object
Econ           object
SR             object
4              object
5              object
Player Name    object
Team           object
dtype: object

In [13]:
df.columns

Index(['Span', 'Mat', 'Inns', 'Balls', 'Overs', 'Mdns', 'Runs', 'Wkts', 'BBI',
       'Ave', 'Econ', 'SR', '4', '5', 'Player Name', 'Team'],
      dtype='object', name=0)

In [15]:
df = df[['Player Name', 'Team','Span', 'Mat', 'Inns', 'Balls', 'Overs', 'Mdns', 'Runs', 'Wkts', 'BBI',
       'Ave', 'Econ', 'SR', '4', '5']]

In [17]:
df

Unnamed: 0,Player Name,Team,Span,Mat,Inns,Balls,Overs,Mdns,Runs,Wkts,BBI,Ave,Econ,SR,4,5
1,HV Patel,PBKS,2024,14,14,294,49.0,-,477,24,3/15,19.87,9.73,12.25,-,-
2,CV Varun,KKR,2024,15,14,300,50.0,-,402,21,3/16,19.14,8.04,14.28,-,-
3,JJ Bumrah,MI,2024,13,13,311,51.5,-,336,20,5/21,16.80,6.48,15.55,-,1
4,AD Russell,KKR,2024,15,14,176,29.2,-,295,19,3/19,15.52,10.05,9.26,-,-
5,Harshit Rana,KKR,2024,13,11,253,42.1,1,383,19,3/24,20.15,9.08,13.31,-,-
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
389,R Parag,RR,2021,11,6,37,6.1,-,73,1,1/7,73.00,11.83,37.00,-,-
390,FA Allen,PBKS,2021,4,3,66,11.0,1,90,1,1/22,90.00,8.18,66.00,-,-
391,NT Ellis,PBKS,2021,3,3,66,11.0,-,90,1,1/12,90.00,8.18,66.00,-,-
392,M Ashwin,PBKS,2021,3,3,66,11.0,-,97,1,1/32,97.00,8.81,66.00,-,-


In [19]:
# Loop through columns excluding the first two (i.e., "Player" and "Team")
for col in df.columns[2:10]:
        df[col] = pd.to_numeric(df[col], errors='coerce')


In [27]:
df.dtypes

0
Player Name     object
Team            object
Span             int32
Mat              int32
Inns             int32
Balls            int32
Overs            int32
Mdns             int32
Runs             int32
Wkts             int32
BBI             object
Ave            float64
Econ           float64
SR             float64
4              float64
5              float64
dtype: object

In [21]:
# Loop through columns excluding the first two (i.e., "Player" and "Team")
for col in df.columns[11:]:
        df[col] = pd.to_numeric(df[col], errors='coerce')

In [91]:
df.dtypes

0
Player Name     object
Team            object
Span           float64
Mat            float64
Inns           float64
Balls          float64
Overs          float64
Mdns           float64
Runs           float64
Wkts           float64
BBI             object
Ave            float64
Econ           float64
SR             float64
4              float64
5              float64
dtype: object

In [23]:
for col in df.columns[2:10]:
    df[col] = df[col].fillna(0).astype(int)

In [25]:
df.dtypes

0
Player Name     object
Team            object
Span             int32
Mat              int32
Inns             int32
Balls            int32
Overs            int32
Mdns             int32
Runs             int32
Wkts             int32
BBI             object
Ave            float64
Econ           float64
SR             float64
4              float64
5              float64
dtype: object

In [29]:
# Loop through columns excluding the first two (i.e., "Player" and "Team")
for col in df.columns[14:]:
        df[col] = df[col].fillna(0).astype(int)

In [31]:

df.dtypes

0
Player Name     object
Team            object
Span             int32
Mat              int32
Inns             int32
Balls            int32
Overs            int32
Mdns             int32
Runs             int32
Wkts             int32
BBI             object
Ave            float64
Econ           float64
SR             float64
4                int32
5                int32
dtype: object

In [33]:
df

Unnamed: 0,Player Name,Team,Span,Mat,Inns,Balls,Overs,Mdns,Runs,Wkts,BBI,Ave,Econ,SR,4,5
1,HV Patel,PBKS,2024,14,14,294,49,0,477,24,3/15,19.87,9.73,12.25,0,0
2,CV Varun,KKR,2024,15,14,300,50,0,402,21,3/16,19.14,8.04,14.28,0,0
3,JJ Bumrah,MI,2024,13,13,311,51,0,336,20,5/21,16.80,6.48,15.55,0,1
4,AD Russell,KKR,2024,15,14,176,29,0,295,19,3/19,15.52,10.05,9.26,0,0
5,Harshit Rana,KKR,2024,13,11,253,42,1,383,19,3/24,20.15,9.08,13.31,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
389,R Parag,RR,2021,11,6,37,6,0,73,1,1/7,73.00,11.83,37.00,0,0
390,FA Allen,PBKS,2021,4,3,66,11,1,90,1,1/22,90.00,8.18,66.00,0,0
391,NT Ellis,PBKS,2021,3,3,66,11,0,90,1,1/12,90.00,8.18,66.00,0,0
392,M Ashwin,PBKS,2021,3,3,66,11,0,97,1,1/32,97.00,8.81,66.00,0,0


In [35]:
df = df.rename(columns={'Span': 'Year'})

In [37]:
df

Unnamed: 0,Player Name,Team,Year,Mat,Inns,Balls,Overs,Mdns,Runs,Wkts,BBI,Ave,Econ,SR,4,5
1,HV Patel,PBKS,2024,14,14,294,49,0,477,24,3/15,19.87,9.73,12.25,0,0
2,CV Varun,KKR,2024,15,14,300,50,0,402,21,3/16,19.14,8.04,14.28,0,0
3,JJ Bumrah,MI,2024,13,13,311,51,0,336,20,5/21,16.80,6.48,15.55,0,1
4,AD Russell,KKR,2024,15,14,176,29,0,295,19,3/19,15.52,10.05,9.26,0,0
5,Harshit Rana,KKR,2024,13,11,253,42,1,383,19,3/24,20.15,9.08,13.31,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
389,R Parag,RR,2021,11,6,37,6,0,73,1,1/7,73.00,11.83,37.00,0,0
390,FA Allen,PBKS,2021,4,3,66,11,1,90,1,1/22,90.00,8.18,66.00,0,0
391,NT Ellis,PBKS,2021,3,3,66,11,0,90,1,1/12,90.00,8.18,66.00,0,0
392,M Ashwin,PBKS,2021,3,3,66,11,0,97,1,1/32,97.00,8.81,66.00,0,0


In [39]:
df.to_csv("last_4years_bowlers.csv")