In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import requests
from bs4 import BeautifulSoup
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [None]:
# URL to make the API call
batting_url = "https://www.espncricinfo.com/records/tournament/batting-most-runs-career/icc-cricket-world-cup-2023-24-15338"
bowling_url = 'https://www.espncricinfo.com/records/tournament/bowling-most-wickets-career/icc-cricket-world-cup-2023-24-15338'
# Make the API call
batting_response = requests.get(batting_url)
bowling_response = requests.get(bowling_url)

In [None]:
# Parse the HTML content with Beautiful Soup
batting_soup = BeautifulSoup(batting_response.text, "html.parser")

# Find the table element in the parsed HTML
table = batting_soup.find("table", class_="ds-w-full ds-table ds-table-xs ds-table-auto ds-w-full ds-overflow-scroll ds-scrollbar-hide")

# Extract column headers from the table
headers = table.find("thead").find_all("span")
column_headers = [header.text.strip() for header in headers]

# Extract data rows from the table
rows = table.find("tbody").find_all("tr")

data = []
for row in rows:
    row_data = row.find_all("span")
    row_values = [data.text.strip() for data in row_data[1:]]
    data.append(row_values)

# Create a pandas DataFrame
batting_df = pd.DataFrame(data, columns=column_headers)

batting_df.head()

Unnamed: 0,Player,Span,Mat,Inns,NO,Runs,HS,Ave,BF,SR,100,50,0,4s,6s
0,V Kohli (IND),2023-2023,9,9,3,594,103*,99.0,671,88.52,2,5,1,55,7
1,Q de Kock (SA),2023-2023,9,9,0,591,174,65.66,541,109.24,4,-,-,57,21
2,R Ravindra (NZ),2023-2023,9,9,1,565,123*,70.62,521,108.44,3,2,-,52,17
3,RG Sharma (IND),2023-2023,9,9,0,503,131,55.88,414,121.49,1,3,1,58,24
4,DA Warner (AUS),2023-2023,9,9,0,499,163,55.44,473,105.49,2,2,-,48,20


In [None]:
# Parse the HTML content with Beautiful Soup
bowling_soup = BeautifulSoup(bowling_response.text, "html.parser")

# Find the table element in the parsed HTML
table = bowling_soup.find("table", class_="ds-w-full ds-table ds-table-xs ds-table-auto ds-w-full ds-overflow-scroll ds-scrollbar-hide")

# Extract column headers from the table
headers = table.find("thead").find_all("span")
column_headers = [header.text.strip() for header in headers]

# Extract data rows from the table
rows = table.find("tbody").find_all("tr")

data = []
for row in rows:
    row_data = row.find_all("span")
    row_values = [data.text.strip() for data in row_data[1:]]
    data.append(row_values)

# Create a pandas DataFrame
bowling_df = pd.DataFrame(data, columns=column_headers)

bowling_df.head()

Unnamed: 0,Player,Span,Mat,Inns,Balls,Overs,Mdns,Runs,Wkts,BBI,Ave,Econ,SR,4,5
0,A Zampa (AUS),2023-2023,9,9,474,79.0,1,416,22,4/8,18.9,5.26,21.54,3,-
1,D Madushanka (SL),2023-2023,9,9,470,78.2,4,525,21,5/80,25.0,6.7,22.38,1,1
2,G Coetzee (SA),2023-2023,7,7,327,54.3,1,349,18,4/44,19.38,6.4,18.16,1,-
3,Shaheen Shah Afridi (PAK),2023-2023,9,9,486,81.0,3,481,18,5/54,26.72,5.93,27.0,-,1
4,JJ Bumrah (IND),2023-2023,9,9,437,72.5,6,266,17,4/39,15.64,3.65,25.7,1,-


In [None]:
bowling_df

Unnamed: 0,Player,Span,Mat,Inns,Balls,Overs,Mdns,Runs,Wkts,BBI,Ave,Econ,SR,4,5
0,A Zampa (AUS),2023-2023,9,9,474,79.0,1,416,22,4/8,18.90,5.26,21.54,3,-
1,D Madushanka (SL),2023-2023,9,9,470,78.2,4,525,21,5/80,25.00,6.70,22.38,1,1
2,G Coetzee (SA),2023-2023,7,7,327,54.3,1,349,18,4/44,19.38,6.40,18.16,1,-
3,Shaheen Shah Afridi (PAK),2023-2023,9,9,486,81.0,3,481,18,5/54,26.72,5.93,27.00,-,1
4,JJ Bumrah (IND),2023-2023,9,9,437,72.5,6,266,17,4/39,15.64,3.65,25.70,1,-
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77,V Kohli (IND),2023-2023,9,2,21,3.3,-,15,1,1/13,15.00,4.28,21.00,-,-
78,R Ashwin (IND),2023-2023,1,1,60,10.0,1,34,1,1/34,34.00,3.40,60.00,-,-
79,AL Phehlukwayo (SA),2023-2023,1,1,42,7.0,-,36,1,1/36,36.00,5.14,42.00,-,-
80,Saqib Zulfiqar (NED),2023-2023,2,2,30,5.0,-,40,1,1/25,40.00,8.00,30.00,-,-


In [None]:
bat_df_update = batting_df.replace("-",0)
bat_df_update

Unnamed: 0,Player,Span,Mat,Inns,NO,Runs,HS,Ave,BF,SR,100,50,0,4s,6s
0,V Kohli (IND),2023-2023,9,9,3,594,103*,99.00,671,88.52,2,5,1,55,7
1,Q de Kock (SA),2023-2023,9,9,0,591,174,65.66,541,109.24,4,0,0,57,21
2,R Ravindra (NZ),2023-2023,9,9,1,565,123*,70.62,521,108.44,3,2,0,52,17
3,RG Sharma (IND),2023-2023,9,9,0,503,131,55.88,414,121.49,1,3,1,58,24
4,DA Warner (AUS),2023-2023,9,9,0,499,163,55.44,473,105.49,2,2,0,48,20
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
140,JR Hazlewood (AUS),2023-2023,9,5,3,4,2,2.00,5,80.00,0,0,1,0,0
141,Fazalhaq Farooqi (AFG),2023-2023,6,3,2,2,2*,2.00,9,22.22,0,0,1,0,0
142,LH Ferguson (NZ),2023-2023,6,2,1,1,1,1.00,6,16.66,0,0,0,0,0
143,AT Carey (AUS),2023-2023,1,1,0,0,0,0.00,2,0.00,0,0,1,0,0


In [None]:
bowl_df_update = bowling_df.replace("-",0)
bowl_df_update


Unnamed: 0,Player,Span,Mat,Inns,Balls,Overs,Mdns,Runs,Wkts,BBI,Ave,Econ,SR,4,5
0,A Zampa (AUS),2023-2023,9,9,474,79.0,1,416,22,4/8,18.90,5.26,21.54,3,0
1,D Madushanka (SL),2023-2023,9,9,470,78.2,4,525,21,5/80,25.00,6.70,22.38,1,1
2,G Coetzee (SA),2023-2023,7,7,327,54.3,1,349,18,4/44,19.38,6.40,18.16,1,0
3,Shaheen Shah Afridi (PAK),2023-2023,9,9,486,81.0,3,481,18,5/54,26.72,5.93,27.00,0,1
4,JJ Bumrah (IND),2023-2023,9,9,437,72.5,6,266,17,4/39,15.64,3.65,25.70,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77,V Kohli (IND),2023-2023,9,2,21,3.3,0,15,1,1/13,15.00,4.28,21.00,0,0
78,R Ashwin (IND),2023-2023,1,1,60,10.0,1,34,1,1/34,34.00,3.40,60.00,0,0
79,AL Phehlukwayo (SA),2023-2023,1,1,42,7.0,0,36,1,1/36,36.00,5.14,42.00,0,0
80,Saqib Zulfiqar (NED),2023-2023,2,2,30,5.0,0,40,1,1/25,40.00,8.00,30.00,0,0


In [None]:
bowl_df_update.insert(2, 'ID', range(1, 1 + len(bowl_df_update)))

In [None]:
bowl_df_update

Unnamed: 0,Player,Span,ID,Mat,Inns,Balls,Overs,Mdns,Runs,Wkts,BBI,Ave,Econ,SR,4,5
0,A Zampa (AUS),2023-2023,1,9,9,474,79.0,1,416,22,4/8,18.90,5.26,21.54,3,0
1,D Madushanka (SL),2023-2023,2,9,9,470,78.2,4,525,21,5/80,25.00,6.70,22.38,1,1
2,G Coetzee (SA),2023-2023,3,7,7,327,54.3,1,349,18,4/44,19.38,6.40,18.16,1,0
3,Shaheen Shah Afridi (PAK),2023-2023,4,9,9,486,81.0,3,481,18,5/54,26.72,5.93,27.00,0,1
4,JJ Bumrah (IND),2023-2023,5,9,9,437,72.5,6,266,17,4/39,15.64,3.65,25.70,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77,V Kohli (IND),2023-2023,78,9,2,21,3.3,0,15,1,1/13,15.00,4.28,21.00,0,0
78,R Ashwin (IND),2023-2023,79,1,1,60,10.0,1,34,1,1/34,34.00,3.40,60.00,0,0
79,AL Phehlukwayo (SA),2023-2023,80,1,1,42,7.0,0,36,1,1/36,36.00,5.14,42.00,0,0
80,Saqib Zulfiqar (NED),2023-2023,81,2,2,30,5.0,0,40,1,1/25,40.00,8.00,30.00,0,0


In [None]:
bat_df_update.insert(2, 'ID', range(1, 1 + len(bat_df_update)))

In [None]:
bat_df_update

Unnamed: 0,Player,Span,ID,Mat,Inns,NO,Runs,HS,Ave,BF,SR,100,50,0,4s,6s
0,V Kohli (IND),2023-2023,1,9,9,3,594,103*,99.00,671,88.52,2,5,1,55,7
1,Q de Kock (SA),2023-2023,2,9,9,0,591,174,65.66,541,109.24,4,0,0,57,21
2,R Ravindra (NZ),2023-2023,3,9,9,1,565,123*,70.62,521,108.44,3,2,0,52,17
3,RG Sharma (IND),2023-2023,4,9,9,0,503,131,55.88,414,121.49,1,3,1,58,24
4,DA Warner (AUS),2023-2023,5,9,9,0,499,163,55.44,473,105.49,2,2,0,48,20
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
140,JR Hazlewood (AUS),2023-2023,141,9,5,3,4,2,2.00,5,80.00,0,0,1,0,0
141,Fazalhaq Farooqi (AFG),2023-2023,142,6,3,2,2,2*,2.00,9,22.22,0,0,1,0,0
142,LH Ferguson (NZ),2023-2023,143,6,2,1,1,1,1.00,6,16.66,0,0,0,0,0
143,AT Carey (AUS),2023-2023,144,1,1,0,0,0,0.00,2,0.00,0,0,1,0,0


In [None]:
bat_df_update.columns

Index(['Player', 'Span', 'ID', 'Mat', 'Inns', 'NO', 'Runs', 'HS', 'Ave', 'BF',
       'SR', '100', '50', '0', '4s', '6s'],
      dtype='object')

In [None]:
new_order = ['Player', 'Span','ID', 'Mat', 'Inns', 'NO', 'HS', 'Ave', 'BF',
       'SR', '100', '50', '0', '4s', '6s','Runs']

# Reindex the dataframe using the new column order
bat_df_update = bat_df_update.reindex(columns=new_order)
bat_df_update

Unnamed: 0,Player,Span,ID,Mat,Inns,NO,HS,Ave,BF,SR,100,50,0,4s,6s,Runs
0,V Kohli (IND),2023-2023,1,9,9,3,103*,99.00,671,88.52,2,5,1,55,7,594
1,Q de Kock (SA),2023-2023,2,9,9,0,174,65.66,541,109.24,4,0,0,57,21,591
2,R Ravindra (NZ),2023-2023,3,9,9,1,123*,70.62,521,108.44,3,2,0,52,17,565
3,RG Sharma (IND),2023-2023,4,9,9,0,131,55.88,414,121.49,1,3,1,58,24,503
4,DA Warner (AUS),2023-2023,5,9,9,0,163,55.44,473,105.49,2,2,0,48,20,499
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
140,JR Hazlewood (AUS),2023-2023,141,9,5,3,2,2.00,5,80.00,0,0,1,0,0,4
141,Fazalhaq Farooqi (AFG),2023-2023,142,6,3,2,2*,2.00,9,22.22,0,0,1,0,0,2
142,LH Ferguson (NZ),2023-2023,143,6,2,1,1,1.00,6,16.66,0,0,0,0,0,1
143,AT Carey (AUS),2023-2023,144,1,1,0,0,0.00,2,0.00,0,0,1,0,0,0


In [None]:
bowl_df_update.columns

Index(['Player', 'Span', 'ID', 'Mat', 'Inns', 'Balls', 'Overs', 'Mdns', 'Runs',
       'Wkts', 'BBI', 'Ave', 'Econ', 'SR', '4', '5'],
      dtype='object')

In [None]:
order = ['Player', 'Span', 'ID', 'Mat', 'Inns', 'Balls', 'Overs', 'Mdns', 'Runs',
        'BBI', 'Ave', 'Econ', 'SR', '4', '5','Wkts']
bowl_df_update = bowl_df_update.reindex(columns=order)
bowl_df_update

Unnamed: 0,Player,Span,ID,Mat,Inns,Balls,Overs,Mdns,Runs,BBI,Ave,Econ,SR,4,5,Wkts
0,A Zampa (AUS),2023-2023,1,9,9,474,79.0,1,416,4/8,18.90,5.26,21.54,3,0,22
1,D Madushanka (SL),2023-2023,2,9,9,470,78.2,4,525,5/80,25.00,6.70,22.38,1,1,21
2,G Coetzee (SA),2023-2023,3,7,7,327,54.3,1,349,4/44,19.38,6.40,18.16,1,0,18
3,Shaheen Shah Afridi (PAK),2023-2023,4,9,9,486,81.0,3,481,5/54,26.72,5.93,27.00,0,1,18
4,JJ Bumrah (IND),2023-2023,5,9,9,437,72.5,6,266,4/39,15.64,3.65,25.70,1,0,17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77,V Kohli (IND),2023-2023,78,9,2,21,3.3,0,15,1/13,15.00,4.28,21.00,0,0,1
78,R Ashwin (IND),2023-2023,79,1,1,60,10.0,1,34,1/34,34.00,3.40,60.00,0,0,1
79,AL Phehlukwayo (SA),2023-2023,80,1,1,42,7.0,0,36,1/36,36.00,5.14,42.00,0,0,1
80,Saqib Zulfiqar (NED),2023-2023,81,2,2,30,5.0,0,40,1/25,40.00,8.00,30.00,0,0,1


In [None]:
bowl_df_update.drop(columns='BBI')

In [None]:
x = bat_df_update.iloc[:,2:15]
y = bat_df_update.iloc[:,15:]

In [None]:
x

In [None]:
x_up=x.drop('HS',axis=1)
x_up


In [None]:
y

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
linreg=LinearRegression()
linreg.fit(x_up,y)
y_pred=linreg.predict(x_up)
y_pred

In [None]:
x = bowl_df_update.iloc[:,2:15]
y = bowl_df_update.iloc[:,15:]

In [None]:
x

Unnamed: 0,ID,Mat,Inns,Balls,Overs,Mdns,Runs,BBI,Ave,Econ,SR,4,5
0,1,9,9,474,79.0,1,416,4/8,18.90,5.26,21.54,3,0
1,2,9,9,470,78.2,4,525,5/80,25.00,6.70,22.38,1,1
2,3,7,7,327,54.3,1,349,4/44,19.38,6.40,18.16,1,0
3,4,9,9,486,81.0,3,481,5/54,26.72,5.93,27.00,0,1
4,5,9,9,437,72.5,6,266,4/39,15.64,3.65,25.70,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
77,78,9,2,21,3.3,0,15,1/13,15.00,4.28,21.00,0,0
78,79,1,1,60,10.0,1,34,1/34,34.00,3.40,60.00,0,0
79,80,1,1,42,7.0,0,36,1/36,36.00,5.14,42.00,0,0
80,81,2,2,30,5.0,0,40,1/25,40.00,8.00,30.00,0,0


In [None]:
x_up = x.drop('BBI',axis=1)
x_up

Unnamed: 0,ID,Mat,Inns,Balls,Overs,Mdns,Runs,Ave,Econ,SR,4,5
0,1,9,9,474,79.0,1,416,18.90,5.26,21.54,3,0
1,2,9,9,470,78.2,4,525,25.00,6.70,22.38,1,1
2,3,7,7,327,54.3,1,349,19.38,6.40,18.16,1,0
3,4,9,9,486,81.0,3,481,26.72,5.93,27.00,0,1
4,5,9,9,437,72.5,6,266,15.64,3.65,25.70,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...
77,78,9,2,21,3.3,0,15,15.00,4.28,21.00,0,0
78,79,1,1,60,10.0,1,34,34.00,3.40,60.00,0,0
79,80,1,1,42,7.0,0,36,36.00,5.14,42.00,0,0
80,81,2,2,30,5.0,0,40,40.00,8.00,30.00,0,0


In [None]:
y

Unnamed: 0,Wkts
0,22
1,21
2,18
3,18
4,17
...,...
77,1
78,1
79,1
80,1


In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
linreg=LinearRegression()
linreg.fit(x_up,y)
y_pred=linreg.predict(x_up)
y_pred