In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
import math

In [2]:
df = pd.read_csv("BabarAzam_ODI_data.csv")
print(df.info())
df

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 122 entries, 0 to 121
Data columns (total 13 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Runs        122 non-null    object
 1   Mins        122 non-null    object
 2   BF          122 non-null    object
 3   4s          122 non-null    object
 4   6s          122 non-null    object
 5   SR          122 non-null    object
 6   Pos         122 non-null    object
 7   Dismissal   122 non-null    object
 8   Inns        122 non-null    object
 9   Opposition  122 non-null    object
 10  Ground      122 non-null    object
 11  Start Date  122 non-null    object
 12  Match Type  122 non-null    object
dtypes: object(13)
memory usage: 12.5+ KB
None


Unnamed: 0,Runs,Mins,BF,4s,6s,SR,Pos,Dismissal,Inns,Opposition,Ground,Start Date,Match Type
0,54,78,60,4,0,90.00,4,bowled,1,Zimbabwe,Lahore,31 May 2015,ODI # 3653
1,25,48,30,3,0,83.33,4,lbw,2,Sri Lanka,Dambulla,11 Jul 2015,ODI # 3664
2,12,23,19,1,0,63.15,4,bowled,1,Sri Lanka,Pallekele,15 Jul 2015,ODI # 3669
3,62*,79,62,2,4,100.00,6,not out,2,England,Abu Dhabi,11 Nov 2015,ODI # 3707
4,4,8,6,0,0,66.66,2,lbw,2,England,Abu Dhabi,13 Nov 2015,ODI # 3708
...,...,...,...,...,...,...,...,...,...,...,...,...,...
117,37,66,44,4,0,84.09,3,bowled,1,Australia,Melbourne,4 Nov 2024,ODI # 4795
118,15*,22,20,0,1,75.00,3,not out,2,Australia,Adelaide,8 Nov 2024,ODI # 4801
119,28*,41,30,4,0,93.33,3,not out,2,Australia,Perth,10 Nov 2024,ODI # 4804
120,23,51,38,3,0,60.52,3,caught,2,South Africa,Paarl,17 Dec 2024,ODI # 4817


In [20]:
dfr = df[['Runs','BF','Pos', 'Inns', 'Opposition', 'Ground']].copy()
dfr.head()

Unnamed: 0,Runs,BF,Pos,Inns,Opposition,Ground
0,54,60,4,1,Zimbabwe,Lahore
1,25,30,4,2,Sri Lanka,Dambulla
2,12,19,4,1,Sri Lanka,Pallekele
3,62*,62,6,2,England,Abu Dhabi
4,4,6,2,2,England,Abu Dhabi


In [23]:
print(dfr['Opposition'].unique())
print(dfr['Ground'].unique())

['Zimbabwe' 'Sri Lanka' 'England' 'New Zealand' 'Ireland' 'West Indies'
 'Australia' 'India' 'South Africa' 'Hong Kong' 'Afghanistan' 'Bangladesh'
 'Netherlands' 'Nepal']
['Lahore' 'Dambulla' 'Pallekele' 'Abu Dhabi' 'Sharjah' 'Dubai (DICS)'
 'Wellington' 'Auckland' 'Dublin (Malahide)' 'Southampton' "Lord's"
 'Nottingham' 'Leeds' 'Cardiff' 'Brisbane' 'Melbourne' 'W.A.C.A' 'Sydney'
 'Adelaide' 'Providence' 'Birmingham' 'The Oval' 'Nelson' 'Dunedin'
 'Hamilton' 'Bulawayo' 'Gqeberha' 'Durban' 'Centurion' 'Johannesburg'
 'Cape Town' 'Bristol' 'Taunton' 'Manchester' 'Karachi' 'Rawalpindi'
 'Multan' 'Rotterdam' 'Hambantota' 'Colombo (RPS)' 'Hyderabad' 'Ahmedabad'
 'Bengaluru' 'Chennai' 'Eden Gardens' 'Perth' 'Paarl']


In [24]:
team_ratings = {
    'Zimbabwe': 0.55,
    'Sri Lanka': 0.65,
    'England': 0.8,
    'New Zealand': 0.83,
    'Ireland': 0.4,
    'West Indies': 0.7,
    'Australia': 0.78,
    'India': 0.9,
    'South Africa': 0.85,
    'Hong Kong': 0.3,
    'Afghanistan': 0.75,
    'Bangladesh': 0.6,
    'Netherlands': 0.45,
    'Nepal': 0.5
}

dfr['Opposition'] = dfr['Opposition'].replace(team_ratings)
dfr

Unnamed: 0,Runs,BF,Pos,Inns,Opposition,Ground
0,54,60,4,1,0.55,Lahore
1,25,30,4,2,0.65,Dambulla
2,12,19,4,1,0.65,Pallekele
3,62*,62,6,2,0.80,Abu Dhabi
4,4,6,2,2,0.80,Abu Dhabi
...,...,...,...,...,...,...
117,37,44,3,1,0.78,Melbourne
118,15*,20,3,2,0.78,Adelaide
119,28*,30,3,2,0.78,Perth
120,23,38,3,2,0.85,Paarl


In [25]:
pitch_assistance = ground_ratings = {
    'Lahore': 0.75,
    'Dambulla': 0.65,
    'Pallekele': 0.7,
    'Abu Dhabi': 0.75,
    'Sharjah': 0.8,
    'Dubai (DICS)': 0.7,
    'Wellington': 0.85,
    'Auckland': 0.85,
    'Dublin (Malahide)': 0.8,
    'Southampton': 0.9,
    'Lord\'s': 0.95,
    'Nottingham': 0.85,
    'Leeds': 0.85,
    'Cardiff': 0.8,
    'Brisbane': 0.8,
    'Melbourne': 0.9,
    'W.A.C.A': 0.7,
    'Sydney': 0.85,
    'Adelaide': 0.9,
    'Providence': 0.75,
    'Birmingham': 0.85,
    'The Oval': 0.9,
    'Nelson': 0.8,
    'Dunedin': 0.75,
    'Hamilton': 0.8,
    'Bulawayo': 0.65,
    'Gqeberha': 0.75,
    'Durban': 0.7,
    'Centurion': 0.75,
    'Johannesburg': 0.8,
    'Cape Town': 0.9,
    'Bristol': 0.8,
    'Taunton': 0.85,
    'Manchester': 0.85,
    'Karachi': 0.7,
    'Rawalpindi': 0.75,
    'Multan': 0.8,
    'Rotterdam': 0.65,
    'Hambantota': 0.7,
    'Colombo (RPS)': 0.75,
    'Hyderabad': 0.85,
    'Ahmedabad': 0.9,
    'Bengaluru': 0.85,
    'Chennai': 0.9,
    'Eden Gardens': 0.8,
    'Perth': 0.75,
    'Paarl': 0.8
}

dfr['Ground'] = dfr['Ground'].replace(pitch_assistance)
dfr

Unnamed: 0,Runs,BF,Pos,Inns,Opposition,Ground
0,54,60,4,1,0.55,0.75
1,25,30,4,2,0.65,0.65
2,12,19,4,1,0.65,0.70
3,62*,62,6,2,0.80,0.75
4,4,6,2,2,0.80,0.75
...,...,...,...,...,...,...
117,37,44,3,1,0.78,0.90
118,15*,20,3,2,0.78,0.90
119,28*,30,3,2,0.78,0.75
120,23,38,3,2,0.85,0.80


In [26]:
dfr.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 122 entries, 0 to 121
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Runs        122 non-null    object 
 1   BF          122 non-null    object 
 2   Pos         122 non-null    object 
 3   Inns        122 non-null    object 
 4   Opposition  122 non-null    float64
 5   Ground      122 non-null    float64
dtypes: float64(2), object(4)
memory usage: 5.8+ KB


In [27]:
dfr = dfr[~dfr['Runs'].isin(['DNB', 'TDNB'])].copy()

In [28]:
dfr['Pos'] = dfr['Pos'].astype(int)
dfr['Inns'] = dfr['Inns'].astype(int)
dfr['Runs'] = dfr['Runs'].str.replace('*', '').astype(int)

In [29]:
dfr['BF'] = dfr['BF'].astype(int)

In [30]:
dfr['Inns'].unique()

array([1, 2])

In [31]:
dfr.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 119 entries, 0 to 121
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Runs        119 non-null    int32  
 1   BF          119 non-null    int32  
 2   Pos         119 non-null    int32  
 3   Inns        119 non-null    int32  
 4   Opposition  119 non-null    float64
 5   Ground      119 non-null    float64
dtypes: float64(2), int32(4)
memory usage: 4.6 KB


In [33]:
dfr.corr()['BF']

Runs          0.960610
BF            1.000000
Pos          -0.056978
Inns         -0.187334
Opposition   -0.088539
Ground       -0.071671
Name: BF, dtype: float64

In [13]:
X = dfr[["Pos", "Inns","Opposition","Ground"]] 
y = dfr["Runs"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)


In [14]:
model = LinearRegression()
model.fit(X_train, y_train)

LinearRegression()

In [15]:
y_pred = model.predict(X_test)

In [16]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [17]:
print("SQRT(Mean Squared Error):", math.sqrt(mse))
print("R-squared:", r2)

SQRT(Mean Squared Error): 27.70860239005388
R-squared: 0.1364446789891638


In [18]:
print(model.predict([[3,1,0.85,0.7]])[0])

59.649179305765614


  "X does not have valid feature names, but"


### StrikeRate Pred

In [34]:
X2 = dfr[["Pos", "Inns","Opposition","Ground"]] 
y2 = dfr["BF"]
X2_train, X2_test, y2_train, y2_test = train_test_split(X2, y2, test_size=0.1, random_state=42)


In [35]:
model2 = LinearRegression()
model2.fit(X2_train, y2_train)

LinearRegression()

In [36]:
y2_pred = model.predict(X2_test)

In [37]:
mse = mean_squared_error(y2_test, y2_pred)
r2 = r2_score(y2_test, y2_pred)
print("SQRT(Mean Squared Error):", math.sqrt(mse))
print("R-squared:", r2)

SQRT(Mean Squared Error): 31.706843109236615
R-squared: 0.11470165301921231


In [39]:
print(model2.predict([[3,1,0.85,0.7]])[0])

66.62335684503226


  "X does not have valid feature names, but"


In [41]:
print("Predicted Results: 59(67) \nAfter Match Results: 52(71)")

Predicted Results: 59(67) 
After Match Results: 52(71)
