In [1]:
#Python Library
import warnings
warnings.filterwarnings("ignore")
import math
import numpy as np
import pandas as pd
import pickle

# ML Library

In [2]:
#ML Library
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import r2_score, mean_absolute_error, accuracy_score, confusion_matrix, classification_report

from sklearn.model_selection import train_test_split

# Load Dataset

In [15]:
#load data
second_innings_win_portability_df = pickle.load(open('ipl_mens_second_innings_win.pkl','rb'))
second_innings_win_portability_df

Unnamed: 0,batting_team,bowling_team,city,current_score,target_runs,runs_left,balls_left,wickets_left,crr,rrr,result
0,Royal Challengers Bangalore,Sunrisers Hyderabad,Hyderabad,1,208,207,119,10,6.00,10.44,0
1,Royal Challengers Bangalore,Sunrisers Hyderabad,Hyderabad,1,208,207,118,10,3.00,10.53,0
2,Royal Challengers Bangalore,Sunrisers Hyderabad,Hyderabad,1,208,207,117,10,2.00,10.62,0
3,Royal Challengers Bangalore,Sunrisers Hyderabad,Hyderabad,3,208,205,116,10,4.50,10.60,0
4,Royal Challengers Bangalore,Sunrisers Hyderabad,Hyderabad,7,208,201,115,10,8.40,10.49,0
...,...,...,...,...,...,...,...,...,...,...,...
112560,Royal Challengers Bangalore,Sunrisers Hyderabad,Bangalore,192,209,17,5,4,10.02,20.40,0
112561,Royal Challengers Bangalore,Sunrisers Hyderabad,Bangalore,194,209,15,4,4,10.03,22.50,0
112562,Royal Challengers Bangalore,Sunrisers Hyderabad,Bangalore,194,209,15,3,3,9.95,30.00,0
112563,Royal Challengers Bangalore,Sunrisers Hyderabad,Bangalore,195,209,14,2,3,9.92,42.00,0


# Test and Train Split

In [16]:
#Split
X2 = second_innings_win_portability_df.drop(columns=['result'])
y2 = second_innings_win_portability_df['result'].values

X2_train,X2_test,y2_train,y2_test = train_test_split(X2, y2, test_size=0.1, random_state=1)

In [17]:
#Display
X2_train

Unnamed: 0,batting_team,bowling_team,city,current_score,target_runs,runs_left,balls_left,wickets_left,crr,rrr
41728,Royal Challengers Bangalore,Mumbai Indians,Bengaluru,12,172,160,109,10,6.55,8.81
62231,Kolkata Knight Riders,Chennai Super Kings,Kolkata,61,165,104,66,5,6.78,9.45
34847,Kolkata Knight Riders,Delhi Capitals,Mumbai,62,216,154,71,8,7.59,13.01
20634,Punjab Kings,Mumbai Indians,Abu Dhabi,105,192,87,36,6,7.50,14.50
1009,Mumbai Indians,Sunrisers Hyderabad,Mumbai,17,159,142,109,10,9.27,7.82
...,...,...,...,...,...,...,...,...,...,...
50294,Rajasthan Royals,Punjab Kings,Jaipur,159,167,8,14,6,9.00,3.43
98490,Royal Challengers Bangalore,Kolkata Knight Riders,Kolkata,67,196,129,66,9,7.44,11.73
5210,Kolkata Knight Riders,Punjab Kings,Chandigarh,76,168,92,65,9,8.29,8.49
78066,Delhi Capitals,Mumbai Indians,Mumbai,30,93,63,91,10,6.21,4.15


In [18]:
#Column Transformation
trf2 = ColumnTransformer([
    ('trf2', OneHotEncoder(sparse=False, drop='first'),['batting_team', 'bowling_team', 'city'])
]
,remainder='passthrough')

In [19]:
#Pipeline
pipe2 = Pipeline(steps=[
    ('step1',trf2),
    ('step2',LogisticRegression())
])

In [20]:
#Train Model
pipe2.fit(X2_train,y2_train)

In [21]:
pipe2.predict_proba(X2_test)[10]

array([0.59633878, 0.40366122])

In [22]:
#Accuracy on test data
y2_pred = pipe2.predict(X2_test)

In [23]:
#Confusion Matrix
confusion_matrix(y2_test, y2_pred)

array([[5501, 1119],
       [1245, 3342]])

In [24]:
#Classiffication Report
print(classification_report(y2_test, y2_pred))

              precision    recall  f1-score   support

           0       0.82      0.83      0.82      6620
           1       0.75      0.73      0.74      4587

    accuracy                           0.79     11207
   macro avg       0.78      0.78      0.78     11207
weighted avg       0.79      0.79      0.79     11207



# Result Prediction

In [26]:
#Prediction
input_df = pd.DataFrame({'batting_team': ['Royal Challengers Bangalore'], 'bowling_team': ['Chennai Super Kings'], 'city': ['Kolkata'], 'current_score':[32], 'target_runs':[176], 'runs_left':[144], 'balls_left': [95], 'wickets_left':[10], 'crr':[7.80], 'rrr': [9.29]})
result = pipe2.predict_proba(input_df)

loss = result[0][0]
win = result[0][1]

print('win: '+str(round(win*100))+'%')
print('loss: '+str(round(loss*100))+'%')

win: 64%
loss: 36%


# Model Extraction

In [27]:
#Model Extraction
pickle.dump(pipe2, open('ipl_mens_second_innings_final_model.pkl','wb'))