In [38]:
#Python Library
import warnings
warnings.filterwarnings("ignore")
import math
import numpy as np
import pandas as pd
import pickle

# ML Library

In [39]:
#ML Library
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import r2_score, mean_absolute_error, accuracy_score, confusion_matrix, classification_report

from sklearn.model_selection import train_test_split

# Load Dataset

In [40]:
#load data
second_innings_win_portability_df = pickle.load(open('cpl_mens_second_innings_win.pkl','rb'))
second_innings_win_portability_df

Unnamed: 0,batting_team,bowling_team,city,current_score,target_runs,runs_left,balls_left,wickets_left,crr,rrr,result
0,St Kitts and Nevis Patriots,St Lucia Kings,St Lucia,0,175,175,119,10,0.00,8.82,0
1,St Kitts and Nevis Patriots,St Lucia Kings,St Lucia,3,175,172,118,10,9.00,8.75,0
2,St Kitts and Nevis Patriots,St Lucia Kings,St Lucia,4,175,171,117,10,8.00,8.77,0
3,St Kitts and Nevis Patriots,St Lucia Kings,St Lucia,5,175,170,116,10,7.50,8.79,0
4,St Kitts and Nevis Patriots,St Lucia Kings,St Lucia,6,175,169,115,10,7.20,8.82,0
...,...,...,...,...,...,...,...,...,...,...,...
32028,Jamaica Tallawahs,St Lucia Kings,Lauderhill,169,195,26,5,4,8.82,31.20,0
32029,Jamaica Tallawahs,St Lucia Kings,Lauderhill,169,195,26,4,4,8.74,39.00,0
32030,Jamaica Tallawahs,St Lucia Kings,Lauderhill,170,195,25,3,4,8.72,50.00,0
32031,Jamaica Tallawahs,St Lucia Kings,Lauderhill,171,195,24,2,4,8.69,72.00,0


# Test and Train Split

In [41]:
#Split
X2 = second_innings_win_portability_df.drop(columns=['result'])
y2 = second_innings_win_portability_df['result'].values

X2_train,X2_test,y2_train,y2_test = train_test_split(X2, y2, test_size=0.1, random_state=1)

In [42]:
#Display
X2_train

Unnamed: 0,batting_team,bowling_team,city,current_score,target_runs,runs_left,balls_left,wickets_left,crr,rrr
10267,St Lucia Kings,Jamaica Tallawahs,Jamaica,27,171,144,104,10,10.12,8.31
21265,Jamaica Tallawahs,Guyana Amazon Warriors,Providence,21,179,158,102,10,7.00,9.29
26033,Jamaica Tallawahs,Trinbago Knight Riders,Jamaica,37,90,53,77,10,5.16,4.13
4867,St Lucia Kings,Barbados Royals,Barbados,151,196,45,11,5,8.31,24.55
12935,Trinbago Knight Riders,Guyana Amazon Warriors,Trinidad,66,145,79,70,8,7.92,6.77
...,...,...,...,...,...,...,...,...,...,...
17361,St Kitts and Nevis Patriots,Jamaica Tallawahs,Basseterre,86,167,81,58,7,8.32,8.38
5210,Trinbago Knight Riders,St Kitts and Nevis Patriots,Trinidad,84,150,66,27,3,5.42,14.67
12219,Jamaica Tallawahs,Guyana Amazon Warriors,Guyana,37,157,120,78,8,5.29,9.23
236,Trinbago Knight Riders,St Lucia Kings,St Lucia,5,168,163,108,9,2.50,9.06


In [43]:
#Column Transformation
trf2 = ColumnTransformer([
    ('trf2', OneHotEncoder(sparse=False, drop='first'),['batting_team', 'bowling_team', 'city'])
]
,remainder='passthrough')

In [44]:
#Pipeline
pipe2 = Pipeline(steps=[
    ('step1',trf2),
    ('step2',LogisticRegression())
])

In [45]:
#Train Model
pipe2.fit(X2_train,y2_train)

In [46]:
pipe2.predict_proba(X2_test)[10]

array([0.08845385, 0.91154615])

In [47]:
#Accuracy on test data
y2_pred = pipe2.predict(X2_test)

In [48]:
#Confusion Matrix
confusion_matrix(y2_test, y2_pred)

array([[1553,  330],
       [ 310,  999]])

In [49]:
#Classiffication Report
print(classification_report(y2_test, y2_pred))

              precision    recall  f1-score   support

           0       0.83      0.82      0.83      1883
           1       0.75      0.76      0.76      1309

    accuracy                           0.80      3192
   macro avg       0.79      0.79      0.79      3192
weighted avg       0.80      0.80      0.80      3192



# Result Prediction

In [51]:
#Prediction
input_df = pd.DataFrame({'batting_team': ['Trinbago Knight Riders'], 'bowling_team': ['Jamaica Tallawahs'], 'city': ['Providence'], 'current_score':[32], 'target_runs':[176], 'runs_left':[144], 'balls_left': [95], 'wickets_left':[10], 'crr':[7.80], 'rrr': [9.29]})
result = pipe2.predict_proba(input_df)

loss = result[0][0]
win = result[0][1]

print('win: '+str(round(win*100))+'%')
print('loss: '+str(round(loss*100))+'%')

win: 22%
loss: 78%


# Model Extraction

In [52]:
#Model Extraction
pickle.dump(pipe2, open('cpl_mens_second_innings_final_model.pkl','wb'))