In [14]:
#Python Library
import warnings
warnings.filterwarnings("ignore")
import math
import numpy as np
import pandas as pd
import pickle

# ML Library

In [15]:
#ML Library
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import r2_score, mean_absolute_error, accuracy_score, confusion_matrix, classification_report

from sklearn.model_selection import train_test_split

# Load Dataset

In [16]:
#load data
second_innings_win_portability_df = pickle.load(open('wbl_mens_second_innings_win.pkl','rb'))
second_innings_win_portability_df

Unnamed: 0,batting_team,bowling_team,city,current_score,target_runs,runs_left,balls_left,wickets_left,crr,rrr,result
0,Melbourne Renegades,Adelaide Strikers,Sydney,0,117,117,119,10,0.00,5.90,0
1,Melbourne Renegades,Adelaide Strikers,Sydney,1,117,116,118,10,3.00,5.90,0
2,Melbourne Renegades,Adelaide Strikers,Sydney,1,117,116,117,10,2.00,5.95,0
3,Melbourne Renegades,Adelaide Strikers,Sydney,1,117,116,116,9,1.50,6.00,0
4,Melbourne Renegades,Adelaide Strikers,Sydney,1,117,116,115,9,1.20,6.05,0
...,...,...,...,...,...,...,...,...,...,...,...
36112,Sydney Sixers,Hobart Hurricanes,Melbourne,46,87,41,86,10,8.12,2.86,1
36113,Sydney Sixers,Hobart Hurricanes,Melbourne,46,87,41,85,10,7.89,2.89,1
36114,Sydney Sixers,Hobart Hurricanes,Melbourne,50,87,37,84,10,8.33,2.64,1
36115,Sydney Sixers,Hobart Hurricanes,Melbourne,54,87,33,83,10,8.76,2.39,1


# Test and Train Split

In [17]:
#Split
X2 = second_innings_win_portability_df.drop(columns=['result'])
y2 = second_innings_win_portability_df['result'].values

X2_train,X2_test,y2_train,y2_test = train_test_split(X2, y2, test_size=0.1, random_state=1)

In [18]:
#Display
X2_train

Unnamed: 0,batting_team,bowling_team,city,current_score,target_runs,runs_left,balls_left,wickets_left,crr,rrr
35039,Sydney Sixers,Adelaide Strikers,Sydney,39,148,109,66,6,4.33,9.91
3840,Hobart Hurricanes,Melbourne Stars,Hobart,13,136,123,115,10,15.60,6.42
28444,Sydney Thunder,Sydney Sixers,Mackay,48,95,47,68,8,5.54,4.15
13711,Sydney Thunder,Brisbane Heat,Sydney,6,141,135,108,9,3.00,7.50
34892,Adelaide Strikers,Brisbane Heat,Adelaide,27,155,128,88,9,5.06,8.73
...,...,...,...,...,...,...,...,...,...,...
7844,Sydney Sixers,Melbourne Stars,Melbourne,103,107,4,13,5,5.78,1.85
32627,Sydney Thunder,Brisbane Heat,Hobart,148,164,16,2,2,7.53,48.00
5215,Hobart Hurricanes,Sydney Sixers,Hobart,70,153,83,32,4,4.77,15.56
12212,Sydney Thunder,Perth Scorchers,Perth,0,128,128,117,9,0.00,6.56


In [19]:
#Column Transformation
trf2 = ColumnTransformer([
    ('trf2', OneHotEncoder(sparse=False, drop='first'),['batting_team', 'bowling_team', 'city'])
]
,remainder='passthrough')

In [20]:
#Pipeline
pipe2 = Pipeline(steps=[
    ('step1',trf2),
    ('step2',LogisticRegression())
])

In [21]:
#Train Model
pipe2.fit(X2_train,y2_train)

In [22]:
pipe2.predict_proba(X2_test)[10]

array([0.03361393, 0.96638607])

In [23]:
#Accuracy on test data
y2_pred = pipe2.predict(X2_test)

In [24]:
#Confusion Matrix
confusion_matrix(y2_test, y2_pred)

array([[1409,  296],
       [ 275, 1619]])

In [25]:
#Classiffication Report
print(classification_report(y2_test, y2_pred))

              precision    recall  f1-score   support

           0       0.84      0.83      0.83      1705
           1       0.85      0.85      0.85      1894

    accuracy                           0.84      3599
   macro avg       0.84      0.84      0.84      3599
weighted avg       0.84      0.84      0.84      3599



# Result Prediction

In [26]:
#Prediction
input_df = pd.DataFrame({'batting_team': ['Perth Scorchers'], 'bowling_team': ['Brisbane Heat'], 'city': ['Perth'], 'current_score':[32], 'target_runs':[176], 'runs_left':[144], 'balls_left': [95], 'wickets_left':[10], 'crr':[7.80], 'rrr': [9.29]})
result = pipe2.predict_proba(input_df)

loss = result[0][0]
win = result[0][1]

print('win: '+str(round(win*100))+'%')
print('loss: '+str(round(loss*100))+'%')

win: 14%
loss: 86%


# Model Extraction

In [27]:
#Model Extraction
pickle.dump(pipe2, open('wbl_mens_second_innings_final_model.pkl','wb'))