In [1]:
#Python Library
import warnings
warnings.filterwarnings("ignore")

import math
import numpy as np
import pandas as pd
import pickle

# ML Library

In [2]:
#ML Library
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import r2_score, mean_absolute_error, accuracy_score, confusion_matrix, classification_report

from sklearn.model_selection import train_test_split

# Load Dataset

In [4]:
#load data
second_innings_win_portability_df = pickle.load(open('t20_international_mens_second_innings_win.pkl','rb'))
second_innings_win_portability_df.head()

Unnamed: 0,batting_team,bowling_team,city,runs_left,balls_left,wickets_left,target_runs,crr,rrr,result
0,Sri Lanka,Australia,Melbourne,168,119,10,169,6.0,8.47,1
1,Sri Lanka,Australia,Melbourne,167,118,10,169,6.0,8.49,1
2,Sri Lanka,Australia,Melbourne,167,117,10,169,4.0,8.56,1
3,Sri Lanka,Australia,Melbourne,167,116,10,169,3.0,8.64,1
4,Sri Lanka,Australia,Melbourne,164,115,10,169,6.0,8.56,1


# Test and Train Split

In [6]:
#Split
X2 = second_innings_win_portability_df.drop(columns=['result'])
y2 = second_innings_win_portability_df['result'].values

X2_train,X2_test,y2_train,y2_test = train_test_split(X2, y2, test_size=0.2, random_state=1)

In [7]:
#Display
X2_train

Unnamed: 0,batting_team,bowling_team,city,runs_left,balls_left,wickets_left,target_runs,crr,rrr
7613,Afghanistan,Zimbabwe,Sharjah,79,87,8,121,7.64,5.45
4913,New Zealand,Pakistan,Mount Maunganui,129,71,9,182,6.49,10.90
1775,United Arab Emirates,Afghanistan,Dubai,41,21,5,162,7.33,11.71
27127,West Indies,South Africa,St George's,85,49,7,169,7.10,10.41
6468,Scotland,Pakistan,Edinburgh,130,77,6,167,5.16,10.13
...,...,...,...,...,...,...,...,...,...
50248,England,Pakistan,Karachi,144,108,10,159,7.50,8.00
98418,Zimbabwe,Afghanistan,Sharjah,92,18,2,216,7.29,30.67
5213,India,England,Manchester,16,21,8,160,8.73,4.57
77997,New Zealand,West Indies,Lauderhill,107,44,4,178,5.61,14.59


In [8]:
#Column Transformation
trf2 = ColumnTransformer([
    ('trf2', OneHotEncoder(sparse=False, drop='first'),['batting_team', 'bowling_team', 'city'])
]
,remainder='passthrough')

In [9]:
#Pipeline
pipe2 = Pipeline(steps=[
    ('step1',trf2),
    ('step2',LogisticRegression(solver='liblinear'))
])

In [10]:
#Train Model
pipe2.fit(X2_train,y2_train)

In [18]:
pipe2.predict_proba(X2_test)[10]

array([0.32274732, 0.67725268])

In [11]:
#Accuracy on test data
y2_pred = pipe2.predict(X2_test)

In [13]:
#Confusion Matrix
confusion_matrix(y2_test, y2_pred)

array([[9306, 1418],
       [1399, 7891]], dtype=int64)

In [15]:
#Classiffication Report
print(classification_report(y2_test, y2_pred))

              precision    recall  f1-score   support

           0       0.87      0.87      0.87     10724
           1       0.85      0.85      0.85      9290

    accuracy                           0.86     20014
   macro avg       0.86      0.86      0.86     20014
weighted avg       0.86      0.86      0.86     20014



# Result Prediction

In [20]:
#Prediction
result = pipe2.predict_proba(pd.DataFrame([['Australia', 'Sri Lanka', 'Melbourne', 100, 69, 7, 169, 8.11, 8.69]], columns=['batting_team', 'bowling_team', 'city', 'runs_left', 'balls_left', 'wickets_left', 'target_runs', 'crr', 'rrr']))

loss = result[0][0]
win = result[0][1]

print('win: '+str(round(win*100))+'%')
print('loss: '+str(round(loss*100))+'%')

win: 39%
loss: 61%


# Model Extraction

In [None]:
#Model Extraction
pickle.dump(pipe2, open('tournament_name_second_innings_final_model.pkl','wb'))