<h1>T20 Cricket Win Predictor</h1>
This notebook focuses on developing a model for T20 cricket win predictions. Only data from second innings will be used here.

In [415]:
# Load the packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import tensorflow as tf
from sklearn.model_selection import train_test_split

np.random.seed(42)

In [416]:
# Load the required data
win_pred_data = pd.read_csv(r"../Resources/win_pred_data.csv")
win_pred_data.head()

Unnamed: 0,match code,country,over,ball,score,total wickets,win,target,other rating,rating,balls
0,1381452,Germany,0,0,2,0,1,122,58,96,0
1,1381452,Germany,0,1,3,0,1,122,58,96,1
2,1381452,Germany,0,2,4,0,1,122,58,96,2
3,1381452,Germany,0,3,4,0,1,122,58,96,3
4,1381452,Germany,0,4,6,0,1,122,58,96,4


In [417]:
# Now to convert them to numpy arrays
innings = win_pred_data.groupby('match code')
# Create lists to store input sequences and target outputs
input_sequences = []
target_outputs = []

# Iterate over innings and create sequences
for inning, inning_data in innings:
    # Select relevant features for input sequence and target output
    input_features = inning_data[['balls', 'score', 'target', 'total wickets','rating', 'other rating']].values

    # Pad or truncate sequences to length 120
    if len(input_features) >= 120:
        input_features = input_features[:120]
    else:
        input_features = np.pad(input_features, ((0, 120 - len(input_features)), (0,0)), mode='constant')

    # Get the win for the inning
    win = inning_data['win'].values[-1]
    
    # Create target output array of shape (120, 1)
    target_output = np.full((120, 1), win)
    
    # Append sequences to the lists
    input_sequences.append(input_features)
    target_outputs.append(target_output)

X = np.array(input_sequences)
print(X.shape)
Y = np.array(target_outputs)
print(Y.shape)

(1622, 120, 6)
(1622, 120, 1)


In [418]:
# Split into train/dev and test sets
X = X.reshape(-1,720)
Y = Y.reshape(-1,120)

X_train, X_temp, Y_train, Y_temp = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=42)
X_dev, X_test, Y_dev, Y_test = train_test_split(X_temp, Y_temp, test_size=0.5, stratify=Y_temp, random_state=42)

X_train = X_train.reshape(-1,120,6)
X_dev = X_dev.reshape(-1,120,6)
X_test = X_test.reshape(-1,120,6)
Y_train = Y_train.reshape(-1,120,1)
Y_dev = Y_dev.reshape(-1,120,1)
Y_test = Y_test.reshape(-1,120,1)
print(X_train.shape, X_dev.shape, X_test.shape)
print(Y_train.shape, Y_dev.shape, Y_test.shape)

(1297, 120, 6) (162, 120, 6) (163, 120, 6)
(1297, 120, 1) (162, 120, 1) (163, 120, 1)


In [262]:
# Create the model
win_pred_rnn = tf.keras.Sequential([
    tf.keras.layers.SimpleRNN(units=128, return_sequences=True, activation='relu', input_shape=(120, 6)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.5),  
    tf.keras.layers.SimpleRNN(units=64, return_sequences=True, activation='relu'),
    tf.keras.layers.Dropout(0.5),  
    tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(units=1, activation='sigmoid'))
])

optimizer = tf.keras.optimizers.Adam(0.0001)
# Compile the model
win_pred_rnn.compile(optimizer=optimizer, loss='BinaryCrossentropy', metrics=['accuracy'])

# Print model summary
win_pred_rnn.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn_8 (SimpleRNN)    (None, 120, 128)          17152     
                                                                 
 batch_normalization_4 (Bat  (None, 120, 128)          512       
 chNormalization)                                                
                                                                 
 dropout_8 (Dropout)         (None, 120, 128)          0         
                                                                 
 simple_rnn_9 (SimpleRNN)    (None, 120, 64)           12352     
                                                                 
 dropout_9 (Dropout)         (None, 120, 64)           0         
                                                                 
 time_distributed_4 (TimeDi  (None, 120, 1)            65        
 stributed)                                           

In [419]:
# Load the model
from tensorflow.keras.models import load_model

win_pred_rnn = load_model(r"../Models/win_pred_rnn_test.h5")

In [355]:
# Change learning rate here
optimizer.learning_rate.assign(0.0001)

<tf.Variable 'UnreadVariable' shape=() dtype=float32, numpy=1e-04>

In [410]:
# Train the model
win_pred_rnn.fit(X_train, Y_train, epochs=1, batch_size=8, validation_data = (X_dev, Y_dev))



<keras.src.callbacks.History at 0x1691f897a90>

In [420]:
threshold = 119
Y_test_acc = Y_test[:,threshold:,:].reshape(-1, 1)
predictions = win_pred_rnn.predict(X_test)[:,threshold:,:].reshape(-1, 1) >= 0.5
test_acc = np.sum((Y_test_acc == predictions)) / (Y_test_acc.shape[0])
print(f"Accuracy: {test_acc * 100:.2f}")


Accuracy: 95.09


In [421]:
win_pred_rnn.evaluate(X_test,Y_test)



[0.36622539162635803, 0.8397239446640015]

In [422]:
# Get all those examples where the model is mis-predicting
mis_pred = []
preds = win_pred_rnn.predict(X_test, verbose = False)
for i in range(len(X_test)):
    if Y_test[i,119,0] != (preds[i,119,0] >= 0.5):
        mis_pred.append(i)
print(mis_pred)

[12, 25, 45, 84, 112, 122, 134, 159]


In [423]:
num = 12
print(X_test[num], Y_test[num])
win_pred_rnn.predict(X_test)[num]

[[  0   0 187   0 242 264]
 [  1   1 187   0 242 264]
 [  2   1 187   0 242 264]
 [  3   6 187   0 242 264]
 [  4   7 187   0 242 264]
 [  5   7 187   0 242 264]
 [  6   8 187   0 242 264]
 [  7   8 187   0 242 264]
 [  8   9 187   0 242 264]
 [  9   9 187   0 242 264]
 [ 10  10 187   0 242 264]
 [ 11  14 187   0 242 264]
 [ 12  15 187   0 242 264]
 [ 13  15 187   0 242 264]
 [ 14  17 187   0 242 264]
 [ 15  17 187   0 242 264]
 [ 16  18 187   0 242 264]
 [ 17  22 187   0 242 264]
 [ 18  23 187   0 242 264]
 [ 19  24 187   0 242 264]
 [ 20  25 187   0 242 264]
 [ 21  26 187   0 242 264]
 [ 22  27 187   0 242 264]
 [ 23  27 187   0 242 264]
 [ 24  28 187   0 242 264]
 [ 25  30 187   0 242 264]
 [ 26  34 187   0 242 264]
 [ 27  34 187   0 242 264]
 [ 28  34 187   0 242 264]
 [ 29  34 187   0 242 264]
 [ 30  34 187   1 242 264]
 [ 31  38 187   1 242 264]
 [ 32  39 187   1 242 264]
 [ 33  40 187   1 242 264]
 [ 34  41 187   1 242 264]
 [ 35  41 187   1 242 264]
 [ 36  47 187   1 242 264]
 

array([[0.3598092 ],
       [0.27351868],
       [0.17138234],
       [0.16211498],
       [0.17856689],
       [0.1744372 ],
       [0.15639506],
       [0.14102532],
       [0.12709607],
       [0.11240356],
       [0.10424384],
       [0.10288802],
       [0.10641194],
       [0.10946681],
       [0.11179678],
       [0.11304229],
       [0.11406986],
       [0.11880958],
       [0.12558642],
       [0.1314778 ],
       [0.13488178],
       [0.13686529],
       [0.13895227],
       [0.13875094],
       [0.13751924],
       [0.1383145 ],
       [0.14563726],
       [0.1541794 ],
       [0.15795791],
       [0.15625969],
       [0.14541565],
       [0.13857119],
       [0.1353137 ],
       [0.13162254],
       [0.12810111],
       [0.12435809],
       [0.1278653 ],
       [0.13808104],
       [0.14846848],
       [0.15470426],
       [0.15615417],
       [0.15694253],
       [0.15780129],
       [0.15627824],
       [0.15386875],
       [0.14998177],
       [0.14659256],
       [0.143

In [258]:
#Save the model
#win_pred_rnn.save(r"../Models/win_pred_rnn_test.h5")

## Using the model to make predictions
Now to show how the model will be used to make predictions that are updated based on new information retrieved from every ball


In [424]:
# Load the model
import numpy as np
from tensorflow.keras.models import load_model

win_pred_rnn = load_model(r"../Models/win_pred_rnn_test.h5")
win_pred_rnn.summary()

Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn_20 (SimpleRNN)   (None, 120, 128)          17280     
                                                                 
 batch_normalization_4 (Bat  (None, 120, 128)          512       
 chNormalization)                                                
                                                                 
 dropout_20 (Dropout)        (None, 120, 128)          0         
                                                                 
 simple_rnn_21 (SimpleRNN)   (None, 120, 64)           12352     
                                                                 
 dropout_21 (Dropout)        (None, 120, 64)           0         
                                                                 
 time_distributed_10 (TimeD  (None, 120, 1)            65        
 istributed)                                         

In [428]:
# Now take input at every ball and return the updated prediction

print("Starting the Match...")
rating = int(input("Enter the rating of the country currently batting: "))
other_rating = int(input("Enter the rating of the other country: "))
target = int(input("Enter the target to be chased: "))
input_array = np.zeros((1, 120, 6))
for i in range(119):
    score, wickets = (int(x) for x in input(f"Enter the score and wickets after ball {i+1} separated by space: ").split())
    input_array[0,i,0], input_array[0,i,1], input_array[0,i,2] = (i, score, target)
    input_array[0,i,3], input_array[0,i,4], input_array[0,i,5] = (wickets, rating, other_rating)
    print(f"The current prediction of win %age is: {(win_pred_rnn.predict(input_array, verbose = 0)[0,i,0]) * 100: .2f} %")

Starting the Match...


Enter the rating of the country currently batting:  193
Enter the rating of the other country:  264
Enter the target to be chased:  186
Enter the score and wickets after ball 1 separated by space:  0 0


The current prediction of win %age is:  32.65 %


Enter the score and wickets after ball 2 separated by space:  0 0


The current prediction of win %age is:  15.50 %


Enter the score and wickets after ball 3 separated by space:  5 0


The current prediction of win %age is:  8.57 %


Enter the score and wickets after ball 4 separated by space:  5 0


The current prediction of win %age is:  7.72 %


Enter the score and wickets after ball 5 separated by space:  5 0


The current prediction of win %age is:  7.43 %


Enter the score and wickets after ball 6 separated by space:  8 0


The current prediction of win %age is:  5.34 %


Enter the score and wickets after ball 7 separated by space:  13 0


The current prediction of win %age is:  4.59 %


Enter the score and wickets after ball 8 separated by space:  13 0


The current prediction of win %age is:  4.44 %


Enter the score and wickets after ball 9 separated by space:  17 0


The current prediction of win %age is:  4.11 %


Enter the score and wickets after ball 10 separated by space:  18 0


The current prediction of win %age is:  3.81 %


Enter the score and wickets after ball 11 separated by space:  18 0


The current prediction of win %age is:  3.70 %


Enter the score and wickets after ball 12 separated by space:  18 0


The current prediction of win %age is:  3.57 %


Enter the score and wickets after ball 13 separated by space:  19 0


The current prediction of win %age is:  3.46 %


Enter the score and wickets after ball 14 separated by space:  19 0


The current prediction of win %age is:  3.37 %


Enter the score and wickets after ball 15 separated by space:  19 1


The current prediction of win %age is:  3.13 %


Enter the score and wickets after ball 16 separated by space:  19 1


The current prediction of win %age is:  2.84 %


Enter the score and wickets after ball 17 separated by space:  19 1


The current prediction of win %age is:  2.59 %


Enter the score and wickets after ball 18 separated by space:  19 2


The current prediction of win %age is:  2.31 %


Enter the score and wickets after ball 19 separated by space:  19 2


The current prediction of win %age is:  2.08 %


Enter the score and wickets after ball 20 separated by space:  20 2


The current prediction of win %age is:  1.94 %


Enter the score and wickets after ball 21 separated by space:  20 2


The current prediction of win %age is:  1.86 %


Enter the score and wickets after ball 22 separated by space:  20 2


The current prediction of win %age is:  1.82 %


Enter the score and wickets after ball 23 separated by space:  20 2


The current prediction of win %age is:  1.80 %


Enter the score and wickets after ball 24 separated by space:  22 2


The current prediction of win %age is:  1.83 %


Enter the score and wickets after ball 25 separated by space:  22 2


The current prediction of win %age is:  1.87 %


Enter the score and wickets after ball 26 separated by space:  22 2


The current prediction of win %age is:  1.90 %


Enter the score and wickets after ball 27 separated by space:  22 2


The current prediction of win %age is:  1.90 %


Enter the score and wickets after ball 28 separated by space:  23 2


The current prediction of win %age is:  1.92 %


Enter the score and wickets after ball 29 separated by space:  24 2


The current prediction of win %age is:  1.95 %


Enter the score and wickets after ball 30 separated by space:  24 2


The current prediction of win %age is:  1.97 %


Enter the score and wickets after ball 31 separated by space:  28 2


The current prediction of win %age is:  2.04 %


Enter the score and wickets after ball 32 separated by space:  28 3


The current prediction of win %age is:  2.04 %


Enter the score and wickets after ball 33 separated by space:  28 3


The current prediction of win %age is:  1.97 %


Enter the score and wickets after ball 34 separated by space:  30 3


The current prediction of win %age is:  1.92 %


Enter the score and wickets after ball 35 separated by space:  30 3


The current prediction of win %age is:  1.88 %


Enter the score and wickets after ball 36 separated by space:  31 3


The current prediction of win %age is:  1.86 %


Enter the score and wickets after ball 37 separated by space:  31 3


The current prediction of win %age is:  1.84 %


Enter the score and wickets after ball 38 separated by space:  32 3


The current prediction of win %age is:  1.83 %


Enter the score and wickets after ball 39 separated by space:  36 3


The current prediction of win %age is:  1.88 %


Enter the score and wickets after ball 40 separated by space:  40 3


The current prediction of win %age is:  1.99 %


Enter the score and wickets after ball 41 separated by space:  41 3


The current prediction of win %age is:  2.05 %


Enter the score and wickets after ball 42 separated by space:  42 3


The current prediction of win %age is:  2.08 %


Enter the score and wickets after ball 43 separated by space:  42 3


The current prediction of win %age is:  2.07 %


Enter the score and wickets after ball 44 separated by space:  42 3


The current prediction of win %age is:  2.04 %


Enter the score and wickets after ball 45 separated by space:  43 3


The current prediction of win %age is:  2.01 %


Enter the score and wickets after ball 46 separated by space:  46 3


The current prediction of win %age is:  2.04 %


Enter the score and wickets after ball 47 separated by space:  47 3


The current prediction of win %age is:  2.05 %


Enter the score and wickets after ball 48 separated by space:  53 3


The current prediction of win %age is:  2.14 %


Enter the score and wickets after ball 49 separated by space:  55 3


The current prediction of win %age is:  2.23 %


Enter the score and wickets after ball 50 separated by space:  55 3


The current prediction of win %age is:  2.24 %


Enter the score and wickets after ball 51 separated by space:  55 3


The current prediction of win %age is:  2.18 %


Enter the score and wickets after ball 52 separated by space:  56 3


The current prediction of win %age is:  2.15 %


Enter the score and wickets after ball 53 separated by space:  57 3


The current prediction of win %age is:  2.12 %


Enter the score and wickets after ball 54 separated by space:  57 3


The current prediction of win %age is:  2.08 %


Enter the score and wickets after ball 55 separated by space:  58 3


The current prediction of win %age is:  2.02 %


Enter the score and wickets after ball 56 separated by space:  59 3


The current prediction of win %age is:  1.96 %


Enter the score and wickets after ball 57 separated by space:  61 3


The current prediction of win %age is:  1.93 %


Enter the score and wickets after ball 58 separated by space:  62 3


The current prediction of win %age is:  1.88 %


Enter the score and wickets after ball 59 separated by space:  63 3


The current prediction of win %age is:  1.83 %


Enter the score and wickets after ball 60 separated by space:  63 4


The current prediction of win %age is:  1.69 %


Enter the score and wickets after ball 61 separated by space:  64 4


The current prediction of win %age is:  1.54 %


Enter the score and wickets after ball 62 separated by space:  64 4


The current prediction of win %age is:  1.40 %


Enter the score and wickets after ball 63 separated by space:  65 4


The current prediction of win %age is:  1.27 %


Enter the score and wickets after ball 64 separated by space:  71 4


The current prediction of win %age is:  1.24 %


Enter the score and wickets after ball 65 separated by space:  71 4


The current prediction of win %age is:  1.19 %


Enter the score and wickets after ball 66 separated by space:  77 4


The current prediction of win %age is:  1.19 %


Enter the score and wickets after ball 67 separated by space:  78 4


The current prediction of win %age is:  1.17 %


Enter the score and wickets after ball 68 separated by space:  79 4


The current prediction of win %age is:  1.14 %


Enter the score and wickets after ball 69 separated by space:  79 4


The current prediction of win %age is:  1.08 %


Enter the score and wickets after ball 70 separated by space:  80 4


The current prediction of win %age is:  1.04 %


Enter the score and wickets after ball 71 separated by space:  80 4


The current prediction of win %age is:  0.99 %


Enter the score and wickets after ball 72 separated by space:  81 4


The current prediction of win %age is:  0.95 %


Enter the score and wickets after ball 73 separated by space:  85 4


The current prediction of win %age is:  0.94 %


Enter the score and wickets after ball 74 separated by space:  85 4


The current prediction of win %age is:  0.92 %


Enter the score and wickets after ball 75 separated by space:  86 4


The current prediction of win %age is:  0.89 %


Enter the score and wickets after ball 76 separated by space:  87 4


The current prediction of win %age is:  0.85 %


Enter the score and wickets after ball 77 separated by space:  88 4


The current prediction of win %age is:  0.83 %


Enter the score and wickets after ball 78 separated by space:  94 4


The current prediction of win %age is:  0.85 %


Enter the score and wickets after ball 79 separated by space:  94 4


The current prediction of win %age is:  0.85 %


Enter the score and wickets after ball 80 separated by space:  96 4


The current prediction of win %age is:  0.85 %


Enter the score and wickets after ball 81 separated by space:  97 4


The current prediction of win %age is:  0.85 %


Enter the score and wickets after ball 82 separated by space:  98 4


The current prediction of win %age is:  0.85 %


Enter the score and wickets after ball 83 separated by space:  100 4


The current prediction of win %age is:  0.87 %


Enter the score and wickets after ball 84 separated by space:  102 4


The current prediction of win %age is:  0.89 %


Enter the score and wickets after ball 85 separated by space:  103 4


The current prediction of win %age is:  0.92 %


Enter the score and wickets after ball 86 separated by space:  104 4


The current prediction of win %age is:  0.94 %


Enter the score and wickets after ball 87 separated by space:  105 4


The current prediction of win %age is:  0.96 %


Enter the score and wickets after ball 88 separated by space:  108 4


The current prediction of win %age is:  1.00 %


Enter the score and wickets after ball 89 separated by space:  114 4


The current prediction of win %age is:  1.08 %


Enter the score and wickets after ball 90 separated by space:  114 5


The current prediction of win %age is:  1.11 %


Enter the score and wickets after ball 91 separated by space:  116 5


The current prediction of win %age is:  1.13 %


Enter the score and wickets after ball 92 separated by space:  117 5


The current prediction of win %age is:  1.14 %


Enter the score and wickets after ball 93 separated by space:  123 5


The current prediction of win %age is:  1.22 %


Enter the score and wickets after ball 94 separated by space:  123 6


The current prediction of win %age is:  1.22 %


Enter the score and wickets after ball 95 separated by space:  123 6


The current prediction of win %age is:  1.20 %


Enter the score and wickets after ball 96 separated by space:  124 6


The current prediction of win %age is:  1.18 %


KeyboardInterrupt: Interrupted by user

In [429]:
input_array

array([[[  0.,   0., 186.,   0., 193., 264.],
        [  1.,   0., 186.,   0., 193., 264.],
        [  2.,   5., 186.,   0., 193., 264.],
        [  3.,   5., 186.,   0., 193., 264.],
        [  4.,   5., 186.,   0., 193., 264.],
        [  5.,   8., 186.,   0., 193., 264.],
        [  6.,  13., 186.,   0., 193., 264.],
        [  7.,  13., 186.,   0., 193., 264.],
        [  8.,  17., 186.,   0., 193., 264.],
        [  9.,  18., 186.,   0., 193., 264.],
        [ 10.,  18., 186.,   0., 193., 264.],
        [ 11.,  18., 186.,   0., 193., 264.],
        [ 12.,  19., 186.,   0., 193., 264.],
        [ 13.,  19., 186.,   0., 193., 264.],
        [ 14.,  19., 186.,   1., 193., 264.],
        [ 15.,  19., 186.,   1., 193., 264.],
        [ 16.,  19., 186.,   1., 193., 264.],
        [ 17.,  19., 186.,   2., 193., 264.],
        [ 18.,  19., 186.,   2., 193., 264.],
        [ 19.,  20., 186.,   2., 193., 264.],
        [ 20.,  20., 186.,   2., 193., 264.],
        [ 21.,  20., 186.,   2., 1