In [9]:
#importing libraries
import pandas as pd
import numpy as np
import tensorflow as tf
import random
import livelossplot
import math
import pickle

In [2]:
with open ('Data/Players', 'rb') as fp:
    players = pickle.load(fp)
with open ('Data/Venue', 'rb') as fp:
    venue = pickle.load(fp)
with open ('Data/Teams', 'rb') as fp:
    teams = pickle.load(fp)
with open ('Data/BF_Cols', 'rb') as fp:
    BF_Cols = pickle.load(fp)
with open ('Data/BS_Cols', 'rb') as fp:
    BS_Cols = pickle.load(fp)

In [3]:
def get_onehot(df):
    df1=pd.get_dummies(df.Toss, prefix="Toss")
    col = "Toss_"
    df_columns = set(df[col[:-1]])
    not_there = list(set(teams)-df_columns)
    data = np.zeros((df.shape[0], len(not_there)))
    df_add = pd.DataFrame(data=data, columns=[col+i for i in not_there])
    df1 = pd.concat([df1, df_add], axis=1)

    df2=pd.get_dummies(df.Venue,prefix="Venue")
    col = "Venue_"
    df_columns = set(df[col[:-1]])
    not_there = list(set(venue)-df_columns)
    data = np.zeros((df.shape[0], len(not_there)))
    df_add = pd.DataFrame(data=data, columns=[col+i for i in not_there])
    df2 = pd.concat([df2, df_add], axis=1)

    df3=pd.get_dummies(df.Batting_Team, prefix="Batting_Team")
    col = "Batting_Team_"
    df_columns = set(df[col[:-1]])
    not_there = list(set(teams)-df_columns)
    data = np.zeros((df.shape[0], len(not_there)))
    df_add = pd.DataFrame(data=data, columns=[col+i for i in not_there])
    df3 = pd.concat([df3, df_add], axis=1)

    df4=pd.get_dummies(df.Bowling_Team, prefix="Bowling_Team")
    col = "Bowling_Team_"
    df_columns = set(df[col[:-1]])
    not_there = list(set(teams)-df_columns)
    data = np.zeros((df.shape[0], len(not_there)))
    df_add = pd.DataFrame(data=data, columns=[col+i for i in not_there])
    df4 = pd.concat([df4, df_add], axis=1)

    df5=pd.get_dummies(df.Striker, prefix="Striker")
    df5_columns = set(df["Striker"])
    never_striker= list(set(players)-df5_columns)
    data = np.zeros((df.shape[0], len(never_striker)))
    df5_add = pd.DataFrame(data=data, columns=["Striker_"+i for i in never_striker])
    df5 = pd.concat([df5, df5_add], axis=1)

    df6=pd.get_dummies(df.Non_Striker, prefix="Non_Striker")
    df6_columns = set(df["Non_Striker"])
    never_non_striker= list(set(players)-df6_columns)
    data = np.zeros((df.shape[0], len(never_non_striker)))
    df6_add = pd.DataFrame(data=data, columns=["Non_Striker_"+i for i in never_non_striker])
    df6 = pd.concat([df6, df6_add], axis=1)

    df7=pd.get_dummies(df.Bowler, prefix="Bowler")
    df7_columns = set(df["Bowler"])
    never_bowler= list(set(players)-df7_columns)
    data = np.zeros((df.shape[0], len(never_bowler)))
    df7_add = pd.DataFrame(data=data, columns=["Bowler_"+i for i in never_bowler])
    df7 = pd.concat([df7, df7_add], axis=1)

    df_one_hot=df.copy(deep=True)
    df_one_hot=pd.concat([df,df1,df2,df3,df4,df5,df6,df7], axis=1)
    df_result=pd.DataFrame(df_one_hot["Result"])
    df_one_hot=df_one_hot.drop(columns=["Toss","Venue","Batting_Team","Bowling_Team","Striker","Non_Striker","Bowler","Result"])
    return df_one_hot, df_result

In [4]:
def get_cont_ids(df):
    prev = None
    start = 0
    cont_ids = []
    for ind, row in df.iterrows():
        curr = [[row['Toss'], row['Venue'], row['Batting_Team'], row['Bowling_Team']]]
        if curr != prev and prev!=None:
            cont_ids.append([start, ind])
            start = ind
        prev = curr
    cont_ids.append([start, df.shape[0]])
    return cont_ids

def get_df_split(df):
    df_list = []
    for start, end in get_cont_ids(df):
        df_list.append(df[start:end].reset_index(drop=True))
    return df_list

In [5]:
class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self,
            df: pd.DataFrame,
            shuffle: bool = True,
            validate: bool = False
            ):
        self.df = df
        self.inp_cols = None
        self.out_cols = None
        self.validate = validate
        self.batches = self.__fill_batches()
        self.len_dataset = len(self.batches)
        self.shuffle = shuffle


    def __shuffle(self):
        random.shuffle(self.batches)


    def __fill_batches(self):
        df_list = get_df_split(self.df)
        if self.validate:
            one_hot_lis = [get_onehot(df) for df in df_list[:math.floor(0.80*len(df_list))]]
        else:
            one_hot_lis = [get_onehot(df) for df in df_list[math.floor(0.80*len(df_list)):]]
        one_hot_lis = [(inp.reindex(sorted(inp.columns), axis=1), out) for inp, out in one_hot_lis]
        batches = []
        for inp, out in one_hot_lis:
            batches.append((inp.values, out.values))
        self.inp_cols = inp.columns.to_list()
        self.out_cols = out.columns.to_list()
        return batches

    def __len__(self):
        return self.len_dataset


    def __getitem__(self, index):
        x, y = self.batches[index]
        return np.expand_dims(x, axis=0), np.reshape(y, (1, -1))

    def on_epoch_end(self):
        if self.shuffle:
            self.__shuffle()

In [11]:
model_inn_1= tf.keras.Sequential([
  tf.keras.layers.SimpleRNN(512, return_sequences=True, input_shape=(None, 1572)),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.SimpleRNN(256, return_sequences=True),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.SimpleRNN(128, return_sequences=True),
  tf.keras.layers.Dropout(0.5),
  tf.keras.layers.Dense(57, activation='softmax')
])
model_inn_2= tf.keras.Sequential([
  tf.keras.layers.SimpleRNN(512, return_sequences=True, input_shape=(None, 1573)),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.SimpleRNN(256, return_sequences=True),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.SimpleRNN(128, return_sequences=True),
  tf.keras.layers.Dropout(0.5),
  tf.keras.layers.Dense(57, activation='softmax')
])

In [10]:
BF = pd.read_csv("Data/Batting_First.csv")
bf_train_gen = DataGenerator(BF)
bf_test_gen = DataGenerator(BF, validate=True)
BS = pd.read_csv("Data/Chasing.csv")
bs_train_gen = DataGenerator(BS)
bs_test_gen = DataGenerator(BS, validate=True)

In [12]:
model_inn_1.compile(
              loss=tf.keras.losses.SparseCategoricalCrossentropy(),
              metrics=['accuracy'],
              optimizer=tf.keras.optimizers.Adam())
model_inn_1.fit(bf_train_gen,
                validation_data=bf_test_gen,
                epochs=10,
                )

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x11b1c73c1f0>

In [13]:
model_inn_1.fit(bf_train_gen,
                validation_data=bf_test_gen,
                epochs=10,
                )

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x11b35dee640>

In [14]:
model_inn_1.save('Models/Inn1_SimpleRNN_10.h5')

In [15]:
model_inn_2.compile(
              loss=tf.keras.losses.SparseCategoricalCrossentropy(),
              metrics=['accuracy'],
              optimizer=tf.keras.optimizers.Adam())
model_inn_2.fit(bs_train_gen,
                validation_data=bs_test_gen,
                epochs=20,
                )

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x11a00663fd0>

In [18]:
model_inn_2.save('Models/Inn2_SimpleRNN_10.h5')

In [7]:
model_inn_1 = keras.models.load_model('Models/Inn1_SimpleRNN_10.h5')
model_inn_2 = keras.models.load_model('Models/Inn2_SimpleRNN_10.h5')