In [1]:
import pandas as pd
import numpy as np
import pickle as pkl

import matplotlib.pyplot as plt

from keras.models import Sequential
from keras.layers import Dense, LSTM, Input, Dropout, TimeDistributed, LayerNormalization, GlobalAveragePooling1D
from keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping

from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [2]:
df = pd.read_csv("data/bbb.csv", index_col=0)
df = df.drop(columns=['Unnamed: 0', 'Unnamed: 0.1'], errors='ignore')

In [3]:
df.columns

Index(['match_id', 'match_date', 'dl', 'gender', 'venue', 'innings',
       'bat_team', 'bowl_team', 'over', 'ball', 'batter', 'batter_name',
       'bowler', 'bowler_name', 'non_striker', 'runs_batter', 'runs_extras',
       'runs_total', 'wicket_type', 'player_out', 'batter_total_runs',
       'batter_balls_faced', 'bowler_total_runs', 'bowler_balls_bowled',
       'team_total_runs', 'wickets_taken', 'rr', 'target', 'remaining_balls',
       'rrr'],
      dtype='object')

In [None]:
# Create LSTM based model to predict the final score of a cricket match

def create_lstm_model(input_shape):
    model = Sequential()
    model.add(Input(shape=input_shape))
    model.add(LSTM(64, return_sequences=True))
    model.add(Dropout(0.2))
    model.add(LSTM(32, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(1, activation='linear'))
    
    model.compile(optimizer=Adam(), loss='mean_squared_error')
    
    return model

# Prepare data for LSTM
def prepare_data(df, target_column='target', time_steps=5):
    features = df.drop(columns=[target_column])
    target = df[target_column]
    
    X, y = [], []
    
    for i in range(len(features) - time_steps):
        X.append(features.iloc[i:i + time_steps].values)
        y.append(target.iloc[i + time_steps])
    
    X = np.array(X)
    y = np.array(y)
    
    return X, y

# Split data into training and testing sets
X, y = prepare_data(df)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the model
input_shape = (X_train.shape[1], X_train.shape[2])
model = create_lstm_model(input_shape)
model.summary()

# Callbacks for training
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.
2, patience=5, min_lr=1e-6)
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
