# *IPL Score Prediction using Deep Learning*

# Step 1: Load Dataset

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
import keras
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import ipywidgets as widgets
from IPython.display import display, clear_output
import warnings
warnings.filterwarnings("ignore")

ipl = pd.read_csv("/content/drive/MyDrive/ipl_data.csv")  # Update the path accordingly
ipl.drop(columns=['date', 'mid'], inplace=True, errors='ignore')  # Drop unnecessary columns

# Step 2 : EDA

In [None]:
# Display basic dataset info
print("Dataset Overview:")
print(ipl.info())

Dataset Overview:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 76014 entries, 0 to 76013
Data columns (total 13 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   venue           76014 non-null  object 
 1   bat_team        76014 non-null  object 
 2   bowl_team       76014 non-null  object 
 3   batsman         76014 non-null  object 
 4   bowler          76014 non-null  object 
 5   runs            76014 non-null  int64  
 6   wickets         76014 non-null  int64  
 7   overs           76014 non-null  float64
 8   runs_last_5     76014 non-null  int64  
 9   wickets_last_5  76014 non-null  int64  
 10  striker         76014 non-null  int64  
 11  non-striker     76014 non-null  int64  
 12  total           76014 non-null  int64  
dtypes: float64(1), int64(7), object(5)
memory usage: 7.5+ MB
None


In [None]:
# Show first & last 5 rows
print("\n🔹 First 5 rows:")
print(ipl.head())


🔹 First 5 rows:
                   venue               bat_team                    bowl_team  \
0  M Chinnaswamy Stadium  Kolkata Knight Riders  Royal Challengers Bangalore   
1  M Chinnaswamy Stadium  Kolkata Knight Riders  Royal Challengers Bangalore   
2  M Chinnaswamy Stadium  Kolkata Knight Riders  Royal Challengers Bangalore   
3  M Chinnaswamy Stadium  Kolkata Knight Riders  Royal Challengers Bangalore   
4  M Chinnaswamy Stadium  Kolkata Knight Riders  Royal Challengers Bangalore   

       batsman   bowler  runs  wickets  overs  runs_last_5  wickets_last_5  \
0   SC Ganguly  P Kumar     1        0    0.1            1               0   
1  BB McCullum  P Kumar     1        0    0.2            1               0   
2  BB McCullum  P Kumar     2        0    0.2            2               0   
3  BB McCullum  P Kumar     2        0    0.3            2               0   
4  BB McCullum  P Kumar     2        0    0.4            2               0   

   striker  non-striker  total  


In [None]:

print("\n🔹 Last 5 rows:")
print(ipl.tail())



🔹 Last 5 rows:
                                           venue        bat_team  \
76009  Rajiv Gandhi International Stadium, Uppal  Mumbai Indians   
76010  Rajiv Gandhi International Stadium, Uppal  Mumbai Indians   
76011  Rajiv Gandhi International Stadium, Uppal  Mumbai Indians   
76012  Rajiv Gandhi International Stadium, Uppal  Mumbai Indians   
76013  Rajiv Gandhi International Stadium, Uppal  Mumbai Indians   

                    bowl_team     batsman        bowler  runs  wickets  overs  \
76009  Rising Pune Supergiant   KH Pandya  DT Christian   121        7   19.2   
76010  Rising Pune Supergiant   KH Pandya  DT Christian   127        7   19.3   
76011  Rising Pune Supergiant   KH Pandya  DT Christian   128        7   19.4   
76012  Rising Pune Supergiant  MG Johnson  DT Christian   129        7   19.5   
76013  Rising Pune Supergiant   KH Pandya  DT Christian   129        8   19.6   

       runs_last_5  wickets_last_5  striker  non-striker  total  
76009           40    

In [None]:
# Check missing values
print("\n🔹 Missing Values:")
print(ipl.isnull().sum())


🔹 Missing Values:
venue             0
bat_team          0
bowl_team         0
batsman           0
bowler            0
runs              0
wickets           0
overs             0
runs_last_5       0
wickets_last_5    0
striker           0
non-striker       0
total             0
dtype: int64


In [None]:
# Describe numerical columns
print("\n🔹 Numerical Feature Summary:")
print(ipl.describe())


🔹 Numerical Feature Summary:
               runs       wickets         overs   runs_last_5  wickets_last_5  \
count  76014.000000  76014.000000  76014.000000  76014.000000    76014.000000   
mean      74.889349      2.415844      9.783068     33.216434        1.120307   
std       48.823327      2.015207      5.772587     14.914174        1.053343   
min        0.000000      0.000000      0.000000      0.000000        0.000000   
25%       34.000000      1.000000      4.600000     24.000000        0.000000   
50%       70.000000      2.000000      9.600000     34.000000        1.000000   
75%      111.000000      4.000000     14.600000     43.000000        2.000000   
max      263.000000     10.000000     19.600000    113.000000        7.000000   

            striker   non-striker         total  
count  76014.000000  76014.000000  76014.000000  
mean      24.962283      8.869287    160.901452  
std       20.079752     10.795742     29.246231  
min        0.000000      0.000000     67

In [None]:
# Describe categorical columns
print("\n🔹 Categorical Feature Summary:")
print(ipl.describe(include=['object']))


🔹 Categorical Feature Summary:
                        venue        bat_team         bowl_team   batsman  \
count                   76014           76014             76014     76014   
unique                     35              14                14       411   
top     M Chinnaswamy Stadium  Mumbai Indians  Delhi Daredevils  SK Raina   
freq                     7443           10213             10245      1921   

                 bowler  
count             76014  
unique              329  
top     Harbhajan Singh  
freq               1404  


Graphs

# Step 3:  Feature Engineering & Preprocessing

In [None]:
#  Step 2: Feature Engineering
cat_features = ['venue', 'bat_team', 'bowl_team', 'batsman', 'bowler']
num_features = ['runs', 'wickets', 'overs', 'runs_last_5', 'wickets_last_5', 'striker', 'non-striker']

# Apply Label Encoding
label_encoders = {}
for col in cat_features:
    le = LabelEncoder()
    ipl[col] = le.fit_transform(ipl[col].astype(str))
    label_encoders[col] = le

# Normalize Numerical Features
scaler = MinMaxScaler()
ipl[num_features] = scaler.fit_transform(ipl[num_features])

#  Prepare Data
X = ipl[cat_features + num_features]
y = ipl['total']  # Target variable


# Step 4: Train-Test Split

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5:  Model Definition & Training

In [None]:
# Define Optimized Neural Network
model = Sequential([
    Dense(256, activation='relu', input_shape=(X_train.shape[1],)),
    BatchNormalization(),
    Dropout(0.3),

    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.2),

    Dense(64, activation='relu'),
    BatchNormalization(),
    Dropout(0.1),

    Dense(32, activation='relu'),
    Dense(1, activation='linear')  # Output layer
])

# Compile Model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='mse')

# Early Stopping
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Train Model
history = model.fit(X_train, y_train, epochs=50, batch_size=128, validation_data=(X_test, y_test), callbacks=[early_stop])

# Evaluate Model
y_pred = model.predict(X_test)

Epoch 1/50
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 12ms/step - loss: 16565.2949 - val_loss: 1601.5736
Epoch 2/50
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 11ms/step - loss: 1321.8706 - val_loss: 849.3312
Epoch 3/50
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - loss: 1022.5631 - val_loss: 835.1245
Epoch 4/50
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 10ms/step - loss: 975.6440 - val_loss: 792.1475
Epoch 5/50
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 12ms/step - loss: 777.6320 - val_loss: 525.8287
Epoch 6/50
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 10ms/step - loss: 685.2427 - val_loss: 477.0640
Epoch 7/50
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 12ms/step - loss: 651.0076 - val_loss: 707.9279
Epoch 8/50
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - loss: 643.9680 - val_loss: 510.7

In [None]:
# Evaluate Model
y_pred = model.predict(X_test)

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
print(f"MAE: {mean_absolute_error(y_test, y_pred)}, MSE: {mean_squared_error(y_test, y_pred)}, R²: {r2_score(y_test, y_pred)}")


[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
MAE: 14.706571769714355, MSE: 206.51022338867188, R²: 0.8180624723434449


# Save the model

In [None]:
# Save the trained model
model.save("cricket_score_prediction.h5")
print("Model saved successfully! ")




Model saved successfully! 


# Step 7:Load the saved model & Prediction UI

In [None]:
from tensorflow.keras.models import load_model
import tensorflow as tf
from warnings import filterwarnings
filterwarnings("ignore")
# Load the trained model with explicit loss function
model = load_model("cricket_score_prediction.h5", custom_objects={"mse": tf.keras.losses.MeanSquaredError()})

print("Model loaded successfully! ")


# Step 8: Interactive Score Prediction Widget
venue = widgets.Dropdown(options=list(label_encoders['venue'].classes_), description='Venue:')
batting_team = widgets.Dropdown(options=list(label_encoders['bat_team'].classes_), description='Batting Team:')
bowling_team = widgets.Dropdown(options=list(label_encoders['bowl_team'].classes_), description='Bowling Team:')
striker = widgets.Dropdown(options=list(label_encoders['batsman'].classes_), description='Striker:')
bowler = widgets.Dropdown(options=list(label_encoders['bowler'].classes_), description='Bowler:')

runs = widgets.IntText(description='Runs:')
wickets = widgets.IntText(description='Wickets:')
overs = widgets.FloatText(description='Overs:')
runs_last_5 = widgets.IntText(description='Runs Last 5:')
wickets_last_5 = widgets.IntText(description='Wickets Last 5:')
striker_score = widgets.IntText(description='Striker Score:')
non_striker_score = widgets.IntText(description='Non-Striker Score:')

predict_button = widgets.Button(description="Predict Runs")
output = widgets.Output()

def predict_runs(b):
    with output:
        clear_output()

        # Convert categorical inputs
        encoded_input = [
            label_encoders['venue'].transform([venue.value])[0],
            label_encoders['bat_team'].transform([batting_team.value])[0],
            label_encoders['bowl_team'].transform([bowling_team.value])[0],
            label_encoders['batsman'].transform([striker.value])[0],
            label_encoders['bowler'].transform([bowler.value])[0]
        ]

        # Numerical inputs
        numerical_input = [
            runs.value, wickets.value, overs.value,
            runs_last_5.value, wickets_last_5.value,
            striker_score.value, non_striker_score.value
        ]

        # Combine categorical & numerical inputs
        input_data = np.array(encoded_input + numerical_input).reshape(1, -1)

        # Normalize numerical features (last 7 columns)
        input_data[:, -7:] = scaler.transform(input_data[:, -7:])

        # Predict Runs
        predicted_runs = model.predict(input_data)
        print(f"🏏 Predicted Runs: {int(predicted_runs[0,0])}")

# Bind button
predict_button.on_click(predict_runs)

# Display UI
display(venue, batting_team, bowling_team, striker, bowler,
        runs, wickets, overs, runs_last_5, wickets_last_5, striker_score, non_striker_score,
        predict_button, output)




Model loaded successfully! 


Dropdown(description='Venue:', options=('Barabati Stadium', 'Brabourne Stadium', 'Buffalo Park', 'De Beers Dia…

Dropdown(description='Batting Team:', options=('Chennai Super Kings', 'Deccan Chargers', 'Delhi Daredevils', '…

Dropdown(description='Bowling Team:', options=('Chennai Super Kings', 'Deccan Chargers', 'Delhi Daredevils', '…

Dropdown(description='Striker:', options=('A Ashish Reddy', 'A Chandila', 'A Chopra', 'A Choudhary', 'A Flinto…

Dropdown(description='Bowler:', options=('A Ashish Reddy', 'A Chandila', 'A Choudhary', 'A Flintoff', 'A Kumbl…

IntText(value=0, description='Runs:')

IntText(value=0, description='Wickets:')

FloatText(value=0.0, description='Overs:')

IntText(value=0, description='Runs Last 5:')

IntText(value=0, description='Wickets Last 5:')

IntText(value=0, description='Striker Score:')

IntText(value=0, description='Non-Striker Score:')

Button(description='Predict Runs', style=ButtonStyle())

Output()