# IPL Score **Prediction**

In [1]:
# Step 1: Mount Google Drive (Colab only)
from google.colab import drive
drive.mount('/content/drive')

# Step 2: Install necessary libraries
!pip install pandas scikit-learn gradio



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Collecting gradio
  Downloading gradio-5.25.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Down

In [2]:
import pandas as pd
import numpy as np

# Load dataset
df = pd.read_csv('/content/drive/MyDrive/processed_ipl_data2.csv', low_memory=False)

# Clean and filter
df = df[df['inning'] == 1]  # Only 1st innings
df.dropna(subset=['batting_team', 'bowling_team', 'total_runs'], inplace=True)

# Calculate current score, wickets, and over
df['current_score'] = df.groupby('match_id')['total_runs'].cumsum()
df['wickets'] = df.groupby('match_id')['is_wicket'].cumsum()
df['current_over'] = df['over'] + df['ball'] / 6

# Get final score of each match
final_scores = df.groupby('match_id')['current_score'].max().reset_index()
final_scores.columns = ['match_id', 'final_score']

# Merge final scores with each ball-level row
data = df.merge(final_scores, on='match_id')

# Add additional features
data['run_rate'] = data['current_score'] / (data['current_over'] + 0.1)
data['balls_left'] = 120 - (data['current_over'] * 6)
data['wickets_left'] = 10 - data['wickets']

# Final features for ML
data = data[['batting_team', 'bowling_team', 'current_over', 'current_score', 'wickets', 'run_rate', 'balls_left', 'wickets_left', 'final_score']]


In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import time

# Sample only 10% of data for speed
sampled_data = data.sample(frac=0.1, random_state=42)

X = sampled_data.drop('final_score', axis=1)
y = sampled_data['final_score']

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Encoding
categorical_features = ['batting_team', 'bowling_team']
preprocessor = ColumnTransformer([
    ('cat', OneHotEncoder(handle_unknown='ignore', sparse_output=False), categorical_features)
], remainder='passthrough')

# Simple Random Forest for fast training
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('model', RandomForestRegressor(
        n_estimators=10,      # Much faster
        max_depth=5,          # Shallow depth
        n_jobs=-1,
        random_state=42
    ))
])

# Time it
start_time = time.time()
pipeline.fit(X_train, y_train)
end_time = time.time()

print(f"✅ Model trained in {end_time - start_time:.2f} seconds")


✅ Model trained in 0.19 seconds


In [4]:
import gradio as gr

# Prediction function
def predict_score(batting_team, bowling_team, current_score, current_over, wickets):
    run_rate = current_score / (current_over + 0.1)
    balls_left = 120 - (current_over * 6)
    wickets_left = 10 - wickets

    input_data = pd.DataFrame({
        'batting_team': [batting_team],
        'bowling_team': [bowling_team],
        'current_over': [current_over],
        'current_score': [current_score],
        'wickets': [wickets],
        'run_rate': [run_rate],
        'balls_left': [balls_left],
        'wickets_left': [wickets_left]
    })

    predicted_score = pipeline.predict(input_data)[0]
    return f"🏏 Predicted Final Score: {int(predicted_score)} runs"

# Teams list
teams = sorted(data['batting_team'].dropna().unique())

# Gradio interface
interface = gr.Interface(
    fn=predict_score,
    inputs=[
        gr.Dropdown(choices=teams, label="Batting Team"),
        gr.Dropdown(choices=teams, label="Bowling Team"),
        gr.Number(label="Current Score", value=50),
        gr.Number(label="Overs Completed", value=7),
        gr.Number(label="Wickets Fallen", value=2)
    ],
    outputs="text",
    title="IPL Final Score Predictor",
    description="Enter live match stats to predict the 1st innings final score!",
)

interface.launch()


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://9e902d1470e783b795.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


