# IPL Score Prediction
Predicting final IPL match scores based on current match situation

In [None]:
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Load dataset
df = pd.read_csv('Dataset/ipl.csv')
print(f"Dataset shape: {df.shape}")
df.head()

In [None]:
# Data preprocessing
df.info()

In [None]:
# Feature engineering
# Select relevant features for prediction
features = ['runs', 'wickets', 'overs', 'runs_last_5', 'wickets_last_5']
target = 'total'

# Check if all columns exist
available_features = [col for col in features if col in df.columns]
print(f"Available features: {available_features}")

if target not in df.columns:
    print(f"Error: Target column '{target}' not found")
    print(f"Available columns: {df.columns.tolist()}")

In [None]:
# Prepare data
X = df[available_features]
y = df[target]

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(f"Training set: {X_train.shape}, Test set: {X_test.shape}")

In [None]:
# Train models
models = {
    'Linear Regression': LinearRegression(),
    'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42),
    'Gradient Boosting': GradientBoostingRegressor(n_estimators=100, random_state=42)
}

results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    results[name] = {'model': model, 'mae': mae, 'r2': r2}
    print(f"{name} - MAE: {mae:.2f}, R2: {r2:.4f}")

In [None]:
# Select best model
best_model_name = min(results, key=lambda x: results[x]['mae'])
best_model = results[best_model_name]['model']
print(f"\nBest Model: {best_model_name}")
print(f"MAE: {results[best_model_name]['mae']:.2f}")
print(f"R2 Score: {results[best_model_name]['r2']:.4f}")

In [None]:
# Save models
with open('best_model.pkl', 'wb') as f:
    pickle.dump(best_model, f)

with open('linear_regression_model.pkl', 'wb') as f:
    pickle.dump(results['Linear Regression']['model'], f)

print("Models saved successfully!")

In [None]:
# Prediction function with validation
def predict_score(current_runs, wickets, overs, runs_last_5, wickets_last_5, model=best_model):
    """
    Predict final score ensuring it's always >= current score
    
    Parameters:
    - current_runs: Current runs scored
    - wickets: Wickets fallen
    - overs: Overs completed
    - runs_last_5: Runs scored in last 5 overs
    - wickets_last_5: Wickets fallen in last 5 overs
    - model: Trained model for prediction
    
    Returns:
    - Predicted final score (always >= current_runs)
    """
    # Create input dataframe
    input_data = pd.DataFrame([[
        current_runs, wickets, overs, runs_last_5, wickets_last_5
    ]], columns=available_features)
    
    # Get prediction
    predicted_score = model.predict(input_data)[0]
    
    # CRITICAL FIX: Ensure predicted score is always >= current score
    # If prediction is less than current runs, use current runs as minimum
    final_prediction = max(predicted_score, current_runs)
    
    return round(final_prediction)

print("Prediction function defined with validation!")

In [None]:
# Test predictions
print("\n=== Test Predictions ===")
print("\nScenario 1: Early innings (5 overs)")
current_score = 45
prediction = predict_score(current_runs=45, wickets=1, overs=5.0, runs_last_5=45, wickets_last_5=1)
print(f"Current Score: {current_score}")
print(f"Predicted Final Score: {prediction}")
print(f"Valid: {prediction >= current_score}")

print("\nScenario 2: Middle innings (10 overs)")
current_score = 95
prediction = predict_score(current_runs=95, wickets=2, overs=10.0, runs_last_5=50, wickets_last_5=1)
print(f"Current Score: {current_score}")
print(f"Predicted Final Score: {prediction}")
print(f"Valid: {prediction >= current_score}")

print("\nScenario 3: Late innings (15 overs)")
current_score = 150
prediction = predict_score(current_runs=150, wickets=4, overs=15.0, runs_last_5=55, wickets_last_5=2)
print(f"Current Score: {current_score}")
print(f"Predicted Final Score: {prediction}")
print(f"Valid: {prediction >= current_score}")

print("\nScenario 4: Very late innings (18 overs)")
current_score = 175
prediction = predict_score(current_runs=175, wickets=6, overs=18.0, runs_last_5=40, wickets_last_5=3)
print(f"Current Score: {current_score}")
print(f"Predicted Final Score: {prediction}")
print(f"Valid: {prediction >= current_score}")

In [None]:
# Interactive prediction
def make_prediction():
    print("\n=== IPL Score Predictor ===")
    current_runs = float(input("Enter current runs: "))
    wickets = int(input("Enter wickets fallen: "))
    overs = float(input("Enter overs completed: "))
    runs_last_5 = float(input("Enter runs in last 5 overs: "))
    wickets_last_5 = int(input("Enter wickets in last 5 overs: "))
    
    predicted = predict_score(current_runs, wickets, overs, runs_last_5, wickets_last_5)
    
    print(f"\n{'='*50}")
    print(f"Current Score: {int(current_runs)}/{wickets} in {overs} overs")
    print(f"Predicted Final Score: {predicted}")
    print(f"Expected Additional Runs: {predicted - int(current_runs)}")
    print(f"{'='*50}")
    
    # Validation check
    if predicted < current_runs:
        print("\n⚠️  WARNING: Model predicted less than current score!")
        print("This has been corrected to current score.")
    
    return predicted

# Uncomment below to use interactive prediction
# make_prediction()