In [3]:
# Install required packages (if not already installed)
!pip install pandas numpy scikit-learn ipywidgets

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.metrics import mean_absolute_error, accuracy_score
import joblib
import ipywidgets as widgets
from IPython.display import display, clear_output

# Mount Google Drive (if you want to save/load models)
# from google.colab import drive
# drive.mount('/content/drive')

## Data Loading and Preprocessing
# Load the dataset from uploaded file or GitHub
try:
    # Try loading from uploaded file
    from google.colab import files
    uploaded = files.upload()
    data = pd.read_csv(next(iter(uploaded.keys())))
except:
    # Fallback to GitHub raw data
    github_url = "https://raw.githubusercontent.com/yourusername/yourrepo/main/all_champions_trophy_matches_results.csv"
    data = pd.read_csv(github_url)

# Display dataset info
print("Dataset Overview:")
print(f"Number of matches: {len(data)}")
print("\nFirst few matches:")
display(data.head())

# Data Preprocessing
# Create synthetic score data (since actual scores aren't in the dataset)
np.random.seed(42)
data['total_score'] = np.random.randint(150, 350, size=len(data))

# For outcome prediction
data['winner_binary'] = data.apply(lambda x: 1 if x['Winner'] == x['Team1'] else 0, axis=1)

# Feature engineering
# Add some additional features that might be useful
data['batting_strength_diff'] = data['Team1 Avg Batting Ranking'] - data['Team2 Avg Batting Ranking']
data['bowling_strength_diff'] = data['Team1 Avg Bowling Ranking'] - data['Team2 Avg Bowling Ranking']
data['experience_diff'] = data['Team1 Total CTs participated'] - data['Team2 Total CTs participated']

## Model Training
# Features for score prediction
score_features = ['Team1 Avg Batting Ranking', 'Team2 Avg Bowling Ranking',
                 'Team1 W/L ratio over Team2', 'Team1 Total CTs participated',
                 'batting_strength_diff', 'bowling_strength_diff']
score_target = 'total_score'

# Features for outcome prediction
outcome_features = ['Team1 Avg Batting Ranking', 'Team2 Avg Batting Ranking',
                   'Team1 Avg Bowling Ranking', 'Team2 Avg Bowling Ranking',
                   'Team1 W/L ratio over Team2', 'Team1 Total CTs won',
                   'Team2 Total CTs won', 'batting_strength_diff',
                   'bowling_strength_diff', 'experience_diff']
outcome_target = 'winner_binary'

# Split data for score prediction
X_score = data[score_features]
y_score = data[score_target]
X_score_train, X_score_test, y_score_train, y_score_test = train_test_split(
    X_score, y_score, test_size=0.2, random_state=42)

# Split data for outcome prediction
X_outcome = data[outcome_features]
y_outcome = data[outcome_target]
X_outcome_train, X_outcome_test, y_outcome_train, y_outcome_test = train_test_split(
    X_outcome, y_outcome, test_size=0.2, random_state=42)

# Train score prediction model
print("\nTraining score prediction model...")
score_model = RandomForestRegressor(n_estimators=150, random_state=42)
score_model.fit(X_score_train, y_score_train)
score_preds = score_model.predict(X_score_test)
print(f"Score Prediction MAE: {mean_absolute_error(y_score_test, score_preds):.2f}")

# Train outcome prediction model
print("\nTraining outcome prediction model...")
outcome_model = RandomForestClassifier(n_estimators=150, random_state=42)
outcome_model.fit(X_outcome_train, y_outcome_train)
outcome_preds = outcome_model.predict(X_outcome_test)
print(f"Outcome Prediction Accuracy: {accuracy_score(y_outcome_test, outcome_preds):.2f}")

# Save models (optional)
# joblib.dump(score_model, 'score_model.pkl')
# joblib.dump(outcome_model, 'outcome_model.pkl')

## Interactive Prediction Interface
print("\n\n=== Cricket Match Prediction System ===")

# Team selection dropdown
teams = sorted(data['Team1'].unique().tolist() + data['Team2'].unique().tolist())
teams = sorted(list(set(teams)))  # Remove duplicates

team1_dropdown = widgets.Dropdown(options=teams, description='Team 1:')
team2_dropdown = widgets.Dropdown(options=teams, description='Team 2:')

# Match parameters
overs_slider = widgets.IntSlider(min=1, max=50, value=50, description='Overs:')
venue_dropdown = widgets.Dropdown(options=sorted(data['Ground'].unique()), description='Venue:')

# Advanced parameters (hidden by default)
advanced_toggle = widgets.ToggleButton(value=False, description='Show Advanced Options')
advanced_box = widgets.VBox(children=[], layout={'border': '1px solid gray', 'padding': '10px'})

def update_advanced_options(change):
    if change['new']:
        advanced_box.children = [
            widgets.FloatSlider(min=0, max=100, value=70, step=0.1, description='Team1 Batting Rank:'),
            widgets.FloatSlider(min=0, max=100, value=70, step=0.1, description='Team2 Batting Rank:'),
            widgets.FloatSlider(min=0, max=100, value=70, step=0.1, description='Team1 Bowling Rank:'),
            widgets.FloatSlider(min=0, max=100, value=70, step=0.1, description='Team2 Bowling Rank:'),
            widgets.FloatSlider(min=0, max=10, value=1.0, step=0.1, description='Team1 W/L Ratio:'),
            widgets.IntSlider(min=0, max=20, value=5, description='Team1 CT Participations:'),
            widgets.IntSlider(min=0, max=10, value=2, description='Team1 CT Wins:'),
            widgets.IntSlider(min=0, max=10, value=2, description='Team2 CT Wins:')
        ]
    else:
        advanced_box.children = []

advanced_toggle.observe(update_advanced_options, names='value')

# Prediction button
predict_button = widgets.Button(description="Predict Match Outcome", button_style='success')
output = widgets.Output()

def get_team_stats(team_name):
    """Helper function to get average stats for a team"""
    team_matches = data[(data['Team1'] == team_name) | (data['Team2'] == team_name)]
    if len(team_matches) == 0:
        return {
            'batting': 70,
            'bowling': 70,
            'wl_ratio': 1.0,
            'ct_participated': 5,
            'ct_won': 2
        }

    batting = team_matches[team_matches['Team1'] == team_name]['Team1 Avg Batting Ranking'].mean()
    if pd.isna(batting):
        batting = team_matches[team_matches['Team2'] == team_name]['Team2 Avg Batting Ranking'].mean()

    bowling = team_matches[team_matches['Team1'] == team_name]['Team1 Avg Bowling Ranking'].mean()
    if pd.isna(bowling):
        bowling = team_matches[team_matches['Team2'] == team_name]['Team2 Avg Bowling Ranking'].mean()

    wl_ratio = team_matches[team_matches['Team1'] == team_name]['Team1 W/L ratio over Team2'].mean()
    if pd.isna(wl_ratio):
        wl_ratio = 1.0

    ct_part = team_matches[team_matches['Team1'] == team_name]['Team1 Total CTs participated'].max()
    if pd.isna(ct_part):
        ct_part = team_matches[team_matches['Team2'] == team_name]['Team2 Total CTs participated'].max()

    ct_won = team_matches[team_matches['Team1'] == team_name]['Team1 Total CTs won'].max()
    if pd.isna(ct_won):
        ct_won = team_matches[team_matches['Team2'] == team_name]['Team2 Total CTs won'].max()

    return {
        'batting': batting if not pd.isna(batting) else 70,
        'bowling': bowling if not pd.isna(bowling) else 70,
        'wl_ratio': wl_ratio if not pd.isna(wl_ratio) else 1.0,
        'ct_participated': ct_part if not pd.isna(ct_part) else 5,
        'ct_won': ct_won if not pd.isna(ct_won) else 2
    }

def on_predict_button_clicked(b):
    with output:
        clear_output()

        # Get selected teams
        team1 = team1_dropdown.value
        team2 = team2_dropdown.value

        # Get team stats
        team1_stats = get_team_stats(team1)
        team2_stats = get_team_stats(team2)

        # If advanced options are shown, use those values
        if advanced_toggle.value and len(advanced_box.children) > 0:
            team1_batting = advanced_box.children[0].value
            team2_batting = advanced_box.children[1].value
            team1_bowling = advanced_box.children[2].value
            team2_bowling = advanced_box.children[3].value
            wl_ratio = advanced_box.children[4].value
            team1_ct_part = advanced_box.children[5].value
            team1_ct_won = advanced_box.children[6].value
            team2_ct_won = advanced_box.children[7].value
        else:
            team1_batting = team1_stats['batting']
            team2_batting = team2_stats['batting']
            team1_bowling = team1_stats['bowling']
            team2_bowling = team2_stats['bowling']
            wl_ratio = team1_stats['wl_ratio']
            team1_ct_part = team1_stats['ct_participated']
            team1_ct_won = team1_stats['ct_won']
            team2_ct_won = team2_stats['ct_won']

        overs = overs_slider.value
        venue = venue_dropdown.value

        # Prepare features for predictions
        # Score prediction features
        score_input = np.array([
            team1_batting,
            team2_bowling,
            wl_ratio,
            team1_ct_part,
            team1_batting - team2_batting,
            team1_bowling - team2_bowling
        ]).reshape(1, -1)

        # Outcome prediction features
        outcome_input = np.array([
            team1_batting,
            team2_batting,
            team1_bowling,
            team2_bowling,
            wl_ratio,
            team1_ct_won,
            team2_ct_won,
            team1_batting - team2_batting,
            team1_bowling - team2_bowling,
            team1_ct_part - team2_stats['ct_participated']
        ]).reshape(1, -1)

        # Make predictions
        predicted_score = score_model.predict(score_input)[0]
        adjusted_score = predicted_score * (overs / 50)  # Adjust for number of overs

        outcome_prob = outcome_model.predict_proba(outcome_input)[0]

        # Display results
        print(f"\n=== Prediction Results for {team1} vs {team2} ===")
        print(f"Venue: {venue}")
        print(f"Match Length: {overs} overs")

        print("\nScore Prediction:")
        print(f"Predicted Score: {adjusted_score:.0f} runs")

        print("\nMatch Outcome Prediction:")
        print(f"Probability {team1} wins: {outcome_prob[1]*100:.1f}%")
        print(f"Probability {team2} wins: {outcome_prob[0]*100:.1f}%")

        # Additional insights
        print("\nKey Factors Influencing Prediction:")
        if team1_batting - team2_batting > 10:
            print(f"- {team1} has significantly stronger batting ({team1_batting:.1f} vs {team2_batting:.1f})")
        elif team2_batting - team1_batting > 10:
            print(f"- {team2} has significantly stronger batting ({team2_batting:.1f} vs {team1_batting:.1f})")

        if team1_bowling - team2_bowling > 10:
            print(f"- {team2} has significantly stronger bowling ({team2_bowling:.1f} vs {team1_bowling:.1f})")
        elif team2_bowling - team1_bowling > 10:
            print(f"- {team1} has significantly stronger bowling ({team1_bowling:.1f} vs {team2_bowling:.1f})")

        if wl_ratio > 1.5:
            print(f"- {team1} has strong historical advantage (W/L ratio: {wl_ratio:.2f})")
        elif wl_ratio < 0.67:
            print(f"- {team2} has strong historical advantage (W/L ratio: {1/wl_ratio:.2f} in their favor)")

predict_button.on_click(on_predict_button_clicked)

# Display the UI
display(widgets.VBox([
    widgets.HBox([team1_dropdown, team2_dropdown]),
    widgets.HBox([overs_slider, venue_dropdown]),
    advanced_toggle,
    advanced_box,
    predict_button,
    output
]))

print("\nInstructions:")
print("1. Select the teams playing")
print("2. Set the match length (overs)")
print("3. Choose the venue")
print("4. Click 'Predict Match Outcome'")
print("5. For more control, toggle 'Show Advanced Options'")

Collecting jedi>=0.16 (from ipython>=4.0.0->ipywidgets)
  Downloading jedi-0.19.2-py2.py3-none-any.whl.metadata (22 kB)
Downloading jedi-0.19.2-py2.py3-none-any.whl (1.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m27.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: jedi
Successfully installed jedi-0.19.2


Saving all_champions_trophy_matches_results.csv to all_champions_trophy_matches_results (1).csv
Dataset Overview:
Number of matches: 128

First few matches:


Unnamed: 0,Team1,Team2,Toss,Match days,Winner,Player of the Match,Margin,Ground,Match Date,ODI Int Match,Team1 Avg Batting Ranking,Team2 Avg Batting Ranking,Team1 Avg Bowling Ranking,Team2 Avg Bowling Ranking,Team1 Total CTs participated,Team1 Total CTs won,Team2 Total CTs participated,Team2 Total CTs won,Team1 W/L ratio over Team2
0,South Africa,West Indies,"South Africa, elected to field first",daynight,South Africa,Jacques Kallis,4 wickets,Dhaka,"Nov 1, 1998",ODI # 1364,66.58,63.42,81.5,84.92,0,0,0,0,1.0
1,India,West Indies,"India, elected to bat first",daynight,West Indies,Mervyn Dillon,6 wickets,Dhaka,"Oct 31, 1998",ODI # 1363,58.0,63.92,63.73,86.25,0,0,0,0,0.527
2,South Africa,Sri Lanka,"Sri Lanka, elected to field first",daynight,South Africa,Jacques Kallis,92 runs,Dhaka,"Oct 30, 1998",ODI # 1362,66.92,53.55,83.08,69.18,0,0,0,0,1.166
3,Pakistan,West Indies,"West Indies, elected to bat first",daynight,West Indies,Keith Arthurton,30 runs,Dhaka,"Oct 29, 1998",ODI # 1361,88.36,65.0,70.0,87.17,0,0,0,0,0.806
4,Australia,India,"Australia, elected to field first",daynight,India,Sachin Tendulkar,44 runs,Dhaka,"Oct 28, 1998",ODI # 1360,58.55,58.0,78.73,64.82,0,0,0,0,1.38



Training score prediction model...
Score Prediction MAE: 62.61

Training outcome prediction model...
Outcome Prediction Accuracy: 0.65


=== Cricket Match Prediction System ===


VBox(children=(HBox(children=(Dropdown(description='Team 1:', options=('Afghanistan', 'Australia', 'Bangladesh…


Instructions:
1. Select the teams playing
2. Set the match length (overs)
3. Choose the venue
4. Click 'Predict Match Outcome'
5. For more control, toggle 'Show Advanced Options'
