In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingRegressor
from sklearn.metrics import accuracy_score, mean_squared_error
import joblib

# Load data
ipl_df = pd.read_csv(r'C:\Users\ADMIN\Desktop\IPL_ball_by_ball_updated (1).csv')
ipl_df.head()

Unnamed: 0,match_id,season,start_date,venue,innings,ball,batting_team,bowling_team,striker,non_striker,...,extras,wides,noballs,byes,legbyes,penalty,wicket_type,player_dismissed,other_wicket_type,other_player_dismissed
0,335982,2008,2008-04-18,M Chinnaswamy Stadium,2,6.8,Royal Challengers Bangalore,Kolkata Knight Riders,MV Boucher,CL White,...,0,,,,,,,,,
1,335982,2008,2008-04-18,M Chinnaswamy Stadium,2,2.7,Royal Challengers Bangalore,Kolkata Knight Riders,W Jaffer,JH Kallis,...,0,,,,,,,,,
2,335982,2008,2008-04-18,M Chinnaswamy Stadium,2,3.1,Royal Challengers Bangalore,Kolkata Knight Riders,W Jaffer,JH Kallis,...,0,,,,,,,,,
3,335982,2008,2008-04-18,M Chinnaswamy Stadium,2,3.2,Royal Challengers Bangalore,Kolkata Knight Riders,W Jaffer,JH Kallis,...,0,,,,,,,,,
4,335982,2008,2008-04-18,M Chinnaswamy Stadium,2,3.3,Royal Challengers Bangalore,Kolkata Knight Riders,JH Kallis,W Jaffer,...,0,,,,,,,,,


In [2]:
# Filter for relevant innings
ipl_df = ipl_df[(ipl_df.innings == 1) | (ipl_df.innings == 2)]

# Feature Engineering
ipl_df['total_runs'] = ipl_df['runs_off_bat'] + ipl_df['extras']
ipl_df['isOut'] = ipl_df['player_dismissed'].apply(lambda x: 1 if pd.notna(x) else 0)


In [3]:
# Aggregate data
aggregated_df = ipl_df.groupby(['match_id', 'innings']).agg({
    'total_runs': 'sum',
    'isOut': 'sum',
    'ball': 'count'
}).reset_index()

aggregated_df['run_rate'] = aggregated_df['total_runs'] / (aggregated_df['ball'] / 6)
aggregated_df['remaining_overs'] = 20 - (aggregated_df['ball'] / 6)

In [4]:
# Calculate target for second innings
first_innings_scores = aggregated_df[aggregated_df['innings'] == 1][['match_id', 'total_runs']]
first_innings_scores.columns = ['match_id', 'first_innings_score']
aggregated_df = aggregated_df.merge(first_innings_scores, on='match_id', how='left')
aggregated_df['first_innings_score'] = aggregated_df['first_innings_score'].fillna(0)

In [5]:
# Determine winning team (binary target: 1 for win, 0 for loss)
aggregated_df['winning_team'] = aggregated_df.apply(lambda x: 1 if (x['innings'] == 2 and x['total_runs'] > x['first_innings_score']) else 0, axis=1)

# Prepare data for model training
X_clf = aggregated_df[['total_runs', 'isOut', 'run_rate', 'remaining_overs']]
y_clf = aggregated_df['winning_team']

X_train_clf, X_test_clf, y_train_clf, y_test_clf = train_test_split(X_clf, y_clf, test_size=0.2, random_state=42)


In [6]:
# Train classification model
clf = RandomForestClassifier()
clf.fit(X_train_clf, y_train_clf)
y_pred_clf = clf.predict(X_test_clf)
print(f'Accuracy: {accuracy_score(y_test_clf, y_pred_clf)}')

Accuracy: 0.8842105263157894


In [10]:
# Prepare data for regression
X_reg = aggregated_df[['total_runs', 'isOut', 'run_rate']]
y_reg = aggregated_df['first_innings_score']

X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)

# Train regression model
reg = GradientBoostingRegressor()
reg.fit(X_train_reg, y_train_reg)
y_pred_reg = reg.predict(X_test_reg)
print(f'RMSE: {mean_squared_error(y_test_reg, y_pred_reg, squared=False)}')

# Save models
joblib.dump(clf, 'clf_model.pkl')
joblib.dump(reg, 'reg_model.pkl')


RMSE: 15.54668866902905


['reg_model.pkl']

In [11]:
streamlit_code = """
import streamlit as st
import joblib
import matplotlib.pyplot as plt
import seaborn as sns

# Load models
clf_model = joblib.load('clf_model.pkl')
reg_model = joblib.load('reg_model.pkl')

# Streamlit app function
def run_app():
    st.title('IPL Match Predictor')
    st.sidebar.header('Input Parameters')

    # Input parameters
    runs = st.sidebar.number_input('Current Runs', min_value=0, value=100)
    wickets = st.sidebar.number_input('Wickets Lost', min_value=0, max_value=10, value=2)
    overs = st.sidebar.number_input('Overs Completed', min_value=0.0, max_value=20.0, value=10.0, step=0.1)
    target = st.sidebar.number_input('Target Score', min_value=0, value=180)

    # Derived parameters
    run_rate = runs / (overs + 1e-5)
    remaining_overs = 20 - overs
    required_run_rate = (target - runs) / (remaining_overs + 1e-5)

    # Prediction input
    prediction_input_clf = [[runs, wickets, run_rate, remaining_overs]]
    prediction_input_reg = [[runs, wickets, run_rate]]

    # Predict winning probability
    winning_prob = clf_model.predict_proba(prediction_input_clf)[0][1]

    # Predict target score
    optimal_target = reg_model.predict(prediction_input_reg)[0]

    # Display predictions
    st.subheader('Predictions')
    st.write(f'**Winning Probability:** {winning_prob:.2f}')
    st.write(f'**Optimal Target Score:** {optimal_target:.2f}')

    # Visualization
    fig, ax = plt.subplots()
    sns.lineplot(x=[0, 20], y=[target, target], ax=ax, label='Target Score')
    sns.lineplot(x=[0, overs], y=[0, runs], ax=ax, label='Current Score')
    ax.axvline(x=overs, color='red', linestyle='--', label='Current Over')
    ax.set_xlabel('Overs')
    ax.set_ylabel('Runs')
    ax.set_title('Match Progress')
    ax.legend()

    st.pyplot(fig)

# Run the Streamlit app
if __name__ == '__main__':
    run_app()
"""

with open('streamlit_app.py', 'w') as f:
    f.write(streamlit_code)


In [12]:
import subprocess

subprocess.run(["streamlit", "run", "streamlit_app.py"])


CompletedProcess(args=['streamlit', 'run', 'streamlit_app.py'], returncode=4294967295)

In [8]:
import streamlit as st
import joblib
import matplotlib.pyplot as plt
import seaborn as sns

# Load models
clf_model = joblib.load('clf_model.pkl')
reg_model = joblib.load('reg_model.pkl')

In [9]:
import streamlit as st
import joblib
import matplotlib.pyplot as plt
import seaborn as sns

# Load models
clf_model = joblib.load('clf_model.pkl')
reg_model = joblib.load('reg_model.pkl')

# Streamlit app function
def run_app():
    st.title('IPL Match Predictor')
    st.sidebar.header('Input Parameters')

    # Input parameters
    runs = st.sidebar.number_input('Current Runs', min_value=0, value=100)
    wickets = st.sidebar.number_input('Wickets Lost', min_value=0, max_value=10, value=2)
    overs = st.sidebar.number_input('Overs Completed', min_value=0.0, max_value=20.0, value=10.0, step=0.1)
    target = st.sidebar.number_input('Target Score', min_value=0, value=180)

    # Derived parameters
    run_rate = runs / (overs + 1e-5)
    remaining_overs = 20 - overs
    required_run_rate = (target - runs) / (remaining_overs + 1e-5)

    # Prediction input
    prediction_input_clf = [[runs, wickets, run_rate, remaining_overs]]
    prediction_input_reg = [[runs, wickets, run_rate]]

    # Predict winning probability
    winning_prob = clf_model.predict_proba(prediction_input_clf)[0][1]

    # Predict target score
    optimal_target = reg_model.predict(prediction_input_reg)[0]

    # Display predictions
    st.subheader('Predictions')
    st.write(f'**Winning Probability:** {winning_prob:.2f}')
    st.write(f'**Optimal Target Score:** {optimal_target:.2f}')

    # Visualization
    fig, ax = plt.subplots()
    sns.lineplot(x=[0, 20], y=[target, target], ax=ax, label='Target Score')
    sns.lineplot(x=[0, overs], y=[0, runs], ax=ax, label='Current Score')
    ax.axvline(x=overs, color='red', linestyle='--', label='Current Over')
    ax.set_xlabel('Overs')
    ax.set_ylabel('Runs')
    ax.set_title('Match Progress')
    ax.legend()

    st.pyplot(fig)

# Run the Streamlit app
if __name__ == '__main__':
    from streamlit import cli as stcli
    import sys
    if st._is_running_with_streamlit:
        run_app()
    else:
        sys.argv = ["streamlit", "run", "app.py"]
        stcli.main()


ImportError: cannot import name 'cli' from 'streamlit' (C:\Users\ADMIN\Documents\Custom Office Templates\lib\site-packages\streamlit\__init__.py)

In [9]:
# Streamlit app function
def run_app():
    st.title('IPL Match Predictor')
    st.sidebar.header('Input Parameters')

    # Input parameters
    runs = st.sidebar.number_input('Current Runs', min_value=0, value=100)
    wickets = st.sidebar.number_input('Wickets Lost', min_value=0, max_value=10, value=2)
    overs = st.sidebar.number_input('Overs Completed', min_value=0.0, max_value=20.0, value=10.0, step=0.1)
    target = st.sidebar.number_input('Target Score', min_value=0, value=180)

    # Derived parameters
    run_rate = runs / (overs + 1e-5)
    remaining_overs = 20 - overs
    required_run_rate = (target - runs) / (remaining_overs + 1e-5)

    # Prediction input
    prediction_input_clf = [[runs, wickets, run_rate, remaining_overs]]
    prediction_input_reg = [[runs, wickets, run_rate]]

    # Predict winning probability
    winning_prob = clf_model.predict_proba(prediction_input_clf)[0][1]

    # Predict target score
    optimal_target = reg_model.predict(prediction_input_reg)[0]

    # Display predictions
    st.subheader('Predictions')
    st.write(f'**Winning Probability:** {winning_prob:.2f}')
    st.write(f'**Optimal Target Score:** {optimal_target:.2f}')

    # Visualization
    fig, ax = plt.subplots()
    sns.lineplot(x=[0, 20], y=[target, target], ax=ax, label='Target Score')
    sns.lineplot(x=[0, overs], y=[0, runs], ax=ax, label='Current Score')
    ax.axvline(x=overs, color='red', linestyle='--', label='Current Over')
    ax.set_xlabel('Overs')
    ax.set_ylabel('Runs')
    ax.set_title('Match Progress')
    ax.legend()

    st.pyplot(fig)

In [10]:
# To run the Streamlit app within a Jupyter notebook
if __name__ == '__main__':
    run_app()

2024-07-02 14:36:02.532 
  command:

    streamlit run C:\Users\ADMIN\AppData\Roaming\Python\Python39\site-packages\ipykernel_launcher.py [ARGUMENTS]


In [13]:
pip install streamlit pandas scikit-learn matplotlib seaborn joblib jupyter-streamlit


Note: you may need to restart the kernel to use updated packages.


ERROR: Could not find a version that satisfies the requirement jupyter-streamlit (from versions: none)
ERROR: No matching distribution found for jupyter-streamlit

[notice] A new release of pip is available: 23.3.2 -> 24.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [14]:
pip install --upgrade pip

Collecting pip
  Downloading pip-24.1.1-py3-none-any.whl.metadata (3.6 kB)
Downloading pip-24.1.1-py3-none-any.whl (1.8 MB)
   ---------------------------------------- 0.0/1.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.8 MB ? eta -:--:--
    --------------------------------------- 0.0/1.8 MB 487.6 kB/s eta 0:00:04
   -- ------------------------------------- 0.1/1.8 MB 871.5 kB/s eta 0:00:02
   -- ------------------------------------- 0.1/1.8 MB 901.1 kB/s eta 0:00:02
   --- ------------------------------------ 0.1/1.8 MB 853.3 kB/s eta 0:00:02
   ---- ----------------------------------- 0.2/1.8 MB 827.9 kB/s eta 0:00:02
   ------ --------------------------------- 0.3/1.8 MB 983.9 kB/s eta 0:00:02
   ------- -------------------------------- 0.3/1.8 MB 1.0 MB/s eta 0:00:02
   --------- ------------------------------ 0.5/1.8 MB 1.1 MB/s eta 0:00:02
   ---------- ----------------------------- 0.5/1.8 MB 1.1 MB/s eta 0:00:02
   ----------- ---------------------------