# Problem Statement: Build an Interactive ML Model for IPL 2023 Final GT vs CSK

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

#to display all rows columns 
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)  
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', -1)

In [6]:
ipl_df = pd.read_csv('IPL_ball_by_ball_updated.csv')

In [7]:
ipl_df.tail(1)

Unnamed: 0,match_id,season,start_date,venue,innings,ball,batting_team,bowling_team,striker,non_striker,bowler,runs_off_bat,extras,wides,noballs,byes,legbyes,penalty,wicket_type,player_dismissed,other_wicket_type,other_player_dismissed
243814,1370353,2023,2023-05-29,"Narendra Modi Stadium, Ahmedabad",2,14.6,Chennai Super Kings,Gujarat Titans,RA Jadeja,S Dube,MM Sharma,4,0,,,,,,,,,


In [10]:
ipl_df = ipl_df[(ipl_df.innings == 1) | (ipl_df.innings == 2)]

In [11]:
ipl_df['total_runs'] = ipl_df.apply(lambda x: x['runs_off_bat'] + x['extras'], axis = 1)

In [13]:
ipl_df['isOut'] = ipl_df['player_dismissed'].apply(lambda x: 1 if type(x) == type('str') else 0)

# Choosing Teams

In [17]:
# t1 - bat first, t2 - bat second
t1 = 'Gujarat Titans'
t2 = 'Chennai Super Kings'

# Choosing a match between the teams

In [18]:
ipl_df[(ipl_df.batting_team == t1) & (ipl_df.bowling_team == t2) & (ipl_df.innings == 1)].match_id.unique()

array([1370353], dtype=int64)

In [19]:
required_match_id = 1370353

In [20]:
ipl_df[ipl_df.match_id == required_match_id].head(1)

Unnamed: 0,match_id,season,start_date,venue,innings,ball,batting_team,bowling_team,striker,non_striker,bowler,runs_off_bat,extras,wides,noballs,byes,legbyes,penalty,wicket_type,player_dismissed,other_wicket_type,other_player_dismissed,total_runs,isOut
243599,1370353,2023,2023-05-29,"Narendra Modi Stadium, Ahmedabad",1,0.1,Gujarat Titans,Chennai Super Kings,WP Saha,Shubman Gill,DL Chahar,0,0,,,,,,,,,,0,0


In [21]:
t1_outs = ipl_df[ipl_df.batting_team == t1].isOut.sum()
t2_outs = ipl_df[ipl_df.batting_team == t2].isOut.sum()

In [22]:
t1_outcomes = ipl_df[ipl_df.batting_team == t1].total_runs.value_counts()
t2_outcomes = ipl_df[ipl_df.batting_team == t2].total_runs.value_counts()

In [23]:
outcomes = [0, 1, 2, 3, 4, 6, 'w']

In [24]:
t1_outcomes_count = []
for outcome in outcomes:
    try:
        if outcome != 'w':
            t1_outcomes_count.append(t1_outcomes[outcome])
        else:
            t1_outcomes_count.append(t1_outs)
    except:
        t1_outcomes_count.append(0)
        

t2_outcomes_count = []
for outcome in outcomes:
    try:
        if outcome != 'w':
            t2_outcomes_count.append(t2_outcomes[outcome])
        else:
            t2_outcomes_count.append(t2_outs)
    except:
        t2_outcomes_count.append(0)

In [25]:
t1_outcomes_count

[1208, 1796, 277, 9, 514, 202, 168]

In [26]:
t2_outcomes_count

[9171, 11334, 1839, 106, 3029, 1392, 1166]

In [27]:
t1_pb = [i/sum(t1_outcomes_count) for i in t1_outcomes_count]
t2_pb = [i/sum(t2_outcomes_count) for i in t2_outcomes_count]

In [28]:
t1_pb, t2_pb

([0.28941063727839006,
  0.4302827024436991,
  0.0663632007666507,
  0.002156205079060853,
  0.1231432678485865,
  0.048394825107810256,
  0.04024916147580259],
 [0.32710347041409565,
  0.40425152477083853,
  0.06559189642258444,
  0.003780718336483932,
  0.10803580982273424,
  0.04964867853193994,
  0.04158790170132325])

# Probability Values:

In [29]:
#GT = [0.28941, 0.43028, 0.06636, 0.0, 0.12314, 0.04839,0.04024]
#CSK = [0.32710, 0.40425, 0.06559, 0.0, 0.10803, 0.04964, 0.04158]

In [31]:
def get_pbvalues(teamName):
  
    if teamName == 'GT':
        p_0 = 0.28941
        p_1 = 0.28941 + 0.43028
        p_2 = 0.28941 + 0.43028 + 0.06636
        p_3 = 0.28941 + 0.43028 + 0.06636 + 0.0
        p_4 = 0.28941 + 0.43028 + 0.06636 + 0.0 + 0.12314
        p_6 = 0.28941 + 0.43028 + 0.06636 + 0.0 + 0.12314 + 0.04839
        p_w = 1
    
    elif teamName == 'CSK':
        p_0 = 0.32710
        p_1 = 0.32710 + 0.40425
        p_2 = 0.32710 + 0.40425 + 0.06559
        p_3 = 0.32710 + 0.40425 + 0.06559 + 0.0
        p_4 = 0.32710 + 0.40425 + 0.06559 + 0.0 + 0.10803
        p_6 = 0.32710 + 0.40425 + 0.06559 + 0.0 + 0.10803 + 0.04964
        p_w = 1

    return p_0, p_1, p_2, p_3, p_4, p_6, p_w

## Runs Prediction Model:

In [32]:
def predict_runs(target, current_score, current_wickets, current_overs):
  
    # pb values of both teams
    i1p_0, i1p_1, i1p_2, i1p_3, i1p_4, i1p_6, i1p_w = get_pbvalues('GT')
    i2p_0, i2p_1, i2p_2, i2p_3, i2p_4, i2p_6, i2p_w = get_pbvalues('CSK')

    pred_runs = current_score
    pred_wks = current_wickets
    leftover_balls = 120 - current_overs*6

    for i in range(leftover_balls):
        r_value = np.random.random()

        if r_value <= i2p_0:
            pred_runs += 0
        elif r_value <= i2p_1:
            pred_runs += 1
        elif r_value <= i2p_2:
            pred_runs += 2
        elif r_value <= i2p_3:
            pred_runs += 3
        elif r_value <= i2p_4:
            pred_runs += 4
        elif r_value <= i2p_6:
            pred_runs += 6
        else:
            pred_runs += 0
            pred_wks += 1
            if pred_wks == 10:
                break
        if pred_runs > target:
            break
        # print('pred_runs: ', pred_runs)
        # print('pred_wks: ', pred_wks)
    
    return pred_runs

In [44]:
# predict_runs(target, current_score, current_wickets, current_overs)
predict_runs(171, 0, 0, 0)

172

## Winner Function:

In [45]:
def get_win(pred_runs, target):
    if pred_runs > target:
        return 'win'
    elif pred_runs == target:
        return 'tie'
    else:
        return 'lose'

In [72]:
# runs, wickets, overs, 
# win - 1st ing score

# GT - 214/4 
#reduced to 17 overs 171 to chase from 15 overs (DLS method)

target = 171

current_score = 133
current_wickets = 3
current_overs = 12

iter_count = 100

runs_ls = []
results_ls = []

win_count = 0
tie_count = 0
lose_count = 0

for i in range(iter_count):
    pred_runs = predict_runs(target, current_score, current_wickets, current_overs)
    runs_ls.append(pred_runs)
    result_pred = get_win(pred_runs, target)
    results_ls.append(result_pred)

    if result_pred == 'win':
        win_count += 1
    elif result_pred == 'tie':
        tie_count += 1
    else:
        lose_count +=1

In [75]:
win_count, tie_count, lose_count

(97, 1, 2)

## Find out Runs at a required stage:

In [76]:
# find out runs:
def find_runs(current_score, target, current_wickets, at_overs):
    runs_ls = []
    results_ls = []
    req_runs = []
    win_ls = []

    for i in range(current_score, target + 1):
        win_count = 0
        tie_count = 0
        lose_count = 0

        for j in range(100):
            pred_runs = predict_runs(target, i, current_wickets, at_overs)
            runs_ls.append(pred_runs)
            result_pred = get_win(pred_runs, target)
            results_ls.append(result_pred)

            if result_pred == 'win':
                win_count += 1
            elif result_pred == 'tie':
                tie_count += 1
            else:
                lose_count +=1

            win_ls.append(win_count)
            req_runs.append(i)
            # print('runs: ', i, ' win%: ', win_count)

    required_runs = current_score
    for i in range(len(req_runs)):
        if win_ls[i] >= 50:
            required_runs = req_runs[i]
            # print('Runs to be: ', req_runs[i])
            break

    return required_runs

## Here CSK scored 133/3 in 12 overs at this stage they exactly required 133 so they maintained the req runrate at that time 

In [79]:
# find_runs(current_score, target, current_wickets, at_overs)
find_runs(133, 171, 3, 12)

133

## Find out Wickets they can afford to lose:

In [98]:
def find_wickets(current_score, target, current_wickets, at_overs):

#     find_runs(current_score, target, current_wickets, at_overs)
    req_runs = find_runs(current_score, target, current_wickets, at_overs)

    runs_ls = []
    results_ls = []

    req_wks = []
    win_ls = []

    for i in range(current_wickets, 10):
        win_count = 0
        tie_count = 0
        lose_count = 0

        for j in range(100):
#             pred_runs = predict_runs(target, req_runs, i, at_overs)
            pred_runs = predict_runs(target, current_score, i, at_overs)
            runs_ls.append(pred_runs)
            result_pred = get_win(pred_runs, target)
            results_ls.append(result_pred)

            if result_pred == 'win':
                win_count += 1
            elif result_pred == 'tie':
                tie_count += 1
            else:
                lose_count +=1

        win_ls.append(win_count)
        req_wks.append(i)
     #print('wickets: ', i, ' win%: ', win_count)

    req_wicket_value = current_wickets
    
    for i in range(len(req_wks)):
        if (win_ls[i] < 45)  :
            req_wicket_value = req_wks[i]
            break

    return req_wicket_value

In [99]:
# wickets they can afford to lose:

# find_wickets(current_score, target, current_wickets, at_overs)
find_wickets(133, 171, 3, 12)

9

## Function: Interactive chart

In [108]:
%matplotlib inline
from ipywidgets import interactive
import ipywidgets as widgets
import matplotlib.pyplot as plt
import numpy as np

def find_runs_wickets(current_wks, at_overs, target_score):
    plt.figure(figsize = (16, 6))
    # x = np.linspace(-10, 10, num=1000)
    x = np.array(list(range(21)))
    req_value = find_runs(133, target_score, current_wks, at_overs)
    req_wk_value = find_wickets(133, target_score, current_wks, at_overs)

    if at_overs == 12:
        req_value = 133
        req_wk_value = 3

    
    # print(req_value)
    y = np.array([req_value for i in range(21)])

    # plt.plot(x, current_overs * x + target_score)
    # plt.plot(x, y)
    plt.scatter(at_overs, req_value, s = 1200, color = 'red')
    plt.axhline(target_score, ls = '--', color = 'blue')
    plt.text( 1, target_score + 10, 'Target Score :' + str(target_score) , color = 'darkblue', fontsize = 13)
    plt.text( at_overs, req_value, str(req_value) + '/' + str(req_wk_value), color = 'white', fontsize = 12,  horizontalalignment='center', verticalalignment='center')
    plt.text(at_overs, req_value - 30, 'CSK has to be at ' + str(req_value) + '/' +  str(req_wk_value) + ' after ' + str(at_overs) + ' ov', horizontalalignment='center')
    plt.ylim(50, target_score + 50)
    plt.xticks(x)
    plt.title('Where should CSK be?', fontsize = 20)
    plt.xlabel('Overs')
    plt.ylabel('Score')
    plt.show()

# x=widgets.IntSlider(min=-10, max=30, step=1, value=10)

# find_wickets(current_score, target, current_wickets, at_overs)
# find_wickets(133, 171, 3, 12)

# Interactive Visualization:

In [109]:
print('current_score = CSK: 133/3 (12 overs)')
print('')

interactive_plot = interactive(find_runs_wickets, current_wks = widgets.IntSlider(min=1, max=10, step=1, value=1),  at_overs=widgets.IntSlider(min=10, max=15, step=1, value=10), target_score = widgets.IntSlider(min=0, max=250, step=1, value=171))
output = interactive_plot.children[-1]
output.layout.height = '450px'
interactive_plot

current_score = CSK: 133/3 (12 overs)



interactive(children=(IntSlider(value=1, description='current_wks', max=10, min=1), IntSlider(value=10, descri…

# This is the Interactive ML Model for IPL Final 2023 - GT vs CSK