In [1]:
# Basics
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import ipywidgets as widgets

# Trying out a bunch of things. 
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor

from sklearn import metrics
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score

#Maths 
from numpy import absolute
from numpy import mean
from numpy import std

#Visalization
from matplotlib import pyplot as plt

# Clean up options
import warnings
warnings.filterwarnings('ignore')
pd.set_option("display.precision", 2)

In [2]:
# Read All IPL Data
deliveres = pd.read_csv("C:/Users/utkar/OneDrive/Desktop/Analytics/Data/IPL Ball-by-Ball 2008-2020.csv")
matches = pd.read_csv("C:/Users/utkar/OneDrive/Desktop/Analytics/Data/IPL Matches 2008-2020.csv")

# Make a copy
del_df = deliveres.copy()
match_df = matches.copy()

In [3]:
comb = pd.merge(del_df, match_df, on = 'id', how='left')

In [4]:
comb = comb.sort_values(['id' , 'inning' , 'over' , 'ball'] , ascending = [True, True, True, True])

In [5]:
comb.columns

Index(['id', 'inning', 'over', 'ball', 'batsman', 'non_striker', 'bowler',
       'batsman_runs', 'extra_runs', 'total_runs', 'non_boundary', 'is_wicket',
       'dismissal_kind', 'player_dismissed', 'fielder', 'extras_type',
       'batting_team', 'bowling_team', 'Unnamed: 18', 'city', 'date',
       'player_of_match', 'venue', 'neutral_venue', 'team1', 'team2',
       'toss_winner', 'toss_decision', 'winner', 'result', 'result_margin',
       'eliminator', 'method', 'umpire1', 'umpire2'],
      dtype='object')

In [6]:
comb = comb[['id' , 'inning' , 'batting_team' , 'bowling_team' , 'over' , 'ball' , 'total_runs' , 'is_wicket' , 'player_dismissed' , 'venue']]

In [7]:
comb = comb.replace(np.NaN, 0)

In [8]:
comb['finalScore'] = comb.groupby(['id' , 'inning'])['total_runs'].transform('sum')

comb['current_score'] = comb.groupby(['id' , 'inning'])['total_runs'].apply(lambda x: x.cumsum())

comb['current_wickets'] = comb.groupby(['id' , 'inning'])['is_wicket'].apply(lambda x: x.cumsum())

tmp = comb.groupby(['id' , 'inning'])['total_runs'].rolling(min_periods = 1, window = 30).sum().reset_index()
comb['prev_30_runs'] = tmp['total_runs'].to_list()

tmp = comb.groupby(['id' , 'inning'])['is_wicket'].rolling(min_periods = 1, window = 30).sum().reset_index()
comb['prev_30_wickets'] = tmp['is_wicket'].to_list()

comb['prev_30_dot_balls'] = comb['total_runs'].apply(lambda x : 1 if x == 0 else 0)
tmp = comb.groupby(['id' , 'inning'])['prev_30_dot_balls'].rolling(min_periods = 1, window = 30).sum().reset_index()
comb['prev_30_dot_balls'] = tmp['prev_30_dot_balls'].to_list()

comb['prev_30_boundaries'] = comb['total_runs'].apply(lambda x : 1 if (x == 4 or x == 6) else 0)
tmp = comb.groupby(['id' , 'inning'])['prev_30_boundaries'].rolling(min_periods = 1, window = 30).sum().reset_index()
comb['prev_30_boundaries'] = tmp['prev_30_boundaries'].to_list()

convert_dict = {'prev_30_runs': int, 
                'prev_30_wickets': int,
                'prev_30_dot_balls': int,
                'prev_30_boundaries': int 
                }

comb = comb.astype(convert_dict)

In [9]:
venue_list = comb['venue'].unique().tolist()
batting_team_list = comb['batting_team'].unique().tolist()
bowling_team_list = comb['bowling_team'].unique().tolist()

In [10]:
comb = pd.get_dummies(data = comb , columns = ['batting_team' , 'bowling_team' , 'venue'])

In [11]:
# Choosing Features 
# Input columns are inning, current score (runs / wickets) , last 30 ball data (runs / wickets / Dot Balls / Boundaries)
# Batting Team, Bowling Team and venue (One Hot encoded)
x = comb.drop(labels = ['id' , 'finalScore' , 'player_dismissed', 'total_runs' , 'is_wicket'] , axis = 1)

# We need prediction for Final Score 
y = comb['finalScore'].values

# Split the data into test/train - 70/30 
x_train , x_test , y_train , y_test = train_test_split(x , y, test_size = 0.3 , random_state = 30)

In [12]:
# Linear Regressor 
linear_regressor = LinearRegression()
linear_regressor.fit(x_train, y_train)

print("Accuracy on training data  - " + str(linear_regressor.score(x_train, y_train) * 100) + "%")
y_pred = linear_regressor.predict(x_test)
print("Accuracy on test data  - " + str(linear_regressor.score(x_test, y_test) * 100) + "%")
y_pred = np.rint(y_pred)
print("Mean Absolute Error - " + str(mean_absolute_error(y_test, y_pred)))

Accuracy on training data  - 45.16391589483165%
Accuracy on test data  - 44.80172782224382%
Mean Absolute Error - 16.336917006943366


In [13]:
# Random Forest Regressor
rdf_regressor = RandomForestRegressor(n_estimators=10,max_features=None)
rdf_regressor.fit(x_train,y_train)

print("Accuracy on training data  - " + str(rdf_regressor.score(x_train, y_train) * 100) + "%")
y_pred = rdf_regressor.predict(x_test)
print("Accuracy on test data  - " + str(rdf_regressor.score(x_test, y_test) * 100) + "%")
y_pred = np.rint(y_pred)
print("Mean Absolute Error - " + str(mean_absolute_error(y_test, y_pred)))

Accuracy on training data  - 97.23890220791898%
Accuracy on test data  - 87.36333926724473%
Mean Absolute Error - 5.992746506779691


In [14]:
# Decision Tree
dt_regressor = DecisionTreeRegressor(criterion='mse', splitter='best',
                             max_depth=None, min_samples_split=2,
                             min_samples_leaf=1, min_weight_fraction_leaf=0.0,
                             max_features=None, random_state=None,
                             max_leaf_nodes=None, min_impurity_decrease=0.0, 
                             min_impurity_split=None, 
                             ccp_alpha=0.0)


dt_regressor.fit(x_train,y_train)

print("Accuracy on training data  - " + str(dt_regressor.score(x_train, y_train) * 100) + "%")
y_pred = dt_regressor.predict(x_test)
print("Accuracy on test data  - " + str(dt_regressor.score(x_test, y_test) * 100) + "%")
y_pred = np.rint(y_pred)
print("Mean Absolute Error - " + str(mean_absolute_error(y_test, y_pred)))

Accuracy on training data  - 99.58089725599156%
Accuracy on test data  - 79.54435023522447%
Mean Absolute Error - 4.6638755362588515


In [15]:
pd.set_option('display.max_columns', None)
x_test.head()

Unnamed: 0,inning,over,ball,current_score,current_wickets,prev_30_runs,prev_30_wickets,prev_30_dot_balls,prev_30_boundaries,batting_team_Chennai Super Kings,batting_team_Deccan Chargers,batting_team_Delhi Daredevils,batting_team_Gujarat Lions,batting_team_Kings XI Punjab,batting_team_Kochi Tuskers Kerala,batting_team_Kolkata Knight Riders,batting_team_Mumbai Indians,batting_team_Pune Warriors,batting_team_Rajasthan Royals,batting_team_Rising Pune Supergiants,batting_team_Royal Challengers Bangalore,batting_team_Sunrisers Hyderabad,bowling_team_Chennai Super Kings,bowling_team_Deccan Chargers,bowling_team_Delhi Daredevils,bowling_team_Gujarat Lions,bowling_team_Kings XI Punjab,bowling_team_Kochi Tuskers Kerala,bowling_team_Kolkata Knight Riders,bowling_team_Mumbai Indians,bowling_team_Pune Warriors,bowling_team_Rajasthan Royals,bowling_team_Rising Pune Supergiants,bowling_team_Royal Challengers Bangalore,bowling_team_Sunrisers Hyderabad,venue_Barabati Stadium,venue_Brabourne Stadium,venue_Buffalo Park,venue_De Beers Diamond Oval,venue_Dr DY Patil Sports Academy,venue_Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium,venue_Dubai International Cricket Stadium,venue_Eden Gardens,venue_Feroz Shah Kotla,venue_Green Park,venue_Himachal Pradesh Cricket Association Stadium,venue_Holkar Cricket Stadium,venue_JSCA International Stadium Complex,venue_Kingsmead,venue_M Chinnaswamy Stadium,"venue_MA Chidambaram Stadium, Chepauk",venue_Maharashtra Cricket Association Stadium,venue_Nehru Stadium,venue_New Wanderers Stadium,venue_Newlands,venue_OUTsurance Oval,"venue_Punjab Cricket Association IS Bindra Stadium, Mohali","venue_Punjab Cricket Association Stadium, Mohali","venue_Rajiv Gandhi International Stadium, Uppal","venue_Sardar Patel Stadium, Motera",venue_Saurashtra Cricket Association Stadium,venue_Sawai Mansingh Stadium,venue_Shaheed Veer Narayan Singh International Stadium,venue_Sharjah Cricket Stadium,venue_Sheikh Zayed Stadium,venue_St George's Park,venue_Subrata Roy Sahara Stadium,venue_SuperSport Park,"venue_Vidarbha Cricket Association Stadium, Jamtha",venue_Wankhede Stadium
24061,2,12,4,82,3,40,1,9,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
189823,2,14,4,145,3,44,2,10,6,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
111833,1,1,4,2,1,2,1,9,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
74691,1,9,5,45,2,23,1,12,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
24097,2,18,1,139,3,53,0,4,6,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0


In [16]:
def finalScorePrediction(test_inning , test_over , test_ball , test_current_score , test_current_wickets , 
                         test_prev_30_runs , test_prev_30_wickets , test_prev_30_dot_balls , test_prev_30_boundaries , 
                        test_venue , test_batting_team , test_bowling_team): 
    print( "Inning  = " + str(test_inning))
    print( "Venue  = " + str(test_venue))
    print( "Batting Team  = " + str(test_batting_team))
    print( "Bowling Team  = " + str(test_bowling_team))
    print( "Over    = " + str(test_over))
    print( "Ball    = " + str(test_ball))
    print( "Current Score  = " + str(test_current_score))
    print( "Current Wickets  = " + str(test_current_wickets))
    print( "Runs Scored in last 30 balls  = " + str(test_prev_30_runs))
    print( "Wickets Lost in last 30 balls  = " + str(test_prev_30_wickets))
    print( "Dot Balls in last 30 balls  = " + str(test_prev_30_dot_balls))
    print( "Boundaries scored in last 30 balls  = " + str(test_prev_30_boundaries))
    print()
    
    input = pd.DataFrame(columns = comb.columns)
    input = input.drop(labels = ['id' , 'finalScore' , 'player_dismissed', 'total_runs' , 'is_wicket'] , axis = 1)
    
    input.at[ 0, 'inning'] = test_inning
    input['over'] = test_over
    input['ball'] = test_ball
    input['current_score'] = test_current_score
    input['current_wickets'] = test_current_wickets
    input['prev_30_runs'] = test_prev_30_runs
    input['prev_30_wickets'] = test_prev_30_wickets
    input['prev_30_dot_balls'] = test_prev_30_dot_balls
    input['prev_30_boundaries'] = test_prev_30_boundaries
    input['venue_' + test_venue] = 1
    input['batting_team_' +  test_batting_team ] = 1
    input['bowling_team_' +  test_bowling_team ] = 1

    input = input.replace(np.nan,0)

    print( "Predicted Score (Random Forest) = " + str(np.rint(rdf_regressor.predict(input))))
    print( "Predicted Score (Decision Tree) = " + str(np.rint(dt_regressor.predict(input))))
    print( "Predicted Score (Linear Regres) = " + str(np.rint(linear_regressor.predict(input)))) 

In [17]:
# Predict
test_inning = 1
test_over = 12
test_ball = 0
test_current_score = 101
test_current_wickets = 2
test_prev_30_runs = 44
test_prev_30_wickets = 0
test_prev_30_dot_balls = 6
test_prev_30_boundaries = 6
test_batting_team = 'Mumbai Indians'
test_bowling_team = 'Kings XI Punjab'
test_venue = 'Punjab Cricket Association IS Bindra Stadium, Mohali'

In [18]:
finalScorePrediction(test_inning , test_over , test_ball , test_current_score , test_current_wickets , 
                         test_prev_30_runs , test_prev_30_wickets , test_prev_30_dot_balls , test_prev_30_boundaries , 
                        test_venue , test_batting_team , test_bowling_team)

Inning  = 1
Venue  = Punjab Cricket Association IS Bindra Stadium, Mohali
Batting Team  = Mumbai Indians
Bowling Team  = Kings XI Punjab
Over    = 12
Ball    = 0
Current Score  = 101
Current Wickets  = 2
Runs Scored in last 30 balls  = 44
Wickets Lost in last 30 balls  = 0
Dot Balls in last 30 balls  = 6
Boundaries scored in last 30 balls  = 6

Predicted Score (Random Forest) = [170.]
Predicted Score (Decision Tree) = [161.]
Predicted Score (Linear Regres) = [183.]


In [19]:
inning = widgets.RadioButtons( options=['1', '2'], description ='Inning',disabled=False , style={'description_width': 'initial'}, value='1')
venue_drop = widgets.Dropdown(options = venue_list , description='Venue' , value='Wankhede Stadium' , style={'description_width': 'initial'})
batting_team_drop = widgets.Dropdown(options = batting_team_list , description='Bowling Team' , value='Royal Challengers Bangalore' , style={'description_width': 'initial'})
bowling_team_drop = widgets.Dropdown(options = batting_team_list , description='Bowling Team' , value='Mumbai Indians' , style={'description_width': 'initial'})

current_score = widgets.IntSlider(min=0, max=300, step=1, description='Current Score',value=73 , style={'description_width': 'initial'})
current_wickets = widgets.IntSlider(min=0, max=10, step=1, description='Current Wickets',value=1 , style={'description_width': 'initial'})
current_over = widgets.IntSlider(min=0, max=19, step=1, description='Overs Finished',value=10 , style={'description_width': 'initial'})
current_ball = widgets.IntSlider(min=0, max=5, step=1, description='Balls Done in Current Over',value=3 , style={'description_width': 'initial'})

last_30_score = widgets.IntSlider(min=0, max=100, step=1, description='Runs Scored in Last 30 Balls',value=40 , style={'description_width': 'initial'})
last_30_wickets = widgets.IntSlider(min=0, max=10, step=1, description='Wickets lost in last 30 balls',value=1 , style={'description_width': 'initial'})
last_30_boundaries = widgets.IntSlider(min=0, max=20, step=1, description='Boundaries hit in last 30 balls',value=5 , style={'description_width': 'initial'})
last_30_dotBalls = widgets.IntSlider(min=0, max=25, step=1, description='DotBalls in the last 30 balls',value=7 , style={'description_width': 'initial'})

In [20]:
display(inning)
display(venue_drop)
display(batting_team_drop)
display(bowling_team_drop)

display(current_score)
display(current_wickets)
display(current_over)
display(current_ball)

display(last_30_score)
display(last_30_wickets)
display(last_30_boundaries)
display(last_30_dotBalls)

RadioButtons(description='Inning', options=('1', '2'), style=DescriptionStyle(description_width='initial'), va…

Dropdown(description='Venue', index=3, options=('M Chinnaswamy Stadium', 'Punjab Cricket Association Stadium, …

Dropdown(description='Bowling Team', index=1, options=('Kolkata Knight Riders', 'Royal Challengers Bangalore',…

Dropdown(description='Bowling Team', index=6, options=('Kolkata Knight Riders', 'Royal Challengers Bangalore',…

IntSlider(value=73, description='Current Score', max=300, style=SliderStyle(description_width='initial'))

IntSlider(value=1, description='Current Wickets', max=10, style=SliderStyle(description_width='initial'))

IntSlider(value=10, description='Overs Finished', max=19, style=SliderStyle(description_width='initial'))

IntSlider(value=3, description='Balls Done in Current Over', max=5, style=SliderStyle(description_width='initi…

IntSlider(value=40, description='Runs Scored in Last 30 Balls', style=SliderStyle(description_width='initial')…

IntSlider(value=1, description='Wickets lost in last 30 balls', max=10, style=SliderStyle(description_width='i…

IntSlider(value=5, description='Boundaries hit in last 30 balls', max=20, style=SliderStyle(description_width=…

IntSlider(value=7, description='DotBalls in the last 30 balls', max=25, style=SliderStyle(description_width='i…

In [21]:
# Predict
test_inning = inning.value
test_batting_team = batting_team_drop.value
test_bowling_team = bowling_team_drop.value
test_venue = venue_drop.value

test_over = current_over.value
test_ball = current_ball.value
test_current_score = current_score.value
test_current_wickets = current_wickets.value

test_prev_30_runs = last_30_score.value
test_prev_30_wickets = last_30_wickets.value
test_prev_30_dot_balls = last_30_dotBalls.value
test_prev_30_boundaries = last_30_boundaries.value

finalScorePrediction(test_inning , test_over , test_ball , test_current_score , test_current_wickets , 
                         test_prev_30_runs , test_prev_30_wickets , test_prev_30_dot_balls , test_prev_30_boundaries , 
                        test_venue , test_batting_team , test_bowling_team)

Inning  = 1
Venue  = Wankhede Stadium
Batting Team  = Royal Challengers Bangalore
Bowling Team  = Mumbai Indians
Over    = 10
Ball    = 3
Current Score  = 73
Current Wickets  = 1
Runs Scored in last 30 balls  = 40
Wickets Lost in last 30 balls  = 1
Dot Balls in last 30 balls  = 7
Boundaries scored in last 30 balls  = 5

Predicted Score (Random Forest) = [173.]
Predicted Score (Decision Tree) = [158.]
Predicted Score (Linear Regres) = [165.]
