In [1]:
import pandas as pd
import numpy as np

import pickle

In [2]:
df = pd.read_csv('ipl.csv')

In [3]:
drop_cols = ['mid', 'venue', 'batsman', 'bowler', 'striker', 'non-striker']

df.drop(labels=drop_cols, axis=1, inplace=True)

In [4]:
selected_teams =['Kolkata Knight Riders', 'Chennai Super Kings', 'Rajasthan Royals','Mumbai Indians', 'Kings XI Punjab','Royal Challengers Bangalore', 'Delhi Daredevils', 'Sunrisers Hyderabad']

df = df[(df['bat_team'].isin(selected_teams))]
df = df[(df['bowl_team'].isin(selected_teams))]

In [5]:
#removing first 5 overs data
df = df[df['overs']>=5.0]

In [6]:
df_dummies = pd.get_dummies(data=df, columns=['bat_team', 'bowl_team'])
df_dummies.columns

Index(['date', 'runs', 'wickets', 'overs', 'runs_last_5', 'wickets_last_5',
       'total', 'bat_team_Chennai Super Kings', 'bat_team_Delhi Daredevils',
       'bat_team_Kings XI Punjab', 'bat_team_Kolkata Knight Riders',
       'bat_team_Mumbai Indians', 'bat_team_Rajasthan Royals',
       'bat_team_Royal Challengers Bangalore', 'bat_team_Sunrisers Hyderabad',
       'bowl_team_Chennai Super Kings', 'bowl_team_Delhi Daredevils',
       'bowl_team_Kings XI Punjab', 'bowl_team_Kolkata Knight Riders',
       'bowl_team_Mumbai Indians', 'bowl_team_Rajasthan Royals',
       'bowl_team_Royal Challengers Bangalore',
       'bowl_team_Sunrisers Hyderabad'],
      dtype='object')

In [7]:
#dropping date column
df_dummies.drop(['date'], inplace=True, axis=1)

In [8]:
data_copy = df_dummies.copy()

In [9]:
label = df['total']

In [10]:
features = data_copy.drop(['total'], axis=1)

In [11]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
x_train,  x_test, y_train, y_test = train_test_split(features, label, test_size = 0.3, random_state=28)

LinReg_model = LinearRegression().fit(x_train, y_train)

test_score = LinReg_model.score(x_test,y_test)
train_score = LinReg_model.score(x_train,y_train)

In [12]:
y_hat = LinReg_model.predict(x_test)

In [13]:
from sklearn.metrics import r2_score

r_squared = r2_score(y_test, y_hat)
print('The R-square value is: ', r_squared)

The R-square value is:  0.6581102534259382


In [14]:
# Creating a pickle file for the classifier
filename = 'ipl-prediction-lr-model.pkl'
pickle_out =  open(filename, 'wb')
pickle.dump(LinReg_model, pickle_out)
pickle_out.close()

In [17]:
#selecting linear regression
def predict_score(batting_team='Chennai Super Kings', bowling_team='Mumbai Indians', overs=5.1, runs=50, wickets=0, runs_last_5=50, wickets_last_5=0):
    
    bat_array = list()
    bowl_array = list()

  # Batting Team
    if batting_team == 'Chennai Super Kings':
        bat_array = bat_array + [1,0,0,0,0,0,0,0]
    elif batting_team == 'Delhi Daredevils':
        bat_array = bat_array + [0,1,0,0,0,0,0,0]
    elif batting_team == 'Kings XI Punjab':
        bat_array = bat_array + [0,0,1,0,0,0,0,0]
    elif batting_team == 'Kolkata Knight Riders':
        bat_array = bat_array + [0,0,0,1,0,0,0,0]
    elif batting_team == 'Mumbai Indians':
        bat_array = bat_array + [0,0,0,0,1,0,0,0]
    elif batting_team == 'Rajasthan Royals':
        bat_array = bat_array + [0,0,0,0,0,1,0,0]
    elif batting_team == 'Royal Challengers Bangalore':
        bat_array = bat_array + [0,0,0,0,0,0,1,0]
    elif batting_team == 'Sunrisers Hyderabad':
        bat_array = bat_array + [0,0,0,0,0,0,0,1]

  # Bowling Team
    if bowling_team == 'Chennai Super Kings':
        bowl_array = bowl_array + [1,0,0,0,0,0,0,0]
    elif bowling_team == 'Delhi Daredevils':
        bowl_array = bowl_array + [0,1,0,0,0,0,0,0]
    elif bowling_team == 'Kings XI Punjab':
        bowl_array = bowl_array + [0,0,1,0,0,0,0,0]
    elif bowling_team == 'Kolkata Knight Riders':
        bowl_array = bowl_array + [0,0,0,1,0,0,0,0]
    elif bowling_team == 'Mumbai Indians':
        bowl_array = bowl_array + [0,0,0,0,1,0,0,0]
    elif bowling_team == 'Rajasthan Royals':
        bowl_array = bowl_array + [0,0,0,0,0,1,0,0]
    elif bowling_team == 'Royal Challengers Bangalore':
        bowl_array = bowl_array + [0,0,0,0,0,0,1,0]
    elif bowling_team == 'Sunrisers Hyderabad':
        bowl_array = bowl_array + [0,0,0,0,0,0,0,1]

    final_array = [runs, wickets, overs, runs_last_5, wickets_last_5] + bat_array + bowl_array
    
    final_array = np.array([final_array])
    #print(final_array)

  # Prediction
    return int(LinReg_model.predict(final_array)[0])

In [19]:
final_score = predict_score(batting_team='Royal Challengers Bangalore', bowling_team='Delhi Daredevils', overs=9.2, runs=79, wickets=2, runs_last_5=60, wickets_last_5=1)

print("The final predicted score (range): {} to {}".format(final_score-10, final_score+5))

The final predicted score (range): 165 to 180
