# Machine Learning - Building IPL Score Predictor App

In [1]:
import pandas as pd
import numpy as np
import pickle

In [None]:
cData = pd.read_csv('https://raw.githubusercontent.com/ghimiresunil/IPL-First-Innings-Score-Prediction-/master/ipl.csv')
cData.head()

In [None]:
# --- Data Cleaning ---
# Removing unwanted column 

col_to_remove = ['mid', 'venue', 'batsman', 'bowler', 'striker', 'non-striker']
cData.drop(labels=col_to_remove, axis=1, inplace=True)

In [None]:
cData.head()

In [None]:
cData.bat_team.unique()

In [None]:
cData.bat_team.nunique()

In [None]:
consistent_teams = ['Kolkata Knight Riders', 'Chennai Super Kings', 'Rajasthan Royals',
       'Mumbai Indians', 'Kings XI Punjab',
       'Royal Challengers Bangalore', 'Delhi Daredevils','Sunrisers Hyderabad']

In [None]:
data = cData[(cData['bat_team'].isin(consistent_teams)) & (cData['bowl_team'].isin(consistent_teams))].copy()

In [None]:
new_data = data[data['overs'] > 5.0].copy() # removing the first 5 overs in ever batch 

In [None]:
new_data

In [None]:
print(new_data['bat_team'].unique())
print(new_data['bowl_team'].unique())

In [None]:
new_data.dtypes

In [None]:
new_data['date'] = pd.to_datetime(new_data['date'], dayfirst=False)

In [None]:
new_data.dtypes

In [None]:
## Data Preprocessing
# Converting categorical features using get dummies method

encoded_df = pd.get_dummies(data=new_data,columns=['bat_team', 'bowl_team']).copy()

In [None]:
encoded_df.head()

In [None]:
encoded_df.columns

In [None]:
encoded_df.dtypes

In [None]:
# Splitting the data into train and test

train = encoded_df[encoded_df['date'].dt.year <= 2016].copy()
test = encoded_df[encoded_df['date'].dt.year >= 2017].copy()

In [None]:
ytrain = train['total'].copy()
Xtrain = train.drop('total', axis = 1).copy()

ytest = test['total'].copy()
Xtest = test.drop('total', axis = 1).copy()

## *Model Building*

In [None]:
print(f'Xtrain {Xtrain.shape}, Xtest {Xtest.shape}, ytrain {ytrain.shape}, ytest {ytest.shape}')

In [None]:
Xtrain.drop('date', axis=1,inplace=True)
Xtest.drop('date', axis=1,inplace=True)

In [None]:
from sklearn.linear_model import LinearRegression

lir = LinearRegression()

lir.fit(Xtrain, ytrain)

In [None]:
lir.score(Xtrain, ytrain)

In [None]:
lir.score(Xtest, ytest)

In [None]:
pickle.dump(lir, open('lirmodel.pkl', 'wb'))

In [None]:
# !pip show scikit-learn

In [None]:
# !pip install -U scikit-learn