![logo](https://admin.thecricketer.com/weblab/sites/96c8b790-b593-bfda-0ba4-ecd3a9fdefc2/resources/images/site/ipl130201.jpg)

<h1 align='center'> 🏆XGBoost for Predicting IPL's🏏1st inning score🏆</h1>

- Importing required libraries 

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import pickle

%matplotlib inline

- loading dataset

In [None]:
data = pd.read_csv('../input/ipl-dataset/ipl.csv')

In [None]:
data.head()

### Understand dataset

In [None]:
data.columns

In [None]:
data.shape

- There is 76014 record are present in our dataset and their is 15 feature 

In [None]:
data.info()

### Check there is any null values are present or not if there is any null values then we perform feature engineering

In [None]:
data.isnull().sum()

- That's great there is no null values are present in out dataset           
- let's check also using visualization 

In [None]:
import missingno as mn
mn.matrix(data,figsize=(16,6),color=(0.30,0.60,0.71))
plt.xlabel('Features',fontdict={'fontsize':20})
plt.ylabel("Number of Records",fontdict={'fontsize':20})
plt.title("Check Null values using visualization",fontdict={'fontsize':23})

In [None]:
data.drop(['mid'],axis=1,inplace=True)
data.head()

In [None]:
len(data.venue.unique())

In [None]:
data.bat_team.unique()

- Some team is not playing IPL now so we remove this teams record teams are : ['Deccan Chargers','Kochi Tuskers Kerala','Pune Warriors','Rising Pune Supergiants', 'Gujarat Lions','Rising Pune Supergiant']

In [None]:
playing_teams = ['Kolkata Knight Riders', 'Chennai Super Kings', 'Rajasthan Royals',
                    'Mumbai Indians', 'Kings XI Punjab', 'Royal Challengers Bangalore',
                    'Delhi Daredevils', 'Sunrisers Hyderabad']

- now keep only record of playing_teams

In [None]:
data = data[(data['bat_team'].isin(playing_teams)) & (data['bowl_team'].isin(playing_teams))]

In [None]:
data.head()

In [None]:
data.shape

- Now our some record are deleted according to our condition

- Remove first five over's record 

In [None]:
data = data[data['overs']>=5.0]

In [None]:
data.shape

- More records are deleted according to conditions

In [None]:
len(data.batsman.unique())

In [None]:
data.drop(['batsman','bowler','venue'],axis=1,inplace=True)

In [None]:
data.head()

In [None]:
data.drop(['striker','non-striker'],axis=1,inplace=True)

In [None]:
data.head()

### Now change the date format 
- Converting the column 'date' from string into datetime object

In [None]:
from datetime import datetime
data['date'] = data['date'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d'))

In [None]:
data.head()

- Now perform OneHotEncoding on categorical fature

In [None]:
encoding_data  = pd.get_dummies(data=data,columns=['bat_team','bowl_team'])
encoding_data.shape

In [None]:
encoding_data.head()

In [None]:
encoding_data.columns


In [None]:
encoding_data = encoding_data[['date', 'runs', 'wickets', 'overs', 'runs_last_5', 'wickets_last_5',
       'bat_team_Chennai Super Kings', 'bat_team_Delhi Daredevils',
       'bat_team_Kings XI Punjab', 'bat_team_Kolkata Knight Riders',
       'bat_team_Mumbai Indians', 'bat_team_Rajasthan Royals',
       'bat_team_Royal Challengers Bangalore', 'bat_team_Sunrisers Hyderabad',
       'bowl_team_Chennai Super Kings', 'bowl_team_Delhi Daredevils',
       'bowl_team_Kings XI Punjab', 'bowl_team_Kolkata Knight Riders',
       'bowl_team_Mumbai Indians', 'bowl_team_Rajasthan Royals',
       'bowl_team_Royal Challengers Bangalore',
       'bowl_team_Sunrisers Hyderabad','total']]

In [None]:
pd.set_option('display.max_columns',2000)
encoding_data.head()

In [None]:
encoding_data.columns

In [None]:
encoding_data.shape

## Splitting data into train and test according to match year

In [None]:
X_train = encoding_data.drop(labels='total',axis=1)[encoding_data['date'].dt.year <= 2016]
X_test = encoding_data.drop(labels='total',axis=1)[encoding_data['date'].dt.year >= 2017]

In [None]:
X_train.head()

In [None]:
X_test.head()

In [None]:
Y_train = encoding_data[encoding_data['date'].dt.year <= 2016]['total'].values
Y_test = encoding_data[encoding_data['date'].dt.year >= 2017]['total'].values

In [None]:
X_train.head()

In [None]:
X_train.shape

In [None]:
Y_train = pd.DataFrame({"Total":Y_train})

In [None]:
Y_train.head()

In [None]:
Y_train.shape

In [None]:
Y_test = pd.DataFrame({'Total':Y_test})

In [None]:
Y_test.head()

- Removing Date column

In [None]:
X_train.drop(labels='date',axis=1,inplace=True)
X_test.drop(labels='date',axis=1,inplace=True)

## Bulding Models

In [None]:
from sklearn import metrics

#### Apply XGBoost

In [None]:
import xgboost as xg

In [None]:
xgmodel = xg.XGBRegressor()

In [None]:
xgmodel.fit(X_train,Y_train)

In [None]:
xgmodel.score(X_train,Y_train)

## Predicting Score

In [None]:
score_Prod=xgmodel.predict(X_test)

In [None]:
runs = int(input('Number of runs :'))
wickets = int(input('Number of wickets'))
overs = float(input("Current over :"))
runs_in_prev_5 = int(input('runs_in_prev_5 :'))
wickets_in_prev_5 = int(input('wickets_in_prev_5 :'))

In [None]:
temp_array = list()
batting_team = input('Enter name of batting team : ')

if batting_team == 'Chennai Super Kings':
    temp_array = temp_array + [1,0,0,0,0,0,0,0]
elif batting_team == 'Delhi Daredevils':
    temp_array = temp_array + [0,1,0,0,0,0,0,0]
elif batting_team == 'Kings XI Punjab':
    temp_array = temp_array + [0,0,1,0,0,0,0,0]
elif batting_team == 'Kolkata Knight Riders':
    temp_array = temp_array + [0,0,0,1,0,0,0,0]
elif batting_team == 'Mumbai Indians':
    temp_array = temp_array + [0,0,0,0,1,0,0,0]
elif batting_team == 'Rajasthan Royals':
    temp_array = temp_array + [0,0,0,0,0,1,0,0]
elif batting_team == 'Royal Challengers Bangalore':
    temp_array = temp_array + [0,0,0,0,0,0,1,0]
elif batting_team == 'Sunrisers Hyderabad':
    temp_array = temp_array + [0,0,0,0,0,0,0,1]
                        
bowling_team = input('Enter name of bowling team : ')

if bowling_team == 'Chennai Super Kings':
    temp_array = temp_array + [1,0,0,0,0,0,0,0]
elif bowling_team == 'Delhi Daredevils':
    temp_array = temp_array + [0,1,0,0,0,0,0,0]
elif bowling_team == 'Kings XI Punjab':
    temp_array = temp_array + [0,0,1,0,0,0,0,0]
elif bowling_team == 'Kolkata Knight Riders':
    temp_array = temp_array + [0,0,0,1,0,0,0,0]
elif bowling_team == 'Mumbai Indians':
    temp_array = temp_array + [0,0,0,0,1,0,0,0]
elif bowling_team == 'Rajasthan Royals':
    temp_array = temp_array + [0,0,0,0,0,1,0,0]
elif bowling_team == 'Royal Challengers Bangalore':
    temp_array = temp_array + [0,0,0,0,0,0,1,0]
elif bowling_team == '':
    temp_array = temp_array + [0,0,0,0,0,0,0,1]         

In [None]:
print(temp_array)   

In [None]:
predict_values = [runs,wickets,overs,runs_in_prev_5,wickets_in_prev_5] + temp_array

In [None]:
Xnew = np.array(predict_values).reshape((1,-1))

In [None]:
my_prediction = int(lin_model.predict(Xnew)[0])

In [None]:
print("Predicted first inning score is range between {} to {}: ".format(int(my_prediction-my_prediction*0.04),int(my_prediction+my_prediction*0.04)))

### [I'm](https://www.linkedin.com/in/rushikesh-lavate/) a newcomer to kaggle please deliver me honest feedback so I can improve my self, give me upvote if you like. 
# Thank You, everyone