#                               *IPL WIN PROBABILITY PREDICTION*            

In [1]:
import numpy as np 
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

**1-Importing Libraries**

In [2]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

**2-Data Preprocessing**

In [3]:
match=pd.read_csv('/kaggle/input/ipl-data-set/matches.csv')
print(match.shape)
match.head()

In [4]:
delivery=pd.read_csv('/kaggle/input/ipl-data-set/deliveries.csv')
print(delivery.shape)
delivery.head()

In [5]:
total_runs_df=delivery.groupby(['match_id','inning']).sum()['total_runs'].reset_index()
total_runs_df.shape

In [6]:
total_runs_df=total_runs_df[total_runs_df['inning']==1]

In [7]:
match=match.merge(total_runs_df[['match_id','total_runs']],left_on='id',right_on='match_id')
print(match.shape)
match.head()

In [8]:
# Total Match played by the teams
match['team1'].value_counts()

In [9]:
match['team1'].unique()

In [10]:
teams={'Sunrisers Hyderabad','Mumbai Indians','Royal Challengers Bangalore',
       'Chennai Super Kings','Delhi Capitals','Kings XI Punjab','Rajasthan Royals','Kolkata Knight Riders'}

In [11]:
match['team1']=match['team1'].str.replace('Delhi Daredevils','Delhi Capitals')
match['team2']=match['team2'].str.replace('Delhi Daredevils','Delhi Capitals')

#Deccan chargers played most of the w.r.t old teams , as per the value_counts
match['team1']=match['team1'].str.replace('Deccan Chargers','Sunrisers Hyderabad')
match['team2']=match['team2'].str.replace('Deccan Chargers','Sunrisers Hyderabad')

In [12]:
match=match[match['team1'].isin(teams)]
match=match[match['team2'].isin(teams)]

In [13]:
match.shape

In [14]:
match['dl_applied'].value_counts()

In [15]:
match=match[match['dl_applied']==0]
match.shape

In [16]:
match=match[['match_id','city','winner','total_runs']]
match.head()

In [17]:
delivery=match.merge(delivery,on='match_id')
delivery.head()

In [18]:
delivery.shape

In [19]:
#Extraction of 2nd innign data
delivery=delivery[delivery['inning']==2]
print(delivery.shape)
delivery.head()

In [20]:
#find current score,runs_left
delivery['current_score']=delivery.groupby('match_id').cumsum()['total_runs_y']
delivery['runs_left']=delivery['total_runs_x']-delivery['current_score'] + 1

In [21]:
# Balls left
delivery['balls_left'] = 126 - (delivery['over']*6 + delivery['ball'])
delivery.head(7)

- **Current Run Rate**
- **Required Run Rate**

In [22]:
#Current run rate,required run rate
delivery['crr']=(delivery['current_score']*6)/(120-delivery['balls_left'])
delivery['rrr']=round((delivery['runs_left']*6)/(delivery['balls_left']),3)
delivery.head(2)

In [23]:
#Wickets left
delivery['player_dismissed']=delivery['player_dismissed'].fillna('0')
delivery['player_dismissed']=delivery['player_dismissed'].apply(lambda x:x if x=='0' else '1')
delivery['player_dismissed']=delivery['player_dismissed'].astype('int')
wickets=delivery.groupby('match_id').cumsum()['player_dismissed'].values
delivery['wickets_left']=10-wickets


In [24]:
#Result
def result(row):
    if row['batting_team']==row['winner']:
        return 1
    else:
        return 0

In [25]:
delivery['result']=delivery.apply(result,axis=1)
delivery.head()

In [26]:
#Extracting out relevent features from delivery dataframe
final_df=delivery[['batting_team','bowling_team','city','runs_left','balls_left',
                   'total_runs_x','crr','rrr','wickets_left','result']]

**3-Data Cleaning**

In [29]:
final_df.isnull().sum()

In [30]:
#Drop Null Values
final_df.dropna(inplace=True)
final_df.shape

In [31]:
#Removal of Outrageous values from rrr col
final_df=final_df[final_df['balls_left']!=0]
print(final_df['rrr'].describe())
final_df.shape

In [32]:
#Shuffing of final DataFrame
final_df=final_df.sample(final_df.shape[0])
final_df.head()

**4- Model Building**

In [33]:
#TRAIN TEST SPLIT
X=final_df.iloc[:,:-1]
y=final_df.iloc[:,-1]
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=20)

In [34]:
#shape of X_train ,y_train
print(X_train.shape)
print(y_train.shape)

In [35]:
X_train.head()

In [36]:
#Categorical Columns transformation using One Hot Encoding
trf=ColumnTransformer([
    ('trf',OneHotEncoder(sparse=False,drop='first'),['batting_team','bowling_team','city'])
],remainder='passthrough')

In [37]:
#PipeLine formation
pipe=Pipeline(steps=[
    ('step1',trf),
    ('step2',LogisticRegression(solver='liblinear'))
])

In [38]:
#Prediction
pipe.fit(X_train,y_train)
y_pred=pipe.predict(X_test)
accuracy_score(y_test,y_pred)

In [39]:
pipe.predict_proba(X_test)[0]

In [40]:
final_df.head()

**5-Analysis of Match Progression**

In [41]:
def match_progression(x_df,match_id,pipe):
    match = x_df[x_df['match_id'] == match_id]
    match = match[(match['ball'] == 6)]
    temp_df = match[['batting_team','bowling_team','city','runs_left','balls_left','total_runs_x','crr','rrr','wickets_left']].dropna()
    temp_df = temp_df[temp_df['balls_left'] != 0]
    result = pipe.predict_proba(temp_df)
    temp_df['lose_proba'] = np.round(result.T[0]*100,1)
    temp_df['win_proba'] = np.round(result.T[1]*100,1)
    temp_df['end_of_over'] = range(1,temp_df.shape[0]+1)
    
    target = temp_df['total_runs_x'].values[0]
    batting_team=temp_df['batting_team'].values[0]
    bowling_team=temp_df['bowling_team'].values[0]
    runs = list(temp_df['runs_left'].values)
    new_runs = runs[:]
    runs.insert(0,target)
    temp_df['runs_after_over'] = np.array(runs)[:-1] - np.array(new_runs)
    #wickets = list(temp_df['wickets'].values)
    #new_wickets = wickets[:]
    #new_wickets.insert(0,10)
    #wickets.append(0)
    #w = np.array(wickets)
    #nw = np.array(new_wickets)
    #temp_df['wickets_in_over'] = (nw - w)[0:temp_df.shape[0]]
    print('Batting_team:- ',batting_team)
    print('Bowling_team:- ',bowling_team)
    print("Target-",target)
    
    temp_df = temp_df[['end_of_over','runs_after_over','wickets_left','lose_proba','win_proba']]
    return temp_df,target

In [42]:
temp_df,target=match_progression(delivery,1,pipe)
temp_df

In [44]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.figure(figsize=(15,8))
ax = plt.axes()
ax.set_facecolor("white")
plt.plot(temp_df['end_of_over'],temp_df['lose_proba'],color='red',linewidth=4)
plt.plot(temp_df['end_of_over'],temp_df['win_proba'],color='green',linewidth=4)
plt.plot(temp_df['end_of_over'],temp_df['wickets_left'],color='yellow',linewidth=4)
plt.bar(temp_df['end_of_over'],temp_df['runs_after_over'])
plt.xlabel('end_of_over')
plt.title('Target:- '+ str(target))
plt.show()