### 1. Import libraries : 

In [52]:
import pandas as pd 
import numpy as np 
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

### 2. Load Data : 

In [53]:
matches =  pd.read_csv('matches.csv')
deliveries = pd.read_csv('deliveries.csv')

In [54]:
matches.head() 

Unnamed: 0,id,Season,city,date,team1,team2,toss_winner,toss_decision,result,dl_applied,winner,win_by_runs,win_by_wickets,player_of_match,venue,umpire1,umpire2,umpire3
0,1,IPL-2017,Hyderabad,05-04-2017,Sunrisers Hyderabad,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,
1,2,IPL-2017,Pune,06-04-2017,Mumbai Indians,Rising Pune Supergiant,Rising Pune Supergiant,field,normal,0,Rising Pune Supergiant,0,7,SPD Smith,Maharashtra Cricket Association Stadium,A Nand Kishore,S Ravi,
2,3,IPL-2017,Rajkot,07-04-2017,Gujarat Lions,Kolkata Knight Riders,Kolkata Knight Riders,field,normal,0,Kolkata Knight Riders,0,10,CA Lynn,Saurashtra Cricket Association Stadium,Nitin Menon,CK Nandan,
3,4,IPL-2017,Indore,08-04-2017,Rising Pune Supergiant,Kings XI Punjab,Kings XI Punjab,field,normal,0,Kings XI Punjab,0,6,GJ Maxwell,Holkar Cricket Stadium,AK Chaudhary,C Shamshuddin,
4,5,IPL-2017,Bangalore,08-04-2017,Royal Challengers Bangalore,Delhi Daredevils,Royal Challengers Bangalore,bat,normal,0,Royal Challengers Bangalore,15,0,KM Jadhav,M Chinnaswamy Stadium,,,


In [55]:
deliveries.head() 

Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batsman,non_striker,bowler,is_super_over,...,bye_runs,legbye_runs,noball_runs,penalty_runs,batsman_runs,extra_runs,total_runs,player_dismissed,dismissal_kind,fielder
0,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,1,DA Warner,S Dhawan,TS Mills,0,...,0,0,0,0,0,0,0,,,
1,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,2,DA Warner,S Dhawan,TS Mills,0,...,0,0,0,0,0,0,0,,,
2,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,3,DA Warner,S Dhawan,TS Mills,0,...,0,0,0,0,4,0,4,,,
3,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,4,DA Warner,S Dhawan,TS Mills,0,...,0,0,0,0,0,0,0,,,
4,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,5,DA Warner,S Dhawan,TS Mills,0,...,0,0,0,0,0,2,2,,,


### 3. Merge to get total runs per match : 

In [56]:
total_score = deliveries.groupby('match_id').sum()['total_runs'].reset_index()
total_score.columns = ['match_id','total_runs']

#### Merge with matches : 

In [57]:
match_data = matches.merge(total_score,left_on='id',right_on='match_id')

In [58]:
# keep relevent columns : 
match_data = match_data[['city','venue','team1','team2','toss_winner','toss_decision','total_runs','winner']]

In [59]:
match_data 

Unnamed: 0,city,venue,team1,team2,toss_winner,toss_decision,total_runs,winner
0,Hyderabad,"Rajiv Gandhi International Stadium, Uppal",Sunrisers Hyderabad,Royal Challengers Bangalore,Royal Challengers Bangalore,field,379,Sunrisers Hyderabad
1,Pune,Maharashtra Cricket Association Stadium,Mumbai Indians,Rising Pune Supergiant,Rising Pune Supergiant,field,371,Rising Pune Supergiant
2,Rajkot,Saurashtra Cricket Association Stadium,Gujarat Lions,Kolkata Knight Riders,Kolkata Knight Riders,field,367,Kolkata Knight Riders
3,Indore,Holkar Cricket Stadium,Rising Pune Supergiant,Kings XI Punjab,Kings XI Punjab,field,327,Kings XI Punjab
4,Bangalore,M Chinnaswamy Stadium,Royal Challengers Bangalore,Delhi Daredevils,Royal Challengers Bangalore,bat,299,Royal Challengers Bangalore
...,...,...,...,...,...,...,...,...
751,Mumbai,Wankhede Stadium,Kolkata Knight Riders,Mumbai Indians,Mumbai Indians,field,280,Mumbai Indians
752,Chennai,M. A. Chidambaram Stadium,Chennai Super Kings,Mumbai Indians,Chennai Super Kings,bat,276,Mumbai Indians
753,Visakhapatnam,ACA-VDCA Stadium,Sunrisers Hyderabad,Delhi Capitals,Delhi Capitals,field,341,Delhi Capitals
754,Visakhapatnam,ACA-VDCA Stadium,Delhi Capitals,Chennai Super Kings,Chennai Super Kings,field,317,Chennai Super Kings


In [60]:
# Filter out the matches with the D/L method or missing results : 
match_data.dropna(inplace=True)

In [61]:
# Preprocess deliveries for live match simulation 
deliveries = deliveries.merge(matches[['id','city','venue','team1','team2','winner']], left_on='match_id',right_on='id')

In [62]:
# create new features : 
deliveries['current_score']= deliveries.groupby('match_id')['total_runs'].cumsum()

In [63]:
deliveries['balls_left'] = 120- deliveries.groupby('match_id').cumcount() 

In [64]:
# set the target runs : 

targets = deliveries.groupby('match_id').sum()['total_runs'].reset_index()
targets.columns = ['match_id','target']

In [65]:
# Merge target : 

final_df = deliveries.merge(targets,on='match_id')
final_df['runs_left'] = final_df['target']- final_df['current_score']

In [66]:
# Wickets left : 
final_df['player_dismissed'] = final_df['player_dismissed'].fillna('0')
final_df['player_dismissed'] = final_df['player_dismissed'].apply(lambda x: 0 if x== '0' else 1)

final_df['wickets'] = final_df.groupby('match_id')['player_dismissed'].cumsum()

final_df['wickets_left'] = 10 - final_df['wickets']

In [67]:
# Run Rates : 

final_df['crr'] = final_df['current_score']/ ((120-final_df['balls_left'])/ 6 +1e-10)
final_df['rrr'] = final_df['runs_left']/ (final_df['balls_left']/6 + 1e-10)


In [68]:
# label : 
final_df['result'] = final_df['batting_team'] == final_df['winner']
final_df['result'] = final_df['result'].astype(int)

In [69]:
# FInal Features : 
X = final_df[['batting_team','bowling_team','city','runs_left','balls_left','wickets_left','total_runs','crr','rrr']]
y = final_df['result']

In [70]:
# One Hot encode categorical : 
X = pd.get_dummies(X,columns=['batting_team','bowling_team','city'])

In [71]:
# Train - Test -Split : 
X_train,X_test,y_train,y_test = train_test_split(X,y,stratify=y,test_size=0.2,random_state= 1)

In [72]:
# Pipeline : 
pipe = Pipeline(steps=[
       ('scaler',StandardScaler()),
       ('model',LogisticRegression())
])

In [73]:
# Train the model : 
pipe.fit(X_train,y_train) 

In [75]:
import pickle

In [76]:
# save the model : 
pickle.dump(pipe,open('ipl_model.pkl','wb'))
pickle.dump(X.columns,
           open('columns.pkl','wb'))