In [447]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline

In [448]:
matches = pd.read_csv("iplDatasets/matches.csv")
matches.shape

(756, 18)

In [449]:
matches.head()

Unnamed: 0,id,Season,city,date,team1,team2,toss_winner,toss_decision,result,dl_applied,winner,win_by_runs,win_by_wickets,player_of_match,venue,umpire1,umpire2,umpire3
0,1,IPL-2017,Hyderabad,05-04-2017,Sunrisers Hyderabad,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,
1,2,IPL-2017,Pune,06-04-2017,Mumbai Indians,Rising Pune Supergiant,Rising Pune Supergiant,field,normal,0,Rising Pune Supergiant,0,7,SPD Smith,Maharashtra Cricket Association Stadium,A Nand Kishore,S Ravi,
2,3,IPL-2017,Rajkot,07-04-2017,Gujarat Lions,Kolkata Knight Riders,Kolkata Knight Riders,field,normal,0,Kolkata Knight Riders,0,10,CA Lynn,Saurashtra Cricket Association Stadium,Nitin Menon,CK Nandan,
3,4,IPL-2017,Indore,08-04-2017,Rising Pune Supergiant,Kings XI Punjab,Kings XI Punjab,field,normal,0,Kings XI Punjab,0,6,GJ Maxwell,Holkar Cricket Stadium,AK Chaudhary,C Shamshuddin,
4,5,IPL-2017,Bangalore,08-04-2017,Royal Challengers Bangalore,Delhi Daredevils,Royal Challengers Bangalore,bat,normal,0,Royal Challengers Bangalore,15,0,KM Jadhav,M Chinnaswamy Stadium,,,


In [450]:
deliveries = pd.read_csv(r"iplDatasets/deliveries.csv")
deliveries.shape

(179078, 21)

In [451]:
deliveries.head()

Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batsman,non_striker,bowler,is_super_over,...,bye_runs,legbye_runs,noball_runs,penalty_runs,batsman_runs,extra_runs,total_runs,player_dismissed,dismissal_kind,fielder
0,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,1,DA Warner,S Dhawan,TS Mills,0,...,0,0,0,0,0,0,0,,,
1,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,2,DA Warner,S Dhawan,TS Mills,0,...,0,0,0,0,0,0,0,,,
2,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,3,DA Warner,S Dhawan,TS Mills,0,...,0,0,0,0,4,0,4,,,
3,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,4,DA Warner,S Dhawan,TS Mills,0,...,0,0,0,0,0,0,0,,,
4,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,5,DA Warner,S Dhawan,TS Mills,0,...,0,0,0,0,0,2,2,,,


In [452]:
deliveries.columns

Index(['match_id', 'inning', 'batting_team', 'bowling_team', 'over', 'ball',
       'batsman', 'non_striker', 'bowler', 'is_super_over', 'wide_runs',
       'bye_runs', 'legbye_runs', 'noball_runs', 'penalty_runs',
       'batsman_runs', 'extra_runs', 'total_runs', 'player_dismissed',
       'dismissal_kind', 'fielder'],
      dtype='object')

In [453]:
total_score = deliveries.groupby(["match_id","inning"]).sum()["total_runs"].reset_index()

In [454]:
total_score = total_score[total_score["inning"] == 1]
total_score

Unnamed: 0,match_id,inning,total_runs
0,1,1,207
2,2,1,184
4,3,1,183
6,4,1,163
8,5,1,157
...,...,...,...
1518,11347,1,143
1520,11412,1,136
1522,11413,1,171
1524,11414,1,155


In [455]:
match_df = matches.merge(total_score[["match_id","total_runs"]] , left_on="id",right_on="match_id")
match_df

Unnamed: 0,id,Season,city,date,team1,team2,toss_winner,toss_decision,result,dl_applied,winner,win_by_runs,win_by_wickets,player_of_match,venue,umpire1,umpire2,umpire3,match_id,total_runs
0,1,IPL-2017,Hyderabad,05-04-2017,Sunrisers Hyderabad,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,,1,207
1,2,IPL-2017,Pune,06-04-2017,Mumbai Indians,Rising Pune Supergiant,Rising Pune Supergiant,field,normal,0,Rising Pune Supergiant,0,7,SPD Smith,Maharashtra Cricket Association Stadium,A Nand Kishore,S Ravi,,2,184
2,3,IPL-2017,Rajkot,07-04-2017,Gujarat Lions,Kolkata Knight Riders,Kolkata Knight Riders,field,normal,0,Kolkata Knight Riders,0,10,CA Lynn,Saurashtra Cricket Association Stadium,Nitin Menon,CK Nandan,,3,183
3,4,IPL-2017,Indore,08-04-2017,Rising Pune Supergiant,Kings XI Punjab,Kings XI Punjab,field,normal,0,Kings XI Punjab,0,6,GJ Maxwell,Holkar Cricket Stadium,AK Chaudhary,C Shamshuddin,,4,163
4,5,IPL-2017,Bangalore,08-04-2017,Royal Challengers Bangalore,Delhi Daredevils,Royal Challengers Bangalore,bat,normal,0,Royal Challengers Bangalore,15,0,KM Jadhav,M Chinnaswamy Stadium,,,,5,157
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
751,11347,IPL-2019,Mumbai,05-05-2019,Kolkata Knight Riders,Mumbai Indians,Mumbai Indians,field,normal,0,Mumbai Indians,0,9,HH Pandya,Wankhede Stadium,Nanda Kishore,O Nandan,S Ravi,11347,143
752,11412,IPL-2019,Chennai,07-05-2019,Chennai Super Kings,Mumbai Indians,Chennai Super Kings,bat,normal,0,Mumbai Indians,0,6,AS Yadav,M. A. Chidambaram Stadium,Nigel Llong,Nitin Menon,Ian Gould,11412,136
753,11413,IPL-2019,Visakhapatnam,08-05-2019,Sunrisers Hyderabad,Delhi Capitals,Delhi Capitals,field,normal,0,Delhi Capitals,0,2,RR Pant,ACA-VDCA Stadium,,,,11413,171
754,11414,IPL-2019,Visakhapatnam,10-05-2019,Delhi Capitals,Chennai Super Kings,Chennai Super Kings,field,normal,0,Chennai Super Kings,0,6,F du Plessis,ACA-VDCA Stadium,Sundaram Ravi,Bruce Oxenford,Chettithody Shamshuddin,11414,155


In [456]:
match_df["team1"].unique()

array(['Sunrisers Hyderabad', 'Mumbai Indians', 'Gujarat Lions',
       'Rising Pune Supergiant', 'Royal Challengers Bangalore',
       'Kolkata Knight Riders', 'Delhi Daredevils', 'Kings XI Punjab',
       'Chennai Super Kings', 'Rajasthan Royals', 'Deccan Chargers',
       'Kochi Tuskers Kerala', 'Pune Warriors', 'Rising Pune Supergiants',
       'Delhi Capitals'], dtype=object)

In [457]:
teams = ['Sunrisers Hyderabad', 'Mumbai Indians','Royal Challengers Bangalore','Kolkata Knight Riders'
         , 'Kings XI Punjab' ,'Chennai Super Kings', 'Rajasthan Royals',  'Delhi Capitals']

In [458]:
match_df = match_df.replace(['Rising Pune Supergiant','Rising Pune Supergiants','Delhi Daredevils','Deccan Chargers'],
                           ['Chennai Super Kings','Chennai Super Kings', 'Delhi Capitals','Sunrisers Hyderabad'])

In [459]:
match_df = match_df[ (match_df["team1"].isin(teams)) & (match_df["team2"].isin(teams) & (match_df["winner"].isin(teams))) ]
match_df = match_df[match_df["dl_applied"] == 0]

In [460]:
match_df.columns

Index(['id', 'Season', 'city', 'date', 'team1', 'team2', 'toss_winner',
       'toss_decision', 'result', 'dl_applied', 'winner', 'win_by_runs',
       'win_by_wickets', 'player_of_match', 'venue', 'umpire1', 'umpire2',
       'umpire3', 'match_id', 'total_runs'],
      dtype='object')

In [461]:
match_df = match_df[['match_id', 'result',  'winner', 'total_runs',"city"]]

In [462]:
deliveries_df = match_df.merge(deliveries,on = "match_id")
deliveries_df = deliveries_df[deliveries_df["inning"] == 2]

In [463]:
deliveries_df.head()

Unnamed: 0,match_id,result,winner,total_runs_x,city,inning,batting_team,bowling_team,over,ball,...,bye_runs,legbye_runs,noball_runs,penalty_runs,batsman_runs,extra_runs,total_runs_y,player_dismissed,dismissal_kind,fielder
125,1,normal,Sunrisers Hyderabad,207,Hyderabad,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1,1,...,0,0,0,0,1,0,1,,,
126,1,normal,Sunrisers Hyderabad,207,Hyderabad,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1,2,...,0,0,0,0,0,0,0,,,
127,1,normal,Sunrisers Hyderabad,207,Hyderabad,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1,3,...,0,0,0,0,0,0,0,,,
128,1,normal,Sunrisers Hyderabad,207,Hyderabad,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1,4,...,0,0,0,0,2,0,2,,,
129,1,normal,Sunrisers Hyderabad,207,Hyderabad,2,Royal Challengers Bangalore,Sunrisers Hyderabad,1,5,...,0,0,0,0,4,0,4,,,


In [464]:
deliveries_df.columns

Index(['match_id', 'result', 'winner', 'total_runs_x', 'city', 'inning',
       'batting_team', 'bowling_team', 'over', 'ball', 'batsman',
       'non_striker', 'bowler', 'is_super_over', 'wide_runs', 'bye_runs',
       'legbye_runs', 'noball_runs', 'penalty_runs', 'batsman_runs',
       'extra_runs', 'total_runs_y', 'player_dismissed', 'dismissal_kind',
       'fielder'],
      dtype='object')

In [465]:
deliveries_df["current_score"] = deliveries_df[["total_runs_y","match_id"]].groupby("match_id").cumsum()["total_runs_y"]

In [466]:
deliveries_df["runs_left"] = deliveries_df["total_runs_x"] - deliveries_df["current_score"]

In [467]:
deliveries_df = deliveries_df[deliveries_df["runs_left"]  >= 0]

In [468]:
deliveries_df["balls_left"]  = 120 - ((deliveries_df["over"]-1)*6 + deliveries["ball"])

In [469]:
deliveries_df["crr"] =  (deliveries_df["current_score"] / (120 - deliveries_df["balls_left"]))*6  #Current Run Rate

In [470]:
deliveries_df["rrr"] =  (deliveries_df["runs_left"]/ deliveries_df["balls_left"])*6  # Required run rate

In [471]:
def winner(x):

    return 1 if x["batting_team"] == x["winner"] else 0

deliveries_df["result"] = deliveries_df.apply(winner,axis=1)

In [472]:
deliveries_df["player_dismissed"] = deliveries_df["player_dismissed"] .fillna(0)

deliveries_df["player_dismissed"] = deliveries_df["player_dismissed"].apply(lambda x : 0 if x==0 else 1)

wickets_fallen = deliveries_df[["player_dismissed","match_id"]].groupby("match_id").cumsum()["player_dismissed"]

deliveries_df["wickets"] = 10 - wickets_fallen


In [473]:
deliveries_df.columns

Index(['match_id', 'result', 'winner', 'total_runs_x', 'city', 'inning',
       'batting_team', 'bowling_team', 'over', 'ball', 'batsman',
       'non_striker', 'bowler', 'is_super_over', 'wide_runs', 'bye_runs',
       'legbye_runs', 'noball_runs', 'penalty_runs', 'batsman_runs',
       'extra_runs', 'total_runs_y', 'player_dismissed', 'dismissal_kind',
       'fielder', 'current_score', 'runs_left', 'balls_left', 'crr', 'rrr',
       'wickets'],
      dtype='object')

In [474]:
final = deliveries_df[['batting_team','bowling_team','city','runs_left','balls_left','wickets','total_runs_x','crr','rrr','result']]

In [475]:
x = final.drop(["result"],axis = "columns")
y = final["result"]

In [476]:
train_x, test_x , train_y , test_y = train_test_split(x,y,test_size = 0.1)

In [477]:
obj_cols = []

for column in final.columns:

    if(final[column].dtype == "object"):
        obj_cols.append(column)
    
obj_cols

['batting_team', 'bowling_team', 'city']

In [478]:
obj_pipe =ColumnTransformer([ ("col_trans",OneHotEncoder(sparse=False,drop='first'),obj_cols) ],n_jobs=True)

model = Pipeline([("columns_transformer",obj_pipe),
                       ("model",LogisticRegression(solver='liblinear'))])

In [479]:
model.fit(train_x,train_y)



In [480]:
model.score(test_x,test_y)

0.7056775575790037

In [481]:
model.predict_proba(test_x)

array([[0.42746901, 0.57253099],
       [0.44030463, 0.55969537],
       [0.47018673, 0.52981327],
       ...,
       [0.37224489, 0.62775511],
       [0.49722569, 0.50277431],
       [0.76503678, 0.23496322]])

In [482]:
def get_predictions( match_id , data = deliveries_df  , mode = model):

    match_  = data[data["match_id"] == match_id]

    match_ =  match_[match_["ball"] == 6]

    ip = match_[['batting_team' ,'bowling_team',"city" ,"total_runs_x",'runs_left','balls_left' ,'crr' ,'rrr',"wickets"]]

    prob_ = model.predict_proba(ip)

    print(prob_)



    

In [483]:
train_x

Unnamed: 0,batting_team,bowling_team,city,runs_left,balls_left,wickets,total_runs_x,crr,rrr
40522,Chennai Super Kings,Kings XI Punjab,Chennai,11,9.0,5,136,6.756757,7.333333
134217,Mumbai Indians,Kings XI Punjab,Indore,134,73.0,9,184,6.382979,11.013699
66643,Chennai Super Kings,Rajasthan Royals,Chennai,6,2.0,7,146,7.118644,18.000000
115676,Chennai Super Kings,Kings XI Punjab,Chandigarh,54,64.0,8,130,8.142857,5.062500
147047,Rajasthan Royals,Mumbai Indians,Mumbai,34,34.0,8,191,10.953488,6.000000
...,...,...,...,...,...,...,...,...,...
91984,Sunrisers Hyderabad,Kings XI Punjab,Sharjah,162,101.0,9,193,9.789474,9.623762
149156,Rajasthan Royals,Mumbai Indians,Jaipur,162,117.0,10,171,18.000000,8.307692
28786,Delhi Daredevils,Chennai Super Kings,Johannesburg,159,116.0,10,163,6.000000,8.224138
77798,Royal Challengers Bangalore,Kolkata Knight Riders,Bangalore,39,31.0,8,154,7.752809,7.548387


In [484]:
a = deliveries_df[deliveries_df["match_id"] == 1]

In [486]:
get_predictions(1)

[[0.52749482 0.47250518]
 [0.52749482 0.47250518]
 [0.52749482 0.47250518]
 [0.52749482 0.47250518]
 [0.52749482 0.47250518]
 [0.52749482 0.47250518]
 [0.52749482 0.47250518]
 [0.52749482 0.47250518]
 [0.52749482 0.47250518]
 [0.52749482 0.47250518]
 [0.52749482 0.47250518]
 [0.52749482 0.47250518]
 [0.52749482 0.47250518]
 [0.52749482 0.47250518]
 [0.52749482 0.47250518]
 [0.52749482 0.47250518]
 [0.52749482 0.47250518]
 [0.52749482 0.47250518]
 [0.52749482 0.47250518]]
