### Importing Packages

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold  
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn import metrics
import shap
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

### Importing dataset and preprocessing it

In [2]:
matches=pd.read_csv('matches.csv')
matches.info()
matches.head()
matches.shape

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 577 entries, 0 to 576
Data columns (total 18 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   id               577 non-null    int64  
 1   season           577 non-null    int64  
 2   city             570 non-null    object 
 3   date             577 non-null    object 
 4   team1            577 non-null    object 
 5   team2            577 non-null    object 
 6   toss_winner      577 non-null    object 
 7   toss_decision    577 non-null    object 
 8   result           577 non-null    object 
 9   dl_applied       577 non-null    int64  
 10  winner           574 non-null    object 
 11  win_by_runs      577 non-null    int64  
 12  win_by_wickets   577 non-null    int64  
 13  player_of_match  574 non-null    object 
 14  venue            577 non-null    object 
 15  umpire1          577 non-null    object 
 16  umpire2          577 non-null    object 
 17  umpire3         

Unnamed: 0,id,season,city,date,team1,team2,toss_winner,toss_decision,result,dl_applied,winner,win_by_runs,win_by_wickets,player_of_match,venue,umpire1,umpire2,umpire3
0,1,2008,Bangalore,4/18/2008,Kolkata Knight Riders,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,Kolkata Knight Riders,140,0,BB McCullum,M Chinnaswamy Stadium,Asad Rauf,RE Koertzen,
1,2,2008,Chandigarh,4/19/2008,Chennai Super Kings,Kings XI Punjab,Chennai Super Kings,bat,normal,0,Chennai Super Kings,33,0,MEK Hussey,"Punjab Cricket Association Stadium, Mohali",MR Benson,SL Shastri,
2,3,2008,Delhi,4/19/2008,Rajasthan Royals,Delhi Daredevils,Rajasthan Royals,bat,normal,0,Delhi Daredevils,0,9,MF Maharoof,Feroz Shah Kotla,Aleem Dar,GA Pratapkumar,
3,4,2008,Mumbai,4/20/2008,Mumbai Indians,Royal Challengers Bangalore,Mumbai Indians,bat,normal,0,Royal Challengers Bangalore,0,5,MV Boucher,Wankhede Stadium,SJ Davis,DJ Harper,
4,5,2008,Kolkata,4/20/2008,Deccan Chargers,Kolkata Knight Riders,Deccan Chargers,bat,normal,0,Kolkata Knight Riders,0,5,DJ Hussey,Eden Gardens,BF Bowden,K Hariharan,


(577, 18)

In [3]:
matches.head(7)

Unnamed: 0,id,season,city,date,team1,team2,toss_winner,toss_decision,result,dl_applied,winner,win_by_runs,win_by_wickets,player_of_match,venue,umpire1,umpire2,umpire3
0,1,2008,Bangalore,4/18/2008,Kolkata Knight Riders,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,Kolkata Knight Riders,140,0,BB McCullum,M Chinnaswamy Stadium,Asad Rauf,RE Koertzen,
1,2,2008,Chandigarh,4/19/2008,Chennai Super Kings,Kings XI Punjab,Chennai Super Kings,bat,normal,0,Chennai Super Kings,33,0,MEK Hussey,"Punjab Cricket Association Stadium, Mohali",MR Benson,SL Shastri,
2,3,2008,Delhi,4/19/2008,Rajasthan Royals,Delhi Daredevils,Rajasthan Royals,bat,normal,0,Delhi Daredevils,0,9,MF Maharoof,Feroz Shah Kotla,Aleem Dar,GA Pratapkumar,
3,4,2008,Mumbai,4/20/2008,Mumbai Indians,Royal Challengers Bangalore,Mumbai Indians,bat,normal,0,Royal Challengers Bangalore,0,5,MV Boucher,Wankhede Stadium,SJ Davis,DJ Harper,
4,5,2008,Kolkata,4/20/2008,Deccan Chargers,Kolkata Knight Riders,Deccan Chargers,bat,normal,0,Kolkata Knight Riders,0,5,DJ Hussey,Eden Gardens,BF Bowden,K Hariharan,
5,6,2008,Jaipur,4/21/2008,Kings XI Punjab,Rajasthan Royals,Kings XI Punjab,bat,normal,0,Rajasthan Royals,0,6,SR Watson,Sawai Mansingh Stadium,Aleem Dar,RB Tiffin,
6,7,2008,Hyderabad,4/22/2008,Deccan Chargers,Delhi Daredevils,Deccan Chargers,bat,normal,0,Delhi Daredevils,0,9,V Sehwag,"Rajiv Gandhi International Stadium, Uppal",IL Howell,AM Saheba,


In [4]:
matches[pd.isnull(matches['winner'])]

Unnamed: 0,id,season,city,date,team1,team2,toss_winner,toss_decision,result,dl_applied,winner,win_by_runs,win_by_wickets,player_of_match,venue,umpire1,umpire2,umpire3
241,242,2011,Delhi,5/21/2011,Delhi Daredevils,Pune Warriors,Delhi Daredevils,bat,no result,0,,0,0,,Feroz Shah Kotla,SS Hazare,RJ Tucker,
486,487,2015,Bangalore,4/29/2015,Royal Challengers Bangalore,Rajasthan Royals,Rajasthan Royals,field,no result,0,,0,0,,M Chinnaswamy Stadium,JD Cloete,PG Pathak,
511,512,2015,Bangalore,5/17/2015,Delhi Daredevils,Royal Challengers Bangalore,Royal Challengers Bangalore,field,no result,0,,0,0,,M Chinnaswamy Stadium,HDPK Dharmasena,K Srinivasan,


In [5]:
matches['winner'].fillna('Draw', inplace=True)

In [6]:
matches.loc[241,'winner']

'Draw'

In [7]:
matches.replace(['Mumbai Indians','Kolkata Knight Riders','Royal Challengers Bangalore','Deccan Chargers','Chennai Super Kings',
                 'Rajasthan Royals','Delhi Daredevils','Gujarat Lions','Kings XI Punjab',
                 'Sunrisers Hyderabad','Rising Pune Supergiants','Kochi Tuskers Kerala','Pune Warriors']
                ,['MI','KKR','RCB','DC','CSK','RR','DD','GL','KXIP','SRH','RPS','KTK','PW'],inplace=True)


matches.head(2)

Unnamed: 0,id,season,city,date,team1,team2,toss_winner,toss_decision,result,dl_applied,winner,win_by_runs,win_by_wickets,player_of_match,venue,umpire1,umpire2,umpire3
0,1,2008,Bangalore,4/18/2008,KKR,RCB,RCB,field,normal,0,KKR,140,0,BB McCullum,M Chinnaswamy Stadium,Asad Rauf,RE Koertzen,
1,2,2008,Chandigarh,4/19/2008,CSK,KXIP,CSK,bat,normal,0,CSK,33,0,MEK Hussey,"Punjab Cricket Association Stadium, Mohali",MR Benson,SL Shastri,


In [8]:
encode = {'team1': {'MI':1,'KKR':2,'RCB':3,'DC':4,'CSK':5,'RR':6,'DD':7,'GL':8,'KXIP':9,'SRH':10,'RPS':11,'KTK':12,'PW':13},
          'team2': {'MI':1,'KKR':2,'RCB':3,'DC':4,'CSK':5,'RR':6,'DD':7,'GL':8,'KXIP':9,'SRH':10,'RPS':11,'KTK':12,'PW':13},
          'toss_winner': {'MI':1,'KKR':2,'RCB':3,'DC':4,'CSK':5,'RR':6,'DD':7,'GL':8,'KXIP':9,'SRH':10,'RPS':11,'KTK':12,'PW':13},
          'winner': {'MI':1,'KKR':2,'RCB':3,'DC':4,'CSK':5,'RR':6,'DD':7,'GL':8,'KXIP':9,'SRH':10,'RPS':11,'KTK':12,'PW':13,'Draw':14}}
matches.replace(encode, inplace=True)
matches.head(2)

Unnamed: 0,id,season,city,date,team1,team2,toss_winner,toss_decision,result,dl_applied,winner,win_by_runs,win_by_wickets,player_of_match,venue,umpire1,umpire2,umpire3
0,1,2008,Bangalore,4/18/2008,2,3,3,field,normal,0,2,140,0,BB McCullum,M Chinnaswamy Stadium,Asad Rauf,RE Koertzen,
1,2,2008,Chandigarh,4/19/2008,5,9,5,bat,normal,0,5,33,0,MEK Hussey,"Punjab Cricket Association Stadium, Mohali",MR Benson,SL Shastri,


In [9]:
matches[pd.isnull(matches['city'])]

Unnamed: 0,id,season,city,date,team1,team2,toss_winner,toss_decision,result,dl_applied,winner,win_by_runs,win_by_wickets,player_of_match,venue,umpire1,umpire2,umpire3
402,403,2014,,4/19/2014,1,3,3,field,normal,0,3,0,7,PA Patel,Dubai International Cricket Stadium,Aleem Dar,AK Chaudhary,
403,404,2014,,4/19/2014,2,7,2,bat,normal,0,7,0,4,JP Duminy,Dubai International Cricket Stadium,Aleem Dar,VA Kulkarni,
407,408,2014,,4/23/2014,5,6,6,field,normal,0,5,7,0,RA Jadeja,Dubai International Cricket Stadium,HDPK Dharmasena,RK Illingworth,
409,410,2014,,4/25/2014,10,7,10,bat,normal,0,10,4,0,AJ Finch,Dubai International Cricket Stadium,M Erasmus,S Ravi,
410,411,2014,,4/25/2014,1,5,1,bat,normal,0,5,0,7,MM Sharma,Dubai International Cricket Stadium,BF Bowden,M Erasmus,
415,416,2014,,4/28/2014,3,9,9,field,normal,0,9,0,5,Sandeep Sharma,Dubai International Cricket Stadium,BF Bowden,S Ravi,
417,418,2014,,4/30/2014,10,1,1,field,normal,0,10,15,0,B Kumar,Dubai International Cricket Stadium,HDPK Dharmasena,M Erasmus,


In [10]:
matches['city'].fillna('Dubai',inplace=True)
matches.describe()
matches.info()

Unnamed: 0,id,season,team1,team2,toss_winner,dl_applied,winner,win_by_runs,win_by_wickets,umpire3
count,577.0,577.0,577.0,577.0,577.0,577.0,577.0,577.0,577.0,0.0
mean,289.0,2012.029463,5.417678,5.481802,5.365685,0.025997,5.218371,13.715771,3.363951,
std,166.709828,2.486247,3.281985,3.291946,3.255295,0.159263,3.196267,23.619282,3.416049,
min,1.0,2008.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,
25%,145.0,2010.0,3.0,3.0,3.0,0.0,2.0,0.0,0.0,
50%,289.0,2012.0,5.0,5.0,5.0,0.0,5.0,0.0,3.0,
75%,433.0,2014.0,8.0,7.0,7.0,0.0,7.0,20.0,6.0,
max,577.0,2016.0,13.0,13.0,13.0,1.0,14.0,144.0,10.0,


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 577 entries, 0 to 576
Data columns (total 18 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   id               577 non-null    int64  
 1   season           577 non-null    int64  
 2   city             577 non-null    object 
 3   date             577 non-null    object 
 4   team1            577 non-null    int64  
 5   team2            577 non-null    int64  
 6   toss_winner      577 non-null    int64  
 7   toss_decision    577 non-null    object 
 8   result           577 non-null    object 
 9   dl_applied       577 non-null    int64  
 10  winner           577 non-null    int64  
 11  win_by_runs      577 non-null    int64  
 12  win_by_wickets   577 non-null    int64  
 13  player_of_match  574 non-null    object 
 14  venue            577 non-null    object 
 15  umpire1          577 non-null    object 
 16  umpire2          577 non-null    object 
 17  umpire3         

In [11]:
xx=matches.groupby(["toss_winner"]).size()
print(xx.iat[1])
yy=matches.groupby(["winner"]).size()
print(yy.iat[1])

69
68


In [12]:
dicVal = encode['winner']
print(dicVal['MI'])
print(list(dicVal.keys())[list(dicVal.values()).index(1)]) 

1
MI


In [13]:
df_matches = matches[['team1','team2','city','toss_decision','toss_winner','venue','winner']]
matches.head(2)

Unnamed: 0,id,season,city,date,team1,team2,toss_winner,toss_decision,result,dl_applied,winner,win_by_runs,win_by_wickets,player_of_match,venue,umpire1,umpire2,umpire3
0,1,2008,Bangalore,4/18/2008,2,3,3,field,normal,0,2,140,0,BB McCullum,M Chinnaswamy Stadium,Asad Rauf,RE Koertzen,
1,2,2008,Chandigarh,4/19/2008,5,9,5,bat,normal,0,5,33,0,MEK Hussey,"Punjab Cricket Association Stadium, Mohali",MR Benson,SL Shastri,


In [14]:
df = pd.DataFrame(df_matches)
df.describe()

Unnamed: 0,team1,team2,toss_winner,winner
count,577.0,577.0,577.0,577.0
mean,5.417678,5.481802,5.365685,5.218371
std,3.281985,3.291946,3.255295,3.196267
min,1.0,1.0,1.0,1.0
25%,3.0,3.0,3.0,2.0
50%,5.0,5.0,5.0,5.0
75%,8.0,7.0,7.0,7.0
max,13.0,13.0,13.0,14.0


In [15]:
df["city"].unique()

array(['Bangalore', 'Chandigarh', 'Delhi', 'Mumbai', 'Kolkata', 'Jaipur',
       'Hyderabad', 'Chennai', 'Cape Town', 'Port Elizabeth', 'Durban',
       'Centurion', 'East London', 'Johannesburg', 'Kimberley',
       'Bloemfontein', 'Ahmedabad', 'Cuttack', 'Nagpur', 'Dharamsala',
       'Kochi', 'Indore', 'Visakhapatnam', 'Pune', 'Raipur', 'Ranchi',
       'Abu Dhabi', 'Sharjah', 'Dubai', 'Rajkot', 'Kanpur'], dtype=object)

In [16]:
df["venue"].unique()

array(['M Chinnaswamy Stadium',
       'Punjab Cricket Association Stadium, Mohali', 'Feroz Shah Kotla',
       'Wankhede Stadium', 'Eden Gardens', 'Sawai Mansingh Stadium',
       'Rajiv Gandhi International Stadium, Uppal',
       'MA Chidambaram Stadium, Chepauk', 'Dr DY Patil Sports Academy',
       'Newlands', "St George's Park", 'Kingsmead', 'SuperSport Park',
       'Buffalo Park', 'New Wanderers Stadium', 'De Beers Diamond Oval',
       'OUTsurance Oval', 'Brabourne Stadium',
       'Sardar Patel Stadium, Motera', 'Barabati Stadium',
       'Vidarbha Cricket Association Stadium, Jamtha',
       'Himachal Pradesh Cricket Association Stadium', 'Nehru Stadium',
       'Holkar Cricket Stadium',
       'Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium',
       'Subrata Roy Sahara Stadium',
       'Shaheed Veer Narayan Singh International Stadium',
       'JSCA International Stadium Complex', 'Sheikh Zayed Stadium',
       'Sharjah Cricket Stadium', 'Dubai International Cricket St

In [None]:
cat_list=df["city"]
encoded_data, mapping_index = pd.Series(cat_list).factorize()
print(encoded_data)
print(mapping_index)
print(mapping_index.get_loc("Visakhapatnam"))

In [None]:
cat_list1=df["venue"]
encoded_data1, mapping_index1 = pd.Series(cat_list1).factorize()
print(encoded_data1)
print(mapping_index1)
print(mapping_index1.get_loc("Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium"))

In [None]:
cat_list2=df["toss_decision"]
encoded_data2, mapping_index2 = pd.Series(cat_list2).factorize()
print(mapping_index2)
print(mapping_index2.get_loc("field"))

In [None]:
temp1=df['toss_winner'].value_counts(sort=True)
temp2=df['winner'].value_counts(sort=True)
print('No of toss winners by each team')
for idx, val in temp1.iteritems():
    print('{} -> {}'.format(list(dicVal.keys())[list(dicVal.values()).index(idx)],val))
print('No of match winners by each team')
for idx, val in temp2.iteritems():
    print('{} -> {}'.format(list(dicVal.keys())[list(dicVal.values()).index(idx)],val))

In [None]:
df['toss_winner'].hist(bins=50)

In [None]:
df['winner'].hist(bins=50)

In [None]:
fig = plt.figure(figsize=(8,4))
ax1 = fig.add_subplot(121)
ax1.set_xlabel('toss_winner')
ax1.set_ylabel('Count of toss winners')
ax1.set_title("toss winners")
temp1.plot(kind='bar')

ax2 = fig.add_subplot(122)
temp2.plot(kind = 'bar')
ax2.set_xlabel('winner')
ax2.set_ylabel('Count of match winners')
ax2.set_title("Match winners")

In [None]:
df.apply(lambda x: sum(x.isnull()),axis=0) 

In [None]:
df[pd.isnull(df['city'])]

In [None]:
var_mod = ['city','toss_decision','venue']
labelencoder = LabelEncoder()
for i in var_mod:
    df[i] = labelencoder.fit_transform(df[i])
df 

In [None]:
df.head(10)

### Building models

In [None]:
def classification_model(model, data, predictors, outcome):
    model.fit(data[predictors],data[outcome])
    predictions = model.predict(data[predictors])
    print(predictions)
    accuracy = metrics.accuracy_score(predictions,data[outcome])
    print('Accuracy : %s' % '{0:.3%}'.format(accuracy))

### LogisticRegression

In [None]:
outcome_var=['winner']
predictor_var = ['team1', 'team2', 'city', 'toss_winner', 'venue', 'toss_decision']
model_LR =LogisticRegression()
classification_model(model_LR, df, predictor_var, outcome_var)

### Gaussian naive bayes

In [None]:
from sklearn.naive_bayes import GaussianNB
model_GNB = GaussianNB() 
classification_model(model_GNB, df,predictor_var,outcome_var)

### KNN

In [None]:
from sklearn.neighbors import KNeighborsClassifier
model_KNN = KNeighborsClassifier(n_neighbors=3)
classification_model(model_KNN, df,predictor_var,outcome_var)

In [None]:
df.head(3)

### SVM

In [None]:
from sklearn import svm
model_SVM = svm.SVC(kernel='rbf', C=1, gamma=1) 
classification_model(model_SVM, df,predictor_var,outcome_var)

### Gradient Boosting

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
model_GB = GradientBoostingClassifier(n_estimators = 1000, learning_rate = 0.1, max_depth = 3, random_state = 0)
classification_model(model_GB, df,predictor_var,outcome_var)

### Decision Tree

In [None]:
from sklearn import tree
model_DT = tree.DecisionTreeClassifier(criterion='gini')
classification_model(model_DT, df,predictor_var,outcome_var)

### RandomForest

In [None]:
model_RF = RandomForestClassifier(n_estimators=100)
classification_model(model_RF, df,predictor_var,outcome_var)

### Extreme Gradient Boosting

In [None]:
from xgboost import XGBClassifier
model_XGB = XGBClassifier(n_estimators = 1000, learning_rate = 0.03, max_dept = 3, random_state = 0)
classification_model(model_XGB, df, predictor_var, outcome_var)

In [None]:
df.head()

### Predicting on Test data

In [None]:
test=pd.read_csv("test.csv")
test.head()

In [None]:
test = test.drop(["date","winner"], axis=1,inplace=False)

In [None]:
test.replace(encode, inplace=True)
test.head(2)

In [None]:
out_XGB = model_XGB.predict(test)
out_RF = model_RF.predict(test)

In [None]:
predicted_winner_XGB=[]
predicted_winner_RF=[]

for i in out_XGB:
    predicted_winner_XGB.append(list(dicVal.keys())[list(dicVal.values()).index(i)])
    
for i in out_RF:
    predicted_winner_RF.append(list(dicVal.keys())[list(dicVal.values()).index(i)])

In [None]:
ipl=pd.read_csv('test.csv')
ctr=0
k=0
total=len(ipl['winner'])
for i in ipl['winner']:
    if i==predicted_winner_XGB[k]:
        ctr=ctr+1
    k=k+1  
print(ctr)
print("Accuracy of our prediction of recent IPL is ", ctr/total)

In [None]:
ipl=pd.read_csv('test.csv')
ctr=0
k=0
total=len(ipl['winner'])
for i in ipl['winner']:
    if i==predicted_winner_RF[k]:
        ctr=ctr+1
    k=k+1  
print(ctr)
print("Accuracy of our prediction of recent IPL is ", ctr/total)

In [None]:
print(predicted_winner_XGB)
print(predicted_winner_RF)

In [None]:
sub = pd.DataFrame()
sub['team1'] = ipl['team1']
sub['team2'] = ipl['team2']
sub['venue'] = ipl['venue']
sub['toss_winner'] =ipl['toss_winner']
sub['city'] = ipl['city']
sub['toss_decision'] = ipl['toss_decision']
sub['date of match']=ipl['date']
sub['actual_winner']=ipl["winner"]
sub['Predicted_winner_XGB'] = predicted_winner_XGB
sub['Predicted_winner_RF'] = predicted_winner_RF
sub.to_csv('ipl2.csv',index=False)

In [None]:
sub.head(5)

In [None]:
imp_input_RF = pd.Series(model_RF.feature_importances_, index=predictor_var).sort_values(ascending=False)
print(imp_input_RF)
imp_input_XGB = pd.Series(model_XGB.feature_importances_, index=predictor_var).sort_values(ascending=False)
print(imp_input_XGB)

In [None]:
count =0
for i in range(577): 
    if df["toss_winner"][i]==df["winner"][i]==1 :
        count=count+1
plt.style.use('fivethirtyeight')
df_fil=df[df['toss_winner']==1]
slices=[count,(len(df_fil)-count)]
plt.pie(slices,labels=['Toss & win','Toss & lose'],startangle=90,shadow=True,explode=(0,0),autopct='%1.1f%%',colors=['g','b'])
fig = plt.gcf()
fig.set_size_inches(6,6)
plt.show()

In [None]:
count =0
for i in range(577): 
    if df["toss_winner"][i]==df["winner"][i]==5 :
        count=count+1
plt.style.use('fivethirtyeight')
df_fil=df[df['toss_winner']==1]
slices=[count,(len(df_fil)-count)]
plt.pie(slices,labels=['Toss & win','Toss & lose'],startangle=90,shadow=True,explode=(0,0),autopct='%1.1f%%',colors=['g','b'])
fig = plt.gcf()
fig.set_size_inches(6,6)
plt.show() 

In [None]:
count =0
for i in range(577): 
    if df["toss_winner"][i]==df["winner"][i]==9 :
        count=count+1
plt.style.use('fivethirtyeight')
df_fil=df[df['toss_winner']==1]
slices=[count,(len(df_fil)-count)]
plt.pie(slices,labels=['Toss & win','Toss & lose'],startangle=90,shadow=True,explode=(0,0),autopct='%1.1f%%',colors=['g','b'])
fig = plt.gcf()
fig.set_size_inches(6,6)
plt.show()

In [None]:
plt.style.use('fivethirtyeight')
df_fil=df[df['toss_winner']==df['winner']]

slices=[len(df_fil),(577-len(df_fil))]

plt.pie(slices,labels=['Toss & win','Toss & lose'],startangle=90,shadow=True,explode=(0,0),autopct='%1.1f%%',colors=['r','g'])
fig = plt.gcf()
fig.set_size_inches(6,6)
plt.show()

In [None]:
team1=dicVal['CSK']
team2=dicVal['RCB']
mtemp=df_matches[((df_matches['team1']==team1)|(df_matches['team2']==team1))&((df_matches['team1']==team2)|(df_matches['team2']==team2))]
sns.countplot(x='venue', hue='winner',data=mtemp,palette='Set3')
plt.xticks(rotation='vertical')
leg = plt.legend( loc = 'upper right')
fig=plt.gcf()
fig.set_size_inches(10,6)
plt.show()
labelencoder.classes_[15] 

In [None]:
shap.initjs()
mybooster = model_XGB.get_booster()
model_bytearray = mybooster.save_raw()[4:]
def myfun(self=None):
    return model_bytearray

mybooster.save_raw = myfun

In [None]:
explainer = shap.TreeExplainer(mybooster)
shap_values = explainer.shap_values(df[predictor_var])

### Visualizing how each value effect model

In [None]:
shap.force_plot(explainer.expected_value[0], shap_values[0], df[predictor_var])

In [None]:
for i in range(len(shap_values)):
    shap.force_plot(explainer.expected_value[i], shap_values[i], df[predictor_var])

In [None]:
for name in df[predictor_var]:
    for j in range(len(shap_values)):
        shap.dependence_plot(name, shap_values[j], df[predictor_var])

In [None]:
shap.summary_plot(shap_values, df[predictor_var])