# Importing Libraries 

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

plt.style.use('fivethirtyeight')


import plotly
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.offline as py
py.init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.tools as tls
import plotly.express as px


import cufflinks as cf
cf.go_offline()

# Loading The Datasets

In [2]:
matches = pd.read_csv('matches.csv')
deliveries = pd.read_csv('deliveries.csv')
matches.head()

Unnamed: 0,id,season,city,date,team1,team2,toss_winner,toss_decision,result,dl_applied,winner,win_by_runs,win_by_wickets,player_of_match,venue,umpire1,umpire2,umpire3
0,1,2017,Hyderabad,2017-04-05,Sunrisers Hyderabad,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,
1,2,2017,Pune,2017-04-06,Mumbai Indians,Rising Pune Supergiant,Rising Pune Supergiant,field,normal,0,Rising Pune Supergiant,0,7,SPD Smith,Maharashtra Cricket Association Stadium,A Nand Kishore,S Ravi,
2,3,2017,Rajkot,2017-04-07,Gujarat Lions,Kolkata Knight Riders,Kolkata Knight Riders,field,normal,0,Kolkata Knight Riders,0,10,CA Lynn,Saurashtra Cricket Association Stadium,Nitin Menon,CK Nandan,
3,4,2017,Indore,2017-04-08,Rising Pune Supergiant,Kings XI Punjab,Kings XI Punjab,field,normal,0,Kings XI Punjab,0,6,GJ Maxwell,Holkar Cricket Stadium,AK Chaudhary,C Shamshuddin,
4,5,2017,Bangalore,2017-04-08,Royal Challengers Bangalore,Delhi Daredevils,Royal Challengers Bangalore,bat,normal,0,Royal Challengers Bangalore,15,0,KM Jadhav,M Chinnaswamy Stadium,,,


In [3]:
deliveries.head()

Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batsman,non_striker,bowler,is_super_over,...,bye_runs,legbye_runs,noball_runs,penalty_runs,batsman_runs,extra_runs,total_runs,player_dismissed,dismissal_kind,fielder
0,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,1,DA Warner,S Dhawan,TS Mills,0,...,0,0,0,0,0,0,0,,,
1,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,2,DA Warner,S Dhawan,TS Mills,0,...,0,0,0,0,0,0,0,,,
2,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,3,DA Warner,S Dhawan,TS Mills,0,...,0,0,0,0,4,0,4,,,
3,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,4,DA Warner,S Dhawan,TS Mills,0,...,0,0,0,0,0,0,0,,,
4,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,5,DA Warner,S Dhawan,TS Mills,0,...,0,0,0,0,0,2,2,,,


In [4]:
matches.shape

(756, 18)

In [5]:
deliveries.shape

(179078, 21)

In [6]:
matches.columns

Index(['id', 'season', 'city', 'date', 'team1', 'team2', 'toss_winner',
       'toss_decision', 'result', 'dl_applied', 'winner', 'win_by_runs',
       'win_by_wickets', 'player_of_match', 'venue', 'umpire1', 'umpire2',
       'umpire3'],
      dtype='object')

In [7]:
matches.season.value_counts().sort_values(ascending = False)

2013    76
2012    74
2011    73
2010    60
2014    60
2016    60
2018    60
2019    60
2015    59
2017    59
2008    58
2009    57
Name: season, dtype: int64

In [8]:
matches.team1.value_counts()

Mumbai Indians                 101
Kings XI Punjab                 91
Chennai Super Kings             89
Royal Challengers Bangalore     85
Kolkata Knight Riders           83
Delhi Daredevils                72
Rajasthan Royals                67
Sunrisers Hyderabad             63
Deccan Chargers                 43
Pune Warriors                   20
Gujarat Lions                   14
Rising Pune Supergiant           8
Kochi Tuskers Kerala             7
Rising Pune Supergiants          7
Delhi Capitals                   6
Name: team1, dtype: int64

In [9]:
matches.isnull().sum().sort_values(ascending = False)

umpire3            637
city                 7
player_of_match      4
winner               4
umpire1              2
umpire2              2
toss_winner          0
season               0
date                 0
team1                0
team2                0
result               0
toss_decision        0
dl_applied           0
win_by_runs          0
win_by_wickets       0
venue                0
id                   0
dtype: int64

# Removed Inconsistent Teams and Added Short Names

In [10]:
matches.replace(to_replace = [ 'Delhi Daredevils']   , value = ['Delhi Capitals' ] , inplace = True)
deliveries.replace(to_replace = ['Delhi Daredevils'  ] , value = [ 'Delhi Capitals'], inplace = True)

consistent_teams = ['Sunrisers Hyderabad', 'Mumbai Indians',
        'Kolkata Knight Riders', 'Royal Challengers Bangalore', 
        'Delhi Capitals', 'Kings XI Punjab','Chennai Super Kings', 'Rajasthan Royals']


# Taking data of only consistent teams
matches_2 = matches[ (matches.team1.isin( consistent_teams )) & (matches.team2.isin( consistent_teams ))]
deliveries_2 = deliveries[ (deliveries.batting_team.isin( consistent_teams )) & (deliveries.bowling_team.isin( consistent_teams )) ]

dic = {'Sunrisers Hyderabad' : 'SRH' , 'Kolkata Knight Riders' : 'KKR',
       'Royal Challengers Bangalore' : 'RCB' , 'Kings XI Punjab' : 'KXIP',
       'Mumbai Indians' : 'MI' , 'Chennai Super Kings' : 'CSK' , 
       'Rajasthan Royals' : 'RR' , 'Delhi Capitals' : 'DC'
      }

# Replacing names of teams to their short names
matches_2.replace( dic , inplace = True )
deliveries_2.replace( dic , inplace = True )



In [11]:
matches_2.head()

Unnamed: 0,id,season,city,date,team1,team2,toss_winner,toss_decision,result,dl_applied,winner,win_by_runs,win_by_wickets,player_of_match,venue,umpire1,umpire2,umpire3
0,1,2017,Hyderabad,2017-04-05,SRH,RCB,RCB,field,normal,0,SRH,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,
4,5,2017,Bangalore,2017-04-08,RCB,DC,RCB,bat,normal,0,RCB,15,0,KM Jadhav,M Chinnaswamy Stadium,,,
6,7,2017,Mumbai,2017-04-09,KKR,MI,MI,field,normal,0,MI,0,4,N Rana,Wankhede Stadium,Nitin Menon,CK Nandan,
7,8,2017,Indore,2017-04-10,RCB,KXIP,RCB,bat,normal,0,KXIP,0,8,AR Patel,Holkar Cricket Stadium,AK Chaudhary,C Shamshuddin,
9,10,2017,Mumbai,2017-04-12,SRH,MI,MI,field,normal,0,MI,0,4,JJ Bumrah,Wankhede Stadium,Nitin Menon,CK Nandan,


In [12]:
deliveries_2.head()

Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batsman,non_striker,bowler,is_super_over,...,bye_runs,legbye_runs,noball_runs,penalty_runs,batsman_runs,extra_runs,total_runs,player_dismissed,dismissal_kind,fielder
0,1,1,SRH,RCB,1,1,DA Warner,S Dhawan,TS Mills,0,...,0,0,0,0,0,0,0,,,
1,1,1,SRH,RCB,1,2,DA Warner,S Dhawan,TS Mills,0,...,0,0,0,0,0,0,0,,,
2,1,1,SRH,RCB,1,3,DA Warner,S Dhawan,TS Mills,0,...,0,0,0,0,4,0,4,,,
3,1,1,SRH,RCB,1,4,DA Warner,S Dhawan,TS Mills,0,...,0,0,0,0,0,0,0,,,
4,1,1,SRH,RCB,1,5,DA Warner,S Dhawan,TS Mills,0,...,0,0,0,0,0,2,2,,,


In [13]:
matches_2.shape

(571, 18)

In [14]:
matches_2.drop('umpire3' , axis = 1 , inplace = True)

# Exploratory Data Analysis on Matches Dataset

___
# Let's find out the winning %age of each team
___

In [15]:
win_prcntage = ( matches_2.winner.value_counts() / (matches_2.team1.value_counts() + matches_2.team2.value_counts()) )* 100

In [16]:
win_prcntage = win_prcntage.to_frame().reset_index().rename( columns = { 'index' : 'Team_Name' , 0 : 'Win %age'})
win_prcntage.sort_values( by = 'Win %age' , ascending = False , inplace = True)

In [17]:
win_prcntage.iplot(kind = 'bar' , x = 'Team_Name' , y = 'Win %age' , title = 'Win %age of each team[2008 - 2019]' , xTitle = 'Teams' , yTitle = 'Win %age')  

# Teams Featured in Most Number of Season

In [18]:
# Team Which Featured in Each Season

matches_2.head()

Unnamed: 0,id,season,city,date,team1,team2,toss_winner,toss_decision,result,dl_applied,winner,win_by_runs,win_by_wickets,player_of_match,venue,umpire1,umpire2
0,1,2017,Hyderabad,2017-04-05,SRH,RCB,RCB,field,normal,0,SRH,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong
4,5,2017,Bangalore,2017-04-08,RCB,DC,RCB,bat,normal,0,RCB,15,0,KM Jadhav,M Chinnaswamy Stadium,,
6,7,2017,Mumbai,2017-04-09,KKR,MI,MI,field,normal,0,MI,0,4,N Rana,Wankhede Stadium,Nitin Menon,CK Nandan
7,8,2017,Indore,2017-04-10,RCB,KXIP,RCB,bat,normal,0,KXIP,0,8,AR Patel,Holkar Cricket Stadium,AK Chaudhary,C Shamshuddin
9,10,2017,Mumbai,2017-04-12,SRH,MI,MI,field,normal,0,MI,0,4,JJ Bumrah,Wankhede Stadium,Nitin Menon,CK Nandan


In [19]:
lis = matches_2.team1.unique()
dic = {}
for values in lis:
    dic[values] = 0
for season_no in  matches_2.groupby('season'):
    for team in dic:
        if team in season_no[1].team1.unique():
            dic[team] += 1
print(dic)

{'SRH': 7, 'RCB': 12, 'KKR': 12, 'KXIP': 12, 'DC': 12, 'MI': 12, 'CSK': 10, 'RR': 10}


In [20]:
team_vs_seasons = pd.DataFrame(dic.items()).rename(columns={0:'Team Name', 1:'Season Count'})
team_vs_seasons.sort_values(by='Season Count', ascending= False, inplace=True)
team_vs_seasons.head()

Unnamed: 0,Team Name,Season Count
1,RCB,12
2,KKR,12
3,KXIP,12
4,DC,12
5,MI,12


In [21]:
team_vs_seasons.iplot(kind = 'bar' , x = 'Team Name' , y = 'Season Count' , title = 'Season Count of each team[2008 - 2019]' , xTitle = 'Team Name' , yTitle = 'Season Count')  

# Player of the Match Vs Season

In [22]:
matches_2.head()

Unnamed: 0,id,season,city,date,team1,team2,toss_winner,toss_decision,result,dl_applied,winner,win_by_runs,win_by_wickets,player_of_match,venue,umpire1,umpire2
0,1,2017,Hyderabad,2017-04-05,SRH,RCB,RCB,field,normal,0,SRH,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong
4,5,2017,Bangalore,2017-04-08,RCB,DC,RCB,bat,normal,0,RCB,15,0,KM Jadhav,M Chinnaswamy Stadium,,
6,7,2017,Mumbai,2017-04-09,KKR,MI,MI,field,normal,0,MI,0,4,N Rana,Wankhede Stadium,Nitin Menon,CK Nandan
7,8,2017,Indore,2017-04-10,RCB,KXIP,RCB,bat,normal,0,KXIP,0,8,AR Patel,Holkar Cricket Stadium,AK Chaudhary,C Shamshuddin
9,10,2017,Mumbai,2017-04-12,SRH,MI,MI,field,normal,0,MI,0,4,JJ Bumrah,Wankhede Stadium,Nitin Menon,CK Nandan


In [23]:
matches.player_of_match.value_counts()

CH Gayle          21
AB de Villiers    20
RG Sharma         17
MS Dhoni          17
DA Warner         17
                  ..
R McLaren          1
RP Singh           1
AD Mathews         1
DJG Sammy          1
J Archer           1
Name: player_of_match, Length: 226, dtype: int64

In [24]:
m_of_m_count = matches['player_of_match'].value_counts().head(15).to_frame().reset_index().rename(columns = {'index': 'player_name', 'player_of_match': 'count'})

In [25]:
m_of_m_count.iplot(kind = 'bar' , x = 'player_name' , y = 'count' , title = 'Man of the match[2008 - 2019]' , xTitle = 'player_name' , yTitle = 'count')  

# Team Vs Numer of Matches Played in Each City

In [26]:
matches_2.head()

Unnamed: 0,id,season,city,date,team1,team2,toss_winner,toss_decision,result,dl_applied,winner,win_by_runs,win_by_wickets,player_of_match,venue,umpire1,umpire2
0,1,2017,Hyderabad,2017-04-05,SRH,RCB,RCB,field,normal,0,SRH,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong
4,5,2017,Bangalore,2017-04-08,RCB,DC,RCB,bat,normal,0,RCB,15,0,KM Jadhav,M Chinnaswamy Stadium,,
6,7,2017,Mumbai,2017-04-09,KKR,MI,MI,field,normal,0,MI,0,4,N Rana,Wankhede Stadium,Nitin Menon,CK Nandan
7,8,2017,Indore,2017-04-10,RCB,KXIP,RCB,bat,normal,0,KXIP,0,8,AR Patel,Holkar Cricket Stadium,AK Chaudhary,C Shamshuddin
9,10,2017,Mumbai,2017-04-12,SRH,MI,MI,field,normal,0,MI,0,4,JJ Bumrah,Wankhede Stadium,Nitin Menon,CK Nandan


In [27]:
list(matches_2.city.unique())

['Hyderabad',
 'Bangalore',
 'Mumbai',
 'Indore',
 'Kolkata',
 'Delhi',
 'Chandigarh',
 'Jaipur',
 'Chennai',
 'Cape Town',
 'Port Elizabeth',
 'Durban',
 'Centurion',
 'East London',
 'Johannesburg',
 'Kimberley',
 'Bloemfontein',
 'Ahmedabad',
 'Dharamsala',
 'Pune',
 'Raipur',
 'Ranchi',
 'Abu Dhabi',
 'Sharjah',
 nan,
 'Cuttack',
 'Visakhapatnam',
 'Mohali',
 'Bengaluru']

In [28]:
def team_matches_city(city_name):
    for value in matches_2.groupby('city'):
        if value[0] == city_name:
            matches = (value[1].team1.value_counts() + value[1].team2.value_counts())
            return matches

In [29]:
city_name = 'Mumbai'
matches_in_a_particular_city = team_matches_city(city_name)
matches_in_a_particular_city = matches_in_a_particular_city.to_frame().reset_index().rename(columns = {'index' : 'Team Name' , 0 : 'Count'})
matches_in_a_particular_city.sort_values(by = "Count" , ascending = False , inplace = True)

In [30]:
 matches_in_a_particular_city.iplot(kind = 'bar' , x = 'Team Name' , y = 'Count' , title = 'Teams vs no of matches played in' + city_name + '[2008 - 2019]' , xTitle = 'Teams' , yTitle = 'Count')  

____
# Key players for different teams
___

In [31]:
dic = {'Sunrisers Hyderabad' : 'SRH' , 'Kolkata Knight Riders' : 'KKR',
       'Royal Challengers Bangalore' : 'RCB' , 'Kings XI Punjab' : 'KXIP',
       'Mumbai Indians' : 'MI' , 'Chennai Super Kings' : 'CSK' , 
       'Rajasthan Royals' : 'RR' , 'Delhi Capitals' : 'DC'
      }
matches.replace(dic , inplace = True)

In [32]:
def key_players(team_name):
    for value in matches.groupby('winner'):
        if value[0] == team_name:
            return value[1]['player_of_match'].value_counts().head()

In [33]:
df = key_players('RCB').to_frame().reset_index().rename(columns = {'index' : 'Player' , 'player_of_match' : 'Count'})

In [34]:
df.iplot(kind = 'bar' , x = 'Player' , y = 'Count' , title = 'Player vs no of MOM count' , xTitle = 'Player' , yTitle = 'Count')  

#  Man of the Match Player Vs teams  

In [35]:
def player_MOM_for_teams(player_name):
    for value in matches.groupby('player_of_match'):
        if value[0] == player_name:
            return value[1]['winner'].value_counts()


player_name = 'V Kohli'
df = player_MOM_for_teams(  player_name  ).to_frame().reset_index().rename(columns = {'index' : 'Team' , 'winner' : 'Count'})

In [36]:
px.pie( df , values='Count', names='Team', title='Player vs MOM count for different teams' ,color_discrete_sequence=px.colors.sequential.RdBu)

# Average Win by Run While Chasing And Defending

In [37]:
def avg_win_by_runs_and_wickets_of_a_team_while_defending_and_chasing( team_name , given_df ):
    for value in given_df.groupby('winner'):
        if value[0] == team_name:

            total_win_by_runs = sum(list(value[1]['win_by_runs']))
            total_win_by_wickets = sum(list(value[1]['win_by_wickets']))
            
            if 0 in list(value[1]['win_by_runs'].value_counts().index):
                x = value[1]['win_by_runs'].value_counts()[0]
            else:
                x = 0
                
            if 0 in list(value[1]['win_by_wickets'].value_counts().index):
                y = value[1]['win_by_wickets'].value_counts()[0]
            else:
                y = 0
                
            number_of_times_given_team_win_while_defending = (len(value[1]) - x )
            number_of_times_given_team_win_while_chasing = (len(value[1]) - y )
            
            average_runs_by_which_a_given_team_wins_while_defending = total_win_by_runs / number_of_times_given_team_win_while_defending
            average_wickets_by_which_a_given_team_wins_while_chasing = total_win_by_wickets / number_of_times_given_team_win_while_chasing
            
            print('number_of_times_given_team_win_while_defending :' , number_of_times_given_team_win_while_defending ) 
            print('number_of_times_given_team_win_while_chasing   :' , number_of_times_given_team_win_while_chasing )
            print()
            print('average_runs_by_which_a_given_team_wins_while_defending  : ' ,average_runs_by_which_a_given_team_wins_while_defending )
            print('average_wickets_by_which_a_given_team_wins_while_chasing : ' ,average_wickets_by_which_a_given_team_wins_while_chasing)
            

In [38]:
avg_win_by_runs_and_wickets_of_a_team_while_defending_and_chasing('RCB' , matches)

number_of_times_given_team_win_while_defending : 35
number_of_times_given_team_win_while_chasing   : 48

average_runs_by_which_a_given_team_wins_while_defending  :  35.77142857142857
average_wickets_by_which_a_given_team_wins_while_chasing :  6.541666666666667


In [39]:
avg_win_by_runs_and_wickets_of_a_team_while_defending_and_chasing('CSK' , matches)

number_of_times_given_team_win_while_defending : 52
number_of_times_given_team_win_while_chasing   : 48

average_runs_by_which_a_given_team_wins_while_defending  :  34.19230769230769
average_wickets_by_which_a_given_team_wins_while_chasing :  6.020833333333333


# Wining %age of Team by Toss Decision

In [40]:
def win_visu_by_toss(team_name):
    datas = matches[(matches['toss_winner']==team_name) & (matches['winner']==team_name)]
    count = datas['toss_decision'].value_counts()
    win_bat = count['bat']/(count['field']+count['bat'])*100
    win_field = count['field']/(count['bat']+count['field'])*100
    print("field_count = "+ str(count['field']))
    print("bat_count = " + str(count['bat']))
    print("Win %age if fielding is choosen = " + str(win_field))
    print("Win %age if batting is choosen = " + str(win_bat))
    print()
    print()
    data = [['Fielding', win_field], ['Batting', win_bat]]
    data = pd.DataFrame (data,columns=['Decision','Win_%age'])
    return(px.pie( data , values= 'Win_%age' , names='Decision', title='Win %age For '+ team_name + ' for toss decision',color_discrete_sequence=px.colors.sequential.Rainbow))

            
team_name = str(input("Enter Team Name : "))
plot = win_visu_by_toss(team_name)
plot

Enter Team Name : MI
field_count = 31
bat_count = 25
Win %age if fielding is choosen = 55.35714285714286
Win %age if batting is choosen = 44.642857142857146




# Matches Played Vs Win Number

In [41]:
matches_played=pd.concat([matches['team1'],matches['team2']], axis=0)
matches_played=matches_played.value_counts().reset_index()
matches_played.columns=['Team','Total Matches']
matches_played['wins']=matches['winner'].value_counts().reset_index()['winner']
matches_played.set_index('Team',inplace=True)
matches_played

Unnamed: 0_level_0,Total Matches,wins
Team,Unnamed: 1_level_1,Unnamed: 2_level_1
MI,187,109
RCB,180,100
KKR,178,92
DC,177,84
KXIP,176,82
CSK,164,77
RR,147,75
SRH,108,58
Deccan Chargers,75,29
Pune Warriors,46,13


In [42]:
win_percentage = round(matches_played['wins']/matches_played['Total Matches'],3)*100

# Win %age of Each Team

In [43]:
Teams = [ 'MI', 'RCB', 'KKR', 'K11P', 'CSK', 'DD', 'RR', 'SH', 'DC', 'PW', 'GL', 'RPSG', 'DC', 'KTK', 'RPSGS']
  
data = ([58.3, 55.6, 51.7, 47.7, 50. , 46.6, 45.6, 53.7, 38.7, 28.3, 40. ,
       62.5, 62.5, 42.9, 35.7])
pie_plot = go.Pie(labels = Teams, values = data)
iplot([pie_plot])

# Win %age comparison b/w Two Teams

In [44]:
A , B =  input("Enter the team names separated by space : ").split(' ')
def compare_teams(A , B):
    new_df = matches_2[ ( (matches_2['team1'] == A) & (matches_2['team2'] == B) ) | ((matches_2['team1'] == B) & (matches_2['team2'] == A)) ]
    new_df = new_df.winner.value_counts().to_frame().reset_index().rename( columns = {'index' : 'Team' , 'winner' : 'win %age'}) 
    
    fig = px.pie( new_df , values='win %age', names='Team', title='Comparison of win %age b/w ' + A +' and ' + B ,color_discrete_sequence=px.colors.sequential.RdBu)
    return fig
compare_teams(A , B)

Enter the team names separated by space : CSK MI


___
# Top 5 cricket stadiums

In [45]:
top_5_venue = matches.venue.value_counts().head(5)
top_5_venue_data = pd.DataFrame({
    'venue': top_5_venue.index,
    'count': top_5_venue.values
})

In [46]:
px.pie( top_5_venue_data , values='count', names='venue', title='Most popular venues [2008 - 2019]', color_discrete_sequence=px.colors.sequential.RdBu)

# Umpires to feature in max number of matches

In [47]:
# Creating list for each umpires
umpire1 = list(matches.umpire1)
umpire2 = list(matches.umpire2)
umpire3 = list(matches.umpire3)

# Concating all of the lists
umpire1.extend(umpire2)
umpire1.extend(umpire3)

In [48]:
# Created the dataframe for umpires
new_data = pd.DataFrame(umpire1, columns=['umpires'])
umpire_data = new_data.umpires.value_counts().head(10)
umpire_dataset = pd.DataFrame({
        'umpires': umpire_data.index,
        'count': umpire_data.values
})

In [49]:
px.pie( umpire_dataset , values='count', names='umpires', title='Umpires to feature in max num of matches [2008 - 2019]', color_discrete_sequence=px.colors.sequential.RdBu)

# Creating df for season Winners and Runner-ups

In [50]:
lis = []
for value in matches.groupby('season'):

    
    if value[1].tail(1).winner.values[0] == value[1].tail(1).team1.values[0]:
        runner_up = value[1].tail(1).team2.values[0]
    else:
        runner_up = value[1].tail(1).team1.values[0]
    lis.append([ value[0] , value[1].tail(1).winner.values[0] , runner_up ] )
print(lis)

[[2008, 'RR', 'CSK'], [2009, 'Deccan Chargers', 'RCB'], [2010, 'CSK', 'MI'], [2011, 'CSK', 'RCB'], [2012, 'KKR', 'CSK'], [2013, 'MI', 'CSK'], [2014, 'KKR', 'KXIP'], [2015, 'MI', 'CSK'], [2016, 'SRH', 'RCB'], [2017, 'MI', 'Rising Pune Supergiant'], [2018, 'CSK', 'SRH'], [2019, 'MI', 'CSK']]


In [51]:
winners = pd.DataFrame(lis , columns = ['Season' , 'Winner' , 'RunnerUp'])

# Season Winners Effective Visualisations

In [52]:
season_winners = winners['Winner'].value_counts().to_frame().reset_index().rename(columns = {'index' : 'Winner_Team' , 'Winner' : 'Count'})

In [53]:
px.pie( season_winners , values='Count', names='Winner_Team', title='Season Winners [2008 - 2019]', color_discrete_sequence=px.colors.sequential.RdBu)

# Season Runner-Ups Effective Visualisations

In [54]:
season_runner_ups = winners['RunnerUp'].value_counts().to_frame().reset_index().rename(columns = {'index' : 'Runner_up' , 'RunnerUp' : 'Count'})

In [55]:
px.pie( season_runner_ups , values='Count', names='Runner_up', title='Season Runner Ups [2008 - 2019]', color_discrete_sequence=px.colors.sequential.RdBu)

# Exploratory Data Analysis on Deliveries Dataset

In [56]:
for value in deliveries.groupby('batsman'):
    if value[0] == 'DA Warner':
        print(value[1]['batsman_runs'].sum())

4741


In [57]:
deliveries['batsman_runs'].sum()

223286

In [58]:
batsmen = matches[['id','season']].merge(deliveries, left_on = 'id', right_on = 'match_id', how = 'left')

In [59]:
batsmen.head()

Unnamed: 0,id,season,match_id,inning,batting_team,bowling_team,over,ball,batsman,non_striker,...,bye_runs,legbye_runs,noball_runs,penalty_runs,batsman_runs,extra_runs,total_runs,player_dismissed,dismissal_kind,fielder
0,1,2017,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,1,DA Warner,S Dhawan,...,0,0,0,0,0,0,0,,,
1,1,2017,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,2,DA Warner,S Dhawan,...,0,0,0,0,0,0,0,,,
2,1,2017,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,3,DA Warner,S Dhawan,...,0,0,0,0,4,0,4,,,
3,1,2017,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,4,DA Warner,S Dhawan,...,0,0,0,0,0,0,0,,,
4,1,2017,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,5,DA Warner,S Dhawan,...,0,0,0,0,0,2,2,,,


In [60]:
 def player_runs_across_season(player_name):   
    dic = dict()
    for i in matches.season.unique():
        dic[i] = 0
    for ids in list(deliveries.match_id.unique()):
        season = int(matches[(matches.id == ids)]['season'])
        values = int(deliveries[(deliveries.match_id == ids) & (deliveries.batsman == player_name)].batsman_runs.sum())
        dic[season] += values
    dicc = {}
    dic11 = list(dic.keys())
    dic11.sort()
    for i in dic11:
        dicc[i] = dic[i]
    return dicc
    
player_1 = input('enter player 1 ')
player_2 = input('enter player 2 ')
dic1 = player_runs_across_season(player_1)
dic2 = player_runs_across_season(player_2)

enter player 1 V Kohli
enter player 2 MS Dhoni


In [61]:
 
fig = go.Figure()
fig.add_trace(go.Scatter(x=list(dic1.keys()), y=list(dic1.values()),
                    mode='lines + markers',
                    name= player_1 ))
fig.add_trace(go.Scatter(x=list(dic2.keys()), y=list(dic2.values()),
                    mode='lines+markers',
                    name= player_2))

fig.show()

# Number of fours and sixes  across seasons

In [62]:
def boundaries_counter(given_df):
    lis = []
    for value in given_df.groupby('season'):
        lis.append([ value[0] , value[1]['batsman_runs'].value_counts()[4] , value[1]['batsman_runs'].value_counts()[6] ])
    boundaries = pd.DataFrame( lis , columns = ['Season' , "4's" , "6's"] )
    return boundaries

In [63]:
boundaries = boundaries_counter(batsmen)

In [64]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=boundaries['Season'], y=boundaries["4's"],
                    mode='lines + markers',
                    name= "4's" ))
fig.add_trace(go.Scatter(x=boundaries['Season'], y=boundaries["6's"],
                    mode='lines+markers',
                    name= "6's"))

fig.show()

# Top - 15 fielders

In [65]:
top_15_fielders = (batsmen.fielder.value_counts().head(15)).to_frame().reset_index().rename(columns = {'index' : 'Player' , 'fielder' : 'Count'})
top_15_fielders

Unnamed: 0,Player,Count
0,MS Dhoni,159
1,KD Karthik,152
2,RV Uthappa,125
3,SK Raina,115
4,AB de Villiers,114
5,PA Patel,97
6,RG Sharma,92
7,V Kohli,90
8,KA Pollard,85
9,NV Ojha,82


In [66]:
top_15_fielders.iplot(kind = 'bar' , x = 'Player' , y = 'Count' , title = 'Fielder vs No. of dismissals[2008 - 2019]' , xTitle = 'Fielder' , yTitle = 'Count')  

In [67]:
def avg_partnership(player_A1, player_A2, player_B1, player_B2):
    data11 = deliveries[((deliveries['batsman'] == player_A1) | (deliveries['batsman'] == player_A2)) & ((deliveries['non_striker'] == player_A1) | (deliveries['non_striker'] == player_A2))]
    print('Avg Partnership of Pair 1 = '+ str(data11.batsman_runs.sum()/len(data11['match_id'].unique())))
    data12 = deliveries[((deliveries['batsman'] == player_B1) | (deliveries['batsman'] == player_B2)) & ((deliveries['non_striker'] == player_B1) | (deliveries['non_striker'] == player_B2))]
    print('Avg Partnership of Pair 2 = '+ str(data11.batsman_runs.sum()/len(data12['match_id'].unique())))
    ls= [['Pair 1', data11.batsman_runs.sum()/len(data11['match_id'].unique())],['Pair 2', data11.batsman_runs.sum()/len(data12['match_id'].unique())]]
    dataf = pd.DataFrame(ls, columns = ['Pairs', 'Avg_Runs'])
    return(px.pie( dataf , values='Avg_Runs', names='Pairs', title='Avg Runs For different Pairs ',color_discrete_sequence=px.colors.sequential.RdBu))
    
    

player_A1 = input('Enter First Batsman of First Pair : ')
player_A2 = input('Enter Second Batsman of First Pair : ')
player_B1 = input('Enter First Batsman of Second Pair : ')
player_B2 = input('Enter Second Batsman of Second Pair : ')

plot = avg_partnership(player_A1, player_A2, player_B1, player_B2)
plot

Enter First Batsman of First Pair : MS Dhoni
Enter Second Batsman of First Pair : SK Raina
Enter First Batsman of Second Pair : V Kohli
Enter Second Batsman of Second Pair : AB de Villiers
Avg Partnership of Pair 1 = 26.62264150943396
Avg Partnership of Pair 2 = 20.75


# Conclusion

- **CSK** has the maximum win percentage, whereas **DC** has minimum win percentage.
- **SRH** has played least number of seasons from 2008-2019.
- **CH Gayle** is the player with maximum number of **Man of the match Award**.
- **MI** has played maximum matches in its home town that is Mumbai.
- **AB de Villiers** is the key player to **RCB**.
- **MI** while chossing fielding had won most number of matches.
- **MI** had played maximum number of matches from 2008-2019 and also had won maximum matche sof all teams.
- **Eden Garden** is the most popular venue in the IPL.
- **S Ravi** is the most featured Umpire in the IPL.
- **CSK** has been the most time Runner Up in IPL.
- **MS Dhoni** has been top fielder in IPL for having maximum number of Dismissals.