In [160]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly
import numpy as np
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go


import nbformat
print(nbformat.__version__)

plotly.offline.init_notebook_mode(connected=True)

5.10.4


In [161]:
Df_deliveries = pd.read_csv(r'F:\DataAnalysis\IPL_project\deliveries.csv')
Df_matches= pd.read_csv(r'F:\DataAnalysis\IPL_project\matches.csv')

## Data Cleaning

In [162]:
Df_matches['date'] = pd.to_datetime(Df_matches['date'])
Df_matches = Df_matches.replace({
    '2009/10':'2010'
})
Df_matches['season'] = Df_matches['season'].apply(lambda x : x[:-3] if x.__contains__('/') else x)

In [163]:
Df_deliveries = Df_deliveries.replace({
    'Delhi Daredevils':'Delhi Capitals',
    'Gujarat Lions':'Gujarat Titans',
    'Royal Challengers Bangalore':'Royal Challengers Bengaluru',
    'Rising Pune Supergiants':'Rising Pune Supergiant',
    'Kings XI Punjab':'Punjab Kings'
})

In [164]:
Df_deliveries['season'] = Df_deliveries['match_id'].map(Df_matches.set_index('id')['season'])

In [165]:
import pickle

with open('Df_deliveries.pkl', 'wb') as file:
    df = pickle.dump(Df_deliveries,file)

In [166]:
run_bat = Df_deliveries.groupby('batter')['batsman_runs'].sum()
run_bat = run_bat.reset_index()
run_bat = run_bat.sort_values(by='batsman_runs',ascending=False)

In [167]:
rb = run_bat.head(10)
fig = px.bar(rb[::-1],y='batter',x='batsman_runs',
             color='batsman_runs',
             color_continuous_scale=px.colors.sequential.Aggrnyl_r,
             text_auto='0.2s',
             labels={'batter':'Batsman Name','batsman_runs':'Total Run'}
             )
fig.update_layout(
    plot_bgcolor='white',
    showlegend=False,
    xaxis=dict(visible=False),
    title=dict(text='Top 10 Run Scorer',
               x=0.5,
               y=0.95,
               font=dict(weight='bold')
               )
    )

In [168]:
runs_sixes = Df_deliveries[Df_deliveries['batsman_runs'] == 6]
bat_sixes = runs_sixes.groupby('batter')['batsman_runs'].size()

bat_sixes = bat_sixes.reset_index(name='six_count')
bat_sixes = bat_sixes.sort_values(by='six_count',ascending=False)

In [169]:
bs = bat_sixes.head(10)

fig = px.bar(bs[::-1],x='six_count',
             y='batter',
             labels={'six_count':'Number of Sixes',
                     'batter':'Batsman'},
            text_auto=True,
            color='six_count',
            color_continuous_scale=px.colors.sequential.Aggrnyl_r)

fig.update_layout(
    plot_bgcolor='white',
    xaxis=dict(visible=False),
    title=dict(
        text="Batsman with highest number of Sixes",
        x=0.5,
        y=0.95,
        font=dict(
            weight='bold'
        )
    )
    
)

In [170]:
wickets = Df_deliveries[Df_deliveries['is_wicket'] == 1]
wicket_count = wickets.groupby(['bowler'])['is_wicket'].value_counts()
wicket_count = wicket_count.reset_index(name='No_wicket')
wicket_count = wicket_count.sort_values(by='No_wicket',ascending=False)

In [171]:
wc = wicket_count
fig = px.bar(wc,x='bowler'
             ,y='No_wicket',
             labels={'bowler':'Name of Bowler',
                     'No_wicket': 'Number of Wicket'},
             text_auto=True,
            )

fig.update_layout(
    plot_bgcolor='white',
    xaxis=dict(range=[-0.5,10.5]),
    title=dict(
        text="Bowler with highest number of Wickets",
        x=0.5,
        y=0.95,
        font=dict(
            weight='bold'
        )
    )
    
)

#### Batting Average

In [172]:
Dis_count = Df_deliveries['player_dismissed'].value_counts()
Dis_count = Dis_count.reset_index(name='dismissed_count')
Dis_count = Dis_count.rename(columns={'player_dismissed':'batter'})

In [173]:
bats_stats = pd.merge(run_bat,Dis_count,how='left',on='batter')
bats_stats['Batting Avg'] = bats_stats['batsman_runs']/bats_stats['dismissed_count']

In [174]:
bats_stats.sort_values(by='batsman_runs',ascending=False)

Unnamed: 0,batter,batsman_runs,dismissed_count,Batting Avg
0,V Kohli,8014,207.0,38.714976
1,S Dhawan,6769,193.0,35.072539
2,RG Sharma,6630,223.0,29.730942
3,DA Warner,6567,164.0,40.042683
4,SK Raina,5536,171.0,32.374269
...,...,...,...,...
653,YA Abdulla,0,,
652,Yash Dayal,0,1.0,0.000000
651,Y Prithvi Raj,0,,
650,K Yadav,0,,


In [175]:
BatsmanScore = Df_deliveries.pivot_table(index=['season'],columns='batter',values='batsman_runs',aggfunc='sum',fill_value=0)
BatsmanScore.loc['Total'] = BatsmanScore.sum()
BatsmanScore = BatsmanScore.sort_values(by='Total',ascending=False,axis=1)
BatsmanScore.drop('Total',inplace=True)
BatsmanScore = BatsmanScore.reset_index()

In [176]:
fig = px.line(BatsmanScore,x='season',y=BatsmanScore.columns[0:6],line_shape='spline')

Text = 'Runs of Batsman over Season'

fig.update_layout(showlegend=True,title=dict(
            text=Text,
            x=0.5,
            y=0.95, 
            font=dict(size=20, color='black',weight='bold'),
        ),
        plot_bgcolor='white')

fig.update_xaxes(gridcolor='lightgrey')
fig.update_yaxes(gridcolor='lightgrey')

fig.show()

In [177]:
BatsmanScore = Df_deliveries.pivot_table(index=['season'],columns='batter',values='batsman_runs',aggfunc='sum',fill_value=0)
DismisalPiv = Df_deliveries.pivot_table(index=['season'],columns='batter',values='player_dismissed',aggfunc='count',fill_value=0)

batAve = BatsmanScore.div(DismisalPiv)

batAve.replace([np.inf, -np.inf], 0, inplace=True)
batAve.fillna(0,axis=1,inplace=True)
batAve.loc['Total'] = batAve.sum()
batAve = batAve.sort_values(by='Total',ascending=False,axis=1)
batAve.drop('Total',inplace=True)
batAve = batAve.reset_index()

In [178]:
batsman = batAve.columns[1] # feel free to change Batsman name as required
fig = px.line(batAve.reset_index(),x='season',y=batAve.columns[1],line_shape='spline')

fig.update_layout(showlegend=True,title=dict(
            text=f"Batting Average of {batsman}",
            x=0.5,
            y=0.95,
            font=dict(size=20, color='black',weight='bold'),
        ),
        plot_bgcolor='white')

fig.update_xaxes(gridcolor='lightgrey')
fig.update_yaxes(gridcolor='lightgrey')

fig.show()


In [179]:
BatsmanScore = Df_deliveries.pivot_table(index=['season'],columns='batter',values='batsman_runs',aggfunc='sum',fill_value=0)
condition = ((Df_deliveries['extras_type'].isin(['legbyes','noballs'])) | (pd.isna(Df_deliveries['extras_type'])))

ballFaced = Df_deliveries[condition].pivot_table(index=['season'],columns='batter',values='total_runs',aggfunc='count',fill_value=0)
StrickRate_DF = BatsmanScore.div(ballFaced/100)

StrickRate_DF.replace([np.inf, -np.inf], 0, inplace=True)
StrickRate_DF.fillna(0,axis=1,inplace=True)
StrickRate_DF.loc['Total'] = StrickRate_DF.sum()
StrickRate_DF = StrickRate_DF.sort_values(by='Total',ascending=False,axis=1)
StrickRate_DF.drop('Total',inplace=True)
StrickRate_DF = StrickRate_DF.reset_index()

In [180]:
StrickRate_DF = StrickRate_DF.set_index('season')

In [181]:
batsman = StrickRate_DF.columns[0]
fig = px.line(StrickRate_DF.reset_index(),x='season',y=batsman,line_shape='spline')

fig.update_layout(showlegend=True,title=dict(
            text=f"Strick Rate of {batsman}",
            x=0.5,
            y=0.95,
            font=dict(size=20, color='black',weight='bold'),
        ),
        plot_bgcolor='white')

fig.update_xaxes(gridcolor='lightgrey')
fig.update_yaxes(gridcolor='lightgrey')

StrickRate_DF.columns[3]
fig.show()

In [182]:
Df_deliveries['dismissal_kind'].unique()

array([nan, 'caught', 'bowled', 'run out', 'lbw', 'retired hurt',
       'stumped', 'caught and bowled', 'hit wicket',
       'obstructing the field', 'retired out'], dtype=object)

In [183]:
bowler_DF = Df_deliveries[~Df_deliveries['extras_type'].isin(['retired hurt','obstructing the field',
 'retired out'])]
bowl_DF = bowler_DF.groupby(['bowler']).agg(
    matc_Count = ('match_id','nunique'),
    total_run = ('total_runs','sum'),
    wicket = ('is_wicket','sum'),
    balls = ('ball','count')
)
bowl_DF.reset_index(inplace=True)

In [184]:
bowl_DF['Bowling Avg'] = bowl_DF['total_run'].div(bowl_DF['wicket'])
bowl_DF['Economy'] = bowl_DF['total_run'].div(bowl_DF['balls']/6)
bowl_DF['SR'] = bowl_DF['balls'].div(bowl_DF['wicket'])

In [185]:
bowl_DF.replace([np.inf,-np.inf],0,inplace=True)

In [186]:
# Top 5 Bowlers by wicket balls	Bowling Avg	Economy	SR
by='wicket'
bowl_DF = bowl_DF.sort_values(by=[by],ascending=False)

fig = px.bar(bowl_DF,y=by,x='bowler',text_auto='0.2s')
fig.update_layout(
    xaxis=dict(range=[-0.5,10]),
    title=dict(text=f'Ranking bowlers by {by}',x=0.5,font=dict(weight='bold'))
)
fig.show()

In [187]:

wicket_taker = bowler_DF.pivot_table(index='season',columns='bowler',aggfunc='sum',values='is_wicket',fill_value=0)
wicket_taker.loc['total'] = wicket_taker.sum()
wicket_taker = wicket_taker.sort_values(by='total',ascending=False,axis=1)
wicket_taker.drop('total',inplace=True)
wicket_taker.reset_index(inplace=True)

bowler = wicket_taker.columns[1] # feel free to change bowler name as required

fig = px.line(wicket_taker,x='season',y=bowler,line_shape='spline')

fig.update_layout(showlegend=True,title=dict(
            text=f"Wicket take by {bowler} over Season",
            x=0.5,
            y=0.95,
            font=dict(size=20, color='black',weight='bold'),
        ),
        plot_bgcolor='white')

fig.update_xaxes(gridcolor='lightgrey')
fig.update_yaxes(gridcolor='lightgrey')

fig

In [188]:
CR = bowler_DF.pivot_table(
    index='season',
    values='total_runs',
    columns='bowler',
    aggfunc="sum"
)

wk = bowler_DF.pivot_table(
    index='season',
    values='is_wicket',
    columns='bowler',
    aggfunc="sum"
)

balls = bowler_DF.pivot_table(
    index='season',
    columns='bowler',
    values='total_runs',
    aggfunc='size'
)

In [189]:
bowlAvg = CR.div(wk)
bowlAvg.loc['Total'] = bowlAvg.sum()
bowlAvg.fillna(0)
bowlAvg.replace([np.inf,-np.inf],0,inplace=True)
bowlAvg = bowlAvg.sort_values(by='Total',ascending=False,axis=1)
bowlAvg.drop('Total',inplace=True)
bowlAvg.reset_index(inplace=True)
fig = px.line(bowlAvg,x='season',y=bowler,line_shape='spline')

fig.update_layout(showlegend=True,title=dict(
            text=f"Bowling Average of {bowler} Over Seasons",
            x=0.5,
            y=0.95,
            font=dict(size=20, color='black',weight='bold'),
        ),
        plot_bgcolor='white')

fig.update_xaxes(gridcolor='lightgrey')
fig.update_yaxes(gridcolor='lightgrey')

fig.show()

In [190]:
SRate = balls.div(wk)
SRate.loc['Total'] = SRate.sum()
SRate.fillna(0)
SRate.replace([np.inf,-np.inf],0,inplace=True)
SRate = SRate.sort_values(by='Total',ascending=False,axis=1)
SRate.drop('Total',inplace=True)
SRate.reset_index(inplace=True)
fig = px.line(SRate,x='season',y=bowler,line_shape='spline')

fig.update_layout(showlegend=True,title=dict(
            text=f"Strick Rate of {bowler} Over Seasons",
            x=0.5,
            y=0.95,
            font=dict(size=20, color='black',weight='bold'),
        ),
        plot_bgcolor='white')

fig.update_xaxes(gridcolor='lightgrey')
fig.update_yaxes(gridcolor='lightgrey')
fig.show()

In [191]:
ECO = CR.div(balls/6)
ECO.loc['Total'] = ECO.sum()
ECO.fillna(0)
ECO.replace([np.inf,-np.inf],0,inplace=True)
ECO = ECO.sort_values(by='Total',ascending=False,axis=1)
ECO.drop('Total',inplace=True)
ECO.reset_index(inplace=True)
fig = px.line(ECO,x='season',y=bowler,line_shape='spline')

fig.update_layout(showlegend=True,title=dict(
            text=f"Economy of {bowler} Over Seasons",
            x=0.5,
            y=0.95,
            font=dict(size=20, color='black',weight='bold'),
        ),
        plot_bgcolor='white')

fig.update_xaxes(gridcolor='lightgrey')
fig.update_yaxes(gridcolor='lightgrey')
fig.show()

In [192]:
centuries = Df_deliveries.groupby(['match_id','batter'])['batsman_runs'].sum()
centuries = centuries.reset_index(name='batsman_runs')
centuries = centuries[centuries['batsman_runs'] >= 100]
centuries_count = centuries.value_counts('batter')


fig = px.bar(centuries_count)

fig.update_layout(showlegend=False,title=dict(
            text=f"Batsman Ranked based on 100s",
            x=0.5,
            y=0.95,
            font=dict(size=20, color='black',weight='bold'),
        ),
        xaxis=dict(range=[-0.5,10.5]),
        plot_bgcolor='white')


fig.show()

In [193]:
fiftys = Df_deliveries.groupby(['match_id','batter'])['batsman_runs'].sum()
fiftys = fiftys.reset_index(name='batsman_runs')
fiftys = fiftys[fiftys['batsman_runs'] >= 50]
fiftys_count = fiftys.value_counts('batter')
fig = px.bar(fiftys_count)

fig.update_layout(showlegend=False,title=dict(
            text=f"Batsman Ranked based on 50s",
            x=0.5,
            y=0.95,
            font=dict(size=20, color='black',weight='bold'),
        ),
        plot_bgcolor='white',
        xaxis=dict(range=[-0.5,10.5]))

fig.show()

In [194]:
sixes = Df_deliveries[Df_deliveries['batsman_runs'] == 6]
favOvr_6 = sixes['over'].value_counts().to_frame()
favOvr_6 = favOvr_6.rename(columns={'count':'Sixes'})


fours = Df_deliveries[Df_deliveries['batsman_runs'] == 4]
favOvr_4 = fours['over'].value_counts().to_frame()
favOvr_4 = favOvr_4.rename(columns={'count':'Fours'})

wkts = Df_deliveries[Df_deliveries['is_wicket'] == 1]
wktsOvr = wkts['over'].value_counts().to_frame()
wktsOvr = wktsOvr.rename(columns={'count':'Wickets'})



Ovrmerge = pd.merge(favOvr_6,favOvr_4,on='over',how='outer')
Ovrmerge_final = pd.merge(Ovrmerge,wktsOvr,on='over',how='outer')

ovrTotal = Ovrmerge_final.sum(axis=1)

Ovrmerge_final = Ovrmerge_final.div(ovrTotal/100,axis=0)

melted_Ovr = pd.melt(Ovrmerge_final.reset_index(),id_vars='over',value_name="counts",var_name='Type')

melted_Ovr.dropna(inplace=True)

In [195]:
fig = px.bar(melted_Ovr,x='over',y='counts',barmode='relative',color='Type',text_auto='0.2s',labels=dict(over='Overs in a match',counts='Count of Boundries and Wicket'))

fig.update_layout(showlegend=False,title=dict(
            text=f"Boundries and Wickets Trends in Overs",
            x=0.5,
            y=0.95,
            font=dict(size=20, color='black',weight='bold'),
        ),
        plot_bgcolor='white',
        xaxis=dict(range=[-0.5,20.5]))

fig.show()