In [None]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.figure_factory as ff

Important: Shaffer 4c's : Clear, clean, concise and captivating for any data visualization

In [None]:
matches = pd.read_csv('/content/matches.csv')
deliveries = pd.read_csv('/content/deliveries.csv')

ipl = deliveries.merge(matches, left_on='match_id', right_on='id')

In [None]:
matches.columns

Index(['id', 'season', 'city', 'date', 'match_type', 'player_of_match',
       'venue', 'team1', 'team2', 'toss_winner', 'toss_decision', 'winner',
       'result', 'result_margin', 'target_runs', 'target_overs', 'super_over',
       'method', 'umpire1', 'umpire2'],
      dtype='object')

In [None]:
ipl.shape

(260920, 37)

In [None]:
total_runs = ipl['total_runs'].sum()
total_runs

np.int64(347756)

In [None]:
team_map = {
    'Deccan Chargers': 'Sunrisers Hyderabad',
    'Delhi Daredevils': 'Delhi Capitals',
    'Rising Pune Supergiant': 'Rising Pune Supergiants',
    'Pune Warriors': 'Rising Pune Supergiants',
    'Royal Challengers Bangalore': 'Royal Challengers Bengaluru',
    'Kings XI Punjab': 'Punjab Kings',
    'Gujarat Lions': 'Gujarat Titans'
}

ipl.replace(team_map, inplace=True)

In [None]:
ipl.head()

Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batter,bowler,non_striker,batsman_runs,...,toss_decision,winner,result,result_margin,target_runs,target_overs,super_over,method,umpire1,umpire2
0,335982,1,Kolkata Knight Riders,Royal Challengers Bengaluru,0,1,SC Ganguly,P Kumar,BB McCullum,0,...,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen
1,335982,1,Kolkata Knight Riders,Royal Challengers Bengaluru,0,2,BB McCullum,P Kumar,SC Ganguly,0,...,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen
2,335982,1,Kolkata Knight Riders,Royal Challengers Bengaluru,0,3,BB McCullum,P Kumar,SC Ganguly,0,...,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen
3,335982,1,Kolkata Knight Riders,Royal Challengers Bengaluru,0,4,BB McCullum,P Kumar,SC Ganguly,0,...,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen
4,335982,1,Kolkata Knight Riders,Royal Challengers Bengaluru,0,5,BB McCullum,P Kumar,SC Ganguly,0,...,field,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen


In [None]:
ipl.columns

Index(['match_id', 'inning', 'batting_team', 'bowling_team', 'over', 'ball',
       'batter', 'bowler', 'non_striker', 'batsman_runs', 'extra_runs',
       'total_runs', 'extras_type', 'is_wicket', 'player_dismissed',
       'dismissal_kind', 'fielder', 'id', 'season', 'city', 'date',
       'match_type', 'player_of_match', 'venue', 'team1', 'team2',
       'toss_winner', 'toss_decision', 'winner', 'result', 'result_margin',
       'target_runs', 'target_overs', 'super_over', 'method', 'umpire1',
       'umpire2'],
      dtype='object')

In [None]:
matches['season'] = matches['season'].astype(str).str.extract(r'(\d{4})')
matches['season'] = matches['season'].astype(int)

## Scatter Plot

In [None]:
# Scatter plot is plotted between two continous variables
# Problem Statement: Draw a Scatter plot between Batsman Avg(X Axis) and Batsman Strike rate(Y Axis) of the top 50 Batsman.

#Top 50
top_50 = ipl.groupby('batter')['batsman_runs'].sum().sort_values(ascending=False).head(50).index.to_list()
top_50_df = ipl[ipl['batter'].isin(top_50)]

#calculating runs and strike rate
runs = top_50_df.groupby('batter')['batsman_runs'].sum()
balls = top_50_df.groupby('batter')['batsman_runs'].count()
sr = (runs / balls) * 100
sr = sr.reset_index()
sr.columns = ['batsman', 'strike rate']

# Calculating avg
outs = ipl[ipl['player_dismissed'].isin(top_50)]
player_outs = outs['player_dismissed'].value_counts()

# Calculating Avg and creating new result df
avg = (runs / player_outs)
avg = avg.reset_index()
avg.columns = ['batsman', 'average']
player_avg_sr = avg.merge(sr, on='batsman')


### Using GO

In [None]:
# Plotting Scatter plot
trace = go.Scatter(x=player_avg_sr['average'], y=player_avg_sr['strike rate'], mode='markers', marker={'color': '#eaa315', 'size':player_avg_sr['average']},hovertext=player_avg_sr['batsman'])
data = [trace]
layout = go.Layout(title="Top 50 Batsman AVG vs SR in IPL", xaxis={'title':'Average'}, yaxis={'title':'Strike Rate'})
fig = go.Figure(data, layout)
fig.show()


### Using PX

In [None]:
px.scatter(player_avg_sr, x='average', y='strike rate', size='average', hover_name='batsman', color_discrete_sequence=['#eaa315'])


## Line Plot

### Using GO

In [None]:
# Problem Statement: Season by Season batting performance of a batsman
batsman = "V Kohli"
batsman2 = "RG Sharma"
batsman_df = ipl[ipl['batter'] == batsman]
batsman_df = batsman_df.groupby('season')['batsman_runs'].sum().reset_index()
batsman_df2 = ipl[ipl['batter'] == batsman2]
batsman_df2 = batsman_df2.groupby('season')['batsman_runs'].sum().reset_index()

In [None]:
trace = go.Scatter(x=batsman_df['season'], y=batsman_df['batsman_runs'], mode='lines+markers', marker={'color': '#152fea'}, name=batsman)
trace2 = go.Scatter(x=batsman_df2['season'], y=batsman_df2['batsman_runs'], mode='lines+markers', marker={'color': '#ea5615'}, name=batsman2)
data = [trace, trace2]
layout = go.Layout(title='Batsman Season by Season Performance', xaxis={'title': 'Season'}, yaxis={'title': 'Runs'})
fig = go.Figure(data, layout)
fig.show()

In [None]:

def compare_different_batsman(batsmans):
  comp_batsman_df = ipl[ipl['batter'].isin(batsmans)]
  comp_batsman_df = comp_batsman_df.groupby(['batter', 'season'])['batsman_runs'].sum().reset_index()
  comp_batsman_df = comp_batsman_df.pivot_table(index='season', columns='batter', values='batsman_runs')

  data = []
  for batsman in comp_batsman_df.columns:
    trace = go.Scatter(x=comp_batsman_df.index, y=comp_batsman_df[batsman], mode='lines+markers', name=batsman)
    data.append(trace)

  layout = go.Layout(title='Batsman Season by Season Performance', xaxis={'title': 'Season'}, yaxis={'title': 'Runs'})
  fig = go.Figure(data, layout)
  fig.show()

compare_different_batsman(['V Kohli', 'RG Sharma', 'MS Dhoni'])

### Using PX

In [None]:
comp_batsman_df = ipl[ipl['batter'].isin(['V Kohli', 'RG Sharma'])]
comp_batsman_df = comp_batsman_df.groupby(['batter', 'season'])['batsman_runs'].sum().reset_index()
comp_batsman_df = comp_batsman_df.pivot_table(index='season', columns='batter', values='batsman_runs')

fig = px.line(comp_batsman_df, x=comp_batsman_df.index, y=comp_batsman_df.columns, markers=True)
fig.update_layout(title='Season-wise Comparision', xaxis_title='Season', yaxis_title='Runs')
fig.show()

## Bar Plot

In [None]:
# Top 10
top_10 = ipl.groupby('batter')['batsman_runs'].sum().sort_values(ascending=False).head(10).index.to_list()
top_10_df = ipl[ipl['batter'].isin(top_10)]

# Calculating Total Runs
top_10_df = top_10_df.groupby('batter')['batsman_runs'].sum().reset_index().sort_values(by='batsman_runs')
top_10_df

Unnamed: 0,batter,batsman_runs
3,KD Karthik,4843
6,RV Uthappa,4954
1,CH Gayle,4997
0,AB de Villiers,5181
4,MS Dhoni,5243
8,SK Raina,5536
2,DA Warner,6567
5,RG Sharma,6630
7,S Dhawan,6769
9,V Kohli,8014


### Using GO

In [None]:
trace = go.Bar(x=top_10_df['batter'], y=top_10_df['batsman_runs'])

data = [trace]
layout = go.Layout(title='Top 10 Batsman Runs', xaxis={'title': 'Batsman'}, yaxis={'title': 'Runs'})
fig = go.Figure(data, layout)
fig.show()

In [None]:
# Top 10
top_10 = ipl.groupby('batter')['batsman_runs'].sum().sort_values(ascending=False).head(10).index.to_list()
top_10_df = ipl[(ipl['batter'].isin(top_10)) & (ipl['inning'].isin([1, 2]))]

# Calculating Total Runs
top_10_df = top_10_df.groupby(['batter', 'inning'])['batsman_runs'].sum().reset_index()
top_10_df = top_10_df.pivot_table(index='batter', columns='inning', values='batsman_runs')
top_10_df.columns = ['first', 'second']

trace1 = go.Bar(
    x=top_10_df.index,
    y=top_10_df['first'],
    name='1st Innings',
    marker_color='blue'
)

trace2 = go.Bar(
    x=top_10_df.index,
    y=top_10_df['second'],
    name='2nd Innings',
    marker_color='orange'
)

data = [trace1, trace2]
layout = go.Layout(title='Top 10 Batsman Runs by Innings', xaxis={'title': 'Batsman'}, yaxis={'title': 'Runs'}, barmode='stack') #default: group
fig = go.Figure(data, layout)
fig.show()

### Using PX

In [None]:
fig = px.bar(top_10_df, x=top_10_df.index, y=top_10_df.columns, barmode='group')
fig.update_layout(title=dict(text="Top 10 Batsman Runs by Innings", x=0.5), xaxis_title="Batsman", yaxis_title="Runs", legend=dict(title="Innings"))

In [None]:
fig = px.bar(top_10_df, x=top_10_df.index, y=top_10_df.columns, barmode='stack')
fig.update_layout(title=dict(text="Top 10 Batsman Runs by Innings", x=0.5), xaxis_title="Batsman", yaxis_title="Runs", legend=dict(title="Innings"))

In [None]:
top_50_df = top_50_df[top_50_df['batsman_runs'] == 6]
top_50_df = top_50_df.groupby('batter')['batsman_runs'].count().reset_index()
top_50_df.columns = ['batsman', 'sixes']

In [None]:
player_avg_sr_sixes = player_avg_sr.merge(top_50_df, on='batsman')
sixes = player_avg_sr_sixes['sixes']
size_scaled = np.interp(sixes, (sixes.min(), sixes.max()), (10, 60))

In [None]:
hovertext = player_avg_sr_sixes.apply(
    lambda row: f"{row['batsman']}<br>Sixes: {row['sixes']}",
    axis=1
)

trace = go.Scatter(x=player_avg_sr_sixes['average'],
                   y=player_avg_sr_sixes['strike rate'],
                   mode='markers', marker={'color': '#eaa315', 'size':size_scaled},
                   hovertext=hovertext, # custom hover text
                   hoverinfo='text' # shows only hovertext, not x/y values
                   )
data = [trace]
layout = go.Layout(title="Top 50 Batsman AVG vs SR in IPL", xaxis={'title':'Average'}, yaxis={'title':'Strike Rate'})
fig = go.Figure(data, layout)
fig.show()

## Box Plot

### Using GO

In [None]:
ipl.columns

Index(['match_id', 'inning', 'batting_team', 'bowling_team', 'over', 'ball',
       'batter', 'bowler', 'non_striker', 'batsman_runs', 'extra_runs',
       'total_runs', 'extras_type', 'is_wicket', 'player_dismissed',
       'dismissal_kind', 'fielder', 'id', 'season', 'city', 'date',
       'match_type', 'player_of_match', 'venue', 'team1', 'team2',
       'toss_winner', 'toss_decision', 'winner', 'result', 'result_margin',
       'target_runs', 'target_overs', 'super_over', 'method', 'umpire1',
       'umpire2'],
      dtype='object')

In [None]:
match_agg = ipl.groupby(['match_id', 'season'])['total_runs'].sum().reset_index()
match_agg

Unnamed: 0,match_id,season,total_runs
0,335982,2007/08,304
1,335983,2007/08,447
2,335984,2007/08,261
3,335985,2007/08,331
4,335986,2007/08,222
...,...,...,...
1090,1426307,2024,429
1091,1426309,2024,323
1092,1426310,2024,346
1093,1426311,2024,314


In [None]:
trace = go.Box(x=match_agg['total_runs'], name="Total Runs")

data = [trace]
layout = go.Layout(title="Total Runs in Each Match by Season", xaxis={'title': 'Total runs'})
fig = go.Figure(data, layout)
fig.show()

In [None]:
match_agg.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1095 entries, 0 to 1094
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   match_id    1095 non-null   int64 
 1   season      1095 non-null   object
 2   total_runs  1095 non-null   int64 
dtypes: int64(2), object(1)
memory usage: 25.8+ KB


In [None]:
trace1 = go.Box(x=match_agg[match_agg['season']=='2016']['total_runs'], name="2016")
trace2 = go.Box(x=match_agg[match_agg['season']=='2024']['total_runs'], name="2024")

data = [trace1, trace2]
layout = go.Layout(title="Total Runs in Each Match in 2017 and 2018 Season", xaxis={'title': 'Total runs'}, legend=dict(title='Season'))
fig = go.Figure(data, layout)
fig.show()

### Using PX

In [None]:
fig = px.box(match_agg, x=match_agg['total_runs'])
fig.update_layout(title=dict(text="Total Runs in each match", x=0.5), xaxis_title="Total Runs")
fig.show()

## Distplot

In [None]:
import plotly.figure_factory as ff

In [None]:
player_avg_sr_sixes
hist_data = [player_avg_sr_sixes['strike rate']]
group_labels = ['Strike Rate']
fig = ff.create_distplot(hist_data, group_labels)
fig.show()

In [None]:
hist_data = [player_avg_sr_sixes['strike rate'], player_avg_sr_sixes['sixes']]
group_labels = ['Strike Rate', 'Sixes']
fig = ff.create_distplot(hist_data, group_labels, bin_size=20)
fig.show()

## Histogram

In [None]:
player_avg_sr_sixes

Unnamed: 0,batsman,average,strike rate,sixes
0,AB de Villiers,39.853846,148.580442,253
1,AD Russell,28.930233,164.224422,209
2,AJ Finch,24.904762,123.349057,78
3,AM Rahane,30.142857,120.32141,103
4,AT Rayudu,28.051613,124.584527,173
5,BB McCullum,27.711538,126.848592,130
6,CH Gayle,39.65873,142.121729,359
7,DA Miller,35.658537,134.684477,134
8,DA Warner,40.042683,135.429986,236
9,DR Smith,28.392857,132.279534,117


### Using GO

In [None]:
trace = go.Histogram(x=player_avg_sr_sixes['average'], xbins={'size': 2, 'start':25, 'end':100})
data = [trace]
laytout = go.Layout(title=dict(text = "Top 50 Batsman average analysis", x=0.5), xaxis=dict(title="Average"), yaxis={'title': 'Frequency'} )
fig = go.Figure(data, laytout)

fig.show()

### Using PX

In [None]:
fig = px.histogram(player_avg_sr_sixes, x=player_avg_sr_sixes['average'], nbins=20)
fig.update_layout(title='Top 50 Batsman average analysis', yaxis_title="Frequency")
fig.show()

## Heatmap

### Using GO

In [None]:
#Problem Statement : Sxies hit by each team in each over
ipl['batting_team'] = ipl['batting_team'].replace({'Deccan Chargers':'Sunrisers Hyderabad', 'Delhi Daredevils':'Delhi Capitals', 'Rising Pune Supergiant':'Rising Pune Supergiants', 'Pune Warriors':'Rising Pune Supergiants', 'Royal Challengers Bangaluru':'Royal Challengers Bengaluru', 'Kings XI Punjab':'Punjab Kings', 'Gujarat Lions':'Gujarat Titans'})
sixes_over = ipl[ipl['batsman_runs'] == 6]
sixes_over = sixes_over.groupby(['batting_team', 'over'])['batsman_runs'].count().reset_index()

In [None]:
sixes_over['batting_team'].unique()

array(['Chennai Super Kings', 'Delhi Capitals', 'Gujarat Titans',
       'Kochi Tuskers Kerala', 'Kolkata Knight Riders',
       'Lucknow Super Giants', 'Mumbai Indians', 'Punjab Kings',
       'Rajasthan Royals', 'Rising Pune Supergiants',
       'Royal Challengers Bengaluru', 'Sunrisers Hyderabad'], dtype=object)

In [None]:
trace = go.Heatmap(x=sixes_over['batting_team'], y=sixes_over['over'], z=sixes_over['batsman_runs'], text=sixes_over['batsman_runs'] , texttemplate='%{text}')
layout = go.Layout(title=dict(text="Sixes by each Team in each Over"), xaxis=dict(title="Team"), yaxis=dict(title="Over"))
data = [trace]
fig = go.Figure(data, layout)
fig.show()

In [None]:
dots_over = ipl[ipl['batsman_runs'] == 0]
dots_over = dots_over.groupby(['batting_team', 'over'])['batsman_runs'].count().reset_index()

In [None]:
trace1 = go.Heatmap(x=sixes_over['batting_team'], y=sixes_over['over'], z=sixes_over['batsman_runs'], text=sixes_over['batsman_runs'] , texttemplate='%{text}', showscale=True, colorbar=dict(x=0.46))
trace2 = go.Heatmap(x=dots_over['batting_team'], y=dots_over['over'], z=dots_over['batsman_runs'], text=dots_over['batsman_runs'] , texttemplate='%{text}', showscale=True)

fig = make_subplots(rows=1, cols=2, subplot_titles=("6's", "0's"))

fig.append_trace(trace1, 1, 1)
fig.append_trace(trace2, 1, 2)
fig.update_layout(title=dict(text="Sixes and Dots by Over by each team"))
# fig.update_yaxes()
fig.show()

## Section 1: Basic Plots (Bar, Pie, Line, Scatter) – Problems #1–30

In [None]:
ipl.columns

Index(['match_id', 'inning', 'batting_team', 'bowling_team', 'over', 'ball',
       'batter', 'bowler', 'non_striker', 'batsman_runs', 'extra_runs',
       'total_runs', 'extras_type', 'is_wicket', 'player_dismissed',
       'dismissal_kind', 'fielder', 'id', 'season', 'city', 'date',
       'match_type', 'player_of_match', 'venue', 'team1', 'team2',
       'toss_winner', 'toss_decision', 'winner', 'result', 'result_margin',
       'target_runs', 'target_overs', 'super_over', 'method', 'umpire1',
       'umpire2'],
      dtype='object')

In [None]:
# Problem Statement: Total matches played per season (bar)
# one way from combined df
unique_matches = ipl.drop_duplicates(subset='match_id')
total_matches_by_season = unique_matches.groupby('season').size().reset_index(name='total_matches')
# name='total_matches' doesn't rename the index. It gives a name to the values (count) column that came from .size()

# second way only from matches column
# total_matches_by_season = matches.groupby('season').size().reset_index(name='total_matches)
total_matches_by_season

Unnamed: 0,season,total_matches
0,2007/08,58
1,2009,57
2,2009/10,60
3,2011,73
4,2012,74
5,2013,76
6,2014,60
7,2015,59
8,2016,60
9,2017,59


In [None]:
trace = go.Bar(x=total_matches_by_season['season'], y=total_matches_by_season['total_matches'], text=total_matches_by_season['total_matches'], texttemplate='%{text}')
layout = go.Layout(title=dict(text='Total Matches by Season', x=0.5, xanchor='center'), xaxis=dict(title='Season'), yaxis=dict(title='Total Matches'))
fig = go.Figure([trace], layout)
fig.show()

In [None]:
fig = px.bar(total_matches_by_season, x=total_matches_by_season['season'], y=total_matches_by_season['total_matches'], text_auto=True)
fig.update_layout(title=dict(text='Total Matches played by Season', x=0.5, xanchor='center'), xaxis_title='Season', yaxis_title='Total Matches')
# 'left' = Title starts at middle and extends right
# 'center' = Title is centered perfectly
# 'right'	= Title ends at the middle
fig.show()

In [None]:
# Problem Statement: Total wins by each team (horizontal bar)
wins = ipl[~ipl['winner'].isna()]
wins = wins.drop_duplicates(subset='match_id')
wins = wins.groupby('winner').size().reset_index(name='total wins')

In [None]:
trace = go.Bar(x=wins['total wins'], y=wins['winner'], orientation='h', text=wins['total wins'], textposition='outside', texttemplate='%{text} w') #textposition: ['inside', 'outside', 'auto', 'none']
layout = go.Layout(title=dict(text='Total Wins by each Team'), xaxis=dict(title='Total Wins'), yaxis=dict(title='Team'))
fig = go.Figure([trace], layout)
fig.show()

In [None]:
trace = go.Bar(x=wins['total wins'], y=wins['winner'], orientation='h', text=[f"{team}: {win} wins" for team, win in zip(wins['winner'], wins['total wins'])], textposition='auto', texttemplate='%{text}') #textposition: ['inside', 'outside', 'auto', 'none']
layout = go.Layout(title=dict(text='Total Wins by each Team'), xaxis=dict(title='Total Wins'), yaxis=dict(title='Team'))
fig = go.Figure([trace], layout)
fig.show()

In [None]:
fig = px.bar(wins, x=wins['total wins'], y=wins['winner'], text_auto=True)
fig.update_layout(title=dict(text='Total Wins by each Team in IPL'), xaxis_title='Total wins', yaxis_title='Team')
fig.show()

In [None]:
fig = px.bar(wins, x='total wins', y='winner', text='total wins')
fig.update_traces(texttemplate='%{text} w', textposition='auto')
fig.update_layout(title=dict(text='Total Wins by each Team in IPL'), xaxis_title='Total wins', yaxis_title='Team')
fig.show()

In [None]:
# Problem Statement: Matches played in each city (pie)
city_matches = matches.groupby('city').size().reset_index(name='matches played')

In [None]:
trace = go.Pie(labels=city_matches['city'], values=city_matches['matches played'], hole= 0.3, marker=dict(line=dict(color='black', width=1)))
fig = go.Figure([trace])
fig.show()

In [None]:
fig = px.pie(city_matches, names='city', values='matches played')
fig.update_layout(title=dict(text='Total Matches played in each City'))
fig.update_traces(marker=dict(line=dict(color='black', width=1)))
fig.show()

In [None]:
matches.columns

Index(['id', 'season', 'city', 'date', 'match_type', 'player_of_match',
       'venue', 'team1', 'team2', 'toss_winner', 'toss_decision', 'winner',
       'result', 'result_margin', 'target_runs', 'target_overs', 'super_over',
       'method', 'umpire1', 'umpire2'],
      dtype='object')

In [None]:
# Problem Statement: Toss decisions per season (bar + color)
toss_decisions = matches.groupby(['season', 'toss_decision']).size().reset_index(name='count')
toss_decisions = toss_decisions.pivot_table(index='season', columns='toss_decision', values='count')

In [None]:
trace1 = go.Bar(x=toss_decisions.index, y=toss_decisions['bat'], marker_color='aqua', name='Batting First')
trace2 = go.Bar(x=toss_decisions.index, y=toss_decisions['field'], marker_color='aquamarine', name='Fielding First')
fig = go.Figure()
fig.add_trace(trace1)
fig.add_trace(trace2)
fig.show()

In [None]:
fig = px.bar(toss_decisions, x=toss_decisions.index, y=toss_decisions.columns, color='toss_decision')
fig.update_layout(title=dict(text='Toss Decision each season', x=0.5, xanchor='center'), xaxis_title='Season', yaxis_title='Count', legend=dict(title='Toss Decision'), barmode='group')
fig.show()

In [None]:
# Problem Statement: Number of matches won after batting/fielding first
matches.columns

Index(['id', 'season', 'city', 'date', 'match_type', 'player_of_match',
       'venue', 'team1', 'team2', 'toss_winner', 'toss_decision', 'winner',
       'result', 'result_margin', 'target_runs', 'target_overs', 'super_over',
       'method', 'umpire1', 'umpire2'],
      dtype='object')

In [None]:
first_bat_field = matches[matches['result'].isin(['runs', 'wickets'])]
first_bat_field = first_bat_field.groupby('result').size().reset_index(name='count')
first_bat_field.replace({'runs': 'Batting First', 'wickets':'Fielding First'}, inplace=True)

In [None]:
trace = go.Pie(labels=first_bat_field['result'], values=first_bat_field['count'], textinfo='label+percent', hoverinfo='label+value+percent', marker=dict(line=dict(color='white', width=2)), hole=0.3)
layout = go.Layout(title=dict(text='No.of Matches won after batting/fielding first', x=0.5, xanchor='center'), legend=dict(title='Innings Type'))
fig = go.Figure([trace], layout)
fig.show()

In [None]:
fig = px.pie(first_bat_field, names='result', values='count')
fig.update_layout(title=dict(text='No.of Matches won after batting/fielding first', x=0.5, xanchor='center'), legend=dict(title='Innings Type'))
fig.update_traces(marker=dict(line=dict(color='white', width=2)), textinfo='label+percent', hoverinfo='label+value+percent', hole=0.3)
fig.show()

In [None]:
# Problem Statement: Top 10 winning captains (bar with different colors)
captain_map = {'Royal Challengers Bengaluru': 'Rajat Patidar', 'Chennai Super Kings':'MS Dhoni', 'Rajasthan Royals':'Sanju Samson', 'Mumbai Indians':'Hardik Pandya', 'Sunrisers Hyderabad':'Pat Cummins', 'Punjab Kings':'Shreyas Iyer', 'Kolkata Knight Riders':'Ajinkya Rahane', 'Delhi Capitals': 'Axar Patel', 'Kochi Tuskers Kerala':'Parthiv Patel', 'Rising Pune Supergiants':'Steve Smith', 'Gujarat Titans':'Suresh Raina', 'Lucknow Super Giants':'Rishab Pant'}
matches.replace(team_map, inplace=True)
matches['Captain'] = matches['winner'].map(captain_map)
matches

Unnamed: 0,id,season,city,date,match_type,player_of_match,venue,team1,team2,toss_winner,...,winner,result,result_margin,target_runs,target_overs,super_over,method,umpire1,umpire2,Captain
0,335982,2007,Bangalore,2008-04-18,League,BB McCullum,M Chinnaswamy Stadium,Royal Challengers Bengaluru,Kolkata Knight Riders,Royal Challengers Bengaluru,...,Kolkata Knight Riders,runs,140.0,223.0,20.0,N,,Asad Rauf,RE Koertzen,Ajinkya Rahane
1,335983,2007,Chandigarh,2008-04-19,League,MEK Hussey,"Punjab Cricket Association Stadium, Mohali",Punjab Kings,Chennai Super Kings,Chennai Super Kings,...,Chennai Super Kings,runs,33.0,241.0,20.0,N,,MR Benson,SL Shastri,MS Dhoni
2,335984,2007,Delhi,2008-04-19,League,MF Maharoof,Feroz Shah Kotla,Delhi Capitals,Rajasthan Royals,Rajasthan Royals,...,Delhi Capitals,wickets,9.0,130.0,20.0,N,,Aleem Dar,GA Pratapkumar,Axar Patel
3,335985,2007,Mumbai,2008-04-20,League,MV Boucher,Wankhede Stadium,Mumbai Indians,Royal Challengers Bengaluru,Mumbai Indians,...,Royal Challengers Bengaluru,wickets,5.0,166.0,20.0,N,,SJ Davis,DJ Harper,Rajat Patidar
4,335986,2007,Kolkata,2008-04-20,League,DJ Hussey,Eden Gardens,Kolkata Knight Riders,Sunrisers Hyderabad,Sunrisers Hyderabad,...,Kolkata Knight Riders,wickets,5.0,111.0,20.0,N,,BF Bowden,K Hariharan,Ajinkya Rahane
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1090,1426307,2024,Hyderabad,2024-05-19,League,Abhishek Sharma,"Rajiv Gandhi International Stadium, Uppal, Hyd...",Punjab Kings,Sunrisers Hyderabad,Punjab Kings,...,Sunrisers Hyderabad,wickets,4.0,215.0,20.0,N,,Nitin Menon,VK Sharma,Pat Cummins
1091,1426309,2024,Ahmedabad,2024-05-21,Qualifier 1,MA Starc,"Narendra Modi Stadium, Ahmedabad",Sunrisers Hyderabad,Kolkata Knight Riders,Sunrisers Hyderabad,...,Kolkata Knight Riders,wickets,8.0,160.0,20.0,N,,AK Chaudhary,R Pandit,Ajinkya Rahane
1092,1426310,2024,Ahmedabad,2024-05-22,Eliminator,R Ashwin,"Narendra Modi Stadium, Ahmedabad",Royal Challengers Bengaluru,Rajasthan Royals,Rajasthan Royals,...,Rajasthan Royals,wickets,4.0,173.0,20.0,N,,KN Ananthapadmanabhan,MV Saidharshan Kumar,Sanju Samson
1093,1426311,2024,Chennai,2024-05-24,Qualifier 2,Shahbaz Ahmed,"MA Chidambaram Stadium, Chepauk, Chennai",Sunrisers Hyderabad,Rajasthan Royals,Rajasthan Royals,...,Sunrisers Hyderabad,runs,36.0,176.0,20.0,N,,Nitin Menon,VK Sharma,Pat Cummins


In [None]:
top_10_winning_captains = matches.groupby('Captain').size().reset_index(name='Wins').sort_values(by='Wins', ascending=False)[:10]
top_10_winning_captains

Unnamed: 0,Captain,Wins
2,Hardik Pandya,144
3,MS Dhoni,138
0,Ajinkya Rahane,131
6,Rajat Patidar,123
5,Pat Cummins,117
1,Axar Patel,115
9,Shreyas Iyer,112
8,Sanju Samson,112
11,Suresh Raina,41
10,Steve Smith,27


In [None]:
trace = go.Bar(x=top_10_winning_captains['Captain'], y=top_10_winning_captains['Wins'], marker_color=['#FF5733', '#33FF57', '#3357FF', '#FFC300', '#DAF7A6', '#C70039', '#900C3F', '#581845', '#00FFFF', '#FF00FF'])
layout = go.Layout(title=dict(text='Top 10 winning captains', x=0.5, xanchor='center'), xaxis=dict(title='Captain'), yaxis=dict(title='Wins'))
fig = go.Figure([trace], layout)

fig.show()

In [None]:
fig = px.bar(top_10_winning_captains, x='Captain', y='Wins', color='Captain')
fig.update_layout(title=dict(text='Top 10 Winning Captains'), legend=dict(y=0.95, yanchor='middle', orientation='h', x=0, xanchor='left'))
fig.update_traces(textposition='outside')
fig.show()

In [None]:
# Problem Statement: Year-wise match-winning trend of MI (line)
mi_matches = matches[matches['winner'] == 'Mumbai Indians']
mi_matches = mi_matches.groupby('season').size().reset_index(name='Wins')

In [None]:


hovertext = mi_matches.apply(
    lambda row: f"Season: {row['season']} <br>Wins: {row['Wins']} <br>Losses: {16 - int(row['Wins'])}", axis=1
)
trace = go.Scatter(x=mi_matches['season'], y=mi_matches['Wins'], mode='lines+markers', text=hovertext, hoverinfo='text', line=dict(color='royalblue'), marker=dict(size=8))
layout = go.Layout(title=dict(text='Total Wins by MI every season'),
                   xaxis=dict(title='Season',
                              titlefont=dict(color='teal', size=16, family='Arial'),
                              tickfont=dict(color='darkblue', size=13, family='Courier New')),
                   yaxis=dict(title='Wins'))
fig = go.Figure([trace], layout)
fig.show()

In [None]:
fig = px.line(mi_matches, x='season', y='Wins', markers=True)
fig.update_traces(line=dict(color='royalblue'), marker=dict(size=8), text=hovertext, hovertemplate='%{text}')
fig.update_layout(title=dict(text='Total Wins by MI every season', x=0.5, xanchor='center'),
                   xaxis=dict(title='Season',
                              titlefont=dict(color='teal', size=16, family='Courier New'),
                              tickfont=dict(color='darkblue', size=13, family='Courier New')),
                   yaxis=dict(title='Wins',
                              titlefont=dict(color='teal', size=16, family='Courier New'),
                              tickfont=dict(color='darkblue', size=13, family='Courier New')))
fig.show()

In [None]:
#Problem statement: Toss winners across seasons (stacked bar)
toss_winners = matches.groupby(['season', 'toss_winner']).size().reset_index(name='count')
toss_winners = toss_winners.pivot_table(index='season', columns='toss_winner', values='count')
toss_winners.fillna(value=0, inplace=True)
toss_winners

toss_winner,Chennai Super Kings,Delhi Capitals,Gujarat Titans,Kochi Tuskers Kerala,Kolkata Knight Riders,Lucknow Super Giants,Mumbai Indians,Punjab Kings,Rajasthan Royals,Rising Pune Supergiants,Royal Challengers Bengaluru,Sunrisers Hyderabad
season,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2007,5.0,6.0,0.0,0.0,6.0,0.0,8.0,8.0,11.0,0.0,5.0,9.0
2009,17.0,17.0,0.0,0.0,14.0,0.0,15.0,11.0,10.0,0.0,14.0,19.0
2011,9.0,9.0,0.0,8.0,7.0,0.0,6.0,10.0,7.0,5.0,6.0,6.0
2012,7.0,10.0,0.0,0.0,7.0,0.0,11.0,8.0,10.0,6.0,6.0,9.0
2013,8.0,5.0,0.0,0.0,12.0,0.0,12.0,7.0,11.0,9.0,5.0,7.0
2014,10.0,4.0,0.0,0.0,9.0,0.0,6.0,7.0,8.0,0.0,9.0,7.0
2015,10.0,5.0,0.0,0.0,8.0,0.0,7.0,7.0,6.0,0.0,10.0,6.0
2016,0.0,8.0,8.0,0.0,6.0,0.0,9.0,6.0,0.0,7.0,6.0,10.0
2017,0.0,8.0,7.0,0.0,9.0,0.0,11.0,4.0,0.0,6.0,9.0,5.0
2018,11.0,8.0,0.0,0.0,9.0,0.0,5.0,7.0,6.0,0.0,7.0,7.0


In [None]:
fig = go.Figure()

for each_column in toss_winners.columns:
  fig.add_trace(go.Bar(x=toss_winners.index, y=toss_winners[each_column], name=each_column))
fig.update_layout(barmode='stack', title=dict(text='Toss Winners across different seasons', x=0.5, xanchor='center'), xaxis_title='Season', yaxis_title='Count')

fig.show()

In [None]:
fig = px.bar(toss_winners, x=toss_winners.index, y=toss_winners.columns)
fig.update_layout(barmode='stack', title=dict(text='Toss Winners across different seasons', x=0.5, xanchor='center'), xaxis_title='Season', yaxis_title='Count')
fig.show()

In [None]:
# Problem Statment: Bar chart of matches per venue sorted by count
venue_map = {
    'Arun Jaitley Stadium, Delhi': 'Arun Jaitley Stadium',
    'Brabourne Stadium, Mumbai': 'Brabourne Stadium',
    'Dr DY Patil Sports Academy, Mumbai': 'Dr DY Patil Sports Academy',
    'Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium, Visakhapatnam': 'Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium',
    'Eden Gardens, Kolkata': 'Eden Gardens',
    'M Chinnaswamy Stadium, Bengaluru': 'M Chinnaswamy Stadium',
    'M.Chinnaswamy Stadium': 'M Chinnaswamy Stadium',
    'MA Chidambaram Stadium, Chepauk': 'MA Chidambaram Stadium',
    'MA Chidambaram Stadium, Chepauk, Chennai': 'MA Chidambaram Stadium',
    'Maharashtra Cricket Association Stadium, Pune': 'Maharashtra Cricket Association Stadium',
    'Narendra Modi Stadium, Ahmedabad': 'Narendra Modi Stadium',
    'Punjab Cricket Association IS Bindra Stadium, Mohali': 'Punjab Cricket Association IS Bindra Stadium',
    'Punjab Cricket Association IS Bindra Stadium, Mohali, Chandigarh': 'Punjab Cricket Association IS Bindra Stadium',
    'Punjab Cricket Association Stadium, Mohali': 'Punjab Cricket Association IS Bindra Stadium',
    'Rajiv Gandhi International Stadium, Uppal': 'Rajiv Gandhi International Stadium',
    'Rajiv Gandhi International Stadium, Uppal, Hyderabad': 'Rajiv Gandhi International Stadium',
    'Sardar Patel Stadium, Motera': 'Narendra Modi Stadium',
    'Sawai Mansingh Stadium, Jaipur': 'Sawai Mansingh Stadium',
    'Zayed Cricket Stadium, Abu Dhabi': 'Sheikh Zayed Stadium',
    'Himachal Pradesh Cricket Association Stadium, Dharamsala': 'Himachal Pradesh Cricket Association Stadium',
    'Wankhede Stadium, Mumbai': 'Wankhede Stadium'
}
matches['venue'] = matches['venue'].replace(venue_map)
venue_count = matches.groupby('venue').size().reset_index(name='count').sort_values(by='count', ascending=False)[:10]

In [None]:
trace = go.Bar(x=venue_count['venue'], y=venue_count['count'])
layout = go.Layout(title=dict(text='Top 10 Venues in IPL'), xaxis_title='Venue', yaxis_title='Frequency')
fig = go.Figure([trace], layout)
fig.show()

In [None]:
# Problem Statement: Runs scored vs balls faced for top 5 batsmen (scatter)
runs = ipl.groupby('batter')['batsman_runs'].sum().reset_index(name='Total_runs')
balls = ipl.groupby('batter').size().reset_index(name='Total_balls')
batting_stats = runs.merge(balls, on='batter')
batting_stats.rename(columns={'batter': 'Batsman'}, inplace=True)

top_5_batsman = batting_stats.sort_values(by='Total_runs', ascending=False)[:5]


In [None]:
hovertext = top_5_batsman.apply(
    lambda row: f"Batsman: {row['Batsman']}<br>Runs: {row['Total_runs']} <br>Balls: {row['Total_balls']}", axis=1
)

trace = go.Scatter(x=top_5_batsman['Total_runs'], y=top_5_batsman['Total_balls'], mode='markers', text=hovertext, hoverinfo='text')
fig = go.Figure([trace])
fig.update_layout(title=dict(text='Top 5 Batsman in IPL'), xaxis=dict(title='Runs'), yaxis=dict(title='Balls'))
fig.show()

In [None]:
top_5_batsman['hover'] = top_5_batsman.apply(
    lambda row: f"Batsman: {row['Batsman']}<br>Runs: {row['Total_runs']} <br>Balls: {row['Total_balls']}", axis=1
)

trace = go.Scatter(x=top_5_batsman['Total_runs'], y=top_5_batsman['Total_balls'], mode='markers+text', text=top_5_batsman['hover'])
fig = go.Figure([trace])
fig.update_layout(title=dict(text='Top 5 Batsman in IPL'), xaxis=dict(title='Runs'), yaxis=dict(title='Balls'))
fig.show()

In [None]:
fig = px.scatter(top_5_batsman, x='Total_runs', y='Total_balls', color='Batsman')
fig.update_layout(title=dict(text='Top 5 Batsman in IPL'), xaxis=dict(title='Runs'), yaxis=dict(title='Balls'))
fig.update_traces(text=top_5_batsman['hover'], hovertemplate='%{text}', marker=dict(size=4))
fig.show()

In [None]:
# Problem Statement: Runs scored per season by Virat Kohli (line)
runs = ipl.groupby(['season','batter'])['batsman_runs'].sum().reset_index(name='Total_runs')
virat_runs = runs[runs['batter'] == 'V Kohli']
virat_runs

Unnamed: 0,season,batter,Total_runs
138,2007/08,V Kohli,165
290,2009,V Kohli,246
456,2009/10,V Kohli,307
636,2011,V Kohli,557
808,2012,V Kohli,364
983,2013,V Kohli,639
1110,2014,V Kohli,359
1245,2015,V Kohli,505
1383,2016,V Kohli,973
1521,2017,V Kohli,308


In [None]:
trace = go.Scatter(x=virat_runs['season'], y=virat_runs['Total_runs'], mode='lines+markers')
fig = go.Figure([trace])
fig.update_layout(title=dict(text='Runs scored per season by Virat Kohli'), xaxis=dict(title='Season', tickfont=dict(family='Courier New')), yaxis=dict(title='Runs', tickfont=dict(family='Courier New')))
fig.show()

In [None]:
fig = px.line(virat_runs, x='season', y='Total_runs', markers=True)
fig.update_traces(line=dict(width=3), marker=dict(size=12))
fig.update_layout(title=dict(text='Runs scored per season by Virat Kohli'), xaxis=dict(title='Season', tickfont=dict(family='Courier New')), yaxis=dict(title='Runs', tickfont=dict(family='Courier New')))
fig.show()

In [None]:
# Problem Statement: Runs scored vs balls faced with marker size = 4s
fours = ipl[ipl['batsman_runs'] == 4].groupby('batter').size().reset_index(name='Total_fours')
runs = ipl.groupby('batter')['batsman_runs'].sum().reset_index(name='Total_runs')
balls = ipl.groupby('batter').size().reset_index(name='Total_balls')

batsman_stats = runs.merge(balls, on='batter').merge(fours, on='batter')
batsman_stats.rename(columns={'batter':'Batsman'}, inplace=True)
batsman_stats.sort_values(by='Total_runs', ascending=False)
# batsman_stats = batsman_stats[:20]

Unnamed: 0,Batsman,Total_runs,Total_balls,Total_fours
508,V Kohli,8014,6236,708
416,S Dhawan,6769,5483,768
388,RG Sharma,6630,5183,599
119,DA Warner,6567,4849,663
441,SK Raina,5536,4177,506
...,...,...,...,...
262,Liton Das,4,4,1
14,A Tomar,4,8,1
402,RS Sodhi,4,2,1
161,GD McGrath,4,5,1


In [None]:
trace = go.Scatter(x=batsman_stats['Total_balls'], y=batsman_stats['Total_runs'], mode='markers',marker=dict(size=batsman_stats['Total_fours'],
                               color='royalblue',
                               sizemode='area',
                               sizeref=2.*max(batsman_stats['Total_fours'])/(40.**2),
                               sizemin=5), text=batsman_stats['Batsman'], hovertemplate="Batsman: %{text} <br>Runs: %{y}<br>Balls: %{x}<br>Fours: %{marker.size}<extra></extra>")
fig = go.Figure([trace])
# fig.update_traces(marker=dict(size=batsman_stats['Total_fours'],
#                                color='royalblue',
#                                sizemode='area',
#                                sizeref=2.*max(batsman_stats['Total_fours'])/(40.**2),
#                                sizemin=5), text=batsman_stats['Batsman'], hovertemplate="Batsman: %{text} <br>Runs: %{y}<br>Balls: %{x}<br>Fours: %{marker.size}<extra></extra>")
fig.show()

In [None]:
fig = px.scatter(batsman_stats, x='Total_balls', y='Total_runs', size='Total_fours')
fig.update_traces(marker=dict(size=batsman_stats['Total_fours'],
                               color='teal',
                               sizemode='area', # default=diameter
                               sizemin=5), text=batsman_stats['Batsman'], hovertemplate="Batsman: %{text} <br>Runs: %{y}<br>Balls: %{x}<br>Fours: %{marker.size}<extra></extra>")
fig.show()

In [None]:
# Problem Statement: Line plot for all teams’ win count over seasons (facet_col/team)
team_wins = matches.groupby(['season', 'winner']).size().reset_index(name='Wins')
team_wins = team_wins.pivot_table(index='season', columns='winner', values='Wins', fill_value=0)
# team_wins.fillna(value=0, inplace=True)

fig = make_subplots(rows=4, cols=3, horizontal_spacing=0.03, shared_xaxes=True) # we can mention to add title for each subplot subplot_titles=team_wins.columns

row = col = 1
for each_column in team_wins.columns:
  fig.add_trace(go.Scatter(x=team_wins.index, y=team_wins[each_column], mode='lines+markers', name=each_column), row=row, col=col)
  col += 1
  if col > 3:
    col = 1
    row += 1

fig.update_layout(height=1000)

fig.show()

In [None]:
team_wins = team_wins.reset_index().melt(id_vars='season',
    var_name='winner',
    value_name='Wins').sort_values(by=['winner', 'season'])

In [None]:
fig = px.line(team_wins, x='season', y='Wins', color='winner',facet_col='winner', facet_col_wrap=3, markers=True, height=1000)
# fig.for_each_xaxis(lambda axis: axis.update(title=''))
# fig.for_each_yaxis(lambda axis: axis.update(title=''))
# # Custom subplot title mapping
# team_wins_list = team_wins['winner'].unique().tolist()
# # After fig is created:
# for i, annotation in enumerate(fig.layout.annotations):
#     annotation.text = team_wins_list[i]
fig.update_layout(title=dict(text='Team Wins Per Season – IPL', x=0.5))
fig.show()

In [None]:
# Problem Statement: Toss win vs match win scatter plot with shape-coded teams
two_wins = matches.dropna(subset=['winner', 'toss_winner'])

In [None]:
matchup = two_wins.groupby(['team1', 'team2', 'venue']).size().reset_index(name='matches played')
two_wins = two_wins.merge(matchup, on=['team1', 'team2', 'venue'])

two_wins['hover'] = two_wins.apply(lambda row: f"Matches Played: {row['matches played']}<br>Venue: {row['venue']}", axis=1)

trace = go.Scatter(x=two_wins['toss_winner'], y=two_wins['winner'], mode='markers', text=two_wins['hover'], hoverinfo='text', marker=dict(symbol='diamond', color='teal', size=10, opacity=0.6))
fig = go.Figure([trace])
fig.show()

In [None]:
fig = px.scatter(two_wins, x='toss_winner', y='winner', color='winner', symbol='toss_winner')
fig.update_layout(showlegend=False)
fig.show()

In [None]:
# Problem statement: Multi-line chart: Top 3 teams win count over seasons
team_wins = matches.groupby(['season', 'winner']).size().reset_index(name='wins')
top_3_teams = (
    team_wins.groupby('winner')['wins'].sum()
    .sort_values(ascending=False)
    .head(3)
    .index.tolist()
)
top_3_teams_wins = team_wins[team_wins['winner'].isin(top_3_teams)]
top_3_teams_wins

Unnamed: 0,season,winner,wins
0,2007,Chennai Super Kings,9
2,2007,Kolkata Knight Riders,6
3,2007,Mumbai Indians,7
8,2009,Chennai Super Kings,17
10,2009,Kolkata Knight Riders,10
11,2009,Mumbai Indians,16
16,2011,Chennai Super Kings,11
19,2011,Kolkata Knight Riders,8
20,2011,Mumbai Indians,10
26,2012,Chennai Super Kings,10


In [None]:
top_3_teams_wins_pivot = top_3_teams_wins.pivot_table(index='season', columns='winner', values='wins', fill_value=0)

fig = go.Figure()
for each_column in top_3_teams_wins_pivot.columns:
  fig.add_trace(go.Scatter(x=top_3_teams_wins_pivot.index, y=top_3_teams_wins_pivot[each_column], mode='lines+markers', name=each_column))
fig.show()

In [None]:
top_3_teams_wins_melt = top_3_teams_wins_pivot.reset_index().melt(id_vars='season', var_name='winner', value_name='wins')

fig = px.line(top_3_teams_wins_melt, x='season', y='wins', color='winner', markers=True)
fig.show()

In [None]:
# Problem Statement: Batting average vs strike rate scatter with hover labels

#calculating runs and strike rate
runs = ipl.groupby('batter')['batsman_runs'].sum()
balls = ipl.groupby('batter')['batsman_runs'].count()
sr = (runs / balls) * 100
sr = sr.reset_index()
sr.columns = ['batsman', 'strike rate']

# Calculating avg
player_outs = ipl['player_dismissed'].value_counts()

# Calculating Avg and creating new result df
avg = (runs / player_outs)
avg = avg.reset_index()
avg.columns = ['batsman', 'average']
player_avg_sr = avg.merge(sr, on='batsman')
player_avg_sr

Unnamed: 0,batsman,average,strike rate
0,A Ashish Reddy,18.666667,142.857143
1,A Badoni,24.384615,125.544554
2,A Chandila,,57.142857
3,A Chopra,8.833333,70.666667
4,A Choudhary,25.000000,125.000000
...,...,...,...
668,Yashpal Singh,11.750000,70.149254
669,Younis Khan,3.000000,42.857143
670,Yudhvir Singh,5.500000,137.500000
671,Yuvraj Singh,24.810811,124.784776


In [None]:
player_avg_sr['hover'] = player_avg_sr.apply(lambda row: f"Batsman: {row['batsman']}<br>Average: {row['average']}<br>Strike Rate: {row['strike rate']}", axis=1)

trace = go.Scatter(x=player_avg_sr['average'], y=player_avg_sr['strike rate'], mode='markers', hovertext=player_avg_sr['hover'], hoverinfo='text')
fig = go.Figure([trace])
fig.show()

In [None]:
# Problem Statement: Runs vs strike rate: bubble plot
runs = runs.reset_index()

In [None]:
runs.columns = ['batsman', 'runs']
player_avg_sr = player_avg_sr.merge(runs, on='batsman')
player_avg_sr = player_avg_sr[:50]

In [None]:
player_avg_sr['average'] = player_avg_sr['average'].fillna(value=5)

In [None]:
trace = go.Scatter(x=player_avg_sr['runs'], y=player_avg_sr['strike rate'], mode='markers',marker=dict(size=player_avg_sr['average'],
                               color='royalblue',
                               sizemode='area',
                               sizeref=2.*max(player_avg_sr['average'])/(40.**2),
                               sizemin=5), text=player_avg_sr['batsman'], hovertemplate="Batsman: %{text} <br>Runs: %{x}<br>Average: %{marker.size}<extra></extra>")
fig = go.Figure([trace])
fig.show()

In [None]:
fig = px.scatter(player_avg_sr, x='runs', y='strike rate', size='average', hover_name='batsman', hover_data={'runs': True, 'strike rate': False})

fig.show()

In [None]:
# Problem Statement: Average win margin across venues (line plot)
avg_runs_margin = matches[matches['result'] == 'runs']
avg_runs_margin = avg_runs_margin.groupby('venue')['result_margin'].mean().reset_index(name='runs margin').sort_values(by='runs margin', ascending=False)
avg_runs_margin['runs margin'] = avg_runs_margin['runs margin'].astype(int)

avg_balls_margin = matches[matches['result'] == 'wickets']
avg_balls_margin = avg_balls_margin.groupby('venue')['result_margin'].mean().reset_index(name='wickets margin').sort_values(by='wickets margin', ascending=False)
avg_balls_margin['wickets margin'] = avg_balls_margin['wickets margin'].astype(int)

In [None]:
trace = go.Scatter(x=avg_runs_margin['venue'], y=avg_runs_margin['runs margin'], mode='lines+markers')
trace1 = go.Scatter(x=avg_balls_margin['venue'], y=avg_balls_margin['wickets margin'], mode='lines+markers')
layout = go.Layout(height=1000)
fig = go.Figure([trace, trace1], layout)
fig.show()

In [None]:
fig = make_subplots(rows=1, cols=2)
fig.add_trace(go.Scatter(x=avg_runs_margin['venue'], y=avg_runs_margin['runs margin'], mode='lines+markers', name='Runs Margin', hovertemplate="<extra></extra>"), row=1, col=1 )
fig.add_trace(go.Scatter(x=avg_balls_margin['venue'], y=avg_balls_margin['wickets margin'], mode='lines+markers', name='Wickets Margin'), row=1, col=2)
fig.update_layout(height=1000, title=dict(text='Win Margins by Runs and Wickets', x=0.5))
fig.show()

In [None]:
ipl.columns

Index(['match_id', 'inning', 'batting_team', 'bowling_team', 'over', 'ball',
       'batter', 'bowler', 'non_striker', 'batsman_runs', 'extra_runs',
       'total_runs', 'extras_type', 'is_wicket', 'player_dismissed',
       'dismissal_kind', 'fielder', 'id', 'season', 'city', 'date',
       'match_type', 'player_of_match', 'venue', 'team1', 'team2',
       'toss_winner', 'toss_decision', 'winner', 'result', 'result_margin',
       'target_runs', 'target_overs', 'super_over', 'method', 'umpire1',
       'umpire2'],
      dtype='object')

In [None]:
#Problem Statement: Histogram of runs scored per delivery
trace = go.Histogram(y=ipl['total_runs'], nbinsy=8)
layout = go.Layout(title="Runs scored per delivery", xaxis_title="Runs", yaxis_title="Delivery")
fig = go.Figure([trace], layout)
fig.show()


In [None]:
fig = px.histogram(ipl, x='total_runs', nbins=8, title="Runs scored per delivery")
fig.show()

In [None]:
# Problem Statement: Runs per over per inning
runs_per_over = ipl[ipl['inning'].isin([1, 2])]
runs_per_over = runs_per_over.groupby(['inning', 'over'])['total_runs'].sum().reset_index(name='total runs')
runs_per_over['inning'] = runs_per_over['inning'].astype(str)
runs_per_over

Unnamed: 0,inning,over,total runs
0,1,0,6425
1,1,1,7861
2,1,2,8783
3,1,3,8903
4,1,4,9364
5,1,5,9245
6,1,6,7329
7,1,7,7993
8,1,8,8245
9,1,9,8340


In [None]:
fig = px.bar(runs_per_over, x='over', y='total runs', color='inning', color_discrete_map={"1": 'royalblue', "2": 'orange'}, title='Runs per over per inning', text_auto=True)
fig.show()

In [None]:
runs_per_over = runs_per_over.pivot_table(index='over', columns='inning', values='total runs')

In [None]:
fig = px.bar(runs_per_over, x=runs_per_over.index, y=runs_per_over.columns)
fig.show()

In [None]:
runs_per_over

inning,1,2
over,Unnamed: 1_level_1,Unnamed: 2_level_1
0,6425.0,6940.0
1,7861.0,8302.0
2,8783.0,9112.0
3,8903.0,9512.0
4,9364.0,9201.0
5,9245.0,9283.0
6,7329.0,7510.0
7,7993.0,7988.0
8,8245.0,8394.0
9,8340.0,8013.0


In [None]:
fig = go.Figure()

fig.add_trace(go.Bar(x=runs_per_over.index + 1,y=runs_per_over['1'],name='Inning 1',marker_color='royalblue', text=runs_per_over['1'], textposition='auto', hovertemplate="Over: %{x} <br>Runs: %{text}<extra></extra>"))

fig.add_trace(go.Bar(x=runs_per_over.index + 1,y=runs_per_over['2'],name='Inning 2',marker_color='orange',text=runs_per_over['2'], textposition='auto', hovertemplate="Over: %{x}<br>Runs: %{text}<extra></extra>"))

fig.update_layout(
    barmode='stack',
    title='Runs per Over per Inning',
    xaxis_title='Over',
    yaxis_title='Total Runs',
    legend=dict(title='Inning')
)

fig.show()


In [None]:
# Problem Statement: Bar plot of win margins by team
avg_runs_margin = matches[matches['result'] == 'runs']
avg_runs_margin = avg_runs_margin.groupby('winner')['result_margin'].mean().reset_index(name='runs margin').sort_values(by='runs margin', ascending=False)

fig = px.bar(avg_runs_margin, x='winner', y='runs margin',
             labels={'winner': 'Team', 'runs margin': 'Avg runs margin'})
fig.show()

In [None]:
#Problem Statement: Box plot of win margins by team
win_by_runs = matches[matches['result'] == 'runs']
trace = go.Box(x=win_by_runs['winner'], y=win_by_runs['result_margin'], boxpoints='all', jitter=0.4, marker_color='orange')
fig = go.Figure([trace])
fig.update_layout(
    title='Win Margin by Runs (Box Plot)',
    xaxis_title='Team',
    yaxis_title='Win Margin (Runs)',
    xaxis_tickangle=45,
    height=600
)
fig.show()
#jitter in Plotly (used with go.Box and go.Violin) adds random noise along the x-axis to scatter the individual data points when you're displaying them on top of the box plot.

In [None]:
#Problem Statement: Violin plot of player runs across seasons
virat_runs = ipl[ipl['batter'] == 'V Kohli']
virat_runs = virat_runs.groupby(['season', 'match_id'])['batsman_runs'].sum().reset_index()
fig = px.violin(virat_runs, x='season', y='batsman_runs', points='all', box=True)
fig.show()

In [None]:
# Problem Statement: Box plot of match target per venue
trace = go.Box(x=matches['venue'], y=matches['target_runs'], boxpoints='all', jitter=0.2, marker_color='teal')
fig = go.Figure([trace])
fig.update_layout(height=1000)
fig.show()

In [None]:
ipl.columns

Index(['match_id', 'inning', 'batting_team', 'bowling_team', 'over', 'ball',
       'batter', 'bowler', 'non_striker', 'batsman_runs', 'extra_runs',
       'total_runs', 'extras_type', 'is_wicket', 'player_dismissed',
       'dismissal_kind', 'fielder', 'id', 'season', 'city', 'date',
       'match_type', 'player_of_match', 'venue', 'team1', 'team2',
       'toss_winner', 'toss_decision', 'winner', 'result', 'result_margin',
       'target_runs', 'target_overs', 'super_over', 'method', 'umpire1',
       'umpire2'],
      dtype='object')

In [None]:
# Problem Statement: Violin of runs for top 5 players
top_5_batsman_list = top_5_batsman['Batsman'].to_list()
top_5_batsman = ipl[ipl['batter'].isin(top_5_batsman_list)]
top_5_batsman = top_5_batsman.groupby(['season','batter','match_id'])['batsman_runs'].sum().reset_index(name='score')
top_5_batsman

Unnamed: 0,season,batter,match_id,score
0,2007/08,RG Sharma,335986,0
1,2007/08,RG Sharma,335988,66
2,2007/08,RG Sharma,335990,36
3,2007/08,RG Sharma,335999,76
4,2007/08,RG Sharma,336002,17
...,...,...,...,...
1095,2024,V Kohli,1426290,42
1096,2024,V Kohli,1426296,92
1097,2024,V Kohli,1426300,27
1098,2024,V Kohli,1426306,47


In [None]:
fig = make_subplots(rows=5, cols=1, subplot_titles=top_5_batsman_list)
for i, each_batsman in enumerate(top_5_batsman_list):
  batsman_stats = top_5_batsman[top_5_batsman['batter'] == each_batsman]
  fig.add_trace(go.Violin(x=batsman_stats['season'], y=batsman_stats['score'], name=each_batsman, points='all', box_visible=True), row=i + 1, col=1)
fig.update_layout(height=1000)
fig.show()

In [None]:
fig = px.violin(top_5_batsman, x='season', y='score', color='batter', facet_row='batter')
fig.for_each_xaxis(lambda axis: axis.update(matches=None))
fig.update_layout(
    height=1000,
    title='Violin Plot of Top 5 Batsmen Runs per Season'
)
fig.show()

In [None]:
# Problem Statement: Histogram: Wickets per match
ipl_2024 = ipl[ipl['season'] == '2024']
wickets_per_match = ipl_2024.groupby('match_id')['is_wicket'].sum().reset_index(name='wickets')
wickets_per_match

Unnamed: 0,match_id,wickets
0,1422119,10
1,1422120,15
2,1422121,14
3,1422122,10
4,1422123,15
...,...,...
66,1426307,11
67,1426309,12
68,1426310,14
69,1426311,16


In [None]:
fig = px.histogram(wickets_per_match, x='wickets',nbins=15,
    title='Distribution of Wickets per Match – IPL 2024',
    labels={'wickets': 'Wickets per Match'})
fig.update_layout(bargap=0.2)
fig.show()

In [None]:
wickets_per_match_all_seasons = ipl.groupby('match_id')['is_wicket'].sum().reset_index(name='wickets')
fig = px.histogram(wickets_per_match_all_seasons, x='wickets',nbins=15,
    title='Distribution of Wickets of all seasons',
    labels={'wickets': 'Wickets per Match'})
fig.update_layout(bargap=0.2)
fig.show()

In [None]:
# Problem Statement: Violin of dot balls faced by batsmen
dot_balls = ipl[(ipl['batsman_runs'] == 0) & (ipl['batter'].isin(top_10))]
dot_balls = dot_balls.groupby(['season', 'batter'])['batsman_runs'].size().reset_index(name='dot balls')

In [None]:
trace = go.Violin(x=dot_balls['batter'], y=dot_balls['dot balls'], points='all', jitter=0.3, marker_color='orange', box_visible=True)
fig = go.Figure([trace])
fig.show()

## Section 2: Heatmaps, Correlations, Subplots – Problems #31–60

In [None]:
# Heatmap of total matches per city and team
matches_team_city = pd.melt(
    matches,
    id_vars=['city'],
    value_vars=['team1', 'team2'],
    var_name='team_type',
    value_name='team'
)
matches_team_city

Unnamed: 0,city,team_type,team
0,Bangalore,team1,Royal Challengers Bengaluru
1,Chandigarh,team1,Punjab Kings
2,Delhi,team1,Delhi Capitals
3,Mumbai,team1,Mumbai Indians
4,Kolkata,team1,Kolkata Knight Riders
...,...,...,...
2185,Hyderabad,team2,Sunrisers Hyderabad
2186,Ahmedabad,team2,Kolkata Knight Riders
2187,Ahmedabad,team2,Rajasthan Royals
2188,Chennai,team2,Rajasthan Royals


In [None]:
matches_team_city = matches_team_city.groupby(['city', 'team']).size().reset_index(name='total matches')
matches_team_city

Unnamed: 0,city,team,total matches
0,Abu Dhabi,Chennai Super Kings,8
1,Abu Dhabi,Delhi Capitals,7
2,Abu Dhabi,Kolkata Knight Riders,14
3,Abu Dhabi,Mumbai Indians,12
4,Abu Dhabi,Punjab Kings,7
...,...,...,...
285,Visakhapatnam,Mumbai Indians,4
286,Visakhapatnam,Punjab Kings,2
287,Visakhapatnam,Rajasthan Royals,1
288,Visakhapatnam,Rising Pune Supergiants,3


In [None]:
trace = go.Heatmap(x=matches_team_city['city'], y=matches_team_city['team'], z=matches_team_city['total matches'], colorscale='Viridis', colorbar=dict(title='Matches'))
layout = go.Layout(title=dict(text='Heatmap of total matches per city and team', x=0.5), xaxis_title='City', yaxis_title='Team', height=700)
fig = go.Figure([trace])
fig.show()

In [None]:
fig = px.density_heatmap(matches_team_city, x='city', y='team', z='total matches', color_continuous_scale='Viridis', title='Heatmap of total matches per city and team')
fig.show()

In [None]:
# Heatmap of team wins vs opponent teams
valid_matches = matches.dropna(subset=('winner'))

valid_matches['opponent'] = valid_matches.apply(lambda row: row['team2'] if row['winner'] == row['team1'] else row['team1'], axis=1)
valid_matches = valid_matches.groupby(['winner', 'opponent']).size().reset_index(name='wins')
valid_matches

Unnamed: 0,winner,opponent,wins
0,Chennai Super Kings,Delhi Capitals,19
1,Chennai Super Kings,Gujarat Titans,3
2,Chennai Super Kings,Kochi Tuskers Kerala,1
3,Chennai Super Kings,Kolkata Knight Riders,19
4,Chennai Super Kings,Lucknow Super Giants,1
...,...,...,...
115,Sunrisers Hyderabad,Mumbai Indians,14
116,Sunrisers Hyderabad,Punjab Kings,19
117,Sunrisers Hyderabad,Rajasthan Royals,13
118,Sunrisers Hyderabad,Rising Pune Supergiants,6


In [None]:
trace = go.Heatmap(x=valid_matches['winner'], y=valid_matches['opponent'], z=valid_matches['wins'], colorscale='Viridis', colorbar=dict(title='Wins'))
layout = go.Layout(title=dict(text='Heatmap of team wins vs opponent teams', x=0.5), xaxis_title='Winner', yaxis_title='Opponent', height=500)
fig = go.Figure([trace], layout)
fig.show()

In [None]:
fig = px.density_heatmap(valid_matches, x='winner', y='opponent', z='wins', color_continuous_scale='Viridis', labels={'wins': 'Wins'})
fig.update_layout(title=dict(text='Heatmap of team wins vs opponent teams', x=0.5), xaxis_title='Winner', yaxis_title='Opponent', height=500)
fig.update_coloraxes(colorbar_title='Wins')
fig.show()

In [None]:
# Problem Statement: Correlation heatmap for runs, wickets, boundaries
ipl['boundaries'] = ipl['total_runs'].apply(lambda row: 1 if row in [4, 6] else 0)
rwb = ipl[['total_runs', 'is_wicket', 'boundaries']]
corr_matrix = rwb.corr()
corr_matrix

Unnamed: 0,total_runs,is_wicket,boundaries
total_runs,1.0,-0.181955,0.894744
is_wicket,-0.181955,1.0,-0.101632
boundaries,0.894744,-0.101632,1.0


In [None]:
trace = go.Heatmap(x=corr_matrix.index, y=corr_matrix.columns, z=corr_matrix.values, colorscale='RdBu', colorbar=dict(title='Correlation'), zmin=-1, zmax=1, texttemplate='%{z:.3f}')
layout = go.Layout(title='Correlation heatmap for runs, wickets, boundaries')
fig = go.Figure([trace], layout)
fig.show()

In [None]:
fig = px.imshow(corr_matrix, text_auto=True, color_continuous_scale='RdBu', title='Correlation heatmap for runs, wickets, boundaries', labels=dict(color='Correlation'))
fig.show()

In [None]:
# Problem Statement: Toss win vs match win correlation matrix
valid_matches = matches.dropna(subset=('toss_winner', 'winner'))
valid_matches['toss_win'] = (matches['team1'] == matches['toss_winner']).astype(int)
valid_matches['match_win'] = (matches['team1'] == matches['winner']).astype(int)
corr_matrix = valid_matches[['toss_win', 'match_win']].corr()
corr_matrix

Unnamed: 0,toss_win,match_win
toss_win,1.0,0.021162
match_win,0.021162,1.0


In [None]:
fig = px.imshow(corr_matrix, text_auto=True, color_continuous_scale='RdBu', title='Toss win vs match win correlation matrix', labels=dict(color='Correlation'))
fig.show()

In [None]:
# Problem Statement: Heatmap of runs per bowler-batsman pair
bowler_batsman = ipl.groupby(['bowler', 'batter'])['batsman_runs'].sum().reset_index(name='total_runs')
bowler_batsman = bowler_batsman[bowler_batsman['batter'].isin(top_10)]
bowler_batsman = bowler_batsman[bowler_batsman['total_runs'] > 100]
bowler_batsman_pivot = bowler_batsman.pivot_table(index='batter', columns='bowler', values='total_runs', fill_value=0)
bowler_batsman_pivot

bowler,A Mishra,B Kumar,CH Morris,DJ Bravo,DS Kulkarni,DW Steyn,Harbhajan Singh,I Sharma,JD Unadkat,JJ Bumrah,...,Mohammed Shami,P Kumar,PP Chawla,R Ashwin,RA Jadeja,SL Malinga,SP Narine,Sandeep Sharma,UT Yadav,YS Chahal
batter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AB de Villiers,0.0,0.0,0.0,0.0,0.0,0.0,110.0,0.0,0.0,131.0,...,0.0,0.0,0.0,0.0,111.0,124.0,0.0,107.0,0.0,0.0
CH Gayle,0.0,131.0,0.0,0.0,0.0,0.0,102.0,0.0,0.0,0.0,...,0.0,103.0,116.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
DA Warner,0.0,0.0,0.0,0.0,0.0,0.0,113.0,0.0,0.0,0.0,...,109.0,124.0,115.0,122.0,116.0,0.0,195.0,0.0,0.0,156.0
KD Karthik,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,124.0,0.0,...,0.0,0.0,0.0,0.0,105.0,0.0,0.0,0.0,0.0,0.0
MS Dhoni,0.0,0.0,0.0,0.0,0.0,111.0,0.0,0.0,105.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
RG Sharma,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,146.0,105.0,0.0,0.0,143.0,0.0,170.0,0.0
RV Uthappa,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,123.0,0.0,0.0,0.0,0.0,0.0,0.0
S Dhawan,0.0,0.0,0.0,132.0,127.0,0.0,147.0,0.0,0.0,105.0,...,113.0,0.0,105.0,0.0,119.0,0.0,105.0,0.0,103.0,106.0
SK Raina,122.0,0.0,0.0,0.0,0.0,0.0,132.0,0.0,0.0,0.0,...,0.0,0.0,175.0,0.0,0.0,0.0,118.0,110.0,0.0,0.0
V Kohli,162.0,129.0,109.0,157.0,101.0,0.0,0.0,112.0,0.0,145.0,...,107.0,0.0,140.0,179.0,157.0,0.0,127.0,104.0,168.0,0.0


In [None]:
trace = go.Heatmap(x=bowler_batsman_pivot.columns, y=bowler_batsman_pivot.index, z=bowler_batsman_pivot.values, colorscale='Viridis')
fig = go.Figure([trace])
fig.show()

In [None]:
fig = px.density_heatmap(bowler_batsman, x=bowler_batsman['bowler'], y=bowler_batsman['batter'], z=bowler_batsman['total_runs'], color_continuous_scale='Viridis')
fig.show()

In [None]:
# Problem Statement: Heatmap of over vs total runs
over_runs = ipl.groupby('over')['total_runs'].sum()
trace = go.Heatmap(x=over_runs.index + 1, y=['Total Runs'], z=[over_runs.values.tolist()], colorscale='Viridis', colorbar=dict(title='Total Runs'), texttemplate="%{z}", hovertemplate="Over: %{x}<br>Total Runs: %{z}<extra></extra>")
layout = go.Layout(title='Heatmap of over vs total runs', xaxis_title='Over', width=1300)
fig = go.Figure([trace], layout)
fig.show()

In [None]:
# Problem Statement: Match count by venue and toss decision
venue_toss_decision = matches[['venue', 'toss_decision']]
venue_toss_decision = venue_toss_decision.groupby(['venue', 'toss_decision']).size().unstack(fill_value=0)
venue_toss_decision

toss_decision,bat,field
venue,Unnamed: 1_level_1,Unnamed: 2_level_1
Arun Jaitley Stadium,10,20
Barabati Stadium,2,5
"Barsapara Cricket Stadium, Guwahati",1,2
"Bharat Ratna Shri Atal Bihari Vajpayee Ekana Cricket Stadium, Lucknow",6,8
Brabourne Stadium,9,18
Buffalo Park,3,0
De Beers Diamond Oval,2,1
Dr DY Patil Sports Academy,10,27
Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium,7,8
Dubai International Cricket Stadium,19,27


In [None]:
trace = go.Heatmap(x=venue_toss_decision.columns, y=venue_toss_decision.index, z=venue_toss_decision.values, colorscale='Viridis', colorbar=dict(title='No. of Toss Decisions'), text=venue_toss_decision.values, texttemplate='%{z}',hovertemplate="Toss Decision: %{x}<br>Stadium: %{y}<br>Count: %{z}<extra></extra>")
fig = go.Figure([trace])
fig.update_layout(title='Match count by venue and toss decision', xaxis_title='Toss Decision', yaxis_title='Stadium', height=1000)
fig.show()

In [None]:
fig = px.density_heatmap(venue_toss_decision, labels=dict(x="Toss Decision", y="Venue", color="Match Count"),
    color_continuous_scale="Blues", title='Match count by venue and toss decision')
fig.show()

In [None]:
# Problem Statement: Heatmap: Run rate across innings

runs_per_over = ipl[ipl['inning'].isin([1,2])]
runs_per_over = runs_per_over.groupby(['inning', 'over']).agg(
    total_runs = ('total_runs', 'sum'),
    total_balls = ('total_runs', 'count')
).reset_index()
runs_per_over['run_rate'] = runs_per_over['total_runs'] / runs_per_over['total_balls'] * 6
runs_per_over_matrix = runs_per_over.pivot_table(index='inning', columns='over', values='run_rate')
runs_per_over_matrix

over,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
inning,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
1,5.601569,6.840609,7.749706,7.852124,8.266,8.187454,6.494462,7.112265,7.344121,7.427638,7.579418,7.755333,7.692835,8.111857,8.570792,8.578167,9.119644,9.655694,10.2875,11.047634
2,6.067317,7.242222,8.043549,8.427643,8.162945,8.289626,6.744499,7.16734,7.562162,7.26583,7.571993,7.696249,7.925641,8.009659,8.13666,8.635816,8.848485,9.371367,9.320173,9.920047


In [None]:
trace = go.Heatmap(x=runs_per_over_matrix.columns + 1, y=[f"Inning {i}" for i in runs_per_over_matrix.index], z=runs_per_over_matrix.values, colorscale='Blues', colorbar=dict(title='Run Rate'),
                   text=runs_per_over_matrix.round(2).values, texttemplate="%{text}",
                   hovertemplate="Over: %{x}<br>%{y}<br>Run Rate: %{text}<extra></extra>")
fig = go.Figure([trace])
fig.show()

In [None]:
# Problem Statement: Annotated heatmap: Top 5 bowlers by economy and wickets
bowlers_economy = ipl.groupby('bowler').agg(
    total_runs = ('total_runs', 'sum'),
    total_wickets = ('is_wicket', 'sum'),
    total_balls = ('is_wicket', 'size')
)
bowlers_economy['total_overs'] = bowlers_economy['total_balls'] / 6
bowlers_economy['economy'] = bowlers_economy['total_runs'] / bowlers_economy['total_overs']
bowlers_economy = bowlers_economy.sort_values(by=['total_wickets', 'economy'], ascending=[False, True])
bowlers_economy = bowlers_economy.head(5)
bowlers_economy

Unnamed: 0_level_0,total_runs,total_wickets,total_balls,total_overs,economy
bowler,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
YS Chahal,4681,213,3628,604.666667,7.741455
DJ Bravo,4436,207,3296,549.333333,8.075243
PP Chawla,5179,201,3895,649.166667,7.97792
SP Narine,4672,200,4146,691.0,6.761216
R Ashwin,5435,198,4679,779.833333,6.969438


In [None]:
z = [bowlers_economy['economy'].round(2).tolist(), bowlers_economy['total_wickets'].tolist()]
x = bowlers_economy.index.tolist()
y = ['Economy', 'Wickets']

fig = ff.create_annotated_heatmap(z, x=x, y=y, colorscale='Viridis', showscale=True, colorbar_title='Correlation')
fig.update_layout(title='Annotated heatmap: Top 5 bowlers by economy and wickets')
fig.show()

In [None]:
ipl['dismissal_kind'].unique()

array([nan, 'caught', 'bowled', 'run out', 'lbw', 'retired hurt',
       'stumped', 'caught and bowled', 'hit wicket',
       'obstructing the field', 'retired out'], dtype=object)

In [None]:
# Problem Statement: Player dismissal modes by team
dismissals = ipl.dropna(subset='dismissal_kind')
dismissals = dismissals.groupby(['batting_team', 'dismissal_kind']).size().reset_index(name='count')
dismissals_pivot = dismissals.pivot_table(index='batting_team', columns='dismissal_kind', values='count', fill_value=0)
dismissals_pivot

dismissal_kind,bowled,caught,caught and bowled,hit wicket,lbw,obstructing the field,retired hurt,retired out,run out,stumped
batting_team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Chennai Super Kings,183.0,794.0,37.0,1.0,79.0,1.0,1.0,0.0,104.0,45.0
Delhi Capitals,262.0,952.0,33.0,0.0,103.0,1.0,0.0,0.0,132.0,39.0
Gujarat Titans,78.0,271.0,10.0,1.0,26.0,0.0,0.0,1.0,34.0,14.0
Kochi Tuskers Kerala,20.0,44.0,2.0,0.0,7.0,0.0,0.0,0.0,11.0,2.0
Kolkata Knight Riders,244.0,945.0,36.0,1.0,100.0,1.0,1.0,0.0,127.0,36.0
Lucknow Super Giants,33.0,190.0,6.0,1.0,20.0,0.0,1.0,0.0,18.0,7.0
Mumbai Indians,239.0,1007.0,46.0,2.0,111.0,0.0,7.0,0.0,127.0,34.0
Punjab Kings,273.0,936.0,41.0,0.0,102.0,0.0,1.0,1.0,136.0,39.0
Rajasthan Royals,238.0,794.0,53.0,2.0,67.0,0.0,0.0,1.0,117.0,40.0
Rising Pune Supergiants,94.0,250.0,17.0,0.0,25.0,0.0,1.0,0.0,48.0,21.0


In [None]:
fig = ff.create_annotated_heatmap(x=dismissals_pivot.columns.tolist(), y=dismissals_pivot.index.tolist(), z=dismissals_pivot.values, colorscale='oranges', showscale=True)
fig.show()

In [None]:
# Problem Statement: Subplot: Bar (runs), line (SR) for top 3 batsmen
top_3_batsman_list = ipl.groupby('batter')['batsman_runs'].sum().reset_index(name='total_runs').sort_values(by='total_runs', ascending=False).head(3)['batter'].tolist()
top_3_batsman_stats = ipl[ipl['batter'].isin(top_3_batsman_list)]
top_3_batsman_stats = top_3_batsman_stats.groupby(['batter', 'season']).agg(
    total_runs = ('batsman_runs', 'sum'),
    total_balls = ('batsman_runs', 'size')
).reset_index()
top_3_batsman_stats['strike_rate'] = top_3_batsman_stats['total_runs'] / top_3_batsman_stats['total_balls'] * 100
top_3_batsman_stats_pivot = top_3_batsman_stats.pivot_table(index='season', columns='batter', values=['total_runs', 'strike_rate'])
top_3_batsman_stats_pivot.columns

MultiIndex([('strike_rate', 'RG Sharma'),
            ('strike_rate',  'S Dhawan'),
            ('strike_rate',   'V Kohli'),
            ( 'total_runs', 'RG Sharma'),
            ( 'total_runs',  'S Dhawan'),
            ( 'total_runs',   'V Kohli')],
           names=[None, 'batter'])

In [None]:
top_3_batsman_stats_pivot['strike_rate']

batter,RG Sharma,S Dhawan,V Kohli
season,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2007/08,146.376812,110.38961,98.214286
2009,112.074303,86.956522,109.333333
2009/10,130.322581,106.111111,142.12963
2011,122.368421,126.984127,117.758985
2012,123.714286,127.008929,109.309309
2013,129.638554,118.70229,135.66879
2014,127.035831,113.554217,119.269103
2015,142.182891,120.477816,127.848101
2016,131.451613,114.383562,148.549618
2017,119.784173,125.065274,119.844358


In [None]:
fig = make_subplots(rows=2, cols=1, subplot_titles=['Runs over Season', 'Strike Rate over Season'], shared_xaxes=True)
for batsman in top_3_batsman_stats_pivot['strike_rate'].columns:
  fig.append_trace(go.Bar(x=top_3_batsman_stats_pivot.index, y=top_3_batsman_stats_pivot[('total_runs', batsman)], name=batsman, hovertemplate=f"Batsman: {batsman}<br>" + "Season: %{x}<br>Runs: %{y}<extra></extra>"), row=1, col=1)
  fig.append_trace(go.Scatter(x=top_3_batsman_stats_pivot.index, y=top_3_batsman_stats_pivot[('strike_rate', batsman)], name=batsman, mode='lines+markers', hovertemplate=f"Batsman: {batsman}<br>" + "Season: %{x}<br>Strike Rate: %{y}<extra></extra>"), row=2, col=1)
fig.update_layout(
    height=600,
    title=dict(text='Top 3 Batsmen: Runs and Strike Rate Over Seasons', x=0.5),
    showlegend=True

)
fig.show()

In [None]:
top_3_batsman_stats_pivot['strike_rate'].columns

Index(['RG Sharma', 'S Dhawan', 'V Kohli'], dtype='object', name='batter')

In [None]:
# Problem Statement: Subplot: Matches played, won, lost by top 5 teams
top_5_teams = matches.groupby('winner').size().reset_index(name='total_wins').sort_values(by='total_wins', ascending=False).head(5)['winner'].to_list()
valid_matches = matches[(matches['team1'].isin(top_5_teams)) | (matches['team2'].isin(top_5_teams))]
valid_matches

played = pd.Series(dtype=int)
won = pd.Series(dtype=int)
lost = pd.Series(dtype=int)

for team in top_5_teams:
  played[team] = valid_matches[(valid_matches['team1'] == team) | (valid_matches['team2'] == team)].shape[0]
  won[team] = valid_matches[valid_matches['winner'] == team].shape[0]
  lost[team] = played[team] - won[team]

top_5_matches_stats = pd.DataFrame({
    'played': played,
    'won': won,
    'lost': lost
})

top_5_matches_stats

Unnamed: 0,played,won,lost
Mumbai Indians,261,144,117
Chennai Super Kings,238,138,100
Kolkata Knight Riders,251,131,120
Royal Challengers Bengaluru,255,123,132
Sunrisers Hyderabad,257,117,140


In [None]:
fig = make_subplots(rows=3, cols=1, subplot_titles=['Total Matches Played', 'Total Matches Won', 'Total Matches Lost'])
for i, each in enumerate(top_5_matches_stats.columns):
  fig.append_trace(go.Bar(x=top_5_matches_stats.index, y=top_5_matches_stats[each], name=each), row=i + 1, col=1)
fig.update_layout(height=1000)
fig.show()

In [None]:
#Problem Statement: Subplots with shared x-axis: wickets, boundaries per over
wickets = ipl[ipl['is_wicket'] == 1]
wickets = wickets.groupby(['over']).size().reset_index(name='total_wickets')

boundaries = ipl[ipl['total_runs'].isin([4, 6])]
boundaries = boundaries.groupby(['over']).size().reset_index(name='total_boundaries')

wickets_boundaries = boundaries.merge(wickets, on='over')
wickets_boundaries

Unnamed: 0,over,total_boundaries,total_wickets
0,0,1949,463
1,1,2431,503
2,2,2758,533
3,3,2833,548
4,4,2849,576
5,5,2826,540
6,6,1434,444
7,7,1627,472
8,8,1780,537
9,9,1682,498


In [None]:
fig = make_subplots(rows=2, cols=1, subplot_titles=['Total Wickets in each Over', 'Total Boundaries in each Over'], shared_xaxes=True)
fig.append_trace(go.Bar(x=wickets_boundaries['over'] + 1, y=wickets_boundaries['total_wickets'], name='Wickets'), row=1, col=1)
fig.append_trace(go.Bar(x=wickets_boundaries['over'] + 1, y=wickets_boundaries['total_boundaries'], name='Boundaries'), row=2, col=1)
fig.update_layout(title='Wickets and Boundaries per over')
fig.update_yaxes(title='Wickets', row=1, col=1)
fig.update_yaxes(title='Boundaries', row=2, col=1)
fig.update_xaxes(title='Over', type='category')
fig.show()

In [None]:
# Problem Statement: Annotate match with highest margin
max_run_margin = matches[matches['result'] == 'runs'].loc[matches['result_margin'].idxmax()]
max_run_margin

Unnamed: 0,620
id,1082635
season,2017
city,Delhi
date,2017-05-06
match_type,League
player_of_match,LMP Simmons
venue,Feroz Shah Kotla
team1,Delhi Capitals
team2,Mumbai Indians
toss_winner,Delhi Capitals


In [None]:
trace = go.Scatter(x=matches['id'], y=matches['result_margin'], mode='markers')
fig = go.Figure([trace])
fig.add_annotation(x=max_run_margin['id'], y=max_run_margin['result_margin'], showarrow=True, ax=0, ay=50, text=f"Max runs margin: {max_run_margin['result_margin']} runs",
                   arrowhead=2, # Available arrowhead Options (0–8)
                   arrowsize=1,arrowwidth=3,arrowcolor='red',font=dict(color='black', size=12),
                    bgcolor='lightyellow',
                    bordercolor='black',
                    borderwidth=2)
fig.show()

In [None]:
matches.columns

Index(['id', 'season', 'city', 'date', 'match_type', 'player_of_match',
       'venue', 'team1', 'team2', 'toss_winner', 'toss_decision', 'winner',
       'result', 'result_margin', 'target_runs', 'target_overs', 'super_over',
       'method', 'umpire1', 'umpire2', 'Captain'],
      dtype='object')

In [None]:
# Problem Statement: Annotate player with most sixes in a season
sixes = ipl[ipl['batsman_runs'] == 6]
season_sixes = sixes.groupby(['season', 'batter'])['batsman_runs'].count().reset_index(name='count')
# Convert all season values to string
season_sixes['season'] = season_sixes['season'].astype(str)

# Optional: For consistency, extract only the starting year
season_sixes['season'] = season_sixes['season'].str.extract(r'(\d{4})')  # captures the year part

# Convert to int if you want a numeric axis
season_sixes['season'] = season_sixes['season'].astype(int)

In [None]:
fig = px.scatter(season_sixes, x='season', y='count')
for each_season in season_sixes['season'].unique():
    each_season_df = season_sixes[season_sixes['season'] == each_season]
    each_season_max = each_season_df.loc[each_season_df['count'].idxmax()]

    fig.add_annotation(
        x=each_season_max['season'],
        y=each_season_max['count'],
        text=each_season_max['batter'],  # shows player name
        showarrow=True,
        arrowhead=2,
        arrowcolor='orange',
        arrowsize=1.2,
        arrowwidth=2,
        ax=0,
        ay=-40  # arrow pointing upward to the point
    )
fig.show()

In [None]:
matches.columns

Index(['id', 'season', 'city', 'date', 'match_type', 'player_of_match',
       'venue', 'team1', 'team2', 'toss_winner', 'toss_decision', 'winner',
       'result', 'result_margin', 'target_runs', 'target_overs', 'super_over',
       'method', 'umpire1', 'umpire2', 'Captain'],
      dtype='object')

In [None]:
# Problem Statements:
# 1. Annotations on line chart top 3 player runs
# 2. Compare 3 players: Subplots with stats
top_3_batsman_list = ipl.groupby('batter')['batsman_runs'].sum().reset_index(name='total_runs').sort_values(by='total_runs', ascending=False).head(3)['batter'].tolist()
top_3_batsman_stats = ipl[ipl['batter'].isin(top_3_batsman_list)]
top_3_batsman_stats = top_3_batsman_stats.groupby(['batter', 'season']).agg(
    total_runs = ('batsman_runs', 'sum'),
    total_balls = ('batsman_runs', 'size'),
    outs = ('is_wicket', 'sum'),
).reset_index()
top_3_batsman_stats['strike_rate'] = top_3_batsman_stats['total_runs'] / top_3_batsman_stats['total_balls'] * 100
top_3_batsman_stats
top_3_batsman_boundaries = ipl[(ipl['batter'].isin(top_3_batsman_list)) & (ipl['batsman_runs'].isin([4, 6]))]
top_3_batsman_boundaries_stats = top_3_batsman_boundaries.groupby(['batter', 'season']).size().reset_index(name='total_boundaries')
top_3_batsman_stats = top_3_batsman_stats.merge(top_3_batsman_boundaries_stats, on=['batter', 'season'])

In [None]:
top_3_batsman_stats_pivot = top_3_batsman_stats.pivot_table(index='season', columns='batter', values=['total_runs', 'total_balls','outs','strike_rate', 'total_boundaries'])
top_3_batsman_stats_pivot

Unnamed: 0_level_0,outs,outs,outs,strike_rate,strike_rate,strike_rate,total_balls,total_balls,total_balls,total_boundaries,total_boundaries,total_boundaries,total_runs,total_runs,total_runs
batter,RG Sharma,S Dhawan,V Kohli,RG Sharma,S Dhawan,V Kohli,RG Sharma,S Dhawan,V Kohli,RG Sharma,S Dhawan,V Kohli,RG Sharma,S Dhawan,V Kohli
season,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
2007/08,11.0,10.0,12.0,146.376812,110.38961,98.214286,276.0,308.0,168.0,57.0,43.0,22.0,404.0,340.0,165.0
2009,13.0,4.0,11.0,112.074303,86.956522,109.333333,323.0,46.0,225.0,40.0,3.0,30.0,362.0,40.0,246.0
2009/10,18.0,10.0,13.0,130.322581,106.111111,142.12963,310.0,180.0,216.0,50.0,26.0,38.0,404.0,191.0,307.0
2011,10.0,12.0,13.0,122.368421,126.984127,117.758985,304.0,315.0,473.0,45.0,54.0,71.0,372.0,400.0,557.0
2012,14.0,13.0,12.0,123.714286,127.008929,109.309309,350.0,448.0,333.0,57.0,76.0,43.0,433.0,569.0,364.0
2013,14.0,8.0,14.0,129.638554,118.70229,135.66879,415.0,262.0,471.0,63.0,42.0,87.0,538.0,311.0,639.0
2014,14.0,12.0,14.0,127.035831,113.554217,119.269103,307.0,332.0,301.0,47.0,56.0,39.0,390.0,377.0,359.0
2015,15.0,13.0,14.0,142.182891,120.477816,127.848101,339.0,293.0,395.0,62.0,51.0,58.0,482.0,353.0,505.0
2016,12.0,13.0,12.0,131.451613,114.383562,148.549618,372.0,438.0,655.0,66.0,59.0,122.0,489.0,501.0,973.0
2017,14.0,13.0,11.0,119.784173,125.065274,119.844358,278.0,383.0,257.0,40.0,62.0,34.0,333.0,479.0,308.0


In [None]:
colors = {
    'V Kohli': 'red',
    'S Dhawan': 'blue',
    'RG Sharma': 'green'
}
fig = make_subplots(rows=5, cols=1, subplot_titles=['Total Runs', 'Total Balls', 'No. of Outs', 'Strike Rate',  'Total Boundaries'])
for i, batsman in enumerate(top_3_batsman_list):
  for j, column in enumerate(top_3_batsman_stats.columns[2:]):
    fig.append_trace(go.Scatter(x=top_3_batsman_stats_pivot.index, y=top_3_batsman_stats_pivot[(column, batsman)], mode='lines+markers', name=f'{batsman}',
                line=dict(color=colors[batsman]),hovertemplate="Season: %{x}<br>" + f"{column}" + ": %{y}<extra></extra>", showlegend=(j == 1)), row=j + 1, col=1)
fig.update_layout(title='Top 3 Batsman Stats over Season', height=1000)
fig.show()

In [None]:
# Problem Statement: Add player photos using layout images
top_3_batsman_stats = ipl[ipl['batter'].isin(top_3_batsman_list)]
top_3_batsman_stats = top_3_batsman_stats.groupby('batter').agg(
    total_runs = ('batsman_runs', 'sum'),
)
top_3_batsman_stats

image_urls = {'V Kohli': 'https://documents.iplt20.com/ipl/IPLHeadshot2025/2.png', 'S Dhawan': 'https://documents.iplt20.com/ipl/IPLHeadshot2024/11.png', 'RG Sharma': 'https://documents.iplt20.com/ipl/IPLHeadshot2025/6.png'}

fig = px.bar(top_3_batsman_stats, x=top_3_batsman_stats.index, y='total_runs')

for i, batsman in enumerate(top_3_batsman_stats.index):
  fig.add_layout_image(source=image_urls[batsman], x=batsman, y=top_3_batsman_stats.loc[batsman, 'total_runs'] + 1000, xref='x', yref='y', xanchor='center', yanchor='top', layer='above', sizex=1, sizey=2000)
fig.update_yaxes(range=[0, max(top_3_batsman_stats['total_runs']) + 1000])
fig.show()

In [None]:
# Problem Statement: Subplot for dot balls vs boundaries across innings
dot_balls = ipl[ipl['batsman_runs'] == 0]
boundaries = ipl[ipl['batsman_runs'].isin([4, 6])]
dot_balls_stats = dot_balls.groupby('inning').size().reset_index(name='dot_balls_count')
boundaries_stats = boundaries.groupby('inning').size().reset_index(name='boundaries_count')
combine_stats = boundaries_stats.merge(dot_balls_stats, on='inning')
combine_stats = combine_stats[:2]

In [None]:
fig = make_subplots(rows=1, cols=2, subplot_titles=['Boundaries', 'Dot Balls'], shared_yaxes=False)
for i, column in enumerate(combine_stats.columns[1:], start=1):
  fig.append_trace(go.Bar(x=combine_stats['inning'].values.astype(str), y=combine_stats[column], name=column, hovertemplate='Inning: %{x}<br>Count: %{y}<extra></extra>'), row=1, col=i)
  for j in range(2):
    fig.add_annotation(x=j, y=combine_stats[column].iloc[j], xref=f'x{i}', yref=f'y{i}', text=f"{combine_stats[column].iloc[j]}", ax=0, ay=-40, arrowhead=2, arrowcolor='orange', arrowsize=3)
fig.update_layout(title='Bot balls vs Boundaries across innings', xaxis_title='Inning', yaxis_title='Frequency')
fig.show()

In [None]:
# Problem Statements: Gradient color scale for run rates
runs_per_over = ipl[ipl['inning'].isin([1,2])]
runs_per_over = runs_per_over.groupby(['inning', 'over']).agg(
    total_runs = ('total_runs', 'sum'),
    total_balls = ('total_runs', 'count')
).reset_index()
runs_per_over['run_rate'] = runs_per_over['total_runs'] / runs_per_over['total_balls'] * 6
runs_per_over_matrix = runs_per_over.pivot_table(index='inning', columns='over', values='run_rate')
runs_per_over_matrix

over,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
inning,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
1,5.601569,6.840609,7.749706,7.852124,8.266,8.187454,6.494462,7.112265,7.344121,7.427638,7.579418,7.755333,7.692835,8.111857,8.570792,8.578167,9.119644,9.655694,10.2875,11.047634
2,6.067317,7.242222,8.043549,8.427643,8.162945,8.289626,6.744499,7.16734,7.562162,7.26583,7.571993,7.696249,7.925641,8.009659,8.13666,8.635816,8.848485,9.371367,9.320173,9.920047


In [None]:
fig = make_subplots(rows=2, cols=1, subplot_titles=['Innings 1', 'Innings 2'])
fig.append_trace(go.Bar(x=runs_per_over_matrix.columns, y=runs_per_over_matrix.iloc[0], marker=dict(color=runs_per_over_matrix.iloc[0],coloraxis='coloraxis'), name='Innings 1'), row=1, col=1)
fig.append_trace(go.Bar(x=runs_per_over_matrix.columns, y=runs_per_over_matrix.iloc[1], marker=dict(color=runs_per_over_matrix.iloc[1], coloraxis='coloraxis'), name='Innings 2'), row=2, col=1)
fig.update_layout(title='Gradient color scale for run rates', showlegend=False, coloraxis=dict(colorscale='Viridis', colorbar_title='Run Rate'))
fig.show()

# With coloraxis:
# You explicitly link multiple traces to a common color scale.

# You control the colorbar only once in layout.coloraxis

In [None]:
# Problem Statement: Color map of city popularity using color axis
city_popularity = matches.groupby('city').size().reset_index(name='popularity_by_matches').sort_values(by='popularity_by_matches', ascending=False)
city_popularity = city_popularity[city_popularity['popularity_by_matches'] > 10]
city_popularity

Unnamed: 0,city,popularity_by_matches
26,Mumbai,173
23,Kolkata,93
10,Delhi,90
8,Chennai,85
16,Hyderabad,77
2,Bangalore,65
7,Chandigarh,61
18,Jaipur,57
30,Pune,51
0,Abu Dhabi,37


In [None]:
trace = go.Bar(x=city_popularity['city'], y=city_popularity['popularity_by_matches'], marker=dict(color=city_popularity['popularity_by_matches'], coloraxis='coloraxis'))
fig = go.Figure([trace])
fig.update_layout(title="Color map of city popularity using color axis", xaxis_title='City', yaxis_title='Popularity', coloraxis=dict(colorscale='Viridis', colorbar_title='Popularity'))
fig.show()

In [None]:
ipl.columns

Index(['match_id', 'inning', 'batting_team', 'bowling_team', 'over', 'ball',
       'batter', 'bowler', 'non_striker', 'batsman_runs', 'extra_runs',
       'total_runs', 'extras_type', 'is_wicket', 'player_dismissed',
       'dismissal_kind', 'fielder', 'id', 'season', 'city', 'date',
       'match_type', 'player_of_match', 'venue', 'team1', 'team2',
       'toss_winner', 'toss_decision', 'winner', 'result', 'result_margin',
       'target_runs', 'target_overs', 'super_over', 'method', 'umpire1',
       'umpire2', 'boundaries'],
      dtype='object')

In [None]:
# Problem Statement: Color per dismissal type
dismissals = ipl.dropna(subset='dismissal_kind')
dismissals = dismissals.groupby(['dismissal_kind']).size().reset_index(name='count').sort_values(by='count', ascending=False)
dismissals

Unnamed: 0,dismissal_kind,count
1,caught,8063
0,bowled,2212
8,run out,1114
4,lbw,800
2,caught and bowled,367
9,stumped,358
6,retired hurt,15
3,hit wicket,15
5,obstructing the field,3
7,retired out,3


In [None]:
trace = go.Bar(x=dismissals['dismissal_kind'], y=dismissals['count'], marker=dict(color=dismissals['count'], colorscale='Viridis', colorbar_title='Count'))
fig = go.Figure([trace])
fig.show()

In [None]:
# Problem Statement: Use Plotly template themes (dark, ggplot2)
for theme in ['plotly', 'plotly_white', 'plotly_dark', 'ggplot2', 'seaborn', 'simple_white', 'presentation']:
    fig = px.bar(dismissals, x='dismissal_kind', y='count', color='dismissal_kind', template=theme, title=f"Theme: {theme}")
    fig.show()

In [None]:
# Problem Statement: Toggle light/dark mode using layout templates
light_theme = {
    "paper_bgcolor": "white",
    "plot_bgcolor": "white",
    "font": {"color": "black"},
    "xaxis": {"gridcolor": "lightgray"},
    "yaxis": {"gridcolor": "lightgray"},
    "updatemenus[0].bgcolor": "white",
    "updatemenus[0].font.color": "black"
}
dark_theme = {
    "paper_bgcolor": "black",
    "plot_bgcolor": "black",
    "font": {"color": "white"},
    "xaxis": {"gridcolor": "gray"},
    "yaxis": {"gridcolor": "gray"},
    "updatemenus[0].bgcolor": "black",
    "updatemenus[0].font.color": "white"
}

trace = go.Bar(x=dismissals['dismissal_kind'], y=dismissals['count'], marker=dict(color=dismissals['count'], colorscale='Viridis', colorbar_title='Count'))
fig = go.Figure([trace])

fig.update_layout(
    updatemenus = [dict(
        type="buttons",
        direction="right",
        x=1,
        xanchor="right",
        y=1.15,
        showactive=False,
        buttons=[
                dict(label="Light Mode", method="relayout", args=[light_theme]),
                dict(label="Dark Mode", method="relayout", args=[dark_theme])
            ]
    )]
)
fig.update_layout(light_theme)
fig.show()

In [None]:
# Problem Statement:
# 1. Change axis line color per subplot
# 2. Player comparison with marker size, color, shape
colors = {
    'V Kohli': 'red',
    'S Dhawan': 'blue',
    'RG Sharma': 'green'
}

markers = {
    'V Kohli': 'circle',
    'S Dhawan': 'square',
    'RG Sharma': 'diamond'
}

# Create subplots
fig = make_subplots(rows=5, cols=1, subplot_titles=['Total Runs', 'Total Balls', 'No. of Outs', 'Strike Rate', 'Total Boundaries'])

# Add scatter traces with lines+markers
for i, batsman in enumerate(top_3_batsman_list):
    for j, column in enumerate(top_3_batsman_stats.columns[2:]):
        visible = True if batsman == top_3_batsman_list[0] else False
        fig.add_trace(
            go.Scatter(
                x=top_3_batsman_stats_pivot.index,
                y=top_3_batsman_stats_pivot[(column, batsman)],
                mode='lines+markers',
                name=batsman,
                marker=dict(symbol=markers[batsman], size=10),
                line=dict(color=colors[batsman], width=2),
                hovertemplate=f"Season: %{{x}}<br>{column}: %{{y}}<extra></extra>",
                showlegend=(j == 0),  # Show legend once per player
                visible=visible
            ),
            row=j + 1,
            col=1
        )

# Update axes line colors individually
fig.update_xaxes(linecolor='red', linewidth=2, row=1, col=1)
fig.update_xaxes(linecolor='purple', linewidth=2, row=2, col=1)
fig.update_xaxes(linecolor='green', linewidth=2, row=3, col=1)
fig.update_xaxes(linecolor='orange', linewidth=2, row=4, col=1)
fig.update_xaxes(linecolor='black', linewidth=2, row=5, col=1)

fig.update_yaxes(linecolor='blue', linewidth=2)

# Dropdown logic
dropdown_buttons = []
for batsman in top_3_batsman_list:
    visibility = []
    for b in top_3_batsman_list:
        for _ in range(5):  # 5 subplots
            visibility.append(b == batsman)
    dropdown_buttons.append(
        dict(
            label=batsman,
            method='update',
            args=[
                {"visible": visibility},
                {"title": f"Stats for {batsman} over Seasons"}
            ]
        )
    )

fig.update_layout(
    updatemenus=[
        dict(
            type='dropdown',
            direction='down',
            x=1.05,
            xanchor='left',
            y=1.15,
            showactive=True,
            buttons=dropdown_buttons
        )
    ],
    title="Stats for V Kohli over Seasons",
    showlegend=True,
    height=1000
)

fig.show()
print(dropdown_buttons)

[{'label': 'V Kohli', 'method': 'update', 'args': [{'visible': [True, True, True, True, True, False, False, False, False, False, False, False, False, False, False]}, {'title': 'Stats for V Kohli over Seasons'}]}, {'label': 'S Dhawan', 'method': 'update', 'args': [{'visible': [False, False, False, False, False, True, True, True, True, True, False, False, False, False, False]}, {'title': 'Stats for S Dhawan over Seasons'}]}, {'label': 'RG Sharma', 'method': 'update', 'args': [{'visible': [False, False, False, False, False, False, False, False, False, False, True, True, True, True, True]}, {'title': 'Stats for RG Sharma over Seasons'}]}]


## Interactivity, Dropdowns, ML Evaluation – Problems #61–100


In [None]:
# Problem Statement: Dropdown to show team’s yearly wins
team_wins = matches.groupby(['season', 'winner']).size().reset_index(name='Wins')
team_wins = team_wins.pivot_table(index='season', columns='winner', values='Wins', fill_value=0)

fig = go.Figure()
for i, team in enumerate(team_wins.columns):
  fig.add_trace(go.Bar(x=team_wins.index.tolist(), y=team_wins[team], visible=(i == 0), name=team))

dropdown_buttons = [
    dict(
        label=team,
        method='update',
        args = [
            {'visible': [t == team for t in team_wins.columns]},
            {'title': f"Yearly Wins for {team}"}
        ]
    )
    for team in team_wins.columns
]

fig.update_layout(
    updatemenus = [dict(
        type='dropdown',
        direction='down',
        x=1,
        xanchor='right',
        y=1.15,
        buttons = dropdown_buttons,
        bgcolor="white",
        font=dict(color="black"),
        bordercolor="gray",
        borderwidth=1,
        pad={"r": 10, "t": 10},
    )],
    title= "Yearly wins for Chennai Super Kings"
)
fig.show()

In [None]:
# Problem Statement: Dropdown to show individual player stats
top_10_winning_captains

Unnamed: 0,Captain,Wins
2,Hardik Pandya,144
3,MS Dhoni,138
0,Ajinkya Rahane,131
6,Rajat Patidar,123
5,Pat Cummins,117
1,Axar Patel,115
9,Shreyas Iyer,112
8,Sanju Samson,112
11,Suresh Raina,41
10,Steve Smith,27


In [None]:
# Problem Statement: Year slider to filter top scorers
top_scores = ipl.groupby(['season', 'batter'])['batsman_runs'].sum().reset_index(name='total runs').sort_values(by=['season', 'total runs'], ascending=[True, False])
fig = go.Figure()
for i, season in enumerate(top_scores['season'].unique()):
  df = top_scores[top_scores['season'] == season][:10]
  fig.add_trace(go.Bar(x=df['batter'], y=df['total runs'], visible=(i == 0)))

dropdown_buttons = [
    dict(
        label=season,
        method='update',
        args=[
            {
                'visible': [s == season for s in top_scores['season'].unique()],
                'title': f"Top Scores for the Season {season}"
            }
        ]
    )
    for season in top_scores['season'].unique()
]
fig.update_layout(
    updatemenus = [
        dict(
            type='dropdown',
            direction='down',
            x=1, xanchor='center',
            y=1.15,
            buttons = dropdown_buttons
        )
    ]
)
fig.show()

In [None]:
# Problem Statement: Animation: Over-by-over score progress
final_2016 = ipl[(ipl['season'] == '2016') & (ipl['match_type'] == 'Final')]
final_2016 = final_2016.groupby(['inning', 'over'])['total_runs'].sum().groupby(level=0).cumsum().reset_index(name='score')
final_2016_pivot = final_2016.pivot_table(index='over', columns='inning', values='score')
final_2016_pivot

inning,1,2
over,Unnamed: 1_level_1,Unnamed: 2_level_1
0,7.0,5.0
1,12.0,18.0
2,21.0,26.0
3,27.0,42.0
4,46.0,55.0
5,59.0,59.0
6,65.0,69.0
7,75.0,79.0
8,88.0,100.0
9,97.0,112.0


In [None]:

fig = go.Figure()


for inning in final_2016_pivot.columns:
    fig.add_trace(go.Scatter(
        x=[], y=[], mode='lines+markers', name=f"Inning {inning}"
    ))


fig.update_layout(
    title='IPL Finals 2016: Comparison of Both Innings',
    xaxis=dict(title="Over", range=[1, 20]),
    yaxis=dict(title="Inning Score", range=[0, max(final_2016_pivot[1].max(), final_2016_pivot[2].max()) + 10]),
    updatemenus=[dict(
        type="buttons",
        showactive=False,
        buttons=[dict(
            label='Play',
            method='animate',
            args=[None, {
                'frame': {'duration': 500, 'redraw': True},
                'fromcurrent': True,
                'transition': {'duration': 300}
            }]
        )]
    )],
    sliders=[
        dict(
            active=0,
            y=0,
            x=0,
            len=1,
            pad=dict(t=50),
            currentvalue=dict(visible=True, prefix="Over: ", xanchor="right", font=dict(size=14)),
            steps=[
                dict(
                    method="animate",
                    args=[
                        [str(k)],  # This assumes frame name like '2025_1', '2025_2'
                        dict(mode="immediate", frame=dict(duration=300, redraw=True), fromcurrent=True, transition=dict(duration=0))
                    ],
                    label=str(k)
                )
                for k in range(1, 21)
            ]
        )
    ]
)

frames = []
for k in range(1, len(final_2016_pivot) + 1):
    frame_data = []
    for inning in final_2016_pivot.columns:
        frame_data.append(go.Scatter(
            x=final_2016_pivot.index[:k],
            y=final_2016_pivot[inning][:k],
            mode='lines+markers',
            name=f"Inning {inning}"
        ))
    frames.append(go.Frame(data=frame_data, name=str(k)))
fig.frames = frames

fig.show()


In [None]:
# Problem Statement: Animation: Over-by-over score progress in different seasons(Final Match)
# final_matches = {}
# for each_season in ipl['season'].unique()[-4:]:
#   final = ipl[(ipl['season'] == each_season) & (ipl['match_type'] == 'Final')]
#   final = final.groupby(['inning', 'over'])['total_runs'].sum().groupby(level=0).cumsum().reset_index(name='score')
#   final_pivot = final.pivot_table(index='over', columns='inning', values='score')
#   print(final_pivot.columns)
#   final_matches[each_season] = final_pivot

In [None]:
# fig = go.Figure()
# dropdown_buttons = []
# season_frames = {}
# colors = ['blue', 'green']

# # Add traces and build season-specific frames
# for season, df in final_matches.items():
#     # Add traces for each inning
#     for i, inning in enumerate(df.columns):
#         fig.add_trace(go.Scatter(
#             x=[],
#             y=[],
#             mode='lines+markers',
#             name=f"Inning {inning} - {season}",
#             line=dict(color=colors[i]),
#             visible=(season == '2021')
#         ))

#     # Build animation frames for this season
#     season_frames[season] = []
#     for k in range(1, len(df) + 1):
#         frame_data = []
#         for i, inning in enumerate(df.columns):
#             frame_data.append(go.Scatter(
#                 x=df.index[:k],
#                 y=df[inning][:k],
#                 mode="lines+markers",
#                 name=f"Inning {inning} - {season}",
#                 line=dict(color=colors[i])
#             ))
#         season_frames[season].append(go.Frame(data=frame_data, name=f"{season}_{k}"))

# # Combine all frames from all seasons
# fig.frames = [frame for all_frames in season_frames.values() for frame in all_frames]

# # Add dropdown buttons for each season with animation
# for season in final_matches:
#     # Calculate trace visibility
#     visibility_mask = []
#     for s in final_matches:
#         visibility_mask.extend([s == season] * len(final_matches[s].columns))

#     dropdown_buttons.append(dict(
#         label=f"Season {season}",
#         method='update',
#         args=[
#             {'visible': visibility_mask},
#             {
#                 'title': f"IPL {season} Final: Score Comparison"
#             }
#         ],
#         # Add custom animation via "args2" using 'animate' method separately
#         args2=[
#             None,
#             {
#                 'frame': {'duration': 500, 'redraw': True},
#                 'fromcurrent': True,
#                 'transition': {'duration': 300},
#                 'mode': 'immediate'
#             }
#         ],
#         execute=True
#     ))

# # Single "Play" button just plays from current season
# play_button = dict(
#     type='buttons',
#     direction='right',
#     x=1,
#     y=1.1,
#     xanchor='right',
#     showactive=False,
#     buttons=[
#         dict(
#             label='Play',
#             method='animate',
#             args=[
#                 [f"2025_{k}" for k in range(1, 21)],  # Default: 2025
#                 {
#                     'frame': {'duration': 500, 'redraw': True},
#                     'fromcurrent': True,
#                     'transition': {'duration': 300}
#                 }
#             ]
#         )
#     ]
# )

# # Final layout
# fig.update_layout(
#     title="IPL 2021 Final: Score Comparison",
#     xaxis=dict(title="Over", range=[1, 20]),
#     yaxis=dict(title="Inning Score"),
#     updatemenus=[
#         play_button,
#         dict(
#             buttons=dropdown_buttons,
#             direction="down",
#             showactive=True,
#             x=1,
#             y=1.2,
#             xanchor='right'
#         )
#     ],
#     showlegend=True,
#     height=600
# )

# fig.show()


In [None]:
# Problem Statement: Buttons to toggle views (bar/pie)
top_scores = ipl.groupby(['season', 'batter'])['batsman_runs'].sum().reset_index(name='total runs').sort_values(by=['season', 'total runs'], ascending=[True, False])

fig = go.Figure()
season = '2016'
df = top_scores[top_scores['season'] == season].head(5)
fig.add_trace(go.Bar(x=df['batter'], y=df['total runs'], visible=True))
fig.add_trace(go.Pie(labels=df['batter'], values=df['total runs'], visible=False))

chart_type_buttons = []
chart_types = ['Bar', 'Pie']
for chart_type in chart_types:
  visibility = [False] * 2
  if chart_type =='Bar':
    visibility[0] = True
  else:
    visibility[1] = True

  chart_type_buttons.append(dict(
      label=chart_type,
      method='update',
      args = [
          dict(visible=visibility),
          dict(title=f"Top 5 Scorers - {season} {chart_type} view")
      ]
  ))

# Layout
fig.update_layout(
    updatemenus=[
        dict(
            type="buttons",
            direction='right',
            buttons=chart_type_buttons,
            x=0.9, xanchor='left',
            y=1.15,
            showactive=True
        )
    ],
    title=f"Top 5 Scorers - Season 2007/08 Bar view"
)

fig.show()

In [None]:
# Problem Statement: Buttons to toggle SR vs 4s vs 6s
player = 'RG Sharma'
player_stats = ipl[ipl['batter'] == player]
player_stats_fours = player_stats[player_stats['batsman_runs'] == 4]
player_stats_fours = player_stats_fours.groupby('season').size().reset_index(name='total fours')
player_stats_sixes = player_stats[player_stats['batsman_runs'] == 6]
player_stats_sixes = player_stats_sixes.groupby('season').size().reset_index(name='total sixes')
player_stats = player_stats.groupby('season').agg(
    total_runs = ('batsman_runs', 'sum'),
    total_balls = ('batsman_runs', 'size')
).reset_index()
player_stats['strike_rate'] = player_stats['total_runs'] / player_stats['total_balls'] * 100
player_stats = player_stats.merge(player_stats_fours, on='season').merge(player_stats_sixes, on='season')
player_stats

Unnamed: 0,season,total_runs,total_balls,strike_rate,total fours,total sixes
0,2007/08,404,276,146.376812,38,19
1,2009,362,323,112.074303,22,18
2,2009/10,404,310,130.322581,36,14
3,2011,372,304,122.368421,32,13
4,2012,433,350,123.714286,39,18
5,2013,538,415,129.638554,35,28
6,2014,390,307,127.035831,31,16
7,2015,482,339,142.182891,41,21
8,2016,489,372,131.451613,49,17
9,2017,333,278,119.784173,31,9


In [None]:
fig = go.Figure()
fig.add_trace(go.Bar(x=player_stats['season'], y=player_stats['strike_rate'], visible=True, text=player_stats['strike_rate'].round(2)))
fig.add_trace(go.Bar(x=player_stats['season'], y=player_stats['total sixes'], visible=False, text=player_stats['total sixes']))
fig.add_trace(go.Bar(x=player_stats['season'], y=player_stats['total fours'], visible=False, text=player_stats['total fours']))

buttons = []
stats = ['Strike Rate', 'Sixes', 'Fours']
for i, each_stat in enumerate(stats):
  visibility = [False] * len(stats)
  visibility[i] = True

  buttons.append(dict(
      label=each_stat,
      method='update',
      args = [
          dict(visible=visibility),
          dict(title=f"{player} {each_stat} over Seasons")
      ]
  ))

fig.update_layout(
    updatemenus = [dict(
        type='buttons',
        direction='right',
        x=1, y=1.15,
        xanchor='right',
        buttons = buttons,
        showactive=True,
    )],
    title=f"{player} Strike Rate over Seasons",
    xaxis_title='Season',
    yaxis_title='Count'
)
fig.show()

In [None]:
# Problem Statement: Hover mode = x unified
colors = {
    'V Kohli': 'red',
    'S Dhawan': 'blue',
    'RG Sharma': 'green'
}
fig = make_subplots(rows=5, cols=1, subplot_titles=['Total Runs', 'Total Balls', 'No. of Outs', 'Strike Rate',  'Total Boundaries'])
for i, batsman in enumerate(top_3_batsman_list):
  for j, column in enumerate(top_3_batsman_stats.columns[2:]):
    fig.append_trace(go.Scatter(x=top_3_batsman_stats_pivot.index, y=top_3_batsman_stats_pivot[(column, batsman)], mode='lines+markers', name=f'{batsman}',
                line=dict(color=colors[batsman]),hovertemplate=f"{batsman}" + ": %{y}<extra></extra>", showlegend=(j == 1)), row=j + 1, col=1)
fig.update_layout(title='Top 3 Batsman Stats over Season', height=1000, hovermode='x unified')
fig.show()

In [None]:
# Problem Statement: Highlight powerplay overs with shaded shapes
trace = go.Scatter(x=over_runs.index + 1, y=over_runs.values, mode='lines+markers')
fig = go.Figure([trace])
fig.add_shape(type='rect', x0=1, x1=6, y0=0, y1=20000, fillcolor='lightblue', opacity=0.4, layer='below', line_width=0)
fig.add_shape(type='rect', x0=6, x1=15, y0=0, y1=20000, fillcolor='lightpink', opacity=0.4, layer='below', line_width=0)
fig.add_shape(type='rect', x0=15, x1=20, y0=0, y1=20000, fillcolor='lightgreen', opacity=0.4, layer='below', line_width=0)
fig.add_annotation(x=3.5, y=12000, text='Powerplay Overs', showarrow=False, font=dict(size=12), bgcolor="lightblue")
fig.add_annotation(x=10.5, y=12000, text='Middle Overs', showarrow=False, font=dict(size=12), bgcolor="lightpink")
fig.add_annotation(x=18 ,y=12000, text='Depth Overs', showarrow=False, font=dict(size=12), bgcolor="lightseagreen")
fig.show()

In [None]:
# Problem Statment: Shade super over matches on x-axis
season_matches = matches[['season', 'target_runs', 'super_over']]

fig = go.Figure()
seasons = season_matches['season'].unique()

# This will keep track of how many traces and shapes per season
season_shapes = []
season_annotations = []

for i, season in enumerate(seasons):
    df = season_matches[season_matches['season'] == season].copy()
    total_matches = len(df)
    df.index = list(range(1, total_matches + 1))  # Index becomes match number

    # Add main trace
    fig.add_trace(go.Scatter(
        x=df.index,
        y=df['target_runs'],
        mode='lines+markers',
        name=f"Season {season}",
        visible=(i == 0)
    ))

    # Add shapes and annotations for super over matches in this season
    shapes = []
    annotations = []
    super_over_matches = df[df['super_over'] != 'N']

    for match_idx in super_over_matches.index:
        shapes.append(dict(
            type='rect',
            xref='x',
            yref='y',
            x0=match_idx - 0.3,
            x1=match_idx + 0.3,
            y0=0,
            y1=df['target_runs'].max() + 10,
            fillcolor='lightblue',
            opacity=0.3,
            line=dict(width=0),
            layer='below'
        ))

        annotations.append(dict(
            x=match_idx,
            y=df['target_runs'].max() + 10,
            text='Super Over',
            showarrow=False,
            font=dict(size=10),
            bgcolor='lightblue',
        ))

    season_shapes.append(shapes)
    season_annotations.append(annotations)

# Dropdown buttons
dropdown_buttons = []
for i, season in enumerate(seasons):
    visibility = [False] * len(seasons)
    visibility[i] = True
    dropdown_buttons.append(dict(
        label=str(season),
        method='update',
        args=[
            {'visible': visibility},
            {
                'title': f"Season {season} - Super Over Highlights",
                'shapes': season_shapes[i],
                'annotations': season_annotations[i]
            }
        ]
    ))

# Add layout and show
fig.update_layout(
    title=f"Season {seasons[0]} - Super Over Highlights",
    xaxis_title='Match Number',
    yaxis_title='Target Runs',
    updatemenus=[dict(
        type='dropdown',
        direction='down',
        x=1, y=1.2,
        xanchor='right',
        buttons=dropdown_buttons
    )],
    shapes=season_shapes[0],
    annotations=season_annotations[0]
)

fig.show()


In [None]:
# Problem statement: Annotate longest win streak in team plot
season_2024 = matches[matches['season'] == 2024]
teams = season_2024['team1'].unique()
win_streak = {}

for team in teams:
    team_matches = season_2024[
        (season_2024['team1'] == team) | (season_2024['team2'] == team)
    ].sort_values(by='id').reset_index(drop=True)

    max_streak = 0
    streak = 0

    for i in range(len(team_matches)):
        if team_matches.loc[i, 'winner'] == team:
            streak += 1
            max_streak = max(max_streak, streak)
        else:
            streak = 0

    win_streak[team] = max_streak

teams_win_streak = pd.DataFrame(win_streak.values(), index=teams).reset_index()
teams_win_streak.rename(columns={'index': 'team', 0:'win streak'}, inplace=True)
teams_win_streak

Unnamed: 0,team,win streak
0,Royal Challengers Bengaluru,6
1,Delhi Capitals,2
2,Kolkata Knight Riders,6
3,Rajasthan Royals,4
4,Gujarat Titans,1
5,Punjab Kings,2
6,Chennai Super Kings,2
7,Sunrisers Hyderabad,4
8,Lucknow Super Giants,3
9,Mumbai Indians,2


In [None]:
trace = go.Bar(x=teams_win_streak['team'], y=teams_win_streak['win streak'])
fig = go.Figure([trace])
max_win_streak = max(teams_win_streak['win streak'])
for idx in range(len(teams_win_streak)):
  if teams_win_streak.loc[idx, 'win streak'] == max_win_streak:
    fig.add_annotation(x=teams_win_streak.loc[idx, 'team'], y=max_win_streak, text=f'High Win Streak: {max_win_streak}', ax=0, arrowhead=2, arrowsize=2, bgcolor='lightblue', arrowwidth=1)

fig.update_layout(title=dict(text='Highest Win Streak in Season 2024', x=0.5), xaxis_title='Team', yaxis_title='Win Streak')
fig.show()

In [None]:
# Problem Statement: Draw penalty zone for low SR in scatter
batsman_stats = ipl.groupby('batter').agg(
    total_runs = ('batsman_runs', 'sum'),
    total_balls = ('batsman_runs', 'size')
).reset_index()
batsman_stats['strike_rate'] = batsman_stats['total_runs'] / batsman_stats['total_balls'] * 100
batsman_stats['relative_runs'] = batsman_stats['total_runs'] / batsman_stats['total_runs'].max() * 100
batsman_stats

Unnamed: 0,batter,total_runs,total_balls,strike_rate,relative_runs
0,A Ashish Reddy,280,196,142.857143,3.493886
1,A Badoni,634,505,125.544554,7.911155
2,A Chandila,4,7,57.142857,0.049913
3,A Chopra,53,75,70.666667,0.661343
4,A Choudhary,25,20,125.000000,0.311954
...,...,...,...,...,...
668,Yashpal Singh,47,67,70.149254,0.586474
669,Younis Khan,3,7,42.857143,0.037434
670,Yudhvir Singh,22,16,137.500000,0.274520
671,Yuvraj Singh,2754,2207,124.784776,34.364861


In [None]:
trace = go.Scatter(x=batsman_stats['strike_rate'], y=batsman_stats['relative_runs'], mode='markers', text=batsman_stats['batter'],
    marker=dict(size=10, color='blue'),
    hovertemplate='Batter: %{text}<br>SR: %{x:.1f}<br>Runs: %{y}<extra></extra>')
fig = go.Figure([trace])
fig.add_shape(type='rect', x0=0, x1=120, y0=0, y1=120, fillcolor='lightblue', opacity=0.3, line=dict(width=0),layer='below')
fig.add_annotation(x=60, y=60, text='Lower Strike < 120', showarrow=False, bgcolor='lightblue')
fig.show()

In [None]:
# Problem Statement: Annotate max sixes using xref='paper'
season_2024 = ipl[ipl['season'] == '2024']
season_2024_sixes = season_2024[season_2024['total_runs'] == 6]
season_2024_sixes = season_2024_sixes.groupby('batting_team').size().reset_index(name='total_sixes')
season_2024_sixes

Unnamed: 0,batting_team,total_sixes
0,Chennai Super Kings,107
1,Delhi Capitals,133
2,Gujarat Titans,68
3,Kolkata Knight Riders,140
4,Lucknow Super Giants,101
5,Mumbai Indians,132
6,Punjab Kings,118
7,Rajasthan Royals,112
8,Royal Challengers Bengaluru,164
9,Sunrisers Hyderabad,176


In [None]:
trace = go.Bar(x=season_2024_sixes['batting_team'], y=season_2024_sixes['total_sixes'], text=season_2024_sixes['total_sixes'])
fig =go.Figure([trace])
fig.add_shape(type='rect', x0=0.9, x1=1, y0=0, y1=season_2024_sixes['total_sixes'].max() + 12, fillcolor='lightblue', opacity=0.7, line=dict(width=0),layer='below', xref='paper')
fig.add_annotation(x=9, y=season_2024_sixes['total_sixes'].max() + 7, text='Highest Sixes', showarrow=False, font=dict(color='teal', size=14))
fig.add_shape(type='rect', x0=0.2, x1=0.3, y0=0, y1=season_2024_sixes['total_sixes'].max() + 12, fillcolor='lightblue', opacity=0.7, line=dict(width=0),layer='below', xref='paper')

fig.add_annotation(x=2, y=season_2024_sixes['total_sixes'].max() + 7, text='Lowest Sixes', showarrow=False, font=dict(color='teal', size=14))
fig.update_layout(title='Annotate Highest and Lowest sixes hit in Season 2024', xaxis_title='Team', yaxis_title='Count')
fig.show()

In [None]:
seasons = ipl['season'].unique()
fig = go.Figure()

dropdown_buttons = []
initial_shapes = []
initial_annotations = []

for i, each_season in enumerate(seasons):
    season_df = ipl[ipl['season'] == each_season]
    season_sixes = season_df[season_df['total_runs'] == 6]
    season_sixes = season_sixes.groupby('batting_team').size().reset_index(name='total_sixes')

    lowest = season_sixes['total_sixes'].min()
    highest = season_sixes['total_sixes'].max()

    trace = go.Bar(
        x=season_sixes['batting_team'],
        y=season_sixes['total_sixes'],
        text=season_sixes['total_sixes'],
        visible=(i == 0),
        name=str(each_season)
    )
    fig.add_trace(trace)

    # Shapes and annotations per bar
    shapes = []
    annotations = []
    for idx, row in season_sixes.iterrows():
        if row['total_sixes'] == lowest:
            shapes.append(dict(type='rect', xref='x', yref='y',
                               x0=idx - 0.5, x1=idx + 0.5,
                               y0=0, y1=row['total_sixes'] + 10,
                               fillcolor='lightblue', opacity=0.4, layer='below'))
            annotations.append(dict(x=row['batting_team'], y=row['total_sixes'] + 5,
                                    text="Lowest", showarrow=False,
                                    font=dict(color='teal')))
        elif row['total_sixes'] == highest:
            shapes.append(dict(type='rect', xref='x', yref='y',
                               x0=idx - 0.5, x1=idx + 0.5,
                               y0=0, y1=row['total_sixes'] + 10,
                               fillcolor='lightcoral', opacity=0.4, layer='below'))
            annotations.append(dict(x=row['batting_team'], y=row['total_sixes'] + 5,
                                    text="Highest", showarrow=False,
                                    font=dict(color='darkred')))
    if i == 0:
        initial_shapes = shapes
        initial_annotations = annotations

    dropdown_buttons.append(dict(
        label=str(each_season),
        method='update',
        args=[
            {'visible': [j == i for j in range(len(seasons))]},
            {'title': f"Sixes by Teams - {each_season}",
             'shapes': shapes,
             'annotations': annotations}
        ]
    ))

fig.update_layout(
    title=f"Sixes by Teams - {seasons[0]}",
    updatemenus=[dict(
        type='dropdown',
        direction='down',
        showactive=True,
        x=1,
        y=1.15,
        buttons=dropdown_buttons
    )],
    xaxis_title='Batting Team',
    yaxis_title='Total Sixes',
    shapes=initial_shapes,
    annotations=initial_annotations
)

fig.show()


In [None]:
# Diverging color scale for run diff (red-white-green)
season = matches[matches['season'] == 2024]
season_runs = season[season['result'] == 'runs'].copy()
season_runs['conceded'] = season_runs['target_runs'] - season_runs['result_margin']
season_wickets = season[season['result'] == 'wickets'].copy()
season_wickets['conceded'] = season_wickets['target_runs'] - 1
run_stats = pd.concat([season_runs, season_wickets], axis=0).reset_index(drop=True)
run_stats = run_stats[['winner', 'target_runs', 'conceded']]
run_stats.rename(columns={'winner': 'team', 'target_runs': 'scored'}, inplace=True)
run_stats = run_stats.groupby('team').sum()
run_stats['run_diff'] = run_stats['scored'] - run_stats['conceded']
run_stats = run_stats.sort_values(by='run_diff', ascending=False)
run_stats

Unnamed: 0_level_0,scored,conceded,run_diff
team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Kolkata Knight Riders,2042.0,1786.0,256.0
Chennai Super Kings,1249.0,1057.0,192.0
Royal Challengers Bengaluru,1382.0,1210.0,172.0
Sunrisers Hyderabad,1941.0,1776.0,165.0
Lucknow Super Giants,1294.0,1191.0,103.0
Delhi Capitals,1364.0,1289.0,75.0
Gujarat Titans,904.0,860.0,44.0
Mumbai Indians,799.0,759.0,40.0
Rajasthan Royals,1612.0,1573.0,39.0
Punjab Kings,945.0,940.0,5.0


In [None]:
hovertemplate = run_stats.apply(lambda row: f"Scored: {row['scored']}<br>" + f"Conceded: {row['conceded']}<br>" + f"Run Diff: {row['run_diff']}<extra></extra>", axis=1)

trace = go.Bar(x=run_stats.index, y=run_stats['run_diff'], marker=dict(colorscale='RdYlGn', colorbar=dict(title='Run diff'), color=run_stats['run_diff']), hovertemplate=hovertemplate)
fig = go.Figure([trace])
fig.update_layout(title='Each Teams Performance - Diff of Scored and Conceded')
fig.add_shape(type='rect', x0=-0.6, x1=0.6, y0=0, y1=run_stats['run_diff'].iloc[0] + 20, fillcolor='lightblue', opacity=0.3, layer='below')
fig.add_annotation(x=0, y=run_stats['run_diff'].iloc[0] + 10, text='Best Performer', showarrow=False, font=dict(color='teal', size=14))
fig.show()

In [None]:
# Problem Statement: Manual colorscale for player form
def assign_color(runs):
    if runs > 650:
        return '#004529'  # Elite - deep green
    elif runs > 600:
        return '#006837'  # Excellent
    elif runs > 575:
        return '#238b45'  # Strong
    elif runs > 550:
        return '#41ab5d'  # Good
    elif runs > 525:
        return '#78c679'  # Above average
    elif runs > 500:
        return '#addd8e'  # Average
    elif runs > 475:
        return '#d9f0a3'  # Meh
    elif runs > 450:
        return '#f7fcb9'  # Below average
    elif runs > 425:
        return '#fee391'  # Weak
    else:
        return '#f03b20'  # Poor
fig = go.Figure()
dropdown_buttons = []
for each_season in seasons:
  season_df = ipl[ipl['season'] == each_season]
  top_10 = season_df.groupby('batter')['batsman_runs'].sum().sort_values(ascending=False).head(10).index.to_list()
  top_10_batsman = season_df[season_df['batter'].isin(top_10)]
  top_10_batsman = top_10_batsman.groupby(['batter'])['batsman_runs'].sum().reset_index()
  top_10_batsman = top_10_batsman.sort_values(by='batsman_runs', ascending=False)
  top_10_batsman['color'] = top_10_batsman['batsman_runs'].apply(assign_color)
  fig.add_trace(go.Bar(x=top_10_batsman['batter'], y=top_10_batsman['batsman_runs'], marker=dict(color=top_10_batsman['color']), text=top_10_batsman['batsman_runs'], textposition='auto', visible=(each_season == seasons[0])))
  dropdown_buttons.append(dict(
        label=str(each_season),
        method='update',
        args=[
            {'visible': [j == i for j in range(len(seasons))]},
            {'title': f"Top 10 Performers - {each_season}"}
        ]
    ))

fig.update_layout(updatemenus=[dict(
                    type='dropdown',
                    direction='down',
                    x=1, y=1.15,
                    xanchor='center',
                    showactive=True,
                    buttons=dropdown_buttons)],
                  title=f"Top 10 Performers - {seasons[0]}",
                  xaxis_title='Batsman', yaxis_title='Runs')
fig.show()

In [None]:
trace = go.Bar(x=top_10_batsman['batter'], y=top_10_batsman['batsman_runs'], marker=dict(color=top_10_batsman['batsman_runs'], colorscale='RdYlGn', colorbar=dict(title='Player Form')))
fig = go.Figure([trace])
fig.show()

In [None]:
# Problem Statement: Sync coloraxis across subplots
valid_matches = matches.dropna(subset=('winner'))

valid_matches['opponent'] = valid_matches.apply(lambda row: row['team2'] if row['winner'] == row['team1'] else row['team1'], axis=1)
valid_matches = valid_matches.groupby(['season', 'winner', 'opponent']).size().reset_index(name='wins')
season_2023_2024 = valid_matches[valid_matches['season'].isin([2023, 2024])]
season_2023_2024['season'].unique()

array([2023, 2024])

In [None]:
fig = make_subplots(rows=1, cols=2, subplot_titles=['Season 2023', 'Season 2024'])
for i, season in enumerate(season_2023_2024['season'].unique()):
  df = season_2023_2024[season_2023_2024['season'] == season]
  print(i)
  fig.append_trace(go.Heatmap(x=df['winner'], y=df['opponent'], z=df['wins'], coloraxis='coloraxis'), row=1, col=i + 1)

fig.update_layout(title='Season 2023 and 2024 wins heatmap', coloraxis=dict(
    colorscale='Viridis', colorbar=dict(title='Wins')
), width=1500)
fig.show()

0
1


In [None]:
# Problem Statement: Legend group for batting metrics
top_3_batsman_stats_pivot

Unnamed: 0_level_0,outs,outs,outs,strike_rate,strike_rate,strike_rate,total_balls,total_balls,total_balls,total_boundaries,total_boundaries,total_boundaries,total_runs,total_runs,total_runs
batter,RG Sharma,S Dhawan,V Kohli,RG Sharma,S Dhawan,V Kohli,RG Sharma,S Dhawan,V Kohli,RG Sharma,S Dhawan,V Kohli,RG Sharma,S Dhawan,V Kohli
season,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
2007/08,11.0,10.0,12.0,146.376812,110.38961,98.214286,276.0,308.0,168.0,57.0,43.0,22.0,404.0,340.0,165.0
2009,13.0,4.0,11.0,112.074303,86.956522,109.333333,323.0,46.0,225.0,40.0,3.0,30.0,362.0,40.0,246.0
2009/10,18.0,10.0,13.0,130.322581,106.111111,142.12963,310.0,180.0,216.0,50.0,26.0,38.0,404.0,191.0,307.0
2011,10.0,12.0,13.0,122.368421,126.984127,117.758985,304.0,315.0,473.0,45.0,54.0,71.0,372.0,400.0,557.0
2012,14.0,13.0,12.0,123.714286,127.008929,109.309309,350.0,448.0,333.0,57.0,76.0,43.0,433.0,569.0,364.0
2013,14.0,8.0,14.0,129.638554,118.70229,135.66879,415.0,262.0,471.0,63.0,42.0,87.0,538.0,311.0,639.0
2014,14.0,12.0,14.0,127.035831,113.554217,119.269103,307.0,332.0,301.0,47.0,56.0,39.0,390.0,377.0,359.0
2015,15.0,13.0,14.0,142.182891,120.477816,127.848101,339.0,293.0,395.0,62.0,51.0,58.0,482.0,353.0,505.0
2016,12.0,13.0,12.0,131.451613,114.383562,148.549618,372.0,438.0,655.0,66.0,59.0,122.0,489.0,501.0,973.0
2017,14.0,13.0,11.0,119.784173,125.065274,119.844358,278.0,383.0,257.0,40.0,62.0,34.0,333.0,479.0,308.0


In [None]:
fig = go.Figure()
for batsman in top_3_batsman_stats_pivot['strike_rate'].columns:
  fig.add_trace(go.Bar(x=top_3_batsman_stats_pivot.index, y=top_3_batsman_stats_pivot[('total_runs', batsman)], name=batsman, hovertemplate=f"Batsman: {batsman}<br>" + "Season: %{x}<br>Runs: %{y}<extra></extra>", legendgroup='batting', text=top_3_batsman_stats_pivot[('total_runs', batsman)]))
  fig.add_trace(go.Scatter(x=top_3_batsman_stats_pivot.index, y=top_3_batsman_stats_pivot[('strike_rate', batsman)], name=batsman, mode='lines+markers', hovertemplate=f"Batsman: {batsman}<br>" + "Season: %{x}<br>Strike Rate: %{y}<extra></extra>", legendgroup='batting', yaxis='y2'))
fig.update_layout(
    height=600,
    title=dict(text='Top 3 Batsmen: Runs and Strike Rate Over Seasons', x=0.5),
    showlegend=True,
    yaxis=dict(title='Total Runs'),
    yaxis2= dict(title='Strike Rate', overlaying='y', side='right')

)
fig.show()

In [None]:
# Problem Statement: Unified hover mode for multi-line chart
# Problem Statement: Custom hoverlabel styling
# Problem Statement: Hide legend for clean export
fig = go.Figure()
for batsman in top_3_batsman_stats_pivot['strike_rate'].columns:
  fig.add_trace(go.Scatter(x=top_3_batsman_stats_pivot.index, y=top_3_batsman_stats_pivot[('strike_rate', batsman)], name=batsman, mode='lines+markers',hovertemplate=f"Batsman: {batsman}<br>" + "Strike Rate: %{y}<extra></extra>"))
fig.update_layout(
    height=600,
    title=dict(text='Top 3 Batsmen: Runs and Strike Rate Over Seasons', x=0.5),
    showlegend=True,
    yaxis= dict(title='Strike Rate'),
    hovermode='x unified',
    hoverlabel=dict(
        bgcolor='orange',
        font_size=13,
        font_family='Courier New',
        font_color='black'
    ),
    updatemenus = [dict(
        type='buttons',
        x=1.1, y=1.15, xanchor='right',
        direction='right',
        buttons = [
            dict(label='Show Legend', method='relayout', args=[{'showlegend': True}]),
            dict(label='High Legend', method='relayout', args=[{'showlegend': False}])
        ]

    )]
)
fig.show()

In [None]:
# Problem Statement: Color map for strike rate >140
batsman_stats['color'] = batsman_stats.apply(lambda row: 'green' if row['strike_rate'] > 140 else 'red', axis=1)
trace = go.Scatter(x=batsman_stats['strike_rate'], y=batsman_stats['relative_runs'], mode='markers', text=batsman_stats['batter'],
    marker=dict(size=10, color=batsman_stats['color']),
    hovertemplate='Batter: %{text}<br>SR: %{x:.1f}<br>Runs: %{y}<extra></extra>')
fig = go.Figure([trace])
fig.add_shape(type='rect', x0=141, x1=max(batsman_stats['strike_rate']), y0=0, y1=120, fillcolor='lightblue', opacity=0.3, line=dict(width=0),layer='below')
fig.add_annotation(x=220, y=60, text='Strike Rate > 140', showarrow=False, bgcolor='lightblue')
fig.show()

In [None]:
# Problem Statement: Manual color mapping for dismissal type
dismissals = ipl.dropna(subset='dismissal_kind')
dismissals = dismissals.groupby('dismissal_kind').size().reset_index(name='count')
dismissals = dismissals[~dismissals['dismissal_kind'].isin(['hit wicket', 'obstructing the field', 'retired out', 'retired hurt'])]
dismissals

Unnamed: 0,dismissal_kind,count
0,bowled,2212
1,caught,8063
2,caught and bowled,367
4,lbw,800
8,run out,1114
9,stumped,358


In [None]:
def assign_color(dismissal):
    if dismissal == 'caught':
        return '#004529'
    elif dismissal == 'bowled':
        return '#006837'
    elif dismissal == 'run out':
        return '#238b45'
    elif dismissal == 'lbw':
        return '#41ab5d'
    elif dismissal == 'caught and bowled':
        return '#78c679'
    elif dismissal == 'stumped':
        return '#addd8e'
dismissals['color'] = dismissals['dismissal_kind'].apply(assign_color)
fig = go.Figure()
fig.add_trace(go.Bar(x=dismissals['dismissal_kind'], y=dismissals['count'], marker=dict(color=dismissals['color'])))
fig.show()

In [None]:
# Problem Statement: Color gradient for win margin
seasons = matches['season'].unique()
dropdown_buttons = []
fig = make_subplots(rows=2, cols=1, subplot_titles=['Win Margin by Runs', 'Win Margin by Wickets'])
for i, season in enumerate(seasons):
  season_df = matches[matches['season'] == season]
  runs_df = season_df[season_df['result'] == 'runs']
  wickets_df = season_df[season_df['result'] == 'wickets']
  wickets_df = wickets_df.groupby('result_margin').size().reset_index(name='count')

  fig.add_trace(go.Histogram(x=runs_df['result_margin'], xbins=dict(start=0, end=runs_df['result_margin'].max()+5, size=5), visible=(i == 0), marker=dict(color='rgba(0, 128, 255, 0.7)', line=dict(width=1, color='black')), name='Runs Margin', texttemplate='%{y}', hovertemplate='<span style="color:white">Bin Group: %{x}<br>Count: %{y}</span><extra></extra>', textfont=dict(color='white')), row=1, col=1)
  fig.append_trace(go.Bar(x=wickets_df['result_margin'], y=wickets_df['count'], text=wickets_df['count'], visible=(i == 0),  marker=dict(color='rgba(0, 128, 255, 0.7)', line=dict(width=1, color='black')), hovertemplate='<span style="color:white">Wickets: %{x}<br>Count: %{y}</span><extra></extra>', textfont=dict(color='white')), row=2, col=1)

  visibility = [False] * (2 * len(seasons))
  visibility[2 * i] = True
  visibility[2 * i + 1] = True
  dropdown_buttons.append(dict(
      label=str(season),
      method='update',
      args = [
          dict(visible=visibility),
          dict(title=f'Win Margin in Season {season}')
      ]
  ))

fig.update_layout(title=f'Win Margin in Season {seasons[0]}', showlegend=False, bargap=0.2,
                  updatemenus=[dict(
                      type='dropdown',
                      direction='down',
                      x=1, y=1.15, xanchor='center',
                      buttons=dropdown_buttons,
                      showactive=True
                  )])
fig.update_xaxes(title='Runs', row=1, col=1)
fig.update_xaxes(title='Wickets', row=2, col=1)
fig.update_yaxes(title='Count')
fig.show()

In [None]:
# Problem Statement: Custom tickvals/ticktext for team abbreviations
seasons = ipl['season'].unique()
fig = go.Figure()

dropdown_buttons = []
initial_shapes = []
initial_annotations = []

for i, each_season in enumerate(seasons):
    season_df = ipl[ipl['season'] == each_season]
    season_sixes = season_df[season_df['total_runs'] == 6]
    season_sixes = season_sixes.groupby('batting_team').size().reset_index(name='total_sixes')
    season_sixes['batting_team_sf'] = season_sixes['batting_team'].apply(lambda team_name: ''.join([char for char in team_name if char.isupper()]))
    lowest = season_sixes['total_sixes'].min()
    highest = season_sixes['total_sixes'].max()

    trace = go.Bar(
        x=season_sixes['batting_team'],
        y=season_sixes['total_sixes'],
        text=season_sixes['total_sixes'],
        visible=(i == 0),
        name=str(each_season)
    )
    fig.add_trace(trace)

    # Shapes and annotations per bar
    shapes = []
    annotations = []
    for idx, row in season_sixes.iterrows():
        if row['total_sixes'] == lowest:
            shapes.append(dict(type='rect', xref='x', yref='y',
                               x0=idx - 0.5, x1=idx + 0.5,
                               y0=0, y1=row['total_sixes'] + 10,
                               fillcolor='lightblue', opacity=0.4, layer='below'))
            annotations.append(dict(x=row['batting_team'], y=row['total_sixes'] + 5,
                                    text="Lowest", showarrow=False,
                                    font=dict(color='teal')))
        elif row['total_sixes'] == highest:
            shapes.append(dict(type='rect', xref='x', yref='y',
                               x0=idx - 0.5, x1=idx + 0.5,
                               y0=0, y1=row['total_sixes'] + 10,
                               fillcolor='lightcoral', opacity=0.4, layer='below'))
            annotations.append(dict(x=row['batting_team'], y=row['total_sixes'] + 5,
                                    text="Highest", showarrow=False,
                                    font=dict(color='darkred')))
    if i == 0:
        initial_shapes = shapes
        initial_annotations = annotations

    dropdown_buttons.append(dict(
        label=str(each_season),
        method='update',
        args=[
            {'visible': [j == i for j in range(len(seasons))]},
            {'title': f"Sixes by Teams - {each_season}",
             'shapes': shapes,
             'annotations': annotations}
        ]
    ))

fig.update_layout(
    title=f"Sixes by Teams - {seasons[0]}",
    updatemenus=[dict(
        type='dropdown',
        direction='down',
        showactive=True,
        x=1,
        y=1.15,
        buttons=dropdown_buttons
    )],
    xaxis_title='Batting Team',
    yaxis_title='Total Sixes',
    shapes=initial_shapes,
    annotations=initial_annotations,
    xaxis=dict(
        tickmode='array',
        tickvals=season_sixes['batting_team'],
        ticktext=season_sixes['batting_team_sf']
    )
)

fig.show()


In [None]:
ipl.columns

Index(['match_id', 'inning', 'batting_team', 'bowling_team', 'over', 'ball',
       'batter', 'bowler', 'non_striker', 'batsman_runs', 'extra_runs',
       'total_runs', 'extras_type', 'is_wicket', 'player_dismissed',
       'dismissal_kind', 'fielder', 'id', 'season', 'city', 'date',
       'match_type', 'player_of_match', 'venue', 'team1', 'team2',
       'toss_winner', 'toss_decision', 'winner', 'result', 'result_margin',
       'target_runs', 'target_overs', 'super_over', 'method', 'umpire1',
       'umpire2', 'boundaries'],
      dtype='object')