In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

# Load the dataset
file_path = 'deliveries.csv'
deliveries_df = pd.read_csv(file_path)

In [None]:
missing_values = deliveries_df.isnull().sum()
print(missing_values)

match_id                0
inning                  0
batting_team            0
bowling_team            0
over                    1
ball                    1
batter                  1
bowler                  1
non_striker             1
batsman_runs            1
extra_runs              1
total_runs              1
extras_type         19065
is_wicket               1
player_dismissed    19290
dismissal_kind      19290
fielder             19569
dtype: int64


In [None]:

deliveries_df['over'].fillna(method='ffill', inplace=True)  # Forward fill since over numbers are sequential
deliveries_df['ball'].fillna(method='ffill', inplace=True)  # Forward fill to maintain ball sequence
deliveries_df['batter'].fillna(method='ffill', inplace=True)  # Forward fill to maintain batting order
deliveries_df['bowler'].fillna(method='ffill', inplace=True)  # Forward fill to maintain bowling sequence
deliveries_df['non_striker'].fillna(method='ffill', inplace=True)  # Forward fill for consistency
deliveries_df['batsman_runs'].fillna(0, inplace=True)  # Missing runs likely mean no runs were scored
deliveries_df['extra_runs'].fillna(0, inplace=True)  # Missing extras mean no extras were awarded
deliveries_df['total_runs'].fillna(0, inplace=True)  # Total runs should not be null, so assume 0
deliveries_df['is_wicket'].fillna(0, inplace=True)  # If missing, assume no wicket fell
deliveries_df['extras_type'].fillna('No Extra', inplace=True)
deliveries_df['player_dismissed'].fillna('Not Out', inplace=True)
deliveries_df['dismissal_kind'].fillna('Not Applicable', inplace=True)
deliveries_df['fielder'].fillna('No Fielder', inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  deliveries_df['over'].fillna(method='ffill', inplace=True)  # Forward fill since over numbers are sequential
  deliveries_df['over'].fillna(method='ffill', inplace=True)  # Forward fill since over numbers are sequential
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  deliveries_d

In [None]:
missing_values = deliveries_df.isnull().sum()
print(missing_values)

match_id            0
inning              0
batting_team        0
bowling_team        0
over                0
ball                0
batter              0
bowler              0
non_striker         0
batsman_runs        0
extra_runs          0
total_runs          0
extras_type         0
is_wicket           0
player_dismissed    0
dismissal_kind      0
fielder             0
dtype: int64


In [None]:

# Group by extras_type and count occurrences
extra_counts = deliveries_df['extras_type'].value_counts().reset_index()
extra_counts.columns = ['extras_type', 'count']

# Create pie chart
fig = px.pie(extra_counts, values='count', names='extras_type',
             title="Distribution of Extra Types",
             color_discrete_sequence=px.colors.qualitative.Set3)

fig.show()

In [None]:
# Group by player dismissed and count occurrences
player_dismissed_counts = deliveries_df['player_dismissed'].value_counts().reset_index()
player_dismissed_counts.columns = ['player_dismissed', 'count']

# Select top 10 dismissed players
top_10_dismissed = player_dismissed_counts.nlargest(10, 'count')

# Create larger pie chart
fig = px.pie(top_10_dismissed, values='count', names='player_dismissed',
             title="Top 10 Players Dismissed",
             color_discrete_sequence=px.colors.qualitative.Pastel)

fig.show()

In [None]:
# Group by bowling team and sum extra runs, then sort in ascending order
extra_runs_by_team = deliveries_df.groupby('bowling_team')['extra_runs'].sum().reset_index()
extra_runs_by_team = extra_runs_by_team.sort_values(by='extra_runs', ascending=True)

# Create a horizontal bar chart
fig = px.bar(extra_runs_by_team, x='extra_runs', y='bowling_team', orientation='h',
             title='Total Extra Runs Conceded by Teams (Ascending Order)',
             labels={'extra_runs': 'Total Extra Runs', 'bowling_team': 'Bowling Team'},
             color='extra_runs', color_continuous_scale=px.colors.sequential.Turbo)

fig.show()

In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

deliveries_df = pd.read_csv('deliveries.csv')
team_runs = 'Mumbai Indians'
team_wickets = 'Chennai Super Kings'
team_sixes = 'Royal Challengers Bangalore'
team_dot_balls = 'Kolkata Knight Riders'
team_boundaries = 'Delhi Capitals'
team_extras = 'Sunrisers Hyderabad'
team_dismissals = 'Rajasthan Royals'
runs_per_over = deliveries_df[deliveries_df['batting_team'] == team_runs].groupby('over')['total_runs'].sum().reset_index()
fig1 = px.bar(runs_per_over, x='over', y='total_runs',
              title=f'Total Runs Scored Per Over - {team_runs}', color='total_runs',
              color_continuous_scale='Viridis')
fig1.show()
wickets_per_over = deliveries_df[(deliveries_df['player_dismissed'].notna()) & (deliveries_df['bowling_team'] == team_wickets)].groupby('over').size().reset_index(name='wickets')
fig2 = px.bar(wickets_per_over, x='over', y='wickets',
              title=f'Wicket Distribution Across Overs - {team_wickets}', color='wickets',
              color_continuous_scale='Reds')
fig2.show()
# check if there are rows where there are sixes and the team is Royal challengers bangalore
print('Number of rows where there are sixes and the team is Royal challengers bangalore:')
print(deliveries_df[(deliveries_df['batsman_runs'] == 6) & (deliveries_df['batting_team'] == team_sixes)].shape[0])
# Filter for sixes hit by Royal Challengers Bangalore
sixes_rcb = deliveries_df[(deliveries_df['batsman_runs'] == 6) & (deliveries_df['batting_team'] == team_sixes)]
# check if there are rows where there are sixes
print('Number of rows where there are sixes:')
print(deliveries_df[(deliveries_df['batsman_runs'] == 6)].shape[0])
# If there are no rows after filtering, remove the second condition
if sixes_rcb.empty:
  sixes_filtered = deliveries_df[(deliveries_df['batsman_runs'] == 6)]
else:
  sixes_filtered = sixes_rcb

sixes = sixes_filtered['batter'].value_counts().nlargest(10).reset_index()
sixes.columns = ['batsman', 'sixes']
fig3 = px.pie(sixes, values='sixes', names='batsman',
              title=f'Top 10 Players with Most Sixes - {team_sixes}', color_discrete_sequence=px.colors.qualitative.Pastel)
fig3.show()

dot_balls = deliveries_df[(deliveries_df['total_runs'] == 0) & (deliveries_df['bowling_team'] == team_dot_balls)].groupby('bowler').size().reset_index(name='dot_balls')
balls_bowled = deliveries_df[deliveries_df['bowling_team'] == team_dot_balls].groupby('bowler').size().reset_index(name='total_balls')
dot_ball_percentage = pd.merge(dot_balls, balls_bowled, on='bowler')
dot_ball_percentage['dot_percentage'] = (dot_ball_percentage['dot_balls'] / dot_ball_percentage['total_balls']) * 100
top_dot_bowlers = dot_ball_percentage.nlargest(10, 'dot_percentage')
fig4 = px.scatter(top_dot_bowlers, x='bowler', y='dot_percentage', size='dot_percentage',
                  title=f'Top 10 Bowlers with Highest Dot Ball Percentage - {team_dot_balls}', color='dot_percentage',
                  color_continuous_scale='Magma')
fig4.show()
boundaries_conceded = deliveries_df[(deliveries_df['batsman_runs'].isin([4,6])) & (deliveries_df['bowling_team'] == team_boundaries)].groupby('bowler').size().nlargest(10).reset_index(name='boundaries')
fig5 = px.bar(boundaries_conceded, x='bowler', y='boundaries',
              title=f'Top 10 Bowlers Conceding Most Boundaries - {team_boundaries}', color='boundaries',
              color_continuous_scale='Rainbow')
fig5.show()
extra_runs_by_team = deliveries_df.groupby('bowling_team')['extra_runs'].sum().reset_index()
extra_runs_by_team = extra_runs_by_team.sort_values(by='extra_runs', ascending=True)
fig6 = px.bar(extra_runs_by_team, x='extra_runs', y='bowling_team', orientation='h',
              title=f'Total Extra Runs Conceded by Teams - {team_extras}', color='extra_runs',
              color_continuous_scale='Cividis')
fig6.show()

dismissal_counts = deliveries_df[deliveries_df['bowling_team'] == team_dismissals]['dismissal_kind'].value_counts().reset_index()
dismissal_counts.columns = ['dismissal_kind', 'count']
fig7 = px.pie(dismissal_counts, values='count', names='dismissal_kind',
              title=f'Distribution of Dismissal Types - {team_dismissals}', color_discrete_sequence=px.colors.qualitative.Set3)
fig7.show()

Number of rows where there are sixes and the team is Royal challengers bangalore:
1488
Number of rows where there are sixes:
13051


In [None]:
import pandas as pd
import plotly.express as px

df = pd.read_csv('deliveries.csv')

# Total wickets per team (Ascending)
wickets = df[df['player_dismissed'].notna()].groupby('bowling_team').size().reset_index(name='total_wickets').sort_values('total_wickets')
fig1 = px.bar(wickets, x='bowling_team', y='total_wickets', title='Total Wickets Per Team (Ascending)',
              labels={'bowling_team': 'Team', 'total_wickets': 'Total Wickets'},
              color='total_wickets', color_continuous_scale='Viridis')
fig1.show()

# Total matches played by team (Ascending)
matches = df.groupby('bowling_team')['match_id'].nunique().reset_index(name='total_matches').sort_values('total_matches')
fig2 = px.bar(matches, x='bowling_team', y='total_matches', title='Total Matches Played Per Team (Ascending)',
              labels={'bowling_team': 'Team', 'total_matches': 'Total Matches'},
              color='total_matches', color_continuous_scale='Viridis')
fig2.show()

# Average wickets per match (Ascending)
avg_wickets = (wickets.set_index('bowling_team')['total_wickets'] / matches.set_index('bowling_team')['total_matches']).reset_index(name='avg_wickets').sort_values('avg_wickets')
fig3 = px.bar(avg_wickets, x='bowling_team', y='avg_wickets', title='Average Wickets Per Match (Ascending)',
              labels={'bowling_team': 'Team', 'avg_wickets': 'Avg Wickets'},
              color='avg_wickets', color_continuous_scale='Viridis')
fig3.show()

# Normalized total wickets based on max matches played (Ascending)
max_matches = matches['total_matches'].max()
normalized_wickets = (avg_wickets.set_index('bowling_team')['avg_wickets'] * max_matches).reset_index(name='normalized_wickets').sort_values('normalized_wickets')
fig4 = px.bar(normalized_wickets, x='bowling_team', y='normalized_wickets', title='Normalized Total Wickets (Max Matches Played, Ascending)',
              labels={'bowling_team': 'Team', 'normalized_wickets': 'Normalized Wickets'},
              color='normalized_wickets', color_continuous_scale='Viridis')
fig4.show()