# Dictionary with Stadiums

In [1]:
import pandas as pd

In [2]:
ball_by_ball_df = pd.read_csv('../raw_data/IPL_Ball_by_Ball_2008_2022.csv')

In [3]:
# ball by ball is not particularly useful. We want to calculate the innings total. For this we must group by id and calculate the total runs for each innings. 

ball_by_ball_df['innings_total'] = ball_by_ball_df.groupby(['ID', 'innings'])['total_run'].transform('sum')

In [4]:
# Next we need to load up the match data. This will give us match level data for all IPL matches between 2008 and 2022

match_df = pd.read_csv('../raw_data/IPL_Matches_2008_2022.csv')

In [5]:
# Merge this DFs on ID to create a dataframe that will not only include the ball by ball data, but also information about who won the match, how it was won etc. 

complete_df = ball_by_ball_df.merge(match_df, on='ID')

In [6]:
complete_df.head()

Unnamed: 0,ID,innings,overs,ballnumber,batter,bowler,non-striker,extra_type,batsman_run,extras_run,...,SuperOver,WinningTeam,WonBy,Margin,method,Player_of_Match,Team1Players,Team2Players,Umpire1,Umpire2
0,1312200,1,0,1,YBK Jaiswal,Mohammed Shami,JC Buttler,,0,0,...,N,Gujarat Titans,Wickets,7.0,,HH Pandya,"['YBK Jaiswal', 'JC Buttler', 'SV Samson', 'D ...","['WP Saha', 'Shubman Gill', 'MS Wade', 'HH Pan...",CB Gaffaney,Nitin Menon
1,1312200,1,0,2,YBK Jaiswal,Mohammed Shami,JC Buttler,legbyes,0,1,...,N,Gujarat Titans,Wickets,7.0,,HH Pandya,"['YBK Jaiswal', 'JC Buttler', 'SV Samson', 'D ...","['WP Saha', 'Shubman Gill', 'MS Wade', 'HH Pan...",CB Gaffaney,Nitin Menon
2,1312200,1,0,3,JC Buttler,Mohammed Shami,YBK Jaiswal,,1,0,...,N,Gujarat Titans,Wickets,7.0,,HH Pandya,"['YBK Jaiswal', 'JC Buttler', 'SV Samson', 'D ...","['WP Saha', 'Shubman Gill', 'MS Wade', 'HH Pan...",CB Gaffaney,Nitin Menon
3,1312200,1,0,4,YBK Jaiswal,Mohammed Shami,JC Buttler,,0,0,...,N,Gujarat Titans,Wickets,7.0,,HH Pandya,"['YBK Jaiswal', 'JC Buttler', 'SV Samson', 'D ...","['WP Saha', 'Shubman Gill', 'MS Wade', 'HH Pan...",CB Gaffaney,Nitin Menon
4,1312200,1,0,5,YBK Jaiswal,Mohammed Shami,JC Buttler,,0,0,...,N,Gujarat Titans,Wickets,7.0,,HH Pandya,"['YBK Jaiswal', 'JC Buttler', 'SV Samson', 'D ...","['WP Saha', 'Shubman Gill', 'MS Wade', 'HH Pan...",CB Gaffaney,Nitin Menon


In [7]:
complete_df.columns

Index(['ID', 'innings', 'overs', 'ballnumber', 'batter', 'bowler',
       'non-striker', 'extra_type', 'batsman_run', 'extras_run', 'total_run',
       'non_boundary', 'isWicketDelivery', 'player_out', 'kind',
       'fielders_involved', 'BattingTeam', 'innings_total', 'City', 'Date',
       'Season', 'MatchNumber', 'Team1', 'Team2', 'Venue', 'TossWinner',
       'TossDecision', 'SuperOver', 'WinningTeam', 'WonBy', 'Margin', 'method',
       'Player_of_Match', 'Team1Players', 'Team2Players', 'Umpire1',
       'Umpire2'],
      dtype='object')

In [8]:
complete_df.Venue.unique()

array(['Narendra Modi Stadium, Ahmedabad', 'Eden Gardens, Kolkata',
       'Wankhede Stadium, Mumbai', 'Brabourne Stadium, Mumbai',
       'Dr DY Patil Sports Academy, Mumbai',
       'Maharashtra Cricket Association Stadium, Pune',
       'Dubai International Cricket Stadium', 'Sharjah Cricket Stadium',
       'Zayed Cricket Stadium, Abu Dhabi', 'Arun Jaitley Stadium, Delhi',
       'MA Chidambaram Stadium, Chepauk, Chennai', 'Sheikh Zayed Stadium',
       'Rajiv Gandhi International Stadium',
       'Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium',
       'MA Chidambaram Stadium', 'Wankhede Stadium',
       'Punjab Cricket Association IS Bindra Stadium',
       'M.Chinnaswamy Stadium', 'Arun Jaitley Stadium', 'Eden Gardens',
       'Sawai Mansingh Stadium',
       'Maharashtra Cricket Association Stadium',
       'Holkar Cricket Stadium',
       'Rajiv Gandhi International Stadium, Uppal',
       'M Chinnaswamy Stadium', 'Feroz Shah Kotla', 'Green Park',
       'Punjab Cricket A

In [9]:
complete_df.Team1.unique()

array(['Rajasthan Royals', 'Royal Challengers Bangalore',
       'Sunrisers Hyderabad', 'Delhi Capitals', 'Chennai Super Kings',
       'Gujarat Titans', 'Lucknow Super Giants', 'Kolkata Knight Riders',
       'Punjab Kings', 'Mumbai Indians', 'Kings XI Punjab',
       'Delhi Daredevils', 'Rising Pune Supergiant', 'Gujarat Lions',
       'Rising Pune Supergiants', 'Pune Warriors', 'Deccan Chargers',
       'Kochi Tuskers Kerala'], dtype=object)

In [10]:
pd.DataFrame({'team': complete_df.Team1.unique(), 'stadium': ''})

Unnamed: 0,team,stadium
0,Rajasthan Royals,
1,Royal Challengers Bangalore,
2,Sunrisers Hyderabad,
3,Delhi Capitals,
4,Chennai Super Kings,
5,Gujarat Titans,
6,Lucknow Super Giants,
7,Kolkata Knight Riders,
8,Punjab Kings,
9,Mumbai Indians,


In [11]:
stadium = {
    'Rajasthan Royals': 'Sawai Mansingh Stadium',
    'Royal Challengers Bangalore': 'M.Chinnaswamy Stadium',
    'Sunrisers Hyderabad': 'Rajiv Gandhi International Stadium',
    'Delhi Capitals': 'Arun Jaitley Stadium',
    'Chennai Super Kings': 'MA Chidambaram Stadium',
    'Gujarat Titans': 'Narendra Modi Stadium, Ahmedabad', # stadium built in 2021
    'Lucknow Super Giants': 'Wankhede Stadium',
    'Kolkata Knight Riders': 'Eden Gardens',
    'Punjab Kings': 'Himachal Pradesh Cricket Association Stadium',
    'Mumbai Indians': 'Wankhede Stadium',
    'Kings XI Punjab': 'Himachal Pradesh Cricket Association Stadium',
    'Delhi Daredevils': 'Arun Jaitley Stadium',
    'Rising Pune Supergiant': 'Maharashtra Cricket Association Stadium',
    'Pune Warriors': 'Dr DY Patil Sports Academy',
    'Deccan Chargers': 'Rajiv Gandhi International Stadium',
    'Kochi Tuskers Kerala':'Nehru Stadium'
}

In [12]:
stadium_df = pd.DataFrame(list(stadium.items()), columns=['team', 'stadium'])

In [13]:
stadium_df

Unnamed: 0,team,stadium
0,Rajasthan Royals,Sawai Mansingh Stadium
1,Royal Challengers Bangalore,M.Chinnaswamy Stadium
2,Sunrisers Hyderabad,Rajiv Gandhi International Stadium
3,Delhi Capitals,Arun Jaitley Stadium
4,Chennai Super Kings,MA Chidambaram Stadium
5,Gujarat Titans,"Narendra Modi Stadium, Ahmedabad"
6,Lucknow Super Giants,Wankhede Stadium
7,Kolkata Knight Riders,Eden Gardens
8,Punjab Kings,Himachal Pradesh Cricket Association Stadium
9,Mumbai Indians,Wankhede Stadium


In [14]:
new_df = complete_df[['Team1', 'Team2', 'Venue']]

In [15]:
games = new_df[new_df['Team1'] == 'Gujarat Titans']

In [16]:
games['Venue'].value_counts()

Venue
Dr DY Patil Sports Academy, Mumbai               974
Maharashtra Cricket Association Stadium, Pune    459
Wankhede Stadium, Mumbai                         237
Name: count, dtype: int64