In [10]:
import pandas as pd
import numpy as np
import pickle

In [3]:
ball2ball = pd.read_csv('../data/ipl_ball_by_ball_2008_2022.csv')

In [3]:
ball2ball.columns

Index(['id', 'innings', 'overs', 'ball_number', 'batter', 'bowler',
       'non_striker', 'extra_type', 'batsman_run', 'extras_run', 'total_run',
       'non_boundary', 'iswicket_delivery', 'player_out', 'dismisal_kind',
       'fielders_involved', 'batting_team'],
      dtype='object')

In [4]:
matches = pd.read_csv('../data/ipl_matches_2008_2022.csv')

In [5]:
matches.columns

Index(['id', 'city', 'match_date', 'season', 'match_number', 'team1', 'team2',
       'venue', 'toss_winner', 'toss_decision', 'superover', 'winning_team',
       'won_by', 'margin', 'method', 'player_of_match', 'umpire1', 'umpire2'],
      dtype='object')

In [7]:
matches = matches.dropna(subset = 'match_date')

950

run this to add year column to ball2ball dataframe

In [8]:
import pandas as pd

# Assuming your original DataFrame is named 'original_df'
# Example:
# original_df = pd.read_csv('your_csv_file.csv')

# Create a new DataFrame with 'id' and 'year'
new_df = matches[['id', 'match_date']].copy()

# Convert 'match_date' to datetime format
new_df['match_date'] = pd.to_datetime(new_df['match_date'], format='%d-%m-%Y')

# Extract the year and create a new 'year' column
new_df['year'] = new_df['match_date'].dt.year

# Drop the 'match_date' column if you no longer need it
new_df.drop('match_date', axis=1, inplace=True)

# Print or use the new DataFrame 'new_df'
ball2ball = pd.merge(new_df, ball2ball, on='id', how='inner')


In [20]:
ball2ball['extra_type'].unique()

array([nan, 'legbyes', 'wides', 'byes', 'noballs', 'penalty'],
      dtype=object)

In [30]:
from collections import defaultdict
bowler_economy = {}
for index, row in ball2ball.iterrows():
    bowler = row['bowler']
    year = row['year']
    extra_type = row['extra_type']
    total_run = row['total_run']
    extras_run = row['extras_run']
    if year not in bowler_economy:
        bowler_economy[year] = {}
    if bowler not in bowler_economy[year]:
        bowler_economy[year][bowler] = {}
        bowler_economy[year][bowler]['runs_conceeded'] = 0
        bowler_economy[year][bowler]['balls_bowled'] = 0
    bowler_economy[year][bowler]['runs_conceeded'] += total_run - extras_run if extra_type in ['byes','legbyes'] else total_run
    bowler_economy[year][bowler]['balls_bowled'] += 1 if extra_type not in ['wides', 'noballs', 'penalty'] else 0
            


In [32]:
for year in bowler_economy:
    for player in bowler_economy[year]:
        bowler_economy[year][player]['economy'] = bowler_economy[year][player]['runs_conceeded'] * 6 / bowler_economy[year][player]['balls_bowled']

In [40]:
best_economies = {}
for year in bowler_economy:
    sorted_economies = sorted(bowler_economy[year], key = lambda x: bowler_economy[year][x]['economy'])
    best5 = sorted_economies[0:5]
    best_economies[year] = {player: round(bowler_economy[year][player]['economy'],2) for player in best5}
    
best_economies

{2022: {'A Badoni': 5.5,
  'SP Narine': 5.57,
  'Mohsin Khan': 5.97,
  'PH Solanki': 6.33,
  'Tilak Varma': 6.5},
 2021: {'Imran Tahir': 4.0,
  'MC Henriques': 4.5,
  'AK Markram': 5.75,
  'N Rana': 6.0,
  'Harpreet Brar': 6.04},
 2020: {'Rashid Khan': 5.34,
  'Mohammad Nabi': 5.75,
  'Washington Sundar': 5.96,
  'CH Gayle': 6.0,
  'J Yadav': 6.14},
 2019: {'S Dube': 4.8,
  'AS Roy': 5.5,
  'SK Raina': 6.0,
  'STR Binny': 6.29,
  'RA Jadeja': 6.35},
 2018: {'IS Sodhi': 5.87,
  'L Ngidi': 6.0,
  'DJM Short': 6.33,
  'Rashid Khan': 6.74,
  'S Lamichhane': 6.83},
 2017: {'NB Singh': 3.75,
  'Mohammad Nabi': 5.27,
  'R Tewatia': 5.44,
  'Avesh Khan': 5.75,
  'P Negi': 6.12},
 2016: {'N Rana': 3.0,
  'Sachin Baby': 4.8,
  'MR Marsh': 5.0,
  'YK Pathan': 5.5,
  'JW Hastings': 5.55},
 2015: {'RN ten Doeschate': 4.0,
  'J Yadav': 4.14,
  'V Kohli': 5.45,
  'R Ashwin': 5.85,
  'S Nadeem': 6.14},
 2014: {'Ankit Sharma': 5.38,
  'AR Patel': 6.14,
  'R Rampaul': 6.17,
  'SP Narine': 6.43,
  'Harbh

In [45]:
import pickle
with open('best_economies.pkl','wb') as bef:
    pickle.dump(best_economies,bef)

In [6]:
ball2ball['dismisal_kind'].unique()

array([nan, 'caught', 'caught and bowled', 'run out', 'bowled', 'stumped',
       'lbw', 'hit wicket', 'retired hurt', 'retired out',
       'obstructing the field'], dtype=object)

In [11]:
grouped_df = ball2ball.groupby(['year', 'batter'])['batsman_run'].sum().reset_index()
top_scorers_df = grouped_df.groupby('year').apply(lambda x: x.nlargest(5, 'batsman_run')).reset_index(drop=True)
# Print or use the resulting DataFrame 'grouped_df'
# top_scorers_dict = top_scorers_df.to_dict(orient='records')

# Print or use the resulting dictionary 'top_scorers_dict'
top_scorers_dict_2d = {}

# Iterate through the rows of the DataFrame and populate the 2D dictionary
for index, row in top_scorers_df.iterrows():
    year = row['year']
    batter = row['batter']
    batsman_run = row['batsman_run']

    # If the year is not in the dictionary, add it
    if year not in top_scorers_dict_2d:
        top_scorers_dict_2d[year] = {}

    # Add the batter and batsman_run to the dictionary for the specific year
    top_scorers_dict_2d[year][batter] = batsman_run

# Print or use the resulting 2D dictionary 'top_scorers_dict_2d'
print(top_scorers_dict_2d)
with open('top_scorers.pkl','wb') as tf:
    pickle.dump(top_scorers_dict_2d,tf)

{2008: {'SE Marsh': 616, 'G Gambhir': 534, 'ST Jayasuriya': 514, 'SR Watson': 472, 'GC Smith': 441}, 2009: {'ML Hayden': 572, 'AC Gilchrist': 495, 'AB de Villiers': 465, 'SK Raina': 434, 'TM Dilshan': 418}, 2010: {'SR Tendulkar': 618, 'JH Kallis': 572, 'SK Raina': 528, 'SC Ganguly': 493, 'M Vijay': 458}, 2011: {'CH Gayle': 608, 'V Kohli': 557, 'SR Tendulkar': 553, 'SE Marsh': 504, 'MEK Hussey': 492}, 2012: {'CH Gayle': 733, 'G Gambhir': 590, 'S Dhawan': 569, 'AM Rahane': 560, 'V Sehwag': 495}, 2013: {'MEK Hussey': 733, 'CH Gayle': 720, 'V Kohli': 639, 'SK Raina': 548, 'SR Watson': 543}, 2014: {'RV Uthappa': 660, 'DR Smith': 566, 'GJ Maxwell': 552, 'DA Warner': 528, 'SK Raina': 523}, 2015: {'DA Warner': 562, 'AM Rahane': 540, 'LMP Simmons': 540, 'AB de Villiers': 513, 'V Kohli': 505}, 2016: {'V Kohli': 973, 'DA Warner': 848, 'AB de Villiers': 687, 'G Gambhir': 501, 'S Dhawan': 501}, 2017: {'DA Warner': 641, 'G Gambhir': 498, 'S Dhawan': 479, 'SPD Smith': 472, 'SK Raina': 442}, 2018: {'K

In [52]:
ball2ball.columns

Index(['id', 'year', 'innings', 'overs', 'ball_number', 'batter', 'bowler',
       'non_striker', 'extra_type', 'batsman_run', 'extras_run', 'total_run',
       'non_boundary', 'iswicket_delivery', 'player_out', 'dismisal_kind',
       'fielders_involved', 'batting_team'],
      dtype='object')

In [54]:
grouped_df = ball2ball.groupby(['year', 'bowler'])['iswicket_delivery'].count().reset_index()

grouped_df
top_scorers_df = grouped_df.groupby('year').apply(lambda x: x.nlargest(5, 'iswicket_delivery')).reset_index(drop=True)
# Print or use the resulting DataFrame 'grouped_df'
# top_scorers_dict = top_scorers_df.to_dict(orient='records')

# Print or use the resulting dictionary 'top_scorers_dict'
top_scorers_dict_2d = {}

# Iterate through the rows of the DataFrame and populate the 2D dictionary
for index, row in top_scorers_df.iterrows():
    year = row['year']
    batter = row['bowler']
    batsman_run = row['batsman_run']

    # If the year is not in the dictionary, add it
    if year not in top_scorers_dict_2d:
        top_scorers_dict_2d[year] = {}

    # Add the batter and batsman_run to the dictionary for the specific year
    top_scorers_dict_2d[year][batter] = batsman_run

# Print or use the resulting 2D dictionary 'top_scorers_dict_2d'
print(top_scorers_dict_2d)
with open('top_scorers.pkl','wb') as tf:
    pickle.dump(top_scorers_dict_2d,tf)


Unnamed: 0,year,bowler,iswicket_delivery
0,2008,A Kumble,236
1,2008,A Mishra,123
2,2008,A Nehra,280
3,2008,A Nel,18
4,2008,A Symonds,44
...,...,...,...
1668,2022,VR Iyer,24
1669,2022,Washington Sundar,170
1670,2022,YBK Jaiswal,1
1671,2022,YS Chahal,429


In [23]:
ge = open('../data/player_stats/top_bowlers_by_season_corrected_economy.pkl','rb')
me = open('../data/player_stats/best_economies.pkl','rb')
ce = open('../data/player_stats/top_bowlers_by_season_int_year.pkl','rb')
gpt_economy = pickle.load(ge)
my_economy = pickle.load(me)
corrected_economy = pickle.load(ce)

In [20]:
my_economy

{2022: {'A Badoni': 5.5,
  'SP Narine': 5.57,
  'Mohsin Khan': 5.97,
  'PH Solanki': 6.33,
  'Tilak Varma': 6.5},
 2021: {'Imran Tahir': 4.0,
  'MC Henriques': 4.5,
  'AK Markram': 5.75,
  'N Rana': 6.0,
  'Harpreet Brar': 6.04},
 2020: {'Rashid Khan': 5.34,
  'Mohammad Nabi': 5.75,
  'Washington Sundar': 5.96,
  'CH Gayle': 6.0,
  'J Yadav': 6.14},
 2019: {'S Dube': 4.8,
  'AS Roy': 5.5,
  'SK Raina': 6.0,
  'STR Binny': 6.29,
  'RA Jadeja': 6.35},
 2018: {'IS Sodhi': 5.87,
  'L Ngidi': 6.0,
  'DJM Short': 6.33,
  'Rashid Khan': 6.74,
  'S Lamichhane': 6.83},
 2017: {'NB Singh': 3.75,
  'Mohammad Nabi': 5.27,
  'R Tewatia': 5.44,
  'Avesh Khan': 5.75,
  'P Negi': 6.12},
 2016: {'N Rana': 3.0,
  'Sachin Baby': 4.8,
  'MR Marsh': 5.0,
  'YK Pathan': 5.5,
  'JW Hastings': 5.55},
 2015: {'RN ten Doeschate': 4.0,
  'J Yadav': 4.14,
  'V Kohli': 5.45,
  'R Ashwin': 5.85,
  'S Nadeem': 6.14},
 2014: {'Ankit Sharma': 5.38,
  'AR Patel': 6.14,
  'R Rampaul': 6.17,
  'SP Narine': 6.43,
  'Harbh

In [21]:
ball2ball['year'].unique()

array([2022, 2021, 2020, 2019, 2018, 2017, 2016, 2015, 2014, 2013, 2012,
       2011, 2010, 2009, 2008])

In [22]:
matches['year'].unique()

KeyError: 'year'

In [38]:
twe = open('../data/player_stats/season_player_strike_rates.pkl','rb')
top_wicket_takers = pickle.load(twe)
top_wicket_takers

{'2007/08': {'BB McCullum': [205.19480519480518, 100.0, 200.0, 50.0],
  'V Sehwag': [300.0,
   218.60465116279067,
   85.71428571428571,
   184.6153846153846,
   169.04761904761904,
   200.0,
   127.77777777777777,
   80.95238095238095,
   0.0,
   0.0,
   182.14285714285714,
   247.3684210526316,
   300.0,
   75.0],
  'LRPL Taylor': [176.9230769230769,
   209.52380952380955,
   147.22222222222223,
   193.33333333333334],
  'YK Pathan': [200.0,
   116.66666666666667,
   210.3448275862069,
   175.0,
   166.66666666666669,
   133.33333333333331,
   33.33333333333333,
   165.85365853658536,
   61.53846153846154,
   260.0,
   240.0,
   120.0,
   50.0,
   214.28571428571428,
   140.0],
  'MEK Hussey': [214.81481481481484, 55.55555555555556, 127.02702702702702]},
 '2009': {'DR Smith': [152.17391304347828,
   240.0,
   165.51724137931035,
   87.5,
   213.0434782608696,
   133.33333333333331,
   146.875,
   50.0],
  'AC Gilchrist': [118.18181818181819,
   157.77777777777777,
   166.666666666666

array([0, 1])