In [2]:
import pandas as pd 
import numpy as np
import zipfile
import os
import json
from tqdm import tqdm

In [63]:
def convert_over_to_df(over_data):
    over_df = pd.DataFrame(over_data)
    over_df['runs_by_bat']= over_df['runs'].apply(lambda x : x.get('batter'))
    over_df['extra_runs']= over_df['runs'].apply(lambda x : x.get('extras'))
    over_df['total']= over_df['runs'].apply(lambda x : x.get('total'))
    over_df['delivery'] = np.arange(1, len(over_df)+1)
    
    if 'extras'  in over_df.columns:
        over_df['extra_type'] = over_df['extras'].apply(lambda x: "".join(list(x.keys())) if type(x) == dict else np.nan)
    else:
        over_df['extra_type'] = np.nan
    
    if 'wickets'  in over_df.columns:
        over_df['wicket_type'] = over_df['wickets'].apply(lambda x: x[0].get('kind') if type(x) == list else np.nan)
        over_df['player_out'] = over_df['wickets'].apply(lambda x: x[0].get('player_out')  if type(x) == list else np.nan)
        def get_fielder_name(x):
            fielder_list = x[0].get('fielders') if isinstance(x, list) else []
            if fielder_list:
                # Extract names, skipping any fielder without a 'name' key
                names = [fielder.get('name') for fielder in fielder_list if fielder.get('name') is not None]
                return ';'.join(names) if names else np.nan
            return np.nan
        
        over_df['fielder'] = over_df['wickets'].apply(get_fielder_name)
        over_df.drop(columns=['wickets'], inplace=True)
    else:
        over_df['wicket_type'] = np.nan
        over_df['player_out'] = np.nan
        over_df['fielder'] = np.nan
        
    
    # over_df.drop(columns=['runs'], inplace=True)
    return over_df

In [64]:
def complete_team_df(team_overs):
    all_overs = []
    for over in team_overs:
        over_df = convert_over_to_df(over['deliveries'])
        over_df['over'] = over['over']
        all_overs.append(over_df)
    return pd.concat(all_overs, ignore_index=True)

In [65]:
def json_to_csv(match_format,match_file, output_file=False):
    
    with open(match_file, 'r') as f:
        file = json.load(f)
    
    info = file['info']
    match_info_data={}
    match_info_data['match_id']=os.path.splitext(os.path.split(match_file)[-1])[0]
    match_info_data['date']=info['dates'][0]

    
    if 'event' in info:
        match_info_data['event']=info['event']['name']
    else:
        if match_format =='Test':
            match_info_data['event']=f'{info['teams'][1]} tour of {info['teams'][0]} one-off test'
        elif match_format =='ODI':
            match_info_data['event']=f'{info['teams'][1]} tour of {info['teams'][0]} ODI Series'
        elif match_format == 'T20':
            match_info_data['event']=f'{info['teams'][1]} tour of {info['teams'][0]} T20I Series'
        else:
            pass

    match_info_data["team_type"]=info['team_type']
    match_info_data['gender']=info['gender']
    match_info_data['venue']=info['venue']
    match_info_data['team_1']=info['teams'][0]
    match_info_data['team_2']=info['teams'][1]
    match_info_data['toss_winner']=info['toss']['winner']
    match_info_data['toss_decision']=info['toss']['decision']
    if 'winner' in info['outcome']:
        match_info_data['match_winner']=info['outcome']['winner']
    else:
        match_info_data['match_winner']='No winners'
    if 'player_of_match' in info:
        match_info_data['MOM']=info['player_of_match'][0]
    else:
        match_info_data['MOM']='None'
    if 'by' in info['outcome']:
        by =info['outcome']['by']
        if 'runs' in by:
            match_info_data['victory_type'] = 'runs'
            match_info_data['margin'] = info['outcome']['by']['runs']
            if 'innings' in by: 
                match_info_data['by_innings'] = True
            else:
                match_info_data['by_innings'] = False
        elif 'wickets' in by:
            match_info_data['victory_type'] = 'wickets'
            match_info_data['margin'] = info['outcome']['by']['wickets']
            match_info_data['by_innings'] = False
        else:
            match_info_data['victory_type'] = None
            match_info_data['margin'] = None

    match_info = pd.DataFrame([match_info_data])

    innings = file['innings']
    length = len(innings)
    
    if length == 0:
        print('No innings data found')
        return [], info


    all_innings_df = pd.DataFrame()
    for idx, inning in enumerate(innings):
        team = inning['overs']
        df = complete_team_df(team)
        df['match_id'] = os.path.splitext(os.path.split(match_file)[-1])[0]
        df['extra_type'] = df['extra_type'].fillna('-')
        df['wicket_type'] = df['wicket_type'].fillna(0)
        df['batting_team'] = inning['team']
        bowling_team = next(team for team in info['teams'] if team != inning['team'])
        inning['bowling_team'] = bowling_team
        df['inning']=idx+1
        df['bowler_wicket'] = df['wicket_type'].apply(lambda x: 1 if x in ['stump', 'caught', 'bowled', 'caught and bowled', 'lbw'] else 0)
        df['fielder_wicket'] = df['wicket_type'].apply(lambda x: 1 if x == 'runout' else 0)
        df['ball_faced']=1
        if 'review' in df.columns:
            df.drop(columns=['review'],inplace=True)

        all_innings_df=pd.concat([all_innings_df,df],ignore_index=True)
        
    
    if output_file:
            
            file_path_1 = f"{os.path.splitext(os.path.split(match_file)[-1])[0]}_ball_by_ball.csv"
            all_innings_df.to_csv(file_path_1,index=False)
            
            file_path_2 = f"{os.path.splitext(os.path.split(match_file)[-1])[0]}_match_info.csv"
            match_info.to_csv(file_path_2,index=False)
        
        
    
    return all_innings_df, match_info

In [66]:
def full_data_creator(match_type):
    zip_path = r"Datasets\{}s_json.zip".format(match_type)
    ext_path = r"Datasets\extracted\{}s_json".format(match_type)
    format={'test':'Test','odi':'ODI','t20':'T20','ipl':'IPL'}
    if match_type=="ipl":
        zip_path = r"Datasets\{}_json.zip".format(match_type)
        ext_path = r"Datasets\extracted\{}_json".format(match_type)

    with zipfile.ZipFile(zip_path,"r") as zip_file:
        zip_file.extractall(ext_path)
    
    json_files = [os.path.join(ext_path,f) for f in os.listdir(ext_path) if f.endswith('.json')]
    
    deliveries_data = pd.DataFrame()
    matches_info = pd.DataFrame()

    with tqdm(json_files, desc="Processing files", unit="file", total=len(json_files)) as pbar:
        for i, file in enumerate(pbar):
            # Update tqdm description dynamically with the current file
            pbar.set_postfix_str(f"Current file: {file.split('/')[-1]} ({i + 1} of {len(json_files)})")
            bal_by_ball_data, match_info = json_to_csv(format[match_type],file)
            deliveries_data = pd.concat([deliveries_data, bal_by_ball_data], ignore_index=True)
            matches_info = pd.concat([matches_info, match_info], ignore_index=True)
    
    matches_info.sort_values(['date'],inplace=True)
    matches_info = matches_info[matches_info['gender']=='male']
    matches_info.to_csv(r'Datasets\{}\{}_matches.csv'.format(format[match_type],match_type),index=False)
    ids=matches_info['match_id'].to_list()
    deliveries_data.sort_values(['match_id'],inplace=True)
    deliveries_data = deliveries_data[deliveries_data['match_id'].isin(ids)]
    deliveries_data.to_csv(r'Datasets\{}\{}_matches_deliveries.csv'.format(format[match_type],match_type),index=False)

In [67]:
def batting_order_creator():
    zip_path = r"Datasets\ipl_json.zip"
    ext_path = r"Datasets\extracted\ipl_json"

    with zipfile.ZipFile(zip_path,"r") as zip_file:
        zip_file.extractall(ext_path)
    
    json_files = [os.path.join(ext_path,f) for f in os.listdir(ext_path) if f.endswith('.json')]
    batting_order = pd.DataFrame()

    for file in json_files:
        with open(file, 'r') as f:
            cont = json.load(f)
            print(cont['info'])
    pass

In [None]:
batting_order_creator()

In [68]:
# full_data_creator('ipl')

In [69]:
# batting_order = []
# for (match_id, inning), group in df.groupby(['match_id', 'inning']):
#     # Initialize a list to keep track of batters in this match and inning
#     batters_in_inning = []
    
#     # Iterate through the deliveries in the group
#     for index, row in group.iterrows():
#         # Add the batter to the list if they are not already in it
#         if row['batter'] not in batters_in_inning:
#             batters_in_inning.append(row['batter'])
        
#         # Check if the player is out
#         if row['player_out'] is not None:
#             # If the player is out, we need to add the next batter
#             if len(batters_in_inning) < 11:  # Ensure we don't exceed 11 players
#                 next_batter = row['non_striker']  # Assuming the non-striker comes in next
#                 if next_batter not in batters_in_inning:
#                     batters_in_inning.append(next_batter)

#     # Assign batting positions
#     for position, batter in enumerate(batters_in_inning, start=1):
#         batting_order.append({'match_id': match_id, 'inning': inning, 'batter': batter, 'batting_position': position})

In [70]:
# df_ipl = pd.read_csv(r'Datasets\IPL\ipl_matches_deliveries.csv')


In [71]:
# df_bat_inning = df_ipl.groupby(by=['match_id', 'batter','inning']).agg(
#     Team=('batting_team', 'first'),
#     Opposition=('bowling_team', 'first'),
#     Runs=('runs_by_bat', 'sum'),
#     BF=('ball_faced', 'sum'),
#     Fours=('runs_by_bat', lambda x: (x == 4).sum()),
#     Sixes=('runs_by_bat', lambda x: (x == 6).sum()),
#     NotOut=('player_out', lambda x: 1 if pd.isna(x.iloc[-1]) else 0),
#     caught=('wicket_type',lambda x: (x == 'caught').count()),
#     bowled=('wicket_type',lambda x: (x == 'bowled').count()),
#     run_out=('wicket_type',lambda x: (x == 'run out').count()),
#     lbw=('wicket_type',lambda x: (x == 'lbw').count()),
#     retired_out=('wicket_type',lambda x: (x == 'retired out').count()),
#     stumped=('wicket_type',lambda x: (x == 'stumped').count()),
#     caught_and_bowled=('wicket_type',lambda x: (x == 'caught and bowled').count()),
#     hit_wicket=('wicket_type',lambda x: (x == 'hit wicket').count()),
#     obstructing_the_field=('wicket_type',lambda x: (x == 'obstructing the field').count())).reset_index().sort_values(by=['match_id','inning'],)

In [72]:
# df_bowl_innings = df_ipl.groupby(['match_id', 'bowler','inning']).agg(
#     Team=('bowling_team', 'first'),
#     Opposition=('batting_team', 'first'),
#     Ball_Count=('bowler_extra_type', lambda x: x.isna().sum()),  # Total balls bowled
#     Runs_Conceded=('total', lambda x: x[df_ipl.loc[x.index, 'batsman_extra_type'].isna()].sum()),
#     Wickets=('player_out', lambda x: x.notna().sum()),  
#     Dot_Balls=('total', lambda x: (x == 0).sum()),
#     Fours=('runs_by_bat', lambda x: (x == 4).sum()), 
#     Sixes=('runs_by_bat', lambda x: (x == 6).sum()), 
# ).reset_index().sort_values(by=['match_id','inning'])

In [73]:
# df_bat_inning.to_csv(r'Datasets\IPL\ipl_bat_innings.csv',index=False)
# df_bowl_innings.to_csv(r'Datasets\IPL\ipl_bowl_innings.csv',index=False)

In [74]:
# df_bowl_innings.to_csv(r'Datasets\IPL\ipl_bowl_innings.csv',index=False)

In [75]:
# formats=['odi','t20','ipl','test']

# for match in formats:
#     full_data_creator(match_type=match)

In [76]:
# df_test_balls = pd.read_csv(r'Datasets\Test\test_matches_deliveries.csv')

In [77]:
# df_test_balls.drop(columns=['extras'],inplace=True)

In [78]:
# df_test_balls = df_test_balls.sort_values(by=['match_id','inning'],ascending=True)

In [79]:
# df_test_balls

In [80]:
# df_test_balls.to_csv(r'Datasets\Test\test_matches_deliveries.csv',index=False)

In [81]:
# df_test_matches=pd.read_csv(r'Datasets\Test\test_matches.csv')

In [82]:
# df_test_matches.drop(columns=['team_type','gender'],inplace=True)

In [83]:
# df_test_matches['margin'] = df_test_matches['margin'].replace([np.inf, -np.inf], 0).fillna(0)

# # Convert to integer
# df_test_matches['margin'] = df_test_matches['margin'].astype(int)

In [84]:
# df_test_matches

In [85]:
# df_test_matches.to_csv(r'Datasets\Test\test_matches.csv')

In [86]:
# df_test_matches_balls=pd.read_csv(r'Datasets\Test\test_matches_deliveries.csv')

In [87]:
# df_test_matches_balls

In [88]:
# df_odi_matches=pd.read_csv(r'Datasets\ODI\odi_matches.csv')

In [89]:
# df_odi_matches

In [90]:
# # Filter rows where the 'event' column starts with 'ICC' or contains 'World'
# filtered_events = df_odi_matches[
#     df_odi_matches['event'].str.startswith('ICC', na=False) |
#     df_odi_matches['event'].str.contains('World', na=False)
# ]

# # Display the unique events
# distinct_filtered_events = filtered_events['event'].unique()
# print(distinct_filtered_events)


In [91]:
# # Dictionary of replacements
# replacement_dict = {
#     "ICC World Cup": "ICC Cricket World Cup",
#     "World Cup": "ICC Cricket World Cup",
#     "ICC World Cricket League Championship":"ICC Cricket World Cup Qualifier",
#     "ICC World Cup Qualifiers":"ICC Cricket World Cup Qualifier",
#     "ICC Cricket World Cup Qualifier (ICC Trophy)":"ICC Cricket World Cup Qualifier",
#     "ICC Men's Cricket World Cup League 2":"ICC Cricket World Cup Qualifier",
#     "ICC Men's Cricket World Cup Super League":"ICC Cricket World Cup Qualifier",
#     "ICC Cricket World Cup Qualifier Play-off":"ICC Cricket World Cup Qualifier"
# }

# # Replace values in the 'event' column
# df_odi_matches['event'] = df_odi_matches['event'].replace(replacement_dict)


In [92]:
# df_odi_matches['margin'] = df_odi_matches['margin'].replace([np.inf, -np.inf], 0).fillna(0)

# # Convert to integer
# df_odi_matches['margin'] = df_odi_matches['margin'].astype(int)

In [93]:
# df_odi_matches.drop(columns=['team_type','gender','by_innings'],inplace=True)

In [94]:
# df_odi_matches.to_csv(r'Datasets\ODI\odi_matches.csv',index=False)

In [95]:
# df_t20_matches_ball = pd.read_csv(r'Datasets\T20\t20_matches_deliveries.csv')
# df_t20_matches = pd.read_csv(r'Datasets\T20\t20_matches.csv')

In [96]:
# df_t20_matches_ball=df_t20_matches_ball.sort_values(by=['match_id','inning','over'],ascending=True)

In [97]:
# df_t20_matches_ball.drop(columns=['extras'],inplace=True)

In [98]:
# df_t20_matches_ball

In [99]:
# df_t20_matches_ball.to_csv(r'Datasets\T20\t20_matches_deliveries.csv',index=False)

In [100]:
# df_t20_matches.drop(columns=['by_innings','team_type','gender'],inplace=True)

In [101]:
# df_t20_matches

In [102]:
# filtered_events = df_t20_matches[
#     df_t20_matches['event'].str.startswith('ICC', na=False) |
#     df_t20_matches['event'].str.contains('World', na=False)
# ]

# # Display the unique events
# distinct_filtered_events = filtered_events['event'].unique()
# print(distinct_filtered_events)

In [103]:
# replacement_dict = {
#     "World T20": "ICC Men's T20 World Cup",
#     "ICC World Twenty20": "ICC Men's T20 World Cup",
#     "ICC Men's T20 World Cup": "ICC Men's T20 World Cup",
#     "ICC World Twenty20 Qualifier":"ICC Men's T20 World Cup Qualifier",
#     "ICC Menâ€™s T20 World Cup Qualifier A":"ICC Men's T20 World Cup Qualifier A"
# }

# df_t20_matches['event'] = df_t20_matches['event'].replace(replacement_dict)


In [104]:
# df_t20_matches['margin'] = df_t20_matches['margin'].replace([np.inf, -np.inf], 0).fillna(0)

# # Convert to integer
# df_t20_matches['margin'] = df_t20_matches['margin'].astype(int)

In [105]:
# df_t20_matches.to_csv(r'Datasets\T20\t20_matches.csv')

In [106]:
# df_ipl = pd.read_csv(r'Datasets\IPL\ipl_matches.csv')
# df_ipl_balls = pd.read_csv(r'Datasets\IPL\ipl_matches_deliveries.csv')

In [107]:
# df_ipl.drop(columns=['event','team_type','gender','by_innings'],inplace=True)

In [108]:
# df_ipl['team_1'].unique()

In [109]:
# replacement_dict = {
#     "Royal Challengers Bangalore":"Royal Challengers Bengaluru",
#     "Kings XI Punjab":"Punjab Kings",
#     "Delhi Daredevils":"Delhi Capitals",
#     "Rising Pune Supergiant":"Rising Pune Supergiants",
#     "Gujarat Lions":"Gujarat Titans",
#     "Deccan Chargers":"Sunrisers Hyderabad"
# }
# df_ipl['team_1'] = df_ipl['team_1'].replace(replacement_dict)
# df_ipl['team_2'] = df_ipl['team_2'].replace(replacement_dict)
# df_ipl['toss_winner']=df_ipl['toss_winner'].replace(replacement_dict)
# df_ipl['match_winner']=	df_ipl['match_winner'].replace(replacement_dict)

In [110]:
# df_ipl['venue'].unique()

In [111]:
# stadium_mapping = {
#     'M Chinnaswamy Stadium': 'M Chinnaswamy Stadium',
#     'M.Chinnaswamy Stadium': 'M Chinnaswamy Stadium',
#     'M Chinnaswamy Stadium, Bengaluru': 'M Chinnaswamy Stadium',
#     'Punjab Cricket Association Stadium, Mohali': 'Punjab Cricket Association Stadium',
#     'Punjab Cricket Association IS Bindra Stadium': 'Punjab Cricket Association Stadium',
#     'Punjab Cricket Association IS Bindra Stadium, Mohali': 'Punjab Cricket Association Stadium',
#     'Punjab Cricket Association IS Bindra Stadium, Mohali, Chandigarh': 'Punjab Cricket Association Stadium',
#     'Feroz Shah Kotla': 'Arun Jaitley Stadium',
#     'Arun Jaitley Stadium': 'Arun Jaitley Stadium',
#     'Arun Jaitley Stadium, Delhi': 'Arun Jaitley Stadium',
#     'Wankhede Stadium': 'Wankhede Stadium',
#     'Wankhede Stadium, Mumbai': 'Wankhede Stadium',
#     'Eden Gardens': 'Eden Gardens',
#     'Eden Gardens, Kolkata': 'Eden Gardens',
#     'Sawai Mansingh Stadium': 'Sawai Mansingh Stadium',
#     'Sawai Mansingh Stadium, Jaipur': 'Sawai Mansingh Stadium',
#     'Rajiv Gandhi International Stadium, Uppal': 'Rajiv Gandhi International Stadium',
#     'Rajiv Gandhi International Stadium': 'Rajiv Gandhi International Stadium',
#     'Rajiv Gandhi International Stadium, Uppal, Hyderabad': 'Rajiv Gandhi International Stadium',
#     'MA Chidambaram Stadium, Chepauk': 'MA Chidambaram Stadium',
#     'MA Chidambaram Stadium': 'MA Chidambaram Stadium',
#     'MA Chidambaram Stadium, Chepauk, Chennai': 'MA Chidambaram Stadium',
#     'Dr DY Patil Sports Academy': 'Dr DY Patil Sports Academy',
#     'Dr DY Patil Sports Academy, Mumbai': 'Dr DY Patil Sports Academy',
#     'Himachal Pradesh Cricket Association Stadium': 'Himachal Pradesh Cricket Association Stadium',
#     'Himachal Pradesh Cricket Association Stadium, Dharamsala': 'Himachal Pradesh Cricket Association Stadium',
#     'Maharashtra Cricket Association Stadium': 'Maharashtra Cricket Association Stadium',
#     'Maharashtra Cricket Association Stadium, Pune': 'Maharashtra Cricket Association Stadium',
#     'Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium': 'Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium',
#     'Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium, Visakhapatnam': 'Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium',
#     'Sardar Patel Stadium, Motera': 'Narendra Modi Stadium',
#     'Narendra Modi Stadium, Ahmedabad': 'Narendra Modi Stadium',
#     'Barsapara Cricket Stadium, Guwahati': 'Barsapara Cricket Stadium',
#     'Bharat Ratna Shri Atal Bihari Vajpayee Ekana Cricket Stadium, Lucknow': 'Ekana Cricket Stadium',
#     'Maharaja Yadavindra Singh International Cricket Stadium, Mullanpur': 'Mullanpur Cricket Stadium',
# }


In [112]:
# df_ipl['venue'] = df_ipl['venue'].replace(stadium_mapping)

In [113]:
# df_ipl['margin'] = df_ipl['margin'].replace([np.inf, -np.inf], 0).fillna(0)

# # Convert to integer
# df_ipl['margin'] = df_ipl['margin'].astype(int)

In [114]:
# df_ipl

In [115]:
# df_ipl['date'] = pd.to_datetime(df_ipl['date'])

In [116]:
# df_ipl['season']=df_ipl['date'].dt.year

In [117]:
# df_ipl_balls=df_ipl_balls.sort_values(by=['match_id','inning','over','delivery'])

In [118]:
# df_ipl_balls.drop(columns=['extras'],inplace=True)

In [119]:
# replacement_dict = {
#     "Royal Challengers Bangalore":"Royal Challengers Bengaluru",
#     "Kings XI Punjab":"Punjab Kings",
#     "Delhi Daredevils":"Delhi Capitals",
#     "Rising Pune Supergiant":"Rising Pune Supergiants",
#     "Gujarat Lions":"Gujarat Titans",
#     "Deccan Chargers":"Sunrisers Hyderabad"
# }

# df_ipl_balls['batting_team']=df_ipl_balls['batting_team'].replace(replacement_dict)

In [120]:
# df_ipl_balls.columns

In [121]:
# df_ipl.to_csv(r'Datasets\IPL\ipl_matches.csv',index=False)
# df_ipl_balls.to_csv(r'Datasets\IPL\ipl_matches_deliveries.csv',index=False)

#### Converting IPL ball by ball data to innings data

In [122]:
# import pandas as pd

In [123]:
# df_ipl=pd.read_csv(r'Datasets\IPL\ipl_matches.csv')
# df_ipl_balls=pd.read_csv(r'Datasets\IPL\ipl_matches_deliveries.csv')

In [124]:
# df_ipl_balls.columns

In [125]:
# df_ipl_balls.loc[df_ipl_balls['extra_type'].isin(['wides', 'penaltywides','byesnoballs','legbyesnoballs','noballs']), 'ball_faced'] = 0

In [126]:
# df_ipl_balls.loc[df_ipl_balls['extra_type'] == 'byes', 'batsman_extra_type'] = 'byes'
# df_ipl_balls.loc[df_ipl_balls['extra_type'] == 'legbyes', 'batsman_extra_type'] = 'legbyes'
# df_ipl_balls.loc[df_ipl_balls['extra_type'] == 'penalty', 'batsman_extra_type'] = 'penalty'

# df_ipl_balls.loc[df_ipl_balls['extra_type'].isin(['byesnoballs', 'legbyesnoballs']), 'batsman_extra_type'] = 'byes'
# df_ipl_balls.loc[df_ipl_balls['extra_type'].isin(['byesnoballs', 'legbyesnoballs']), 'bowler_extra_type'] = 'noballs'

# df_ipl_balls.loc[df_ipl_balls['extra_type'] == 'noballs', 'bowler_extra_type'] = 'noballs'
# df_ipl_balls.loc[df_ipl_balls['extra_type'] == 'wides', 'bowler_extra_type'] = 'wides'

# df_ipl_balls.loc[df_ipl_balls['extra_type'] == 'penaltywides', 'bowler_extra_type'] = 'wides'
# df_ipl_balls.loc[df_ipl_balls['extra_type'] == 'penaltywides', 'batsman_extra_type'] = 'penalty'


In [127]:
# df_ipl_balls.drop(columns=['extra_type'],inplace=True)

In [128]:
# df_ipl_balls

In [129]:
# df_ipl_balls.to_csv(r'Datasets\IPL\ipl_matches_deliveries.csv',index=False)

In [130]:
# df_all = pd.merge(df_ipl,df_ipl_balls,how='outer',on='match_id')

In [131]:
# df_all.columns

In [132]:
# df_all=df_all[['match_id','date','venue','team_1', 'team_2','batter', 'bowler', 'non_striker', 'runs_by_bat','extra_runs', 'total', 'delivery', 'extra_type', 'wicket_type','player_out', 'fielder', 'over', 'batting_team', 'inning',
#        'bowler_wicket', 'fielder_wicket', 'ball_faced']]

In [133]:
# df_all['bowling_team']=df_all.apply(lambda row: row['team_2'] if row['batting_team']==row['team_1'] else row['team_1'],axis=1)

In [134]:
# df_all.columns

In [135]:
# df_all=df_all[['match_id','date','venue','batter', 'bowler', 'non_striker','runs_by_bat', 'extra_runs', 'total', 'delivery', 'extra_type','wicket_type', 'player_out', 'fielder', 'over', 'batting_team','bowling_team','inning', 'bowler_wicket', 'fielder_wicket', 'ball_faced']]

In [136]:
# df_all

In [137]:
# df_all.to_csv(r'Datasets\IPL\ipl_matches_deliveries.csv',index=False)