In [64]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/indian-premier-league-ipl-2008-2024/IPL_BallByBall2008_2024(Updated).csv
/kaggle/input/indian-premier-league-ipl-2008-2024/ipl_teams_2024_info.csv
/kaggle/input/indian-premier-league-ipl-2008-2024/Players_Info_2024.csv
/kaggle/input/indian-premier-league-ipl-2008-2024/team_performance_dataset_2008to2024.csv


In [65]:
%%writefile ipl_utility.py
import logging
import yaml
import pandas as pd
import re

################
# File Reading #
################

def read_config_file(filepath):
    with open(filepath, 'r') as stream:
        try:
            return yaml.safe_load(stream)
        except yaml.YAMLError as exc:
            logging.error(exc)

def replacer(string, char):
    pattern = char + '{2,}'
    string = re.sub(pattern, char, string) 
    return string

def col_header_val(df, table_config):
    '''
    Replace whitespaces in the column
    and standardize column names
    '''
    df.columns = df.columns.str.lower()
    df.columns = df.columns.str.replace('[^\w]', '_', regex=True)
    df.columns = list(map(lambda x: x.strip('_'), list(df.columns)))
    df.columns = list(map(lambda x: replacer(x, '_'), list(df.columns)))
    expected_col = list(map(lambda x: x.lower(), table_config['columns']))
    expected_col.sort()
    df.columns = list(map(lambda x: x.lower(), list(df.columns)))
    df = df.reindex(sorted(df.columns), axis=1)
    if len(df.columns) == len(expected_col) and list(expected_col) == list(df.columns):
        print("Column name and column length validation passed")
        return 1
    else:
        print("Column name and column length validation failed")
        mismatched_columns_file = list(set(df.columns).difference(expected_col))
        print("Following File columns are not in the YAML file", mismatched_columns_file)
        missing_YAML_file = list(set(expected_col).difference(df.columns))
        print("Following YAML columns are not in the file uploaded", missing_YAML_file)
        logging.info(f'df columns: {df.columns}')
        logging.info(f'expected columns: {expected_col}')
        return 0


Overwriting ipl_utility.py


In [66]:
%%writefile ipl_ball_by_ball_config.yaml
file_type: csv
dataset_name: IPL_BallByBall2008_2024_(Updated)
file_name: /kaggle/input/indian-premier-league-ipl-2008-2024/IPL_BallByBall2008_2024(Updated)
table_name: ball_by_ball
inbound_delimiter: ","
outbound_delimiter: "|"
skip_leading_rows: 1
columns: 
    - match_id
    - date
    - season
    - batting_team
    - bowling_team
    - innings_no
    - ball_no
    - bowler
    - striker
    - non_striker
    - score_wicket
    - player_out
    - extras
    - wicket_confirmation
    - wicket_type
    - runs_scored
    - score
    - type_of_extras
    - fielders_involved


Overwriting ipl_ball_by_ball_config.yaml


In [67]:
%%writefile players_info_config.yaml
file_type: csv
dataset_name: Players_Info_2024
file_name: /kaggle/input/indian-premier-league-ipl-2008-2024/Players_Info_2024
table_name: players_info
inbound_delimiter: ","
outbound_delimiter: "|"
skip_leading_rows: 1
columns: 
    - player_name
    - team_name
    - player_nationality
    - date_of_birth
    - player_role
    - ipl_debut
    - about
    - batting_style
    - bowling_style
    - player_salary

Overwriting players_info_config.yaml


In [68]:
%%writefile ipl_teams_info_config.yaml
file_type: csv
dataset_name: ipl_teams_2024_info
file_name: /kaggle/input/indian-premier-league-ipl-2008-2024/ipl_teams_2024_info
table_name: teams_info
inbound_delimiter: ","
outbound_delimiter: "|"
skip_leading_rows: 1
columns: 
    - team_full_name
    - team_captain_current
    - team_coach_current
    - team_home_ground_current
    - number_of_ipl_titles
    - team_owners
    - team_about


Overwriting ipl_teams_info_config.yaml


In [69]:
%%writefile team_performance_config.yaml
file_type: csv
dataset_name: team_performance_dataset_2008_to_2024
file_name: /kaggle/input/indian-premier-league-ipl-2008-2024/team_performance_dataset_2008to2024
table_name: team_performance
inbound_delimiter: ","
outbound_delimiter: "|"
skip_leading_rows: 1
columns: 
    - match_id
    - date
    - teams
    - venue
    - toss_winner
    - toss_decision
    - match_winner
    - win_type
    - win_margin
    - first_innings_score
    - umpire
    - powerplay_scores
    - middle_overs_scores
    - death_overs_scores
    - umpire1
    - umpire2
    - player_of_match
    - second_innings_score


Overwriting team_performance_config.yaml


In [73]:
import ipl_utility as util

# Read configuration files again after updating paths
ball_by_ball_config = util.read_config_file("ipl_ball_by_ball_config.yaml")
players_info_config = util.read_config_file("players_info_config.yaml")
teams_info_config = util.read_config_file("ipl_teams_info_config.yaml")
team_performance_config = util.read_config_file("team_performance_config.yaml")

# Function to read and validate data with updated paths in config files 
def read_and_validate_data(config):
    file_type = config['file_type']
    source_file = config['file_name'] + f'.{file_type}'
    df = pd.read_csv(source_file, delimiter=config['inbound_delimiter'])
    if util.col_header_val(df, config) == 0:
        print(f"Validation failed for {config['dataset_name']}")
        return None
    else:
        print(f"Validation passed for {config['dataset_name']}")
        return df

# Read and validate each dataset with updated paths in config files 
ball_by_ball_df = read_and_validate_data(ball_by_ball_config)
players_info_df = read_and_validate_data(players_info_config)
teams_info_df = read_and_validate_data(teams_info_config)
team_performance_df = read_and_validate_data(team_performance_config)

# Inspect the data with updated paths in config files 
print(ball_by_ball_df.head())

Column name and column length validation passed
Validation passed for IPL_BallByBall2008_2024_(Updated)
Column name and column length validation passed
Validation passed for Players_Info_2024
Column name and column length validation passed
Validation passed for ipl_teams_2024_info
Column name and column length validation passed
Validation passed for team_performance_dataset_2008_to_2024
   match_id        date   season           batting_team  \
0    335982  2008-04-18  2007/08  Kolkata Knight Riders   
1    335982  2008-04-18  2007/08  Kolkata Knight Riders   
2    335982  2008-04-18  2007/08  Kolkata Knight Riders   
3    335982  2008-04-18  2007/08  Kolkata Knight Riders   
4    335982  2008-04-18  2007/08  Kolkata Knight Riders   

                  bowling_team  innings_no  ball_no   bowler      striker  \
0  Royal Challengers Bangalore           1      0.1  P Kumar   SC Ganguly   
1  Royal Challengers Bangalore           1      0.2  P Kumar  BB McCullum   
2  Royal Challengers Ban

  df = pd.read_csv(source_file, delimiter=config['inbound_delimiter'])


In [74]:
print(players_info_df.head())

       player_name team_name player_nationality date_of_birth  \
0         MS Dhoni       CSK             Indian      7-Jul-81   
1     Devon Conway       CSK      South African      8-Jul-91   
2  Ruturaj Gaikwad       CSK             Indian     31-Jan-97   
3   Ajinkya Rahane       CSK             Indian      6-Jun-88   
4    Shaik Rasheed       CSK             Indian     24-Sep-04   

           player_role  ipl_debut  \
0  Wicketkeeper Batter     2008.0   
1  Wicketkeeper Batter     2022.0   
2               Batter     2020.0   
3     Top order Batter     2008.0   
4               Batter        NaN   

                                               about   batting_style  \
0  Mahendra Singh Dhoni, popularly known as MS Dh...  Right hand Bat   
1  Sorry, but I couldn't find any information on ...   Left hand Bat   
2  Certainly! Ruturaj Gaikwad is an emerging tale...  Right hand Bat   
3  Ajinkya Rahane is an Indian professional crick...  Right hand Bat   
4  Sure! Shikhar Dhawan is

In [75]:
print(teams_info_df.head())

          team_full_name team_captain_current  team_coach_current  \
0    Chennai Super Kings             MS Dhoni     Stephen Fleming   
1         Delhi Capitals         Rishabh Pant       Ricky Ponting   
2         Gujarat Titans         Shubman Gill        Ashish Nehra   
3  Kolkata Knight Riders         Shreyas Iyer  Chandrakant Pandit   
4   Lucknow Super Giants             KL Rahul       Justin Langer   

                 team_home_ground_current              number_of_ipl_titles  \
0         MA Chidambaram Stadium, Chennai  5 (2010, 2011, 2018, 2021, 2023)   
1         Arun Jaitley Stadium, New Delhi                                 0   
2        Narendra Modi Stadium, Ahmedabad                          1 (2022)   
3                   Eden Gardens, Kolkata                    2 (2012, 2014)   
4  BR SABV Ekana Cricket Stadium, Lucknow                                 0   

                                         team_owners  \
0  Chennai Super Kings Cricket Ltd (subsidiary of...  

In [76]:
print(team_performance_df.head())

   match_id        date                                              teams  \
0    335982  2008-04-18  Royal Challengers Bangalore vs Kolkata Knight ...   
1    335983  2008-04-19             Kings XI Punjab vs Chennai Super Kings   
2    335984  2008-04-19               Delhi Daredevils vs Rajasthan Royals   
3    335985  2008-04-20      Mumbai Indians vs Royal Challengers Bangalore   
4    335986  2008-04-20           Kolkata Knight Riders vs Deccan Chargers   

                                        venue                  toss_winner  \
0                       M Chinnaswamy Stadium  Royal Challengers Bangalore   
1  Punjab Cricket Association Stadium, Mohali          Chennai Super Kings   
2                            Feroz Shah Kotla             Rajasthan Royals   
3                            Wankhede Stadium               Mumbai Indians   
4                                Eden Gardens              Deccan Chargers   

  toss_decision                 match_winner win_type  win_mar