In [1]:
import os
import pandas as pd

In [2]:
DATA_DIR = "../data"

In [3]:
players_df = pd.read_csv(os.path.join(DATA_DIR, "player_features_with_score.csv"))
deliveries_df = pd.read_csv(os.path.join(DATA_DIR, "deliveries.csv"))
matches_df = pd.read_csv(os.path.join(DATA_DIR, "matches.csv"))

players_df.shape, deliveries_df.shape, matches_df.shape

((732, 11), (260920, 17), (1095, 20))

In [4]:
available_seasons = sorted(matches_df['season'].dropna().unique())
available_seasons

['2007/08',
 '2009',
 '2009/10',
 '2011',
 '2012',
 '2013',
 '2014',
 '2015',
 '2016',
 '2017',
 '2018',
 '2019',
 '2020/21',
 '2021',
 '2022',
 '2023',
 '2024']

In [5]:
SELECTED_SEASON = available_seasons[-1]
SELECTED_SEASON

'2024'

In [6]:
season_matches = matches_df[matches_df['season'] == SELECTED_SEASON]
season_match_ids = season_matches['id'].unique()

len(season_match_ids)

71

In [8]:
season_deliveries = deliveries_df[
    deliveries_df['match_id'].isin(season_match_ids)
]

season_deliveries.shape

(17103, 17)

In [9]:
player_team_map = (
    season_deliveries[['batter', 'batting_team']]
    .drop_duplicates()
    .rename(columns={
        'batter': 'player',
        'batting_team': 'team'
    })
)

sorted(player_team_map['team'].unique())

['Chennai Super Kings',
 'Delhi Capitals',
 'Gujarat Titans',
 'Kolkata Knight Riders',
 'Lucknow Super Giants',
 'Mumbai Indians',
 'Punjab Kings',
 'Rajasthan Royals',
 'Royal Challengers Bengaluru',
 'Sunrisers Hyderabad']

In [10]:
players_season_df = players_df.merge(
    player_team_map,
    on='player',
    how='inner'
)

players_season_df[['player', 'team']].head()

Unnamed: 0,player,team
0,A Badoni,Lucknow Super Giants
1,A Kamboj,Mumbai Indians
2,A Manohar,Gujarat Titans
3,A Nortje,Delhi Capitals
4,A Raghuvanshi,Kolkata Knight Riders


In [11]:
all_players = sorted(players_season_df['player'].unique())
len(all_players)

171

In [15]:
PLAYER_1 = all_players[0]
PLAYER_2 = all_players[3]

PLAYER_1, PLAYER_2

('A Badoni', 'A Nortje')

In [16]:
p1 = players_season_df[
    players_season_df['player'] == PLAYER_1
].iloc[0]

p2 = players_season_df[
    players_season_df['player'] == PLAYER_2
].iloc[0]

In [17]:
comparison_df = pd.DataFrame({
    "Metric": [
        "Team",
        "Performance Score",
        "Runs",
        "Wickets",
        "Matches Played"
    ],
    PLAYER_1: [
        p1['team'],
        round(p1['performance_score'], 3),
        p1['runs'],
        p1['wickets'],
        p1['matches']
    ],
    PLAYER_2: [
        p2['team'],
        round(p2['performance_score'], 3),
        p2['runs'],
        p2['wickets'],
        p2['matches']
    ]
})

comparison_df

Unnamed: 0,Metric,A Badoni,A Nortje
0,Team,Lucknow Super Giants,Delhi Capitals
1,Performance Score,0.132,0.146
2,Runs,0.079112,0.006114
3,Wickets,0.00939,0.29108
4,Matches Played,35.0,13.0


In [18]:
if p1['performance_score'] > p2['performance_score']:
    winner = PLAYER_1
else:
    winner = PLAYER_2

winner

'A Nortje'