In [None]:
#Code to get the load_csv_database function.
#We pass this function as context so there's no need to include it when we parse the notebook
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..", "..")))
from spider2_utils import load_csv_database

-setup-

In [None]:
import pandas as pd

_database = load_csv_database("IPL", rows_limit=-1)
ball_by_ball = _database["ball_by_ball"]
match = _database["match"]
batsman_scored = _database["batsman_scored"]
player_match = _database["player_match"]
player = _database["player"]

# Question
Retrieve the names of players who scored no less than 100 runs in a match while playing for the team that lost that match.

# Step 1: Merge ball_by_ball and batsman_scored
Combine the ball_by_ball and batsman_scored tables on match_id, over_id, ball_id, and innings_no.

In [None]:
merged_balls = ball_by_ball.merge(batsman_scored, on=['match_id', 'over_id', 'ball_id', 'innings_no'])

# Step 2: Group by Player and Match to Calculate Total Runs
Aggregate the total runs scored by each player in each match.

In [None]:
player_runs = merged_balls.groupby(['striker', 'match_id']).agg(total_runs=('runs_scored', 'sum')).reset_index()

# Step 3: Filter Players with 100 or More Runs
Keep only those player-match pairs where the player scored at least 100 runs.

In [None]:
player_runs_100 = player_runs[player_runs['total_runs'] >= 100]

# Step 4: Identify Losing Teams for Each Match
Determine the losing team for each match based on the match winner.

In [None]:
losing_teams = match.copy()
losing_teams['loser'] = losing_teams.apply(
    lambda row: row['team_2'] if row['match_winner'] == row['team_1'] else row['team_1'], axis=1)
losing_teams = losing_teams[['match_id', 'loser']]

# Step 5: Merge Player Runs with Losing Teams
Combine the player_runs_100 and losing_teams tables on match_id.

In [None]:
players_and_losers = player_runs_100.merge(losing_teams, on='match_id')

# Step 6: Merge with player_match to Get Only Players on Losing Teams
Join with player_match to ensure the player was on the losing team in that match.

In [None]:
players_in_losing_teams = players_and_losers.merge(player_match, left_on=['striker', 'match_id', 'loser'], right_on=['player_id', 'match_id', 'team_id'])

# Step 7: Merge with player Table to Get Player Names
Join with the player table to get the player names.

In [None]:
players_with_names = players_in_losing_teams.merge(player, left_on='striker', right_on='player_id')

# Step 8: Select Distinct Player Names and Sort
Get the unique player names who scored 100+ runs in a match for a losing team, sorted alphabetically.

In [None]:
result = players_with_names[['player_name']].drop_duplicates().sort_values('player_name').reset_index(drop=True)
result

In [None]:
# player_name
# A Symonds
# SPD Smith
# SR Tendulkar
# SR Watson
# V Kohli
# WP Saha
# YK Pathan