In [None]:
#Code to get the load_csv_database function.
#We pass this function as context so there's no need to include it when we parse the notebook
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..", "..")))
from spider2_utils import load_csv_database

-setup-

In [None]:
import pandas as pd

_database = load_csv_database("EU_soccer", rows_limit=-1)
matches = _database["Match"]
leagues = _database["League"]
teams = _database["Team"]
players = _database["Player"]

## Question

In each league, considering all seasons, which single team has the fewest total match wins based on comparing home and away goals, including teams with zero wins, ensuring that if multiple teams tie for the fewest wins, only one team is returned for each league?

Step 1: User Intent: Build the base match_view by joining matches, leagues, and team names

In [None]:
# Merge league names
mv = matches.merge(leagues[['id', 'name']], left_on='league_id', right_on='id', suffixes=('', '_league'))
mv = mv.rename(columns={'name': 'league'}).drop(columns=['id_league'])

# Merge home and away team names
mv = mv.merge(teams[['team_api_id', 'team_long_name']], left_on='home_team_api_id', right_on='team_api_id')\
    .rename(columns={'team_long_name': 'home_team'})\
    .drop(columns=['team_api_id'])

mv = mv.merge(teams[['team_api_id', 'team_long_name']], left_on='away_team_api_id', right_on='team_api_id')\
    .rename(columns={'team_long_name': 'away_team'})\
    .drop(columns=['team_api_id'])

Step 2: User Intent: Attach player names for all home and away positions

In [None]:
# Define mapping of position columns to new names
pos_cols = {f'home_player_{i}': f'home_player_{i}' for i in range(1, 12)}
pos_cols.update({f'away_player_{i}': f'away_player_{i}' for i in range(1, 12)})

# For each player column, merge with players to get name
for col in pos_cols:
    mv = mv.merge(players[['player_api_id', 'player_name']],
                  left_on=col, right_on='player_api_id', how='left')
    mv = mv.rename(columns={'player_name': col.replace('player_', '')})
    mv = mv.drop(columns=['player_api_id'])

Step 3: User Intent: Create match_score by listing each team's result per match

In [None]:
# Home result
home = mv[['id', 'home_team', 'home_team_goal', 'away_team_goal']].copy()
home['team'] = home['home_team']
home['winning_match'] = (home['home_team_goal'] > home['away_team_goal']).astype(int)
home = home[['id', 'team', 'winning_match']]

# Away result
away = mv[['id', 'away_team', 'away_team_goal', 'home_team_goal']].copy()
away['team'] = away['away_team']
away['winning_match'] = (away['away_team_goal'] > away['home_team_goal']).astype(int)
away = away[['id', 'team', 'winning_match']]

# Concatenate home and away
match_score = pd.concat([home, away], ignore_index=True)

Step 4: User Intent: Compute total wins per team and rank within each league

In [None]:
# Merge league info back into match_score
ms = match_score.merge(mv[['id', 'league']], on='id')

# Group and count wins
win_counts = (
    ms.groupby(['league', 'team'])['winning_match']
    .sum()
    .reset_index(name='wins')
)

# Rank teams by wins ascending (1 = least wins)
win_counts['rank'] = (
    win_counts.groupby('league')['wins']
    .rank(method='first', ascending=True).astype(int)
)

Step 5: User Intent: Select the team with the least wins in each league

In [None]:
least_wins = (
    win_counts[win_counts['rank'] == 1]
    .sort_values('league')
    [['league', 'team']]
)
least_wins