In [None]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..", "..")))
from spider2_utils import load_csv_database

-setup-

In [None]:
import pandas as pd
_database = load_csv_database("f1", rows_limit=-1)
results = _database['results']
drivers = _database['drivers']
races = _database['races']
constructors = _database['constructors']

### Question

For each year, which driver and which constructor scored the most points? I want the full name of each driver.

Step1: Merge `results` with `races`, `drivers`, and `constructors` to get full context per result

In [None]:
merged = (
    results
    .merge(races, left_on='race_id', right_on='race_id', how='left')
    .merge(drivers, left_on='driver_id', right_on='driver_id', how='left')
    .merge(constructors.rename({"name": "constructor"}, axis=1), left_on='constructor_id', right_on='constructor_id', how='left')
)

Step2: Create full driver name as a new column

In [None]:
merged['driver'] = merged['forename'] + ' ' + merged['surname']

Step 3: Aggregate total points per year, driver, and constructor

In [None]:
grouped_driver = (
    merged.groupby(['year', 'driver', 'constructor'], as_index=False)['points']
    .sum()
    # .rename(columns={'name': 'constructor'})
)

Step 4: Aggregate total points per year and constructor (driver = None)

In [None]:
grouped_constructor = (
    merged.groupby(['year', 'constructor'], as_index=False)['points']
    .sum()
)
grouped_constructor['driver'] = None

Step 5: Concatenate both aggregations into one table simulating the `UNION`

In [None]:
year_points = pd.concat([grouped_driver, grouped_constructor], ignore_index=True)

Step 6: Compute max points per year for drivers and constructors separately

In [None]:
max_points = year_points.groupby('year').agg(
    max_driver_points=('points', lambda x: x[year_points['driver'].notnull()].max()),
    max_constructor_points=('points', lambda x: x[year_points['constructor'].notnull()].max())
).reset_index()

Step 7: Join back with year_points to find drivers with max_driver_points

In [None]:
drivers_year_points = year_points[year_points['driver'].notnull()]
max_driver = max_points.merge(
    drivers_year_points,
    left_on=['year', 'max_driver_points'],
    right_on=['year', 'points'],
    how='left'
)

Step 8: Join back with year_points to find constructors with max_constructor_points

In [None]:
constructors_year_points = year_points[year_points['constructor'].notnull()]
max_full = max_driver.merge(
    constructors_year_points,
    left_on=['year', 'max_constructor_points'],
    right_on=['year', 'points'],
    how='left',
    suffixes=('_driver', '_constructor')
)

Step 9: Select final output columns and sort by year

In [None]:
final_result = max_full[['year', 'driver_driver', 'constructor_constructor']].sort_values(by='year').reset_index(drop=True)
final_result