In [27]:
import pandas as pd
import os

# Paths
RAW_DIR = 'D:/F1_Proj/data/raw'
PROCESSED_DIR = 'D:/F1_Proj/data/processed'
os.makedirs(PROCESSED_DIR, exist_ok=True)

# Load datasets
results = pd.read_csv(f'{RAW_DIR}/results.csv')
races = pd.read_csv(f'{RAW_DIR}/races.csv')
drivers = pd.read_csv(f'{RAW_DIR}/drivers.csv')
constructors = pd.read_csv(f'{RAW_DIR}/constructors.csv')
circuits = pd.read_csv(f'{RAW_DIR}/circuits.csv')


# Merge results with race data to get year and round info
merged = results.merge(races[['raceId', 'year', 'round', 'circuitId', 'name']], on='raceId', how='left')
merged = merged.rename(columns={'name': 'race_name'})

merged = merged.merge(circuits[['circuitId', 'name']], on='circuitId', how='left')
merged = merged.rename(columns={'name': 'circuit_name'})
# Merge with driver info only if not already present
driver_cols = ['forename', 'surname', 'nationality']
missing_driver_cols = [col for col in driver_cols if col not in merged.columns]

if missing_driver_cols:
    merged = merged.merge(
        drivers[['driverId'] + missing_driver_cols],
        on='driverId',
        how='left'
    )

# Merge with constructor info, rename team
merged = merged.merge(
    constructors[['constructorId', 'name']],
    on='constructorId',
    how='left'
).rename(columns={'name': 'team'})

# Add full name
merged['driver_name'] = merged['forename'] + ' ' + merged['surname']

# Build target label
merged['top_10_finish'] = merged['positionOrder'].apply(lambda x: 1 if x <= 10 else 0)

# Select useful columns for ML
selected = merged[[
    'raceId', 'driverId', 'constructorId', 'year', 'round', 'circuitId',
    'grid', 'positionOrder', 'points', 'fastestLap', 'fastestLapSpeed',
    'driver_name', 'team', 'nationality', 'top_10_finish'
]]

# Save cleaned dataset
selected.to_csv(f'{PROCESSED_DIR}/base_results.csv', index=False)
print("✅ Merged and labeled dataset saved to: base_results.csv")


✅ Merged and labeled dataset saved to: base_results.csv


In [None]:
import pandas as pd

df=pd.read_csv('D:/F1_Proj/data/processed/base_results.csv')
circuits = pd.read_csv(f'{RAW_DIR}/circuits.csv')

# Merge with circuit info
merged = merged.merge(circuits[['circuitId', 'name']], on='circuitId', how='left')
merged = merged.rename(columns={'name': 'circuit_name'})

results=df[(df['year']==2024) & (df['round']==15)]
top10=results[results['positionOrder']<=10]
if not top10.empty:
    circuit = top10['circuit_name'].iloc[0]
    race = top10['race_name'].iloc[0]
    print(f"\n🏁 {race} at {circuit} - Top 10 Results:\n")
    print(top10[['positionOrder', 'driver_name', 'team']].sort_values(by='positionOrder'))
else:
    print("😢 No data found for that round.")



KeyError: 'circuit_name'