In [5]:
import pandas as pd

# --- Load Master Tables ---
race_results = pd.read_csv('Cleaned_Dataset/master_race_results.csv')
qualifying = pd.read_csv('Cleaned_Dataset/master_qualifying.csv')
pit_stops = pd.read_csv('Cleaned_Dataset/master_pit_stops.csv')

# --- Driver Consistency Features ---
driver_avg_pos = race_results.groupby('driverId')['positionOrder'].mean().reset_index()
driver_avg_pos.rename(columns={'positionOrder': 'avg_finishing_position'}, inplace=True)

driver_std_pos = race_results.groupby('driverId')['positionOrder'].std().reset_index()
driver_std_pos.rename(columns={'positionOrder': 'std_finishing_position'}, inplace=True)

qualifying['qualifying_position'] = qualifying['position']  # Assuming 'position' is qualifying position
driver_avg_qual = qualifying.groupby('driverId')['qualifying_position'].mean().reset_index()
driver_avg_qual.rename(columns={'qualifying_position': 'avg_qualifying_position'}, inplace=True)

# --- Team Strength Features ---
constructor_points = race_results.groupby(['constructorId', 'year'])['points'].sum().reset_index()
constructor_points = constructor_points.groupby('constructorId')['points'].mean().reset_index()
constructor_points.rename(columns={'points': 'avg_constructor_points'}, inplace=True)

constructor_dnf = race_results[race_results['status'].str.contains('Did not finish', case=False)]
constructor_dnf_rate = constructor_dnf.groupby('constructorId').size() / race_results.groupby('constructorId').size()
constructor_dnf_rate = constructor_dnf_rate.reset_index(name='constructor_dnf_rate')

# --- Track Complexity Features ---
position_changes = race_results.groupby(['raceId', 'circuitId'])['positionOrder'].std().reset_index()
position_changes = position_changes.groupby('circuitId')['positionOrder'].mean().reset_index()
position_changes.rename(columns={'positionOrder': 'avg_position_change'}, inplace=True)

# --- Merge Features ---
engineered_features = race_results[['raceId', 'driverId', 'constructorId', 'circuitId', 'positionOrder']]
engineered_features = engineered_features.merge(driver_avg_pos, on='driverId', how='left')
engineered_features = engineered_features.merge(driver_std_pos, on='driverId', how='left')
engineered_features = engineered_features.merge(driver_avg_qual, on='driverId', how='left')
engineered_features = engineered_features.merge(constructor_points, on='constructorId', how='left')
engineered_features = engineered_features.merge(constructor_dnf_rate, on='constructorId', how='left')
engineered_features = engineered_features.merge(position_changes, on='circuitId', how='left')

# --- Save Engineered Features ---
engineered_features.to_csv('Cleaned_Dataset/engineered_features.csv', index=False)

  race_results = pd.read_csv('Cleaned_Dataset/master_race_results.csv')
