In [8]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

In [9]:
# read the files
no_DNF_results_data_file = pd.read_csv(r"/home/winter-storm/f1-data-project/erdos_ds_f1/cleaned_data/f1db-races-race-results-NO-DNF.csv")
results_data_file = pd.read_csv(r"/home/winter-storm/f1-data-project/erdos_ds_f1/cleaned_data/f1db-races-race-results-CLEANED.csv")
pit_stop_data = pd.read_csv(r"/home/winter-storm/f1-data-project/erdos_ds_f1/data_f1db/f1db-races-pit-stops.csv")

# Create list of unique race ids
race_id_list = no_DNF_results_data_file['raceId'].unique()

In [10]:
df = pit_stop_data[['raceId', 'driverId', 'lap', 'stop', 'timeMillis']].copy()
prevdf = results_data_file[['raceId', 'driverId']].copy()
df = prevdf.merge(df, on=['raceId', 'driverId'], how='left')

# Sort for consistency
df = df.sort_values(by=['driverId', 'raceId', 'lap'])

# Compute avg stop lap from *prior* races
def avg_prior_values(series):
    return series.shift().expanding().mean()

df['avgStopLap'] = (
    df.groupby('driverId')['lap']
      .transform(avg_prior_values)
)

df['avgStopTime'] = (
    df.groupby('driverId')['timeMillis']
      .transform(avg_prior_values)
)

df['avgStop'] = (
    df.groupby('driverId')['stop']
      .transform(avg_prior_values)
)

# Filter and reduce to one row per race-driver
filtered_df = (
    df[df['raceId'].isin(race_id_list)]
    .drop_duplicates(subset=['driverId', 'raceId'])
    .reset_index(drop=True)
    .sort_values(by='raceId')
)

In [6]:
def get_avg_pit_stop(input_race_id_list):
    avg_pits = []

    for race_id in input_race_id_list:
        this_race_data = filtered_df[filtered_df['raceId'] == race_id]
        avg_pits.append({'raceId': race_id, 
                                'avgDriverLapStop': this_race_data['avgStopLap'].mean(),
                                'avgDriverPSTime': this_race_data['avgStopTime'].mean(),
                                'avgDriverStops': this_race_data['avgStop'].mean()})
        
    return pd.DataFrame(avg_pits)
get_avg_pit_stop(race_id_list).to_csv('/home/winter-storm/f1-data-project/erdos_ds_f1/Patrick/Feature Testing Data Files/pits.csv',
                                       index=False)

In [11]:
# Get the pit stop data
pits_df = pd.read_csv("/home/winter-storm/f1-data-project/erdos_ds_f1/Patrick/Feature Data Files/pit stop data.csv")

# Define the weights and the columns
columns = ['avgDriverPSTime', 'avgDriverLapStop', 'avgDriverStops']
weights = np.array([-0.24215237,  0.31939236, -0.91616087])  # normalized weights

# Standardize the data
scaler = StandardScaler()
X_standardized = scaler.fit_transform(pits_df[columns])  

# Compute weighted sum
pits_df['pitsWeightedSum'] = X_standardized @ weights


pits_df.to_csv("/home/winter-storm/f1-data-project/erdos_ds_f1/Patrick/Feature Data Files/pit stop data.csv", index=False)
