In [1]:
import pandas as pd

In [2]:
# Read the files
all_race_results_data = pd.read_csv(r"/home/winter-storm/f1-data-project/erdos_ds_f1/data_f1db/f1db-races-race-results.csv")
no_DNF_results_data_file = pd.read_csv(r"/home/winter-storm/f1-data-project/erdos_ds_f1/cleaned_data/f1db-races-race-results-NO-DNF.csv")

# Create list of unique race ids
race_id_list = no_DNF_results_data_file['raceId'].unique()

  all_race_results_data = pd.read_csv(r"/home/winter-storm/f1-data-project/erdos_ds_f1/data_f1db/f1db-races-race-results.csv")


In [3]:
# Step 0: Ensure data is sorted properly
all_race_results_data = all_race_results_data.sort_values(by=['year', 'raceId'])

# Step 1: Calculate constAvgPointsEarned for each constructor per race
# Group by raceId, constructorId, driverId to get driver-level points
driver_points = all_race_results_data.groupby(['raceId', 'constructorId', 'driverId'])['points'].sum().reset_index()

# Group again to compute average points per constructor (avg across drivers)
constructor_avg_points = driver_points.groupby(['raceId', 'constructorId'])['points'].mean().reset_index()
constructor_avg_points = constructor_avg_points.rename(columns={'points': 'constAvgPointsEarned'})

# Add year to this dataframe for seasonal filtering
race_years = all_race_results_data[['raceId', 'year']].drop_duplicates()
constructor_avg_points = constructor_avg_points.merge(race_years, on='raceId', how='left')

# Step 2: Compute cumulative sum of constAvgPointsEarned up to each race (excluding current race)
result_rows = []

# Loop through races in order
for race_id, year in race_years.sort_values(by='raceId').itertuples(index=False):
    # Get constructors that participated in this race
    current_const_ids = constructor_avg_points[
        constructor_avg_points['raceId'] == race_id
    ]['constructorId'].unique()
    
    # Get all previous races in the same year
    past_data = constructor_avg_points[
        (constructor_avg_points['year'] == year) &
        (constructor_avg_points['raceId'] < race_id)
    ]
    
    # Filter to constructors that are in the current race only
    past_data_filtered = past_data[past_data['constructorId'].isin(current_const_ids)]
    
    if not past_data_filtered.empty:
        # Cumulative sum per constructor
        cumulative = past_data_filtered.groupby('constructorId')['constAvgPointsEarned'].sum().reset_index()
        cumulative['raceId'] = race_id
        cumulative = cumulative.rename(columns={'constAvgPointsEarned': 'cumConstAvgPoints'})
        result_rows.append(cumulative)

# Step 3: Concatenate results and sort
final_df = pd.concat(result_rows, ignore_index=True)
final_df = final_df[['raceId', 'constructorId', 'cumConstAvgPoints']].sort_values(by=['raceId', 'constructorId'])

In [4]:
# Write function to generate dataframe with Total Cumulative Constructor Average Points Earned (TCCAPE)

def get_total_cum_const_avg_points(input_race_id_list):
    total_cum = []

    for race_id in input_race_id_list:
        this_race = final_df[final_df['raceId'] == race_id]
        race_points = this_race['cumConstAvgPoints'].to_list()

        total_cum.append({'raceId': race_id, 'TCCAPE': sum(race_points)})
    
    return pd.DataFrame(total_cum)
get_total_cum_const_avg_points(race_id_list).to_csv('/home/winter-storm/f1-data-project/erdos_ds_f1/Patrick/Feature Data Files/TCCAPE.csv',
                                                    index=False)