In [1]:
import pandas as pd
import os
import re
from glob import glob

In [3]:
# Parameters
# You can change this path if needed
years = [2018, 2019, 2021, 2022, 2023, 2024]

# Helper function to extract driver number from filename
def extract_driver_number(filename):
    match = re.search(r'driver_(\d+)', filename)
    return match.group(1) if match else None

# Process each year
for year in years:
    # Load lap data for the year
    lap_file = os.path.join(f"monaco_laps_{year}.csv")
    df_lap = pd.read_csv(lap_file)
    df_lap['Time'] = pd.to_timedelta(df_lap['Time'])

    # Prepare list to collect all car data for this year
    car_files = sorted(glob(os.path.join(f"car_data_{year}_driver_*.csv")))
    car_data_frames = []

    for car_file in car_files:
        driver_number = extract_driver_number(car_file)
        df_car = pd.read_csv(car_file)
        df_car['Time'] = pd.to_timedelta(df_car['Time'])
        df_car['DriverNumber'] = int(driver_number)
        car_data_frames.append(df_car)

    # Combine all car data for the year
    df_car = pd.concat(car_data_frames, ignore_index=True)

    # Annotate df_car with LapNumber
    df_car['LapNumber'] = None

    # Preprocess for efficiency
    df_lap = df_lap[['DriverNumber', 'LapNumber', 'Time']].copy()
    df_lap['Time'] = pd.to_timedelta(df_lap['Time'])
    df_lap = df_lap.sort_values(['DriverNumber', 'LapNumber'])

    # Iterate through each driver's laps to assign LapNumber in df_car
    for driver in df_lap['DriverNumber'].unique():
        driver_laps = df_lap[df_lap['DriverNumber'] == driver].sort_values('LapNumber')
        driver_car_data = df_car['DriverNumber'] == driver

        for i in range(len(driver_laps) - 1):
            lap_start = driver_laps.iloc[i]['Time']
            lap_end = driver_laps.iloc[i + 1]['Time']
            lap_number = driver_laps.iloc[i]['LapNumber']

            in_lap = driver_car_data & (df_car['Time'] >= lap_start) & (df_car['Time'] < lap_end)
            df_car.loc[in_lap, 'LapNumber'] = lap_number

        # Assign last lap for each driver
        last_lap = driver_laps.iloc[-1]
        in_last_lap = driver_car_data & (df_car['Time'] >= last_lap['Time'])
        df_car.loc[in_last_lap, 'LapNumber'] = last_lap['LapNumber']
    df_car = df_car.dropna(subset=['LapNumber'])

    # Save the annotated file for this year
    df_car.to_csv(os.path.join(f"car_data_with_lap_{year}.csv"), index=False)

In [None]:
years = [2018, 2019, 2021, 2022, 2023, 2024]
all_dfs = []

for year in years:
    print(f"Processing year {year}...")
    file_path = f"car_data_with_lap_{year}.csv"
    df = pd.read_csv(file_path)

    # Ensure correct dtypes
    df['Brake'] = df['Brake'].astype(bool)
    df['LapNumber'] = df['LapNumber'].astype(int)

    df = df[(df['nGear'] < 9)]

    # Aggregation
    agg_df = df.groupby(['DriverNumber', 'LapNumber']).agg({
        'RPM': 'mean',
        'nGear': 'mean',
        'Throttle': 'mean',
        'Brake': lambda x: x.sum(),  # True count
        'DRS': lambda x: x.isin([10, 12, 14]).sum()
    }).reset_index()

    #Saving as CSV
    output_file = f"aggregated_car_data_{year}.csv"
    agg_df.to_csv(output_file, index=False)

    try:
        df = pd.read_csv(output_file)
        df["Year"] = year  # optionally annotate year
        all_dfs.append(df)
    except FileNotFoundError:
        print(f"File for {year} not found. Skipping...")

df_all_years_car = pd.concat(all_dfs, ignore_index=True)

In [None]:
df_all_years_car

In [None]:
df_lap_2018 = pd.read_csv('monaco_laps_2018.csv')
df_lap_2019 = pd.read_csv('monaco_laps_2019.csv')
df_lap_2021 = pd.read_csv('monaco_laps_2021.csv')
df_lap_2022 = pd.read_csv('monaco_laps_2022.csv')
df_lap_2023 = pd.read_csv('monaco_laps_2023.csv')
df_lap_2024 = pd.read_csv('monaco_laps_2024.csv')

In [None]:
df_laps = pd.concat([df_lap_2018, df_lap_2019, df_lap_2021, df_lap_2022, df_lap_2023, df_lap_2024])

In [None]:
df_laps

In [None]:
merged_df = pd.merge(
    df_all_years_car,
    df_laps,
    on=['DriverNumber', 'LapNumber', 'Year'],
    how='right'
)

In [None]:
merged_df

In [None]:
merged_df.columns

In [None]:
merged_df.to_csv('cars_laps_monaco_all_years.csv', index=False)

In [None]:
merged_df = pd.read_csv('cars_laps_monaco_all_years.csv')

In [None]:
merged_df[merged_df['Year']==2018]

In [None]:
import pandas as pd

years = [2018, 2019, 2021, 2022, 2023, 2024]

for year in years:
    try:
        # Load data
        lap_file = f"monaco_laps_{year}.csv"
        weather_file = f"weather_{year}.csv"

        df_lap = pd.read_csv(lap_file)
        df_weather = pd.read_csv(weather_file)

        df_lap["Time"] = pd.to_timedelta(df_lap["Time"])
        df_weather["Time"] = pd.to_timedelta(df_weather["Time"])
        df_weather["LapNumber"] = None

        # Step 1: Create lap boundaries from min Time per LapNumber
        lap_boundaries = df_lap.groupby("LapNumber")["Time"].min().reset_index().sort_values("LapNumber")

        # Step 2: Iterate over lap boundaries to annotate weather data
        for i in range(len(lap_boundaries) - 1):
            lap_num = lap_boundaries.iloc[i]["LapNumber"]
            lap_start = lap_boundaries.iloc[i]["Time"]
            lap_end = lap_boundaries.iloc[i + 1]["Time"]

            mask = (df_weather["Time"] >= lap_start) & (df_weather["Time"] < lap_end)
            df_weather.loc[mask, "LapNumber"] = lap_num

        # Final lap — anything after last lap start
        last_lap = lap_boundaries.iloc[-1]
        mask = df_weather["Time"] >= last_lap["Time"]
        df_weather.loc[mask, "LapNumber"] = last_lap["LapNumber"]

        # Drop rows without LapNumber
        df_weather = df_weather.dropna(subset=["LapNumber"])

        # Save output
        output_file = f"weather_with_lap_{year}.csv"
        df_weather.to_csv(output_file, index=False)
        print(f"✅ Annotated weather for {year} → {output_file}")

    except Exception as e:
        print(f"❌ Error processing {year}: {e}")

In [None]:
# Path and years
#base_dir = r"D:\MSIM 4th sem\PR\2025Spring_Final_F1\Fastf1"
years = [2018, 2019, 2021, 2022, 2023, 2024]

# Assume merged_df is already in memory
# Ensure Time columns are in timedelta format
merged_df["Time"] = pd.to_timedelta(merged_df["Time"])

for year in years:
    try:
        #weather_file = os.path.join(base_dir, f"weather_{year}.csv")
        weather_file = f"weather_{year}.csv"
        df_weather = pd.read_csv(weather_file)
        df_weather["Time"] = pd.to_timedelta(df_weather["Time"])
        df_weather["LapNumber"] = None

        year_laps = merged_df[merged_df["Year"] == year]

        for _, lap in year_laps.iterrows():
            mask = (df_weather["Time"] >= lap["Time"]) & (df_weather["Time"] < lap["Time"])
            df_weather.loc[mask, "LapNumber"] = lap["LapNumber"]

        df_weather = df_weather.dropna(subset=["LapNumber"])
        #output_file = os.path.join(base_dir, f"weather_with_lap_{year}.csv")
        output_file = f"weather_with_lap{year}.csv"
        df_weather.to_csv(output_file, index=False)
        print(f"✅ Annotated weather data saved: {output_file}")

    except Exception as e:
        print(f"❌ Error processing weather data for {year}: {e}")


In [None]:
pd.read_csv('weather_with_lap_2018.csv')['Rainfall'].sum()

In [None]:

years = [2018, 2019, 2021, 2022, 2023, 2024]

aggregated_weather_dfs = []

for year in years:
    file_path = os.path.join(f"weather_with_lap_{year}.csv")
    try:
        df = pd.read_csv(file_path)

        # Drop Time column if it exists
        if "Time" in df.columns:
            df = df.drop(columns=["Time"])

        # Define aggregation rules
        agg_funcs = {col: 'mean' for col in df.columns if col not in ['LapNumber', 'Rainfall']}
        if 'Rainfall' in df.columns:
            agg_funcs['Rainfall'] = 'sum'

        # Group by LapNumber and aggregate
        df_agg = df.groupby("LapNumber").agg(agg_funcs).reset_index()
        df_agg["Year"] = year
        aggregated_weather_dfs.append(df_agg)

    except Exception as e:
        print(f"❌ Could not process year {year}: {e}")

# Concatenate all years
df_weather_all_years = pd.concat(aggregated_weather_dfs, ignore_index=True)

# Optional: Save to CSV
df_weather_all_years.to_csv(os.path.join("aggregated_weather_all_years.csv"), index=False)
print("✅ Aggregation complete and saved!")


In [None]:
df_weather_all_years

In [None]:
merged_df

In [None]:
merged_df_1 = pd.merge(
    df_weather_all_years,
    merged_df,
    on=['LapNumber', 'Year'],
    how='right'
)

In [None]:
merged_df_1

In [None]:
len(merged_df)

In [None]:
merged_df_1.to_csv('regression_w_l_c.csv', index=False)

In [None]:
merged_df_1.columns