First step load the data

In [1]:
import fastf1
import pandas as pd
import os
import logging
import warnings

logging.getLogger('fastf1').setLevel(logging.CRITICAL)
warnings.filterwarnings("ignore", category=FutureWarning)
cache_folder = "cache_folder"
if not os.path.exists(cache_folder):
    os.makedirs(cache_folder)

fastf1.Cache.enable_cache(cache_folder)

track_types = {"Melbourne": "Street", "Shanghai": "Permanent" , "Suzuka": "Permanent", 
               "Sakhir": "Permanent", "Jeddah": "Street", "Miami": "Street", "Imola": "Permanent",
                "Monaco": "Street", "Barcelona": "Permanent", "Montréal": "Street", "Spielberg": "Permanent", 
               "Silverstone": "Permanent", "Spa-Francorchamps": "Permanent", "Budapest": "Permanent",
                "Zandvoort": "Permanent", "Monza": "Permanent", "Baku": "Street", "Marina Bay": "Street",
                "Austin": "Permanent", "Mexico City": "Permanent", "São Paulo": "Permanent",
                "Las Vegas": "Street", "Lusail": "Permanent", "Yas Island": "Permanent", "Le Castellet": "Permanent"}


TeamId_map = {"red_bull": "red_bull",
              "mercedes": "mercedes",
              "ferrari": "ferrari",
               "mclaren": "mclaren",
               "alpine": "alpine",
               "aston_martin": "aston_martin",
               "williams": "williams",
               "haas": "haas",
               "alphatauri": "rb", "rb": "rb",
               "alfa": "sauber", "sauber": "sauber", 
               }

# Pirellis classification of the tracks for 2024 season
track_characteristics = {"Sakhir": {"Traction": 4, "Asphalt Grip": 4, "Asphalt Abrasion": 5,
                                    "Track Evolution": 4, "Tyre Stress": 3, "Braking": 4,
                                    "Lateral": 3, "Downforce": 3},
                        "Jeddah": {"Traction": 2, "Asphalt Grip": 4, "Asphalt Abrasion": 2,
                                    "Track Evolution": 4, "Tyre Stress": 3, "Braking": 2,
                                    "Lateral": 3, "Downforce": 2},
                        "Melbourne": {"Traction": 2, "Asphalt Grip": 3, "Asphalt Abrasion": 2,
                                    "Track Evolution": 4, "Tyre Stress": 3, "Braking": 2,
                                    "Lateral": 3, "Downforce": 3},
                        "Suzuka": {"Traction": 3, "Asphalt Grip": 3, "Asphalt Abrasion": 4,
                                    "Track Evolution": 3, "Tyre Stress": 5, "Braking": 2,
                                    "Lateral": 5, "Downforce": 4},
                        "Shanghai": {"Traction": 3, "Asphalt Grip": 2, "Asphalt Abrasion": 4,
                                    "Track Evolution": 5, "Tyre Stress": 4, "Braking": 4,
                                    "Lateral": 4, "Downforce": 3},
                        "Miami": {"Traction": 3, "Asphalt Grip": 3, "Asphalt Abrasion": 2,
                                    "Track Evolution": 5, "Tyre Stress": 3, "Braking": 3,
                                    "Lateral": 3, "Downforce": 2},
                        "Imola": {"Traction": 2, "Asphalt Grip": 2, "Asphalt Abrasion": 3,
                                    "Track Evolution": 2, "Tyre Stress": 3, "Braking": 3,
                                    "Lateral": 3, "Downforce": 3},
                        "Monaco": {"Traction": 5, "Asphalt Grip": 1, "Asphalt Abrasion": 1,
                                    "Track Evolution": 5, "Tyre Stress": 1, "Braking": 2,
                                    "Lateral": 1, "Downforce": 5},
                        "Montréal": {"Traction": 5, "Asphalt Grip": 1, "Asphalt Abrasion": 1,
                                    "Track Evolution": 5, "Tyre Stress": 3, "Braking": 5,
                                    "Lateral": 1, "Downforce": 1},
                        "Barcelona": {"Traction": 3, "Asphalt Grip": 3, "Asphalt Abrasion": 4,
                                    "Track Evolution": 3, "Tyre Stress": 5, "Braking": 3,
                                    "Lateral": 5, "Downforce": 4}, 
                        "Spielberg": {"Traction": 2, "Asphalt Grip": 3, "Asphalt Abrasion": 4,
                                    "Track Evolution": 3, "Tyre Stress": 3, "Braking": 3,
                                    "Lateral": 3, "Downforce": 3},
                        "Silverstone": {"Traction": 3, "Asphalt Grip": 4, "Asphalt Abrasion": 2,
                                    "Track Evolution": 2, "Tyre Stress": 5, "Braking": 2,
                                    "Lateral": 5, "Downforce": 4},
                        "Budapest": {"Traction": 4, "Asphalt Grip": 2, "Asphalt Abrasion": 2,
                                    "Track Evolution": 4, "Tyre Stress": 3, "Braking": 3,
                                    "Lateral": 3, "Downforce": 5},
                        "Spa-Francorchamps": {"Traction": 4, "Asphalt Grip": 4, "Asphalt Abrasion": 4,
                                    "Track Evolution": 3, "Tyre Stress": 5, "Braking": 4,
                                    "Lateral": 5, "Downforce": 2},
                        "Zandvoort": {"Traction": 4, "Asphalt Grip": 3, "Asphalt Abrasion": 3,
                                    "Track Evolution": 4, "Tyre Stress": 5, "Braking": 3,
                                    "Lateral": 4, "Downforce": 4},
                        "Monza": {"Traction": 3, "Asphalt Grip": 4, "Asphalt Abrasion": 2,
                                    "Track Evolution": 4, "Tyre Stress": 3, "Braking": 4,
                                    "Lateral": 2, "Downforce": 1},
                        "Baku": {"Traction": 5, "Asphalt Grip": 2, "Asphalt Abrasion": 1,
                                    "Track Evolution": 5, "Tyre Stress": 3, "Braking": 4,
                                    "Lateral": 1, "Downforce": 2},
                        "Marina Bay": {"Traction": 4, "Asphalt Grip": 3, "Asphalt Abrasion": 3,
                                    "Track Evolution": 5, "Tyre Stress": 2, "Braking": 5,
                                    "Lateral": 2, "Downforce": 5},
                        "Austin": {"Traction": 4, "Asphalt Grip": 3, "Asphalt Abrasion": 3,
                                    "Track Evolution": 4, "Tyre Stress": 4, "Braking": 3,
                                    "Lateral": 4, "Downforce": 4},
                        "Mexico City": {"Traction": 3, "Asphalt Grip": 2, "Asphalt Abrasion": 2,
                                    "Track Evolution": 5, "Tyre Stress": 2, "Braking": 3,
                                    "Lateral": 2, "Downforce": 5},
                        "São Paulo": {"Traction": 3, "Asphalt Grip": 3, "Asphalt Abrasion": 2,
                                    "Track Evolution": 5, "Tyre Stress": 3, "Braking": 3,
                                    "Lateral": 3, "Downforce": 4},
                        "Las Vegas": {"Traction": 2, "Asphalt Grip": 1, "Asphalt Abrasion": 2,
                                    "Track Evolution": 5, "Tyre Stress": 4, "Braking": 3,
                                    "Lateral": 2, "Downforce": 1},
                        "Lusail": {"Traction": 3, "Asphalt Grip": 3, "Asphalt Abrasion": 2,
                                    "Track Evolution": 4, "Tyre Stress": 5, "Braking": 3,
                                    "Lateral": 5, "Downforce": 4},
                        "Yas Island": {"Traction": 4, "Asphalt Grip": 3, "Asphalt Abrasion": 3,
                                    "Track Evolution": 4, "Tyre Stress": 3, "Braking": 4,
                                    "Lateral": 3, "Downforce": 3},
                        "Le Castellet": {"Traction": 4, "Asphalt Grip": 3, "Asphalt Abrasion": 3,
                                    "Track Evolution": 3, "Tyre Stress": 4, "Braking": 2,
                                    "Lateral": 4, "Downforce": 3} # 2022 season
                        
                        }

In [2]:

races = fastf1.get_event_schedule(2024)
races = races[races["RoundNumber"] > 0].copy() #skip pre-season testing

all_results = []
for rnd, race in races.iterrows():
    
    session = fastf1.get_session(2024, rnd, "r")
    session.load()  

    results = session.results[["Abbreviation", "Points"]].copy()
    
    results["round"] = rnd 
    
    all_results.append(results)


season_results_2024 = pd.concat(all_results, ignore_index=True)




In [3]:
from sklearn.preprocessing import StandardScaler
races = fastf1.get_event_schedule(2024)
races = races[races["RoundNumber"] > 0].copy() #skip pre-season testing

all_results = []
for rnd, race in races.iterrows():
    

    race_session = fastf1.get_session(2024, rnd, "r")
    race_session.load()


    data = race_session.results[["Abbreviation", "ClassifiedPosition", "GridPosition", "TeamId"]].copy()
    
    location = race_session.event.get("Location", None)
    data["Track_location"] = location
    data["Round"] = rnd
    data["Season"] = 2024

    data["Track_characteristics"] = data["Track_location"].map(track_characteristics)

    track_features = data["Track_characteristics"].apply(pd.Series)

    data = data.join(track_features)
    data.drop(columns=("Track_characteristics"), inplace=True)


    try:
        fp2_session = fastf1.get_session(2024, rnd, "fp2")
        fp2_session.load()

        all_laps = fp2_session.laps
        all_laps = all_laps[all_laps["IsAccurate"]]

        race_sim_dict = {}
        for driver in data["Abbreviation"]:
            

            laps = all_laps[all_laps["Driver"] == driver]
            fast_s1 = laps["Sector1Time"].min()
            fast_s2 = laps["Sector2Time"].min()
            fast_s3 = laps["Sector3Time"].min() 

            delta = pd.Timedelta(seconds=0.3)

            clean_sector_1 = laps[laps["Sector1Time"] <= fast_s1 + delta]
            avg_s1 = clean_sector_1["Sector1Time"].mean()

            # Sector 2
            clean_sector_2 = laps[laps["Sector2Time"] <= fast_s2 + delta]
            avg_s2 = clean_sector_2["Sector2Time"].mean()

            # Sector 3
            clean_sector_3 = laps[laps["Sector3Time"] <= fast_s3 + delta]
            avg_s3 = clean_sector_3["Sector3Time"].mean()



            avg_laptime = (avg_s1 + avg_s2 + avg_s3).total_seconds()
            race_sim_dict[driver] = avg_laptime    

        data["race_sim"] = data["Abbreviation"].map(race_sim_dict)
        scaler = StandardScaler()
        data["race_sim"] = scaler.fit_transform(data["race_sim"].values.reshape(-1, 1))
    except:
        fp2_session = fastf1.get_session(2024, rnd, "s")
        fp2_session.load()

        all_laps = fp2_session.laps
        all_laps = all_laps[all_laps["IsAccurate"]]

        race_sim_dict = {}
        for driver in data["Abbreviation"]:
            

            laps = all_laps[all_laps["Driver"] == driver]
            fast_s1 = laps["Sector1Time"].min()
            fast_s2 = laps["Sector2Time"].min()
            fast_s3 = laps["Sector3Time"].min() 

            delta = pd.Timedelta(seconds=0.5)

            clean_sector_1 = laps[laps["Sector1Time"] <= fast_s1 + delta]
            avg_s1 = clean_sector_1["Sector1Time"].mean()

            # Sector 2
            clean_sector_2 = laps[laps["Sector2Time"] <= fast_s2 + delta]
            avg_s2 = clean_sector_2["Sector2Time"].mean()

            # Sector 3
            clean_sector_3 = laps[laps["Sector3Time"] <= fast_s3 + delta]
            avg_s3 = clean_sector_3["Sector3Time"].mean()



            avg_laptime = (avg_s1 + avg_s2 + avg_s3).total_seconds()
            race_sim_dict[driver] = avg_laptime    

        data["race_sim"] = data["Abbreviation"].map(race_sim_dict)
        scaler = StandardScaler()
        data["race_sim"] = scaler.fit_transform(data["race_sim"].values.reshape(-1, 1))

    if rnd - 5 >= 1:
        start_idx = rnd - 5
    else:
        start_idx = 1
    end_idx = rnd - 1 

    wanted = season_results_2024[(season_results_2024["round"] >= start_idx) & (season_results_2024["round"] <= end_idx)]

    weights = [0.2, 0.4, 0.6, 0.8, 1]

    for driver in wanted["Abbreviation"].unique():
        driver_result =  wanted[wanted["Abbreviation"] == driver].sort_values("round")

        weighted_form = 0

        for i, (_, row) in enumerate(driver_result.iterrows()):
            if i < len(weights):
                weighted_form += row["Points"] * weights[i]
        data.loc[data["Abbreviation"] == driver, "Form-score"] = weighted_form
    all_results.append(data)

season_results_2024 = pd.concat(all_results, ignore_index=True)

season_results_2024["Form-score"] = season_results_2024["Form-score"].fillna(0) #fix first race and other errors that could exist

season_results_2024["Track_type"] = season_results_2024["Track_location"].map(track_types)
season_results_2024["TeamId"] = season_results_2024["TeamId"].map(TeamId_map)


In [4]:
races = fastf1.get_event_schedule(2023)
races = races[races["RoundNumber"] > 0].copy() #skip pre-season testing

all_results = []
for rnd, race in races.iterrows():
    
    
    session = fastf1.get_session(2023, rnd, "r")
    session.load()  

    rnd = int(race["RoundNumber"])
    if rnd == 0:
        continue

    results = session.results[["Abbreviation", "Points"]].copy()
    
    results["round"] = rnd 
    
    all_results.append(results)


season_results_2023 = pd.concat(all_results, ignore_index=True)


In [5]:
races = fastf1.get_event_schedule(2023)
races = races[races["RoundNumber"] > 0].copy() #skip pre-season testing

all_results = []
for _, race in races.iterrows():
    

    race_session = fastf1.get_session(2023, _, "r")
    race_session.load()

    rnd = int(race["RoundNumber"])
    if rnd == 0:
        continue

    data = race_session.results[["Abbreviation", "ClassifiedPosition", "GridPosition", "TeamId"]].copy()
    
    location = race_session.event.get("Location", None)
    data["Track_location"] = location
    data["Round"] = rnd
    data["Season"] = 2023

    data["Track_characteristics"] = data["Track_location"].map(track_characteristics)

    track_features = data["Track_characteristics"].apply(pd.Series)

    data = data.join(track_features)
    data.drop(columns=("Track_characteristics"), inplace=True)


    try:
        fp2_session = fastf1.get_session(2023, rnd, "fp2")
        fp2_session.load()

        all_laps = fp2_session.laps
        all_laps = all_laps[all_laps["IsAccurate"]]

        race_sim_dict = {}
        for driver in data["Abbreviation"]:
            

            laps = all_laps[all_laps["Driver"] == driver]
            fast_s1 = laps["Sector1Time"].min()
            fast_s2 = laps["Sector2Time"].min()
            fast_s3 = laps["Sector3Time"].min() 

            delta = pd.Timedelta(seconds=0.3)

            clean_sector_1 = laps[laps["Sector1Time"] <= fast_s1 + delta]
            avg_s1 = clean_sector_1["Sector1Time"].mean()

            # Sector 2
            clean_sector_2 = laps[laps["Sector2Time"] <= fast_s2 + delta]
            avg_s2 = clean_sector_2["Sector2Time"].mean()

            # Sector 3
            clean_sector_3 = laps[laps["Sector3Time"] <= fast_s3 + delta]
            avg_s3 = clean_sector_3["Sector3Time"].mean()

            avg_laptime = (avg_s1 + avg_s2 + avg_s3).total_seconds()
            race_sim_dict[driver] = avg_laptime    

        data["race_sim"] = data["Abbreviation"].map(race_sim_dict)
        scaler = StandardScaler()
        data["race_sim"] = scaler.fit_transform(data["race_sim"].values.reshape(-1, 1))
    except:
        fp2_session = fastf1.get_session(2023, rnd, "s")
        fp2_session.load()

        all_laps = fp2_session.laps
        all_laps = all_laps[all_laps["IsAccurate"]]

        race_sim_dict = {}
        for driver in data["Abbreviation"]:
            

            laps = all_laps[all_laps["Driver"] == driver]
            fast_s1 = laps["Sector1Time"].min()
            fast_s2 = laps["Sector2Time"].min()
            fast_s3 = laps["Sector3Time"].min() 

            delta = pd.Timedelta(seconds=0.5)

            clean_laps = laps[(laps["Sector1Time"] <= fast_s1 + delta) & 
                            (laps["Sector2Time"] <= fast_s2 + delta) &
                            (laps["Sector3Time"] <= fast_s3 + delta)]

            clean_sector_1 = laps[laps["Sector1Time"] <= fast_s1 + delta]
            avg_s1 = clean_sector_1["Sector1Time"].mean()

            # Sector 2
            clean_sector_2 = laps[laps["Sector2Time"] <= fast_s2 + delta]
            avg_s2 = clean_sector_2["Sector2Time"].mean()

            # Sector 3
            clean_sector_3 = laps[laps["Sector3Time"] <= fast_s3 + delta]
            avg_s3 = clean_sector_3["Sector3Time"].mean()

            avg_laptime = (avg_s1 + avg_s2 + avg_s3).total_seconds()
            race_sim_dict[driver] = avg_laptime    

        data["race_sim"] = data["Abbreviation"].map(race_sim_dict)
        scaler = StandardScaler()
        data["race_sim"] = scaler.fit_transform(data["race_sim"].values.reshape(-1, 1))

    if rnd - 5 >= 1:
        start_idx = rnd - 5
    else:
        start_idx = 1
    end_idx = rnd - 1 

    wanted = season_results_2023[(season_results_2023["round"] >= start_idx) & 
                                 (season_results_2023["round"] <= end_idx)]

    weights = [0.2, 0.4, 0.6, 0.8, 1]

    for driver in wanted["Abbreviation"].unique():
        driver_result =  wanted[wanted["Abbreviation"] == driver].sort_values("round")

        weighted_form = 0


        for i, (_, row) in enumerate(driver_result.iterrows()):
            if i < len(weights):
                weighted_form += row["Points"] * weights[i]
        data.loc[data["Abbreviation"] == driver, "Form-score"] = weighted_form
    all_results.append(data)

season_results_2023 = pd.concat(all_results, ignore_index=True)

season_results_2023["Form-score"] = season_results_2023["Form-score"].fillna(0) #fix first race and other errors that could exist

season_results_2023["Track_type"] = season_results_2023["Track_location"].map(track_types)
season_results_2023["TeamId"] = season_results_2023["TeamId"].map(TeamId_map)


In [None]:
races = fastf1.get_event_schedule(2022)
races = races[races["RoundNumber"] > 0].copy() #skip pre-season testing

all_results = []
for rnd, race in races.iterrows():
    
    rnd = int(race["RoundNumber"])
    if rnd == 0:
        continue
    
    session = fastf1.get_session(2022, rnd, "r")
    session.load()  

    

    results = session.results[["Abbreviation", "Points"]].copy()
    
    results["round"] = rnd 
    
    all_results.append(results)


season_results_2022 = pd.concat(all_results, ignore_index=True)

In [None]:
races = fastf1.get_event_schedule(2022)
races = races[races["RoundNumber"] > 0].copy() #skip pre-season testing

all_results = []
for _, race in races.iterrows():
    
    rnd = int(race["RoundNumber"])
    if rnd == 0:
        continue
    race_session = fastf1.get_session(2022, rnd, "r")
    race_session.load()

    
    data = race_session.results[["Abbreviation", "ClassifiedPosition", "GridPosition", "TeamId"]].copy()
    
    location = race_session.event.get("Location", None)
    data["Track_location"] = location
    data["Round"] = rnd
    data["Season"] = 2022

    data["Track_characteristics"] = data["Track_location"].map(track_characteristics)

    track_features = data["Track_characteristics"].apply(pd.Series)

    data = data.join(track_features)
    data.drop(columns=("Track_characteristics"), inplace=True)


    try:
        fp2_session = fastf1.get_session(2022, rnd, "fp2")
        fp2_session.load()

        all_laps = fp2_session.laps
        all_laps = all_laps[all_laps["IsAccurate"]]

        race_sim_dict = {}
        for driver in data["Abbreviation"]:
            

            laps = all_laps[all_laps["Driver"] == driver]
            fast_s1 = laps["Sector1Time"].min()
            fast_s2 = laps["Sector2Time"].min()
            fast_s3 = laps["Sector3Time"].min() 

            delta = pd.Timedelta(seconds=0.3)
            clean_sector_1 = laps[laps["Sector1Time"] <= fast_s1 + delta]
            avg_s1 = clean_sector_1["Sector1Time"].mean()

            # Sector 2
            clean_sector_2 = laps[laps["Sector2Time"] <= fast_s2 + delta]
            avg_s2 = clean_sector_2["Sector2Time"].mean()

            # Sector 3
            clean_sector_3 = laps[laps["Sector3Time"] <= fast_s3 + delta]
            avg_s3 = clean_sector_3["Sector3Time"].mean()

            avg_laptime = (avg_s1 + avg_s2 + avg_s3).total_seconds()
            race_sim_dict[driver] = avg_laptime    

        data["race_sim"] = data["Abbreviation"].map(race_sim_dict)
        scaler = StandardScaler()
        data["race_sim"] = scaler.fit_transform(data["race_sim"].values.reshape(-1, 1))
    except:
        fp2_session = fastf1.get_session(2022, rnd, "s")
        fp2_session.load()

        all_laps = fp2_session.laps
        all_laps = all_laps[all_laps["IsAccurate"]]

        race_sim_dict = {}
        for driver in data["Abbreviation"]:
            

            laps = all_laps[all_laps["Driver"] == driver]
            fast_s1 = laps["Sector1Time"].min()
            fast_s2 = laps["Sector2Time"].min()
            fast_s3 = laps["Sector3Time"].min() 

            delta = pd.Timedelta(seconds=0.5)

            clean_laps = laps[(laps["Sector1Time"] <= fast_s1 + delta) & 
                            (laps["Sector2Time"] <= fast_s2 + delta) &
                            (laps["Sector3Time"] <= fast_s3 + delta)]

            clean_sector_1 = laps[laps["Sector1Time"] <= fast_s1 + delta]
            avg_s1 = clean_sector_1["Sector1Time"].mean()

            # Sector 2
            clean_sector_2 = laps[laps["Sector2Time"] <= fast_s2 + delta]
            avg_s2 = clean_sector_2["Sector2Time"].mean()

            # Sector 3
            clean_sector_3 = laps[laps["Sector3Time"] <= fast_s3 + delta]
            avg_s3 = clean_sector_3["Sector3Time"].mean()

            avg_laptime = (avg_s1 + avg_s2 + avg_s3).total_seconds()
            race_sim_dict[driver] = avg_laptime    

        data["race_sim"] = data["Abbreviation"].map(race_sim_dict)
        scaler = StandardScaler()
        data["race_sim"] = scaler.fit_transform(data["race_sim"].values.reshape(-1, 1))

    if rnd - 5 >= 1:
        start_idx = rnd - 5
    else:
        start_idx = 1
    end_idx = rnd - 1 

    wanted = season_results_2022[(season_results_2022["round"] >= start_idx) & 
                                 (season_results_2022["round"] <= end_idx)]

    weights = [0.2, 0.4, 0.6, 0.8, 1]

    for driver in wanted["Abbreviation"].unique():
        driver_result =  wanted[wanted["Abbreviation"] == driver].sort_values("round")

        weighted_form = 0

        for i, (_, row) in enumerate(driver_result.iterrows()):
            if i < len(weights):
                weighted_form += row["Points"] * weights[i]
        data.loc[data["Abbreviation"] == driver, "Form-score"] = weighted_form
    all_results.append(data)

season_results_2022 = pd.concat(all_results, ignore_index=True)

season_results_2022["Form-score"] = season_results_2022["Form-score"].fillna(0) #fix first race and other errors that could exist

season_results_2022["Track_type"] = season_results_2022["Track_location"].map(track_types)
season_results_2022["TeamId"] = season_results_2022["TeamId"].map(TeamId_map)
print(season_results_2022)

    Abbreviation ClassifiedPosition  GridPosition    TeamId Track_location  \
0            LEC                  1           1.0   ferrari         Sakhir   
1            SAI                  2           3.0   ferrari         Sakhir   
2            HAM                  3           5.0  mercedes         Sakhir   
3            RUS                  4           9.0  mercedes         Sakhir   
4            MAG                  5           7.0      haas         Sakhir   
..           ...                ...           ...       ...            ...   
435          MSC                 16          12.0      haas     Yas Island   
436          MAG                 17          16.0      haas     Yas Island   
437          HAM                 18           5.0  mercedes     Yas Island   
438          LAT                 19          20.0  williams     Yas Island   
439          ALO                  R          10.0    alpine     Yas Island   

     Round  Season  Traction  Asphalt Grip  Asphalt Abrasion  T

In [None]:
training_data = pd.concat([season_results_2022, season_results_2023, season_results_2024], ignore_index=True)

print(training_data)
training_data.to_parquet("2022_to_2024_data.parquet", index=False)

     Abbreviation ClassifiedPosition  GridPosition    TeamId Track_location  \
0             LEC                  1           1.0   ferrari         Sakhir   
1             SAI                  2           3.0   ferrari         Sakhir   
2             HAM                  3           5.0  mercedes         Sakhir   
3             RUS                  4           9.0  mercedes         Sakhir   
4             MAG                  5           7.0      haas         Sakhir   
...           ...                ...           ...       ...            ...   
1354          MAG                 16          14.0      haas     Yas Island   
1355          LAW                 17          12.0        rb     Yas Island   
1356          BOT                  R           9.0    sauber     Yas Island   
1357          COL                  R          20.0  williams     Yas Island   
1358          PER                  R          10.0  red_bull     Yas Island   

      Round  Season  Traction  Asphalt Grip  Asphal

In [53]:
fp2_session = fastf1.get_session(2024, "Spa-franc", "fp2")
fp2_session.load()

all_laps = fp2_session.laps
all_laps = all_laps[all_laps["IsAccurate"]]

race_sim_dict = {}
for driver in data["Abbreviation"]:
    

    laps = all_laps[all_laps["Driver"] == driver]
    fast_s1 = laps["Sector1Time"].min()
    fast_s2 = laps["Sector2Time"].min()
    fast_s3 = laps["Sector3Time"].min() 

    delta = pd.Timedelta(seconds=2)

    clean_laps = laps[(laps["Sector1Time"] <= fast_s1 + delta) & 
                    (laps["Sector2Time"] <= fast_s2 + delta) &
                    (laps["Sector3Time"] <= fast_s3 + delta)]

    avg_s1 = clean_laps["Sector1Time"].mean()
    avg_s2 = clean_laps["Sector2Time"].mean()
    avg_s3 = clean_laps["Sector3Time"].mean()

    avg_laptime = (avg_s1 + avg_s2 + avg_s3).total_seconds()
    race_sim_dict[driver] = avg_laptime    

data["race_sim"] = data["Abbreviation"].map(race_sim_dict)

print(data)

core           INFO 	Loading data for Belgian Grand Prix - Practice 2 [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '2', '3', '4', '10', '11', '14', '16', '18', '20', '22', '23', '24', '27', '31', '44', '55', '63', '77', '81']


   Abbreviation ClassifiedPosition  GridPosition     Track_location  \
44          HAM                  1           3.0  Spa-Francorchamps   
81          PIA                  2           5.0  Spa-Francorchamps   
16          LEC                  3           1.0  Spa-Francorchamps   
1           VER                  4          11.0  Spa-Francorchamps   
4           NOR                  5           4.0  Spa-Francorchamps   
55          SAI                  6           7.0  Spa-Francorchamps   
11          PER                  7           2.0  Spa-Francorchamps   
14          ALO                  8           8.0  Spa-Francorchamps   
31          OCO                  9           9.0  Spa-Francorchamps   
3           RIC                 10          13.0  Spa-Francorchamps   
18          STR                 11          15.0  Spa-Francorchamps   
23          ALB                 12          10.0  Spa-Francorchamps   
10          GAS                 13          12.0  Spa-Francorchamps   
20    

In [None]:
session = fastf1.get_session(2024, "Spa-franc", "r")
session.load()
rnd = int(session.event[["RoundNumber"]])

if rnd - 5 >= 1:
    start_idx = rnd - 5
else:
    start_idx = 1
end_idx = rnd - 1 

wanted = season_results_2024[(season_results_2024["round"] >= start_idx) & (season_results_2024["round"] <= end_idx)]

weights = [0.2, 0.4, 0.6, 0.8, 1]

for driver in wanted["Abbreviation"].unique():
    driver_result =  wanted[wanted["Abbreviation"] == driver].sort_values("round")

    weighted_form = 0

    for i, (_, row) in enumerate(driver_result.iterrows()):
        if i < len(weights):
            weighted_form += row["Points"] * weights[i]
    data.loc[data["Abbreviation"] == driver, "Form-score"] = weighted_form

print(data)

    Abbreviation  Points  round
159          VER    25.0      9
179          VER    25.0     10
203          VER    10.0     11
220          VER    18.0     12
243          VER    10.0     13
   Abbreviation ClassifiedPosition  GridPosition     Track_location  \
44          HAM                  1           3.0  Spa-Francorchamps   
81          PIA                  2           5.0  Spa-Francorchamps   
16          LEC                  3           1.0  Spa-Francorchamps   
1           VER                  4          11.0  Spa-Francorchamps   
4           NOR                  5           4.0  Spa-Francorchamps   
55          SAI                  6           7.0  Spa-Francorchamps   
11          PER                  7           2.0  Spa-Francorchamps   
14          ALO                  8           8.0  Spa-Francorchamps   
31          OCO                  9           9.0  Spa-Francorchamps   
3           RIC                 10          13.0  Spa-Francorchamps   
18          STR            

  rnd = int(session.event[["RoundNumber"]])
