### Formula 1 Machine Learning Model

** Please be sure to run `pip install fastf1` before running**


In [30]:
import fastf1
import pandas as pd
from IPython.display import display
from datetime import datetime, timezone

years = [2024, 2025]
required_sessions = ['Session4DateUtc', 'Session5DateUtc']  # Qualifying & Race
cache_dir = 'cache'

fastf1.Cache.enable_cache(cache_dir)
fastf1.set_log_level('WARNING')

now = datetime.now(timezone.utc)

# This will hold all merged result DataFrames
all_combined = []

for year in years:
    print(f"\n=== Processing season {year} ===")
    try:
        schedule = fastf1.get_event_schedule(year, include_testing=True)
    except Exception as e:
        print(f"Could not fetch schedule for {year}: {e}")
        continue

    # Filter to only those events where both Quali & Race have completed
    completed = []
    for _, evt in schedule.iterrows():
        ok = True
        for key in required_sessions:
            if key not in evt or pd.isna(evt[key]):
                ok = False
                break
            # ensure timezone-aware
            sess_time = evt[key]
            if sess_time.tzinfo is None:
                sess_time = sess_time.replace(tzinfo=timezone.utc)
            if sess_time >= now:
                ok = False
                break
        if ok:
            completed.append(evt)

    if not completed:
        print(f"No fully completed events found for {year}.")
        continue

    df_events = pd.DataFrame(completed)

    # Pull Qualifying & Race results for each completed event
    for _, evt in df_events.iterrows():
        name = evt['EventName']
        print(f"\n— {year} {name} —")
        try:
            # Qualifying
            q = fastf1.get_session(year, name, 'Q')
            q.load()
            q_res = q.results[['Abbreviation', 'Position', 'Q1', 'Q2', 'Q3']].copy()
            q_res.rename(columns={
                'Position':    'QualiPosition',
                'Q1':          'QualiQ1',
                'Q2':          'QualiQ2',
                'Q3':          'QualiQ3'
            }, inplace=True)
        except Exception as e:
            print(f"Qualifying failed for {name}: {e}")
            continue

        try:
            # Race
            r = fastf1.get_session(year, name, 'R')
            r.load()
            r_res = r.results[['Abbreviation', 'Position', 'GridPosition', 'Time', 'Status', 'Points']].copy()
            r_res.rename(columns={
                'Position':   'RacePosition',
                'Time':       'RaceTime'
            }, inplace=True)
        except Exception as e:
            print(f"Race failed for {name}: {e}")
            continue

        # Merge on driver abbreviation
        merged = pd.merge(q_res, r_res, on='Abbreviation', how='inner')
        merged['Year']      = year
        merged['EventName'] = name
        all_combined.append(merged)

# Final merge
if all_combined:
    final_df = pd.concat(all_combined, ignore_index=True)
    display(
        final_df[[
            'Year','EventName','Abbreviation',
            'QualiPosition','QualiQ1','QualiQ2','QualiQ3',
            'RacePosition','GridPosition','RaceTime','Status','Points'
        ]]
        .sort_values(['Year','EventName','QualiPosition'])
    )
else:
    print("No data could be pulled for any of the specified seasons.")


=== Processing season 2024 ===

— 2024 Bahrain Grand Prix —

— 2024 Saudi Arabian Grand Prix —

— 2024 Australian Grand Prix —

— 2024 Japanese Grand Prix —

— 2024 Chinese Grand Prix —





— 2024 Miami Grand Prix —

— 2024 Emilia Romagna Grand Prix —





— 2024 Monaco Grand Prix —





— 2024 Canadian Grand Prix —

— 2024 Spanish Grand Prix —





— 2024 Austrian Grand Prix —

— 2024 British Grand Prix —

— 2024 Hungarian Grand Prix —

— 2024 Belgian Grand Prix —

— 2024 Dutch Grand Prix —





— 2024 Italian Grand Prix —

— 2024 Azerbaijan Grand Prix —

— 2024 Singapore Grand Prix —





— 2024 United States Grand Prix —





— 2024 Mexico City Grand Prix —

— 2024 São Paulo Grand Prix —





— 2024 Las Vegas Grand Prix —





— 2024 Qatar Grand Prix —





— 2024 Abu Dhabi Grand Prix —

=== Processing season 2025 ===

— 2025 Australian Grand Prix —





— 2025 Chinese Grand Prix —

— 2025 Japanese Grand Prix —

— 2025 Bahrain Grand Prix —





— 2025 Saudi Arabian Grand Prix —

— 2025 Miami Grand Prix —




Unnamed: 0,Year,EventName,Abbreviation,QualiPosition,QualiQ1,QualiQ2,QualiQ3,RacePosition,GridPosition,RaceTime,Status,Points
459,2024,Abu Dhabi Grand Prix,NOR,1.0,0 days 00:01:23.682000,0 days 00:01:23.098000,0 days 00:01:22.595000,1.0,1.0,0 days 01:26:33.291000,Finished,25.0
460,2024,Abu Dhabi Grand Prix,PIA,2.0,0 days 00:01:23.640000,0 days 00:01:23.199000,0 days 00:01:22.804000,10.0,2.0,0 days 00:01:23.821000,Finished,1.0
461,2024,Abu Dhabi Grand Prix,SAI,3.0,0 days 00:01:23.487000,0 days 00:01:22.985000,0 days 00:01:22.824000,2.0,3.0,0 days 00:00:05.832000,Finished,18.0
462,2024,Abu Dhabi Grand Prix,HUL,4.0,0 days 00:01:23.722000,0 days 00:01:23.040000,0 days 00:01:22.886000,8.0,7.0,0 days 00:01:15.554000,Finished,4.0
463,2024,Abu Dhabi Grand Prix,VER,5.0,0 days 00:01:23.516000,0 days 00:01:22.998000,0 days 00:01:22.945000,6.0,4.0,0 days 00:00:49.847000,Finished,8.0
464,2024,Abu Dhabi Grand Prix,GAS,6.0,0 days 00:01:23.548000,0 days 00:01:23.086000,0 days 00:01:22.984000,7.0,5.0,0 days 00:01:12.560000,Finished,6.0
465,2024,Abu Dhabi Grand Prix,RUS,7.0,0 days 00:01:23.678000,0 days 00:01:23.283000,0 days 00:01:23.132000,5.0,6.0,0 days 00:00:37.538000,Finished,10.0
466,2024,Abu Dhabi Grand Prix,ALO,8.0,0 days 00:01:23.794000,0 days 00:01:23.268000,0 days 00:01:23.196000,9.0,8.0,0 days 00:01:22.373000,Finished,2.0
467,2024,Abu Dhabi Grand Prix,BOT,9.0,0 days 00:01:23.481000,0 days 00:01:23.341000,0 days 00:01:23.204000,18.0,9.0,NaT,Retired,0.0
468,2024,Abu Dhabi Grand Prix,PER,10.0,0 days 00:01:23.559000,0 days 00:01:23.379000,0 days 00:01:23.264000,20.0,10.0,NaT,Retired,0.0


In [25]:
#monkey patch
import fastf1.req
from fastf1.req import _MinIntervalLimitDelay, _CallsPerIntervalLimitRaise

# 1) Keep only the soft-delay limiter; drop the "raise" limiter entirely
new_limits = {}

for pattern, limiters in fastf1.req._SessionWithRateLimiting._RATE_LIMITS.items():
    # Filter to only the MinIntervalDelay instances
    new_limits[pattern] = [
        lim for lim in limiters if isinstance(lim, _MinIntervalLimitDelay)
    ]

# Apply the patched rate limits
fastf1.req._SessionWithRateLimiting._RATE_LIMITS = new_limits

In [31]:
pd.set_option('display.max_columns', None)
final_df.head()

Unnamed: 0,Abbreviation,QualiPosition,QualiQ1,QualiQ2,QualiQ3,RacePosition,GridPosition,RaceTime,Status,Points,Year,EventName
0,VER,1.0,0 days 00:01:30.031000,0 days 00:01:29.374000,0 days 00:01:29.179000,1.0,1.0,0 days 01:31:44.742000,Finished,26.0,2024,Bahrain Grand Prix
1,LEC,2.0,0 days 00:01:30.243000,0 days 00:01:29.165000,0 days 00:01:29.407000,4.0,2.0,0 days 00:00:39.669000,Finished,12.0,2024,Bahrain Grand Prix
2,RUS,3.0,0 days 00:01:30.350000,0 days 00:01:29.922000,0 days 00:01:29.485000,5.0,3.0,0 days 00:00:46.788000,Finished,10.0,2024,Bahrain Grand Prix
3,SAI,4.0,0 days 00:01:29.909000,0 days 00:01:29.573000,0 days 00:01:29.507000,3.0,4.0,0 days 00:00:25.110000,Finished,15.0,2024,Bahrain Grand Prix
4,PER,5.0,0 days 00:01:30.221000,0 days 00:01:29.932000,0 days 00:01:29.537000,2.0,5.0,0 days 00:00:22.457000,Finished,18.0,2024,Bahrain Grand Prix


In [32]:
#adding GridPosDiff col
final_df['GridPosDiff'] = final_df['GridPosition'] - final_df['RacePosition']

In [34]:
final_df = pd.get_dummies(final_df, columns=['EventName', 'Abbreviation', 'Status'], drop_first=True)

In [36]:
final_df.shape

(599, 63)

In [38]:
final_df['QualiQ1_sec'] = pd.to_timedelta(final_df['QualiQ1']).dt.total_seconds()
final_df['QualiQ2_sec'] = pd.to_timedelta(final_df['QualiQ2']).dt.total_seconds()
final_df['QualiQ3_sec'] = pd.to_timedelta(final_df['QualiQ3']).dt.total_seconds()
final_df['RaceTime_sec'] = pd.to_timedelta(final_df['RaceTime']).dt.total_seconds()

In [41]:
final_df.drop(columns=['QualiQ1', 'QualiQ2', 'QualiQ3', 'RaceTime'], inplace=True)

In [48]:
from sklearn.preprocessing import MinMaxScaler

cols_to_normalize = [
    'QualiQ1_sec', 'QualiQ2_sec', 'QualiQ3_sec', 'QualiPosition', 'GridPosDiff',
    'RaceTime_sec', 'GridPosition', 'RacePosition', 'Points'
]

scaler = MinMaxScaler()
final_df_scaled = final_df.copy()
final_df_scaled[cols_to_normalize] = scaler.fit_transform(final_df_scaled[cols_to_normalize].fillna(0))

In [49]:
final_df_scaled.head()

Unnamed: 0,QualiPosition,RacePosition,GridPosition,Points,Year,GridPosDiff,EventName_Australian Grand Prix,EventName_Austrian Grand Prix,EventName_Azerbaijan Grand Prix,EventName_Bahrain Grand Prix,EventName_Belgian Grand Prix,EventName_British Grand Prix,EventName_Canadian Grand Prix,EventName_Chinese Grand Prix,EventName_Dutch Grand Prix,EventName_Emilia Romagna Grand Prix,EventName_Hungarian Grand Prix,EventName_Italian Grand Prix,EventName_Japanese Grand Prix,EventName_Las Vegas Grand Prix,EventName_Mexico City Grand Prix,EventName_Miami Grand Prix,EventName_Monaco Grand Prix,EventName_Qatar Grand Prix,EventName_Saudi Arabian Grand Prix,EventName_Singapore Grand Prix,EventName_Spanish Grand Prix,EventName_São Paulo Grand Prix,EventName_United States Grand Prix,Abbreviation_ALO,Abbreviation_ANT,Abbreviation_BEA,Abbreviation_BOR,Abbreviation_BOT,Abbreviation_COL,Abbreviation_DOO,Abbreviation_GAS,Abbreviation_HAD,Abbreviation_HAM,Abbreviation_HUL,Abbreviation_LAW,Abbreviation_LEC,Abbreviation_MAG,Abbreviation_NOR,Abbreviation_OCO,Abbreviation_PER,Abbreviation_PIA,Abbreviation_RIC,Abbreviation_RUS,Abbreviation_SAI,Abbreviation_SAR,Abbreviation_STR,Abbreviation_TSU,Abbreviation_VER,Abbreviation_ZHO,Status_Disqualified,Status_Finished,Status_Lapped,Status_Retired,QualiQ1_sec,QualiQ2_sec,QualiQ3_sec,RaceTime_sec
0,0.0,0.0,0.05,1.0,2024,0.529412,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False,0.764432,0.772356,0.776753,0.640417
1,0.052632,0.157895,0.1,0.461538,2024,0.470588,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,0.766232,0.77055,0.778739,0.004615
2,0.105263,0.210526,0.15,0.384615,2024,0.470588,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,True,False,False,0.767141,0.777092,0.779418,0.005443
3,0.157895,0.105263,0.2,0.576923,2024,0.558824,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,True,False,False,0.763396,0.774076,0.77961,0.002921
4,0.210526,0.052632,0.25,0.692308,2024,0.617647,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,True,False,False,0.766045,0.777179,0.779871,0.002613
