In [3]:
# Import all the required libraries
import fastf1
import pandas as pd

# Enable caching for faster data loading
fastf1.Cache.enable_cache('cache') 

In [5]:
# Get the 2024 F1 event schedule
schedule = fastf1.get_event_schedule(2024)

# Keep only the necessary columns
schedule = schedule[['RoundNumber', 'EventDate', 'EventName']]
schedule

Unnamed: 0,RoundNumber,EventDate,EventName
0,0,2024-02-23,Pre-Season Testing
1,1,2024-03-02,Bahrain Grand Prix
2,2,2024-03-09,Saudi Arabian Grand Prix
3,3,2024-03-24,Australian Grand Prix
4,4,2024-04-07,Japanese Grand Prix
5,5,2024-04-21,Chinese Grand Prix
6,6,2024-05-05,Miami Grand Prix
7,7,2024-05-19,Emilia Romagna Grand Prix
8,8,2024-05-26,Monaco Grand Prix
9,9,2024-06-09,Canadian Grand Prix


In [6]:
# Fetch all 2024 race results
results=[]
for i in range(1, len(schedule)): 
    session = fastf1.get_session(2024, i, 'R') 
    session.load()
        
    df = session.results
    df['Round'] = i    # Add race round number
    
    results.append(df)

# Combine all race results into a single DataFrame
results_df = pd.concat(results, ignore_index=True)

core           INFO 	Loading data for Bahrain Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '55', '16', '63', '4', '44', '81', '14', '18', '24', '20', '3', '22', '23', '27', '31', '10', '77', '2']
core           INFO 	Loading data for Saudi Arabian Grand Prix

In [8]:
# Fetch first 2 rounds of 2025 race results
results=[]
for i in range(1,3):
    session = fastf1.get_session(2025, i, 'R')
    session.load()

    df = session.results
    df['Round'] = i    # Add race round number
    results.append(df)

# Combine 2025 race results into a DataFrame
results2025_df = pd.concat(results, ignore_index=True)

core           INFO 	Loading data for Australian Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '1', '63', '12', '23', '18', '27', '16', '81', '44', '10', '22', '31', '87', '30', '5', '14', '55', '7', '6']
core           INFO 	Loading data for Chinese Grand Prix - R

In [9]:
# Combine 2024 and 2025 race results into one DataFrame
results_df= pd.concat([results_df, results2025_df], ignore_index=True)

In [10]:
results_df.info()

<class 'fastf1.core.SessionResults'>
RangeIndex: 519 entries, 0 to 518
Data columns (total 22 columns):
 #   Column              Non-Null Count  Dtype          
---  ------              --------------  -----          
 0   DriverNumber        519 non-null    object         
 1   BroadcastName       519 non-null    object         
 2   Abbreviation        519 non-null    object         
 3   DriverId            519 non-null    object         
 4   TeamName            519 non-null    object         
 5   TeamColor           519 non-null    object         
 6   TeamId              519 non-null    object         
 7   FirstName           519 non-null    object         
 8   LastName            519 non-null    object         
 9   FullName            519 non-null    object         
 10  HeadshotUrl         519 non-null    object         
 11  CountryCode         519 non-null    object         
 12  Position            519 non-null    float64        
 13  ClassifiedPosition  519 non-null    

In [11]:
old_drivers = set(results_df['Abbreviation'].unique())   
new_drivers = set(results2025_df['Abbreviation'].unique()) 

# Find drivers to ADD (exist in 2025 but NOT in 2024)
drivers_to_add = new_drivers - old_drivers
# Find drivers to REMOVE (exist in 2024 but NOT in 2025)
drivers_to_remove = old_drivers - new_drivers

# Remove old drivers who are not in 2025
results_df = results_df[~results_df['Abbreviation'].isin(drivers_to_remove)]

# Get data for new drivers from results2025_df
new_drivers_data = results2025_df[results2025_df['Abbreviation'].isin(drivers_to_add)]

# Add the new drivers to results_df
results_df = pd.concat([results_df, new_drivers_data], ignore_index=True)

results_df

Unnamed: 0,DriverNumber,BroadcastName,Abbreviation,DriverId,TeamName,TeamColor,TeamId,FirstName,LastName,FullName,...,Position,ClassifiedPosition,GridPosition,Q1,Q2,Q3,Time,Status,Points,Round
0,1,M VERSTAPPEN,VER,max_verstappen,Red Bull Racing,3671c6,red_bull,Max,Verstappen,Max Verstappen,...,1.0,1,1.0,NaT,NaT,NaT,0 days 01:31:44.742000,Finished,26.0,1
1,55,C SAINZ,SAI,sainz,Ferrari,e8002d,ferrari,Carlos,Sainz,Carlos Sainz,...,3.0,3,4.0,NaT,NaT,NaT,0 days 00:00:25.110000,Finished,15.0,1
2,16,C LECLERC,LEC,leclerc,Ferrari,e8002d,ferrari,Charles,Leclerc,Charles Leclerc,...,4.0,4,2.0,NaT,NaT,NaT,0 days 00:00:39.669000,Finished,12.0,1
3,63,G RUSSELL,RUS,russell,Mercedes,27f4d2,mercedes,George,Russell,George Russell,...,5.0,5,3.0,NaT,NaT,NaT,0 days 00:00:46.788000,Finished,10.0,1
4,4,L NORRIS,NOR,norris,McLaren,ff8000,mclaren,Lando,Norris,Lando Norris,...,6.0,6,7.0,NaT,NaT,NaT,0 days 00:00:48.458000,Finished,8.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
379,22,Y TSUNODA,TSU,tsunoda,Racing Bulls,6692FF,rb,Yuki,Tsunoda,Yuki Tsunoda,...,16.0,16,9.0,NaT,NaT,NaT,0 days 00:00:23.511000,Lapped,0.0,2
380,14,F ALONSO,ALO,alonso,Aston Martin,229971,aston_martin,Fernando,Alonso,Fernando Alonso,...,17.0,R,13.0,NaT,NaT,NaT,NaT,Retired,0.0,2
381,16,C LECLERC,LEC,leclerc,Ferrari,E80020,ferrari,Charles,Leclerc,Charles Leclerc,...,18.0,D,6.0,NaT,NaT,NaT,NaT,Disqualified,0.0,2
382,44,L HAMILTON,HAM,hamilton,Ferrari,E80020,ferrari,Lewis,Hamilton,Lewis Hamilton,...,19.0,D,5.0,NaT,NaT,NaT,NaT,Disqualified,0.0,2


In [12]:
results_df.to_csv('model/DATA/f1_results_2024_2025.csv', index=False)