# F1 AUS GP Podium Predictor â€” Data Collection
## Goal
Pull 2023 and 2024 Australian GP race and qualifying data using FastF1.
Save a clean dataframe for use in feature engineering.

## Data Source
- FastF1 Python library (wraps official F1 timing data)
- Sessions: Race (R) and Qualifying (Q) for 2023 and 2024

# Core libraries

In [3]:
import fastf1
import pandas as pd 
import os

# FastF1 caches downloaded data locally so we don't re-download every run

In [4]:
os.makedirs('cache', exist_ok=True)
fastf1.Cache.enable_cache('cache')

# Load 2023 Australian GP race and qualifying sessions
# 'R' = Race, 'Q' = Qualifying

In [5]:
race_2023 = fastf1.get_session(2023, 'Australia', 'R')
race_2023.load()

quali_2023 = fastf1.get_session(2023, 'Australia', 'Q')
quali_2023.load()

core           INFO 	Loading data for Australian Grand Prix - Race [v3.8.1]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for lap_count. Loading data...
_api           INFO 	Fetching lap count data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for track_status_data. Loading data...
_api           INFO 	Fetching track status data...
req            INFO 	Data has been written to cache!
req            INFO 	No

# Load 2024 Australian GP race and qualifying sessions

In [6]:
race_2024 = fastf1.get_session(2024, 'Australia', 'R')
race_2024.load()

quali_2024 = fastf1.get_session(2024, 'Australia', 'Q')
quali_2024.load()

core           INFO 	Loading data for Australian Grand Prix - Race [v3.8.1]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for lap_count. Loading data...
_api           INFO 	Fetching lap count data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for track_status_data. Loading data...
_api           INFO 	Fetching track status data...
req            INFO 	Data has been written to cache!
req            INFO 	No

# Preview the race results to confirm data loaded correctly
# results contains finishing position, driver, constructor, grid position etc

In [7]:
print("2023 Race Results:")
print(race_2023.results[['DriverNumber', 'Abbreviation', 'TeamName', 'GridPosition', 'Position']].head())

print("\n2024 Race Results:")
print(race_2024.results[['DriverNumber', 'Abbreviation', 'TeamName', 'GridPosition', 'Position']].head())

2023 Race Results:
   DriverNumber Abbreviation         TeamName  GridPosition  Position
1             1          VER  Red Bull Racing           1.0       1.0
44           44          HAM         Mercedes           3.0       2.0
14           14          ALO     Aston Martin           4.0       3.0
18           18          STR     Aston Martin           6.0       4.0
11           11          PER  Red Bull Racing          20.0       5.0

2024 Race Results:
   DriverNumber Abbreviation         TeamName  GridPosition  Position
55           55          SAI          Ferrari           2.0       1.0
16           16          LEC          Ferrari           4.0       2.0
4             4          NOR          McLaren           3.0       3.0
81           81          PIA          McLaren           5.0       4.0
11           11          PER  Red Bull Racing           6.0       5.0


# Extract race results for both years and add a 'Year' column

In [8]:
results_2023 = race_2023.results[['DriverNumber', 'Abbreviation', 'TeamName', 'GridPosition', 'Position']].copy()
results_2023['Year'] = 2023

results_2024 = race_2024.results[['DriverNumber', 'Abbreviation', 'TeamName', 'GridPosition', 'Position']].copy()
results_2024['Year'] = 2024

# Combine into one dataframe

In [9]:
all_results = pd.concat([results_2023, results_2024], ignore_index=True)
print(all_results)

   DriverNumber Abbreviation         TeamName  GridPosition  Position  Year
0             1          VER  Red Bull Racing           1.0       1.0  2023
1            44          HAM         Mercedes           3.0       2.0  2023
2            14          ALO     Aston Martin           4.0       3.0  2023
3            18          STR     Aston Martin           6.0       4.0  2023
4            11          PER  Red Bull Racing          20.0       5.0  2023
5             4          NOR          McLaren          13.0       6.0  2023
6            27          HUL     Haas F1 Team          10.0       7.0  2023
7            81          PIA          McLaren          16.0       8.0  2023
8            24          ZHO       Alfa Romeo          17.0       9.0  2023
9            22          TSU       AlphaTauri          12.0      10.0  2023
10           77          BOT       Alfa Romeo          19.0      11.0  2023
11           55          SAI          Ferrari           5.0      12.0  2023
12          

# Create binary target: 1 if driver finished in top 3, 0 otherwise

In [10]:
all_results['Podium'] = (all_results['Position'] <= 3).astype(int)
print(all_results[['Abbreviation', 'Year', 'Position', 'Podium']])

   Abbreviation  Year  Position  Podium
0           VER  2023       1.0       1
1           HAM  2023       2.0       1
2           ALO  2023       3.0       1
3           STR  2023       4.0       0
4           PER  2023       5.0       0
5           NOR  2023       6.0       0
6           HUL  2023       7.0       0
7           PIA  2023       8.0       0
8           ZHO  2023       9.0       0
9           TSU  2023      10.0       0
10          BOT  2023      11.0       0
11          SAI  2023      12.0       0
12          GAS  2023      13.0       0
13          OCO  2023      14.0       0
14          DEV  2023      15.0       0
15          SAR  2023      16.0       0
16          MAG  2023      17.0       0
17          RUS  2023      18.0       0
18          ALB  2023      19.0       0
19          LEC  2023      20.0       0
20          SAI  2024       1.0       1
21          LEC  2024       2.0       1
22          NOR  2024       3.0       1
23          PIA  2024       4.0       0


# Save clean dataframe for use in feature engineering

In [11]:
all_results.to_csv('aus_gp_data.csv', index=False)
print("Saved! Shape:", all_results.shape)

Saved! Shape: (39, 7)
