In [1]:
import pandas
import fastf1
import os

# gets and stores a pandas dataframe object, where each row is indexed by driver abbreviation, 
# and for each indexed row contains the race name that the driver took place in for that row,
# finishing position, starting position, and team name
# additionally, gives each race, driver, and constructor a unique integer identifier; for use in STAN

# Example entry:
# Abbreviation          Position     GridPosition     TeamId     Race     RaceNumber     TeamNumber     DriverNumber
# MAG                   10           4                haas  Miami Grand Prix   5         9              13

n = 22 # number of races
year = 2023 # year to get results from


# Preparing dataframe by loading the first race
# dataframe to be stored is called master
session = fastf1.get_session(year, 1, "Race")
session.load()
session.results.set_index("Abbreviation", inplace=True)
master = session.results[["Position", "GridPosition", "TeamId"]]
master["Race"] = session.event["EventName"]
master["RaceNumber"] = 1

# loading in the rest of the races
for i in range(2, n + 1):
    session = fastf1.get_session(year, i, "Race")
    session.load()
    session.results.set_index("Abbreviation", inplace=True)
    df = session.results[["Position", "GridPosition", "TeamId"]]
    df["Race"] = session.event["EventName"]
    df["RaceNumber"] = i
    master = pandas.concat([master,df], ignore_index=False)

# in 2023, there was a nan value in the dataset that belongs to Stroll
if year == 2023:
    master.rename(index={"nan": "STR"}, inplace=True)

# assigning driver and constructor unique integer identifiers
driver_dictionary = dict()
for i, abbreviation in enumerate(master.index.unique(), start=1):
    driver_dictionary[abbreviation] = i
constructor_dictionary = dict()
for i, team in enumerate(master["TeamId"].unique(), start=1):
    constructor_dictionary[team] = i
master["TeamNumber"] = master["TeamId"].map(constructor_dictionary)
master["DriverNumber"] = master.index.map(driver_dictionary)

# dictionary that tells if a circuit is a street or race circuit
circuit_types = {"Bahrain Grand Prix": "Race",
                 "Saudi Arabian Grand Prix": "Street",
                 "Australian Grand Prix": "Street",
                 "Azerbaijan Grand Prix": "Street",
                 "Miami Grand Prix": "Street",
                 "Monaco Grand Prix": "Street",
                 "Spanish Grand Prix": "Race",
                 "Canadian Grand Prix": "Street",
                 "Austrian Grand Prix": "Race",
                 "British Grand Prix": "Race",
                 "Hungarian Grand Prix": "Race",
                 "Belgian Grand Prix": "Race",
                 "Dutch Grand Prix": "Race",
                 "Italian Grand Prix": "Race",
                 "Singapore Grand Prix": "Street",
                 "Japanese Grand Prix": "Race",
                 "Qatar Grand Prix": "Race",
                 "United States Grand Prix": "Race",
                 "Mexico City Grand Prix": "Race",
                 "Las Vegas Grand Prix": "Street",
                 "Abu Dhabi Grand Prix": "Race",
                "German Grand Prix": "Race",
                "Brazilian Grand Prix": "Race",
                "Sao Paulo Grand Prix": "Race",
                "Styrian Grand Prix": "Race",
                "San Marino Grand Prix": "Race",
                "Emilia Romagna Grand Prix": "Race",
                "Mexican Grand Prix": "Race",
                "Argentine Grand Prix": "Race",
                "Sakhir Grand Prix": "Race",
                "South African Grand Prix": "Race",
                "Malaysian Grand Prix": "Race",
                "French Grand Prix": "Race",
                "Chinese Grand Prix": "Race",
                "European Grand Prix": "Race",
                "Portuguese Grand Prix": "Race",
                "Turkish Grand Prix": "Race"}



# forcing entries that should be integers to be integers; for example some were stored as 7.0
master["Position"] = master["Position"].astype(int)
master["GridPosition"] = master["GridPosition"].astype(int)
master["TeamNumber"] = master["TeamNumber"].astype(int)
master["DriverNumber"] = master["DriverNumber"].astype(int)


DEFAULT CACHE ENABLED!
	Cache directory: /home/john/.cache/fastf1.
	Size: 4.87 GB
core           INFO 	Loading data for Bahrain Grand Prix - Race [v3.1.6]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '14', '55', '44', '18', '63', '77', '10', '23', '22', '2', '20', '21', '27', '24', '

In [18]:
circuit_types = {"Bahrain Grand Prix": "Race",
                 "Saudi Arabian Grand Prix": "Street",
                 "Australian Grand Prix": "Street",
                 "Azerbaijan Grand Prix": "Street",
                 "Miami Grand Prix": "Street",
                 "Monaco Grand Prix": "Street",
                 "Spanish Grand Prix": "Race",
                 "Canadian Grand Prix": "Street",
                 "Austrian Grand Prix": "Race",
                 "British Grand Prix": "Race",
                 "Hungarian Grand Prix": "Race",
                 "Belgian Grand Prix": "Race",
                 "Dutch Grand Prix": "Race",
                 "Italian Grand Prix": "Race",
                 "Singapore Grand Prix": "Street",
                 "Japanese Grand Prix": "Race",
                 "Qatar Grand Prix": "Race",
                 "United States Grand Prix": "Race",
                 "Mexico City Grand Prix": "Race",
                 "Las Vegas Grand Prix": "Street",
                 "Abu Dhabi Grand Prix": "Race",
                "German Grand Prix": "Race",
                "Brazilian Grand Prix": "Race",
                "São Paulo Grand Prix": "Race",
                "Styrian Grand Prix": "Race",
                "San Marino Grand Prix": "Race",
                "Emilia Romagna Grand Prix": "Race",
                "Mexican Grand Prix": "Race",
                "Argentine Grand Prix": "Race",
                "Sakhir Grand Prix": "Race",
                "South African Grand Prix": "Race",
                "Malaysian Grand Prix": "Race",
                "French Grand Prix": "Race",
                "Chinese Grand Prix": "Race",
                "European Grand Prix": "Race",
                "Portuguese Grand Prix": "Race",
                "Turkish Grand Prix": "Race"}

In [19]:
df = master.copy()

In [20]:
df

Unnamed: 0_level_0,Position,GridPosition,TeamId,Race,RaceNumber,TeamNumber,DriverNumber
Abbreviation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
VER,1,1,red_bull,Bahrain Grand Prix,1,1,1
PER,2,2,red_bull,Bahrain Grand Prix,1,1,2
ALO,3,5,aston_martin,Bahrain Grand Prix,1,2,3
SAI,4,4,ferrari,Bahrain Grand Prix,1,3,4
HAM,5,7,mercedes,Bahrain Grand Prix,1,4,5
...,...,...,...,...,...,...,...
SAR,16,20,williams,Abu Dhabi Grand Prix,22,7,12
ZHO,17,19,alfa,Abu Dhabi Grand Prix,22,5,16
SAI,18,16,ferrari,Abu Dhabi Grand Prix,22,3,4
BOT,19,18,alfa,Abu Dhabi Grand Prix,22,5,8


In [21]:
df["CircuitType"] = df["Race"].map(circuit_types)
df

Unnamed: 0_level_0,Position,GridPosition,TeamId,Race,RaceNumber,TeamNumber,DriverNumber,CircuitType
Abbreviation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
VER,1,1,red_bull,Bahrain Grand Prix,1,1,1,Race
PER,2,2,red_bull,Bahrain Grand Prix,1,1,2,Race
ALO,3,5,aston_martin,Bahrain Grand Prix,1,2,3,Race
SAI,4,4,ferrari,Bahrain Grand Prix,1,3,4,Race
HAM,5,7,mercedes,Bahrain Grand Prix,1,4,5,Race
...,...,...,...,...,...,...,...,...
SAR,16,20,williams,Abu Dhabi Grand Prix,22,7,12,Race
ZHO,17,19,alfa,Abu Dhabi Grand Prix,22,5,16,Race
SAI,18,16,ferrari,Abu Dhabi Grand Prix,22,3,4,Race
BOT,19,18,alfa,Abu Dhabi Grand Prix,22,5,8,Race


In [22]:
circuit_type_index = {"Race": 0, "Street": 1}
circuit_type_index

{'Race': 0, 'Street': 1}

In [23]:
df["CircuitTypeIndex"] = df["CircuitType"].map(circuit_type_index)
for a in df["CircuitTypeIndex"]:
    print(a)

0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0


In [24]:
df.iloc[-60:]

Unnamed: 0_level_0,Position,GridPosition,TeamId,Race,RaceNumber,TeamNumber,DriverNumber,CircuitType,CircuitTypeIndex
Abbreviation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
VER,1,1,red_bull,São Paulo Grand Prix,20,1,1,Race,0
NOR,2,6,mclaren,São Paulo Grand Prix,20,10,17,Race,0
ALO,3,4,aston_martin,São Paulo Grand Prix,20,2,3,Race,0
PER,4,9,red_bull,São Paulo Grand Prix,20,1,2,Race,0
STR,5,3,aston_martin,São Paulo Grand Prix,20,2,6,Race,0
SAI,6,7,ferrari,São Paulo Grand Prix,20,3,4,Race,0
GAS,7,15,alpine,São Paulo Grand Prix,20,6,9,Race,0
HAM,8,5,mercedes,São Paulo Grand Prix,20,4,5,Race,0
TSU,9,16,alphatauri,São Paulo Grand Prix,20,8,11,Race,0
OCO,10,14,alpine,São Paulo Grand Prix,20,6,18,Race,0


In [25]:
df

Unnamed: 0_level_0,Position,GridPosition,TeamId,Race,RaceNumber,TeamNumber,DriverNumber,CircuitType,CircuitTypeIndex
Abbreviation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
VER,1,1,red_bull,Bahrain Grand Prix,1,1,1,Race,0
PER,2,2,red_bull,Bahrain Grand Prix,1,1,2,Race,0
ALO,3,5,aston_martin,Bahrain Grand Prix,1,2,3,Race,0
SAI,4,4,ferrari,Bahrain Grand Prix,1,3,4,Race,0
HAM,5,7,mercedes,Bahrain Grand Prix,1,4,5,Race,0
...,...,...,...,...,...,...,...,...,...
SAR,16,20,williams,Abu Dhabi Grand Prix,22,7,12,Race,0
ZHO,17,19,alfa,Abu Dhabi Grand Prix,22,5,16,Race,0
SAI,18,16,ferrari,Abu Dhabi Grand Prix,22,3,4,Race,0
BOT,19,18,alfa,Abu Dhabi Grand Prix,22,5,8,Race,0


In [33]:
df[["RaceNumber", "CircuitTypeIndex"]].drop_duplicates()["CircuitTypeIndex"].to_numpy()

array([0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0])