### Set Up Packages and Global Settings

In [1]:
import pytz
import random
import time

from datetime import datetime

import fastf1 as ff1
import pandas as pd

pd.set_option("display.max_columns", None)

### Explore Schedule

In [None]:
schedule = ff1.get_event_schedule(1994)
schedule.RoundNumber.to_list()

### Explore Session Object

In [None]:
session_quali = ff1.get_session(2020, 1, "Q")

session_quali.load()
session_quali.event

In [31]:
session_quali = ff1.get_session(2020, 1, 'R')

session_quali.load()
session_quali.results

core           INFO 	Loading data for Austrian Grand Prix - Race [v3.1.6]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['77', '16', '4', '44', '55', '11', '10', '31', '99', '5', '6', '26', '23', '7', '63', '8', '20', '18', '3', '33']


Unnamed: 0,DriverNumber,BroadcastName,Abbreviation,DriverId,TeamName,TeamColor,TeamId,FirstName,LastName,FullName,HeadshotUrl,CountryCode,Position,ClassifiedPosition,GridPosition,Q1,Q2,Q3,Time,Status,Points
77,77,V BOTTAS,BOT,bottas,Mercedes,00D2BE,mercedes,Valtteri,Bottas,Valtteri Bottas,https://www.formula1.com/content/dam/fom-websi...,,1.0,1,1.0,NaT,NaT,NaT,0 days 01:30:55.739000,Finished,25.0
16,16,C LECLERC,LEC,leclerc,Ferrari,DC0000,ferrari,Charles,Leclerc,Charles Leclerc,https://www.formula1.com/content/dam/fom-websi...,,2.0,2,7.0,NaT,NaT,NaT,0 days 00:00:02.700000,Finished,18.0
4,4,L NORRIS,NOR,norris,McLaren,FF8700,mclaren,Lando,Norris,Lando Norris,https://www.formula1.com/content/dam/fom-websi...,,3.0,3,3.0,NaT,NaT,NaT,0 days 00:00:05.491000,Finished,16.0
44,44,L HAMILTON,HAM,hamilton,Mercedes,00D2BE,mercedes,Lewis,Hamilton,Lewis Hamilton,https://www.formula1.com/content/dam/fom-websi...,,4.0,4,5.0,NaT,NaT,NaT,0 days 00:00:05.689000,Finished,12.0
55,55,C SAINZ,SAI,sainz,McLaren,FF8700,mclaren,Carlos,Sainz,Carlos Sainz,https://www.formula1.com/content/dam/fom-websi...,,5.0,5,8.0,NaT,NaT,NaT,0 days 00:00:08.903000,Finished,10.0
11,11,S PEREZ,PER,perez,Racing Point,F596C8,racing_point,Sergio,Perez,Sergio Perez,https://www.formula1.com/content/dam/fom-websi...,,6.0,6,6.0,NaT,NaT,NaT,0 days 00:00:15.092000,Finished,8.0
10,10,P GASLY,GAS,gasly,AlphaTauri,ffffff,alphatauri,Pierre,Gasly,Pierre Gasly,https://www.formula1.com/content/dam/fom-websi...,,7.0,7,12.0,NaT,NaT,NaT,0 days 00:00:16.682000,Finished,6.0
31,31,E OCON,OCO,ocon,Renault,FFF500,renault,Esteban,Ocon,Esteban Ocon,https://www.formula1.com/content/dam/fom-websi...,,8.0,8,14.0,NaT,NaT,NaT,0 days 00:00:17.456000,Finished,4.0
99,99,A GIOVINAZZI,GIO,giovinazzi,Alfa Romeo Racing,9B0000,alfa,Antonio,Giovinazzi,Antonio Giovinazzi,https://www.formula1.com/content/dam/fom-websi...,,9.0,9,18.0,NaT,NaT,NaT,0 days 00:00:21.146000,Finished,2.0
5,5,S VETTEL,VET,vettel,Ferrari,DC0000,ferrari,Sebastian,Vettel,Sebastian Vettel,https://www.formula1.com/content/dam/fom-websi...,,10.0,10,11.0,NaT,NaT,NaT,0 days 00:00:24.545000,Finished,1.0


### Explore Laps

In [None]:
lap_data = session_number.laps
print(lap_data)

In [None]:
lap_data.pick_team('McLaren').pick_fastest()

In [None]:
lap_data.pick_drivers(['PIA', 'NOR'])

### Compare DriverId field to names

In [None]:
df_all = pd.DataFrame()

list_years = list(range(1994, 1995 + 1))

max_retries = 5

for year in list_years:
    print(f"Pulling data for {year}")

    schedule = ff1.get_event_schedule(year)
    rounds_all = schedule.RoundNumber.to_list()
    rounds_races = [round for round in rounds_all if round > 0]

    for round in rounds_races:
        for attempt in range(max_retries):
            try:
                now_utc = datetime.now(pytz.timezone("UTC"))
                now_cst = now_utc.astimezone(pytz.timezone("America/Chicago"))

                print(f"Pulling data for round {round} at {now_cst}...")

                session_quali = ff1.get_session(year, round, "Q")
                session_quali.load()
                df_quali = session_quali.results[["DriverId", "LastName", "FirstName"]]

                df_all = df_all.append(df_quali, ignore_index=True)
                time.sleep(5)

                session_race = ff1.get_session(year, round, "R")
                session_race.load()
                df_race = session_race.results[["DriverId", "LastName", "FirstName"]]

                df_all = df_all.append(df_race, ignore_index=True)
                time.sleep(5)

                df_all = df_all.drop_duplicates().reset_index(drop=True)

                # If no exception was raised, break the loop
                break
            except Exception as e:
                wait_time = (2**attempt) + random.random()
                print(
                    f"Could not load data for round {round} due to {e}. Retrying in {wait_time} seconds."
                )
                time.sleep(wait_time)
                continue

df_unique = df_all.sort_values(by="DriverId")

### Develop Approach for Quali Results Object

In [27]:
session_quali = ff1.get_session(2023, 1, "Q")

session_quali.load()
df_raw = session_quali.results[
    ["Q1", "Q2", "Q3", "DriverId", "LastName", "FirstName", "TeamName", "Position"]
]

df_processed = df_raw.melt(
    id_vars=["DriverId", "LastName", "FirstName", "TeamName", "Position"],
    value_vars=["Q1", "Q2", "Q3"],
    var_name="session",
    value_name="time",
)

df_processed.columns = df_processed.columns.str.lower()

df_processed["originalposition"] = df_processed["position"]
df_processed["position"] = df_processed.groupby("session")["time"].rank(
    method="min", ascending=True
)
df_processed["position"] = df_processed["position"].fillna(
    df_processed["originalposition"]
)

df_processed = df_processed.drop(columns=["originalposition"])

df_processed.rename(
    columns={
        "driverid": "id_driver",
        "lastname": "name_driver_last",
        "firstname": "name_driver_first",
        "teamname": "name_team",
    },
    inplace=True,
)
df_final = df_processed[
    [
        "id_driver",
        "name_driver_last",
        "name_driver_first",
        "name_team",
        "session",
        "position",
        "time",
    ]
]

df_final

core           INFO 	Loading data for Bahrain Grand Prix - Qualifying [v3.1.6]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '16', '55', '14', '63', '44', '18', '31', '27', '4', '77', '24', '22', '23', '2', '20', '81', '21', '10']


Unnamed: 0,id_driver,name_driver_last,name_driver_first,name_team,session,position,time
0,max_verstappen,Verstappen,Max,Red Bull Racing,Q1,7.0,0 days 00:01:31.295000
1,perez,Perez,Sergio,Red Bull Racing,Q1,10.0,0 days 00:01:31.479000
2,leclerc,Leclerc,Charles,Ferrari,Q1,3.0,0 days 00:01:31.094000
3,sainz,Sainz,Carlos,Ferrari,Q1,1.0,0 days 00:01:30.993000
4,alonso,Alonso,Fernando,Aston Martin,Q1,4.0,0 days 00:01:31.158000
5,russell,Russell,George,Mercedes,Q1,2.0,0 days 00:01:31.057000
6,hamilton,Hamilton,Lewis,Mercedes,Q1,13.0,0 days 00:01:31.543000
7,stroll,Stroll,Lance,Aston Martin,Q1,5.0,0 days 00:01:31.184000
8,ocon,Ocon,Esteban,Alpine,Q1,12.0,0 days 00:01:31.508000
9,hulkenberg,Hulkenberg,Nico,Haas F1 Team,Q1,6.0,0 days 00:01:31.204000


### Develop Approach for Race Results Object

In [39]:
session_quali = ff1.get_session(2023, 1, "R")

session_quali.load()
df_raw = session_quali.results[
    [
        "DriverId",
        "LastName",
        "FirstName",
        "TeamName",
        "ClassifiedPosition",
        "Time",
    ]
]

df_processed = df_raw.copy().reset_index(drop=True)

df_processed["session"] = "Race"

df_processed.columns = df_processed.columns.str.lower()
df_processed.rename(
    columns={
        "classifiedposition": "position",
        "driverid": "id_driver",
        "lastname": "name_driver_last",
        "firstname": "name_driver_first",
        "teamname": "name_team",
    },
    inplace=True,
)
df_processed.loc[1:, "time"] = (
    df_processed.loc[1:, "time"] + df_processed.loc[0, "time"]
)
df_final = df_processed[
    [
        "id_driver",
        "name_driver_last",
        "name_driver_first",
        "name_team",
        "session",
        "position",
        "time",
    ]
]

df_final

core           INFO 	Loading data for Bahrain Grand Prix - Race [v3.1.6]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '14', '55', '44', '18', '63', '77', '10', '23', '22', '2', '20', '21', '27', '24', '4', '31', '16', '81']


Unnamed: 0,id_driver,name_driver_last,name_driver_first,name_team,session,position,time
0,max_verstappen,Verstappen,Max,Red Bull Racing,Race,1,0 days 01:33:56.736000
1,perez,Perez,Sergio,Red Bull Racing,Race,2,0 days 01:34:08.723000
2,alonso,Alonso,Fernando,Aston Martin,Race,3,0 days 01:34:35.373000
3,sainz,Sainz,Carlos,Ferrari,Race,4,0 days 01:34:44.788000
4,hamilton,Hamilton,Lewis,Mercedes,Race,5,0 days 01:34:47.713000
5,stroll,Stroll,Lance,Aston Martin,Race,6,0 days 01:34:51.238000
6,russell,Russell,George,Mercedes,Race,7,0 days 01:34:52.609000
7,bottas,Bottas,Valtteri,Alfa Romeo,Race,8,0 days 01:35:09.383000
8,gasly,Gasly,Pierre,Alpine,Race,9,0 days 01:35:10.489000
9,albon,Albon,Alexander,Williams,Race,10,0 days 01:35:26.510000


In [21]:
df_raw = pd.DataFrame(
    session_quali.event[["RoundNumber", "Location", "EventDate", "Country"]]
)

df_processed = df_raw.T.reset_index(drop=True)
df_processed.columns = df_processed.columns.str.lower()
df_processed.rename(
    columns={
        "roundnumber": "round",
        "location": "circuit_name",
        "country": "circuit_country",
    },
    inplace=True,
)
df_processed["year"] = df_processed["eventdate"].dt.year

df_final = df_processed[
    [
        "year",
        "round",
        "circuit_name",
        "circuit_country"
    ]
]

df_final

Unnamed: 0,year,round,circuit_name,circuit_country
0,2023,1,Sakhir,Bahrain
