In [None]:
import fastf1
import fastf1.api
import pandas as pd
from dfply import *
from datetime import datetime
from datetime import timedelta

#note: you should put in a new folder to store cached data, github doesn't push empty folders so you have to do it manually
fastf1.Cache.enable_cache('FastF1Cache')

In [None]:
def QualiScrape(grandPrix, raceDate, qualiDate):
    """
    Input: grandPrix, Grand Prix's Race Date, Date of Qualifying
    Returns: data frame with each driver matched to best qualifying time, along with tire and weather conditions
    """
    #Build path to scrape data from
    Quali = fastf1.api.make_path(grandPrix, raceDate, 'Qualifying', qualiDate)
    
    #Scraping timing data, timing app data (for info on tire compund), and weather data
    QualiTiming = fastf1.api.timing_data(Quali)[0]
    QualiTimingApp = fastf1.api.timing_app_data(Quali)
    QualiWeatherDict = fastf1.api.weather_data(Quali)
    QualiWeatherData = pd.DataFrame.from_dict(QualiWeatherDict)
    
    #Remove entries with no laptimes, and round session time to nearest minute (for merging purposes)
    QualiTiming = QualiTiming[QualiTiming.LapTime.notnull()]
    QualiTimingApp >>= mutate(Time = X.Time.round('60s')) >> arrange(X.Driver, X.Time)
    QualiTimingApp = QualiTimingApp.reset_index()
    
    #In timing app dataframe, tire compound is only noted when new set is put on
    #Since dataframe is organized by driver and then time, we can replace empty entries with the previous tire compound
    #Also, dataframe sometime doubles on driver-time combo, so removing the unnecessary ones
    for index in range(len(QualiTimingApp)):
        if not QualiTimingApp.loc[index,"Compound"]:
            QualiTimingApp.loc[index, "Compound"] = QualiTimingApp.loc[index-1, "Compound"]
        if not QualiTimingApp.loc[index,"New"]:
            QualiTimingApp.loc[index,"New"] = False
        if index == 0:
            continue
        if QualiTimingApp.loc[index,"Time"] == QualiTimingApp.loc[index-1,"Time"]:
            if not QualiTimingApp.loc[index,"LapTime"]:
                QualiTimingApp.loc[index,"Driver"] = np.nan
            else: 
                QualiTimingApp.loc[index-1,"Driver"] = np.nan                
    QualiTimingApp = QualiTimingApp[QualiTimingApp.Driver.notnull()]
    
    #Selecting relevant rows
    QualiTimingApp >>= select(X.Stint, X.Driver, X.TotalLaps, X.Compound, X.New, X.Time)
    QualiTiming = QualiTiming >> mutate(Time = X.Time.round('60s')) >> arrange(X.Driver, X.Time) >> \
              drop(contains("Session"), X.PitOutTime, X.PitInTime)

    QualiCompiledTiming = pd.merge(QualiTiming,QualiTimingApp, on=['Time', 'Driver'])

    QualiWeatherData >>= mutate(Time = X.Time.round('60s'))
    QualiCompiledTiming = pd.merge(QualiCompiledTiming, QualiWeatherData, on=['Time'])
    QualiCompiledTiming >>= arrange(X.Driver, X.Time)
    QualiCompiledTiming = QualiCompiledTiming.reset_index()
    
    #Creating List of all drivers, then finding the best lap time for them
    driverList = QualiCompiledTiming.Driver.unique()
    QualiTimes = {}
    for drv in driverList:
        QualiTimes[drv] = [drv, QualiTiming[QualiTiming.Driver == drv].LapTime.min()]
    QualiDF = pd.DataFrame.from_dict(QualiTimes, orient = 'index', columns = ['Driver','LapTime'])
    QualiDF = QualiDF.reset_index()
    QualiDF >>= select(X.Driver, X.LapTime)
    
    #Merge weather and tire data with appropriate laptime data
    QualiDF = pd.merge(QualiDF, QualiCompiledTiming, on = ["LapTime", "Driver"])
    
    #Removing variables regarded as unnecessary, adding Qualifying prefix to Data so merging stays clean
    #Note: Qualifying and Practice are merged by driver, so QualifyingDriver is renamed back to driver
    QualiDF >>= drop(X.Time, X.NumberOfLaps, X.NumberOfPitStops, X.IsPersonalBest, X.Stint, X.TotalLaps, X.New, X.WindDirection)
    QualiDF = QualiDF.add_prefix('Qualifying') >> rename(Driver = X.QualifyingDriver)
    QualiDF >>= drop(X.Qualifyingindex)
    
    return QualiDF

def practiceScrape(grandPrix, raceDate, pracNum, pracDate):
    """
    Input: Grand Prix, Grand Prix Date, Which Practice, Which Date
    Output: Dataframe with each driver's timed lap matched with tire compound and weather conditions
    """
    #Make Path to scrape practice data
    FP = fastf1.api.make_path(grandPrix, raceDate, f'Practice {pracNum}', pracDate)
    
    #Scraping timing data, timing app data (for info on tire compund), and weather data
    FPTiming = fastf1.api.timing_data(FP)[0]
    FPTimingApp = fastf1.api.timing_app_data(FP)
    FPWeatherDict = fastf1.api.weather_data(FP)
    FPWeatherData = pd.DataFrame.from_dict(FPWeatherDict)
    
    #Remove entries with no laptimes, and round session time to nearest minute (for merging purposes)
    FPTiming = FPTiming[FPTiming.LapTime.notnull()]
    FPTimingApp >>= mutate(Time = X.Time.round('60s')) >> arrange(X.Driver, X.Time)
    FPTimingApp = FPTimingApp.reset_index()
    
    #In timing app dataframe, tire compound is only noted when new set is put on
    #Since dataframe is organized by driver and then time, we can replace empty entries with the previous tire compound
    #Also, dataframe sometime doubles on driver-time combo, so removing the unnecessary ones
    for index in range(len(FPTimingApp)):
        if not FPTimingApp.loc[index,"Compound"]:
            FPTimingApp.loc[index, "Compound"] = FPTimingApp.loc[index-1, "Compound"]
        if not FPTimingApp.loc[index,"New"]:
            FPTimingApp.loc[index,"New"] = False
        if index == 0:
            continue
        if FPTimingApp.loc[index,"Time"] == FPTimingApp.loc[index-1,"Time"]:
            if not FPTimingApp.loc[index,"LapTime"]:
                FPTimingApp.loc[index,"Driver"] = np.nan
            else: 
                FPTimingApp.loc[index-1,"Driver"] = np.nan                
    FPTimingApp = FPTimingApp[FPTimingApp.Driver.notnull()]
    
    #Selecting relevant rows, and rounding FPtiming session time to nearest minute
    FPTimingApp >>= select(X.Stint, X.Driver, X.TotalLaps, X.Compound, X.New, X.Time)
    FPTiming = FPTiming >> mutate(Time = X.Time.round('60s')) >> arrange(X.Driver, X.Time) >> \
              drop(contains("Session"), X.PitOutTime, X.PitInTime)
    
    #Merge Dataframes, keyed by time and driver, then dropping irrelevant columns
    FPCompiledTiming = pd.merge(FPTiming,FPTimingApp, on=['Time', 'Driver'])
    FPWeatherData >>= mutate(Time = X.Time.round('60s'))
    FPCompiledTiming = pd.merge(FPCompiledTiming, FPWeatherData, on=['Time'])
    FPCompiledTiming >>= arrange(X.Driver, X.Time) >> mutate(Practice = pracNum) >> drop(X.WindDirection, X.Stint, X.TotalLaps)
    FPCompiledTiming = FPCompiledTiming.reset_index()
    return FPCompiledTiming

In [None]:
def weekendCompiler(grandPrix, raceDate, sprint = False):
    """
    Input: Grand Prix with appropriate race date, and whether grand prix was a sprint weekend
    Output: Dataframe with all timed practice laps, merged with info about best qualifying lap
    """
    #Creating relevant dates for session calling purposes
    raceDate = datetime.strptime(raceDate, "%Y-%m-%d")
    friDate = str(raceDate - timedelta(days = 2))[:10]
    satDate = str(raceDate - timedelta(days = 1))[:10]
    raceDate = str(raceDate)[:10]
    
    #Collecting Practice Data
    #Note: On sprint weekends only one practice is done before qualifying, so FP1 is the only relevant data
    FP1CompiledTiming = practiceScrape(grandPrix, raceDate, 1, friDate)
    if not sprint:
        FP2CompiledTiming = practiceScrape(grandPrix, raceDate, 2, friDate)
        FP3CompiledTiming = practiceScrape(grandPrix, raceDate, 3, satDate)
    
    #Collecting Quali Data:
    #Note: On sprint weekends, qualifying is done on Friday, as opposed to usual Saturdays
    if not sprint:
        QualiDF = QualiScrape(grandPrix, raceDate, satDate)
    else:
        QualiDF = QualiScrape(grandPrix, raceDate, friDate)
    
    #Concatenating Practice Dataframes
    if not sprint:
        frames = [FP1CompiledTiming, FP2CompiledTiming, FP3CompiledTiming]
        practiceCompiled = pd.concat(frames)
    else:
        practiceCompiled = FP1CompiledTiming
    
    #Merging Quali Data with compiled Practice data, such that each laptime has the appropriate quali data
    practiceCompiled = practiceCompiled.reset_index()
    practiceCompiled >>= drop(X.level_0)
    practiceCompiled = pd.merge(practiceCompiled, QualiDF, on=['Driver']) >> mutate(Weekend = grandPrix)
    
    return practiceCompiled

In [None]:
raceCalendar = {1:["Bahrain Grand Prix", "2022-03-20"], 2:["Saudi Arabian Grand Prix", "2022-03-27"], 3:["Australian Grand Prix", "2022-04-10"],\
                4:["Emilia Romagna Grand Prix", "2022-04-24"], 5:["Miami Grand Prix", "2022-05-08"], 6:["Spanish Grand Prix", "2022-05-22"], \
                7:["Monaco Grand Prix", "2022-05-29"], 8:["Azerbaijan Grand Prix", "2022-06-12"], 9:["Canadian Grand Prix",  "2022-06-19"], \
                10:["British Grand Prix", "2022-07-03"], 11:["Austrian Grand Prix", "2022-07-10"], 12:["French Grand Prix", "2022-07-24"], \
                13:["Hungarian Grand Prix", "2022-07-31"], 14:["Belgian Grand Prix", "2022-08-28"], 15:["Dutch Grand Prix", "2022-09-04"], \
                16:["Italian Grand Prix", "2022-09-11"], 17:["Singapore Grand Prix", "2022-10-02"], 18:["Japanese Grand Prix", "2022-10-09"], \
                19:["United States Grand Prix", "2022-10-23"], 20:["Mexico City Grand Prix", "2022-10-30"], 21:["São Paulo Grand Prix", "2022-11-13"]}
dfList = []
for key in raceCalendar:
    
    print(f"Collecting Data for {raceCalendar[key][0]}")
    #Imola, Austria, and Brazil are sprint weekends, so they have a special case where sprint = true
    if key not in [4,11,21]:
        dfList.append(weekendCompiler(raceCalendar[key][0], raceCalendar[key][1]))
    else:
        dfList.append(weekendCompiler(raceCalendar[key][0], raceCalendar[key][1], sprint = True))

masterList = pd.concat(dfList)
masterList >>= drop(X.Time)

In [None]:
#To export dataframe:
masterList.to_csv("masterList.csv", index = True)