In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from functools import lru_cache

In [3]:
@lru_cache(maxsize=None)  
def fetch_data_from_source(parquet_file):
    df = pd.read_parquet(f"https://data3001-racing.s3.ap-southeast-2.amazonaws.com/{parquet_file}")
    return df

def fetch_data_csv(parquet_file):
    df = pd.read_csv(f"https://data3001-racing.s3.ap-southeast-2.amazonaws.com/{parquet_file}")
    return df

In [9]:
# Inputs: N/A
# Outputs: df with both datasets appended to each other (sorted by "SESSION_IDENTIFIER" then "FRAME")
# NOTE: 'SECTOR' has been changed from {0,1,2} to {1,2,3}

def fetch_and_merge():
    df22 = fetch_data_from_source('f1sim-data-2022.parquet')
    df23 = fetch_data_from_source('f1sim-data-2023.parquet')
    df = pd.concat([df22, df23], ignore_index=True)
    df.dropna(inplace=True)
    df.sort_values(by=["SESSION_IDENTIFIER", "FRAME"], inplace=True)
    df.reset_index(drop=True, inplace=True)
    df["SECTOR"] = df["SECTOR"] + 1
    return df

In [5]:
# Inputs: Sector {1,2,3}
# Outputs: df of records with data from that sector

def split_sector(s):
    df = fetch_and_merge()
    df = df[df["SECTOR"]==s]
    return df

In [6]:
# Inputs: df, time
# Outputs: df with records where lap time is under time given
def lap_under(df, t):
    df = df[df["LAP_TIME_MS"]<t]
    return df

In [17]:
df = split_sector(1)
df = lap_under(df, 86000)

In [18]:
df

Unnamed: 0,SESSION_IDENTIFIER,FRAME,LAP_NUM,SECTOR,LAP_DISTANCE,CURRENT_LAP_TIME_MS,SECTOR_1_TIME_MS,SECTOR_2_TIME_MS,LAP_TIME_MS,SECTOR_3_MS,...,ROLL,WORLDPOSX,WORLDPOSY,WORLDPOSZ,WORLDFORWARDDIRX,WORLDFORWARDDIRY,WORLDFORWARDDIRZ,WORLDRIGHTDIRX,WORLDRIGHTDIRY,WORLDRIGHTDIRZ
1580,5.892042e+16,2666,2,1,1.326660,0,28680,18786,84470.0,37004.0,...,-0.025334,112.468544,465.200775,2.921500,-23593.0,-130.0,-22738.0,22728.0,830.0,-23587.0
1581,5.892042e+16,2667,2,1,5.485840,50,28680,18786,84470.0,37004.0,...,-0.025585,115.461723,462.311951,2.898344,-23672.0,-193.0,-22654.0,22644.0,838.0,-23668.0
1582,5.892042e+16,2668,2,1,9.651855,100,28680,18786,84470.0,37004.0,...,-0.024558,118.469940,459.429138,2.866479,-23763.0,-203.0,-22559.0,22549.0,804.0,-23759.0
1583,5.892042e+16,2670,2,1,15.217285,166,28680,18786,84470.0,37004.0,...,-0.024841,122.504723,455.593292,2.832547,-23858.0,-176.0,-22459.0,22449.0,813.0,-23854.0
1584,5.892042e+16,2671,2,1,19.392578,216,28680,18786,84470.0,37004.0,...,-0.025154,125.541367,452.726166,2.811142,-23904.0,-148.0,-22410.0,22401.0,824.0,-23899.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1825706,1.796401e+19,7467,4,1,1472.234375,24599,28647,18649,83229.0,35933.0,...,-0.029635,597.250793,-654.393982,3.555425,13834.0,2.0,-29703.0,29690.0,970.0,13828.0
1825707,1.796401e+19,7477,4,1,1506.816406,25100,28647,18649,83229.0,35933.0,...,0.021930,581.917175,-682.617188,3.757118,20251.0,360.0,-25756.0,25757.0,-718.0,20241.0
1825708,1.796401e+19,7484,4,1,1520.402344,25317,28647,18649,83229.0,35933.0,...,0.021148,573.107361,-693.491089,3.907554,23176.0,250.0,-23161.0,23160.0,-692.0,23168.0
1825709,1.796401e+19,7519,4,1,1626.878906,26868,28647,18649,83229.0,35933.0,...,0.012057,485.888641,-754.579956,3.612338,28981.0,-102.0,-15288.0,15285.0,-395.0,28980.0


In [None]:
# Inputs: sector {1,2,3}, time (in ms)
# Outputs: df where the time set in the sector is less than the time given

def sector_under(s, t):
    df = split_sector(s)
    if s==1:
        str = 'SECTOR_1_TIME_MS'
    elif s==2:
        str = 'SECTOR_2_TIME_MS'
    elif s==3:
        str = 'SECTOR_3_MS'

    df = df[df[str]<t]
    return df

In [None]:
# Inputs: turn
# Output: df where all data is in the turn given

def split_turns(t):
    df = fetch_and_merge()
    t_df = fetch_data_csv('f1sim-ref-turns.csv')
    t = t-1
    x1 = t_df["CORNER_X1"][t]
    x2 = t_df["CORNER_X2"][t]
    y1 = t_df["CORNER_Y1"][t]
    y2 = t_df["CORNER_Y2"][t]
    df = df[(df["WORLDPOSX"] > x1) & (df["WORLDPOSX"] < x2)]
    df = df[(df["WORLDPOSY"] > y1) & (df["WORLDPOSY"] < y2)]
    return df