In [167]:
import pandas as pd
import numpy as np
from functools import lru_cache
import ssl
import certifi

In [168]:
# Set the CA bundle file
ssl._create_default_https_context = ssl._create_unverified_context
ssl._create_default_https_context().load_verify_locations(certifi.where())

@lru_cache(maxsize=None)  
def fetch_data_from_source(file):
    '''
    Files:
        f1sim-data-2022.parquet
        f1sim-data-2023.parquet
        
        f1sim-data-dictionary.xlsx
        
        f1sim-ref-left.csv
        f1sim-ref-line.csv
        f1sim-ref-right.csv
        f1sim-ref-turns.csv
    '''
    if file.split(".")[1] == "parquet":
        df = pd.read_parquet(f"https://data3001-racing.s3.ap-southeast-2.amazonaws.com/{file}")
    elif file.split(".")[1] == "csv":
        df = pd.read_csv(f"https://data3001-racing.s3.ap-southeast-2.amazonaws.com/{file}")
    elif file.split(".")[1] == "xlsx":
        df = pd.read_excel(f"https://data3001-racing.s3.ap-southeast-2.amazonaws.com/{file}") 
    return df

In [169]:
# Load data files
f1_2022_df = fetch_data_from_source('f1sim-data-2022.parquet')
f1_2023_df = fetch_data_from_source("f1sim-data-2023.parquet")
ref_left = fetch_data_from_source("f1sim-ref-left.csv")
ref_line = fetch_data_from_source("f1sim-ref-line.csv")
ref_right = fetch_data_from_source("f1sim-ref-right.csv")
ref_turns = fetch_data_from_source("f1sim-ref-turns.csv")

In [170]:
sessions = f1_2022_df['SESSION_IDENTIFIER'].tolist()
sessions = list(set(sessions))
session = f1_2022_df[f1_2022_df['SESSION_IDENTIFIER'] == sessions[0]]
columns_to_print = ['SESSION_IDENTIFIER', 'FRAME', 'LAP_NUM', 'SECTOR', 'LAP_DISTANCE', 'CURRENT_LAP_TIME_MS', 'LAP_TIME_MS', 'SPEED_KPH']
session = session[columns_to_print]

session

Unnamed: 0,SESSION_IDENTIFIER,FRAME,LAP_NUM,SECTOR,LAP_DISTANCE,CURRENT_LAP_TIME_MS,LAP_TIME_MS,SPEED_KPH
376558,1.924204e+18,1012,1,0,27.757099,333,170118,295
376559,1.924204e+18,1017,1,0,49.735920,600,170118,297
376560,1.924204e+18,1027,1,0,91.468849,1101,170118,301
376561,1.924204e+18,1042,1,0,133.682373,1601,170118,305
376562,1.924204e+18,1096,1,0,310.165649,3812,170118,270
...,...,...,...,...,...,...,...,...
377516,1.924204e+18,19635,6,2,4841.431641,114545,122310,98
377517,1.924204e+18,19652,6,2,4858.050781,115146,122310,106
377518,1.924204e+18,19697,6,2,4936.539062,117198,122310,177
377519,1.924204e+18,19718,6,2,4990.382812,118199,122310,208


In [176]:
for col in f1_2022_df.columns:
    print(col)
    

SESSION_IDENTIFIER
FRAME
LAP_NUM
SECTOR
LAP_DISTANCE
CURRENT_LAP_TIME_MS
SECTOR_1_TIME_MS
SECTOR_2_TIME_MS
LAP_TIME_MS
SECTOR_3_MS
SPEED_KPH
ACCELERATION
THROTTLE
BRAKE
GEAR
ENGINE_RPM
STEERING
YAW
PITCH
ROLL
WORLDPOSX
WORLDPOSY
WORLDPOSZ
WORLDFORWARDDIRX
WORLDFORWARDDIRY
WORLDFORWARDDIRZ
WORLDRIGHTDIRX
WORLDRIGHTDIRY
WORLDRIGHTDIRZ


In [177]:
def calc_acceleration(data):
    # time in milliseconds
    times = []

    # speed in km per hour

    # acceleration is in meters per second^2
    accelerations = []

    for count, row in data.iterrows():
        sessionid1 = row['SESSION_IDENTIFIER']
        time1 = row['CURRENT_LAP_TIME_MS']
        speed1 = row['SPEED_KPH']
        try:
            sessionid2 = f1_2022_df['SESSION_IDENTIFIER'].iloc[count-1]
            time2 = f1_2022_df['CURRENT_LAP_TIME_MS'].iloc[count-1]
            speed2 = f1_2022_df['SPEED_KPH'].iloc[count-1]
            
            if sessionid1 == sessionid2:
                # convert speed from km/h to m/s
                diff_speed = (speed1 - speed2) / 3.6
                
                # convert time to seconds
                time = (time1 - time2) / 1000 
                times.append(time)
                
                if time > 0:
                    acceleration = diff_speed / time 
                else:
                    acceleration = np.nan
                
                accelerations.append(acceleration)
            else:
                times.append(time1)
                
                if time1 > 0:
                    accelerations.append( (speed1/ 3.6) / (time1 / 1000) )
                else:
                    accelerations.append(0)
                
        except KeyError:
            times.append(np.nan)
            accelerations.append(np.nan)
    
    if 'ACCELERATION' in data.columns:
        data.drop('ACCELERATION', axis = 1, inplace = True)
    
    index = data.columns.get_loc('SPEED_KPH')
    return data.insert(index + 1, 'ACCELERATION', accelerations)

In [178]:
calc_acceleration(f1_2022_df)

In [None]:
f1_2022_df['ACCELERATION'].describe

<bound method NDFrame.describe of 0         30.766526
1         -1.040366
2        -26.666667
3        -33.238367
4        -10.986816
            ...    
721248     4.208754
721249     5.446623
721250     5.555556
721251     4.208754
721252     5.555556
Name: ACCELERATION, Length: 721253, dtype: float64>