In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [3]:
telemetry_data_fp = "./Telemetry Data"
# For now, we will only have 1 Data Frame for 1 telemetry file
df = pd.read_csv(telemetry_data_fp + "/T1_Baku_FP2.csv", sep='\t')
# Show first 5 rows
df.head(5)

Unnamed: 0,carId,trackId,trackLength,lapIndex,lapNum,lapFlag,binIndex,validBin,lap_number,lap_distance,...,front_left_wing_damage,front_right_wing_damage,rear_wing_damage,engine_damage,gear_box_damage,ers_store,ers_deployMode,ers_harv_mguk,ers_harv_mguh,ers_deployed
0,Renault,Baku,5994,0,0,0,0,1,0,1.41686,...,0.32,0.0,0,0.66,0.4,3502060.0,2,202249.0,933189.0,1622660.0
1,Renault,Baku,5994,0,0,0,1,1,0,1.41686,...,0.32,0.0,0,0.66,0.4,3502060.0,2,202249.0,933189.0,1622660.0
2,Renault,Baku,5994,0,0,0,2,1,0,2.39192,...,0.32,0.0,0,0.66,0.4,3501810.0,2,161799.0,746551.0,1298190.0
3,Renault,Baku,5994,0,0,0,3,1,0,3.36697,...,0.32,0.0,0,0.66,0.4,3501560.0,2,121349.0,559913.0,973724.0
4,Renault,Baku,5994,0,0,0,4,1,0,4.34202,...,0.32,0.0,0,0.66,0.4,3501310.0,2,80899.6,373276.0,649254.0


In [4]:
# We want to get the list of data categories
data_categories = list(df.columns)
size = len(data_categories)
for i in range (0, size, 8):
    print(*data_categories[i : i + 8], sep=", ")

carId, trackId, trackLength, lapIndex, lapNum, lapFlag, binIndex, validBin
lap_number, lap_distance, lap_time, lap_time_invalid, world_position_X, world_position_Y, world_position_Z, world_forward_X
world_forward_Y, world_forward_Z, world_right_X, world_right_Y, world_right_Z, velocity_X, velocity_Y, velocity_Z
gforce_X, gforce_Y, gforce_Z, pit_status, race_position, flags_status, throttle, brake
clutch, steering, gear, rpm, rpm_perc, fuel, tyre_compound_0, tyre_compound_1
tyre_compound_2, tyre_compound_3, yaw, pitch, roll, susp_pos_0, susp_pos_1, susp_pos_2
susp_pos_3, susp_vel_0, susp_vel_1, susp_vel_2, susp_vel_3, susp_acc_0, susp_acc_1, susp_acc_2
susp_acc_3, wheel_speed_0, wheel_speed_1, wheel_speed_2, wheel_speed_3, wheel_slip_ratio_0, wheel_slip_ratio_1, wheel_slip_ratio_2
wheel_slip_ratio_3, wheel_slip_angle_0, wheel_slip_angle_1, wheel_slip_angle_2, wheel_slip_angle_3, angular_vel_X, angular_vel_Y, angular_vel_Z
angular_acc_X, angular_acc_Y, angular_acc_Z, weather, track_temp,

Note that these stats can contain invalid laps (caused by track infractions), and so we shouldn't consider these during calculations

In [44]:
# Initial Descriptive Analysis

# Velocity
# Mean velocity in m/s
def mps_to_kph(v):
    return v * 3.6
def kph_to_mps(v): 
    return v / 3.6

# Ensure we are dealing with valid data only
valid_df = df[df["lap_time_invalid"] > -1]

mean_velocity_3D = valid_df.aggregate({"velocity_X": "mean", "velocity_Y": "mean", "velocity_Z": "mean"})
print(f"Mean velocities 3D (mps):\n{mean_velocity_3D.to_string()
    } \n\nMean velocities 3D (kph):\n{
    mean_velocity_3D.map(lambda x: f"{round(mps_to_kph(x), ndigits=1)} km/h")
}", end='\n')

# Peak Velocity (we also want to know when we reached this (so likely which sector of the lap and also use lap distance))
# We can later use this information to represent a visual on a track map to show where these peaks were reached, and other useful information
# We will consider velocity_X only here since we want to have the forward line speed
peak_forward_velocity_mps = valid_df.aggregate({"velocity_X": "max"})
peak_forward_velocity = peak_forward_velocity_mps.map(mps_to_kph)

peak_forward_velocity_data = pd.DataFrame(valid_df.loc[df['velocity_X'] == peak_forward_velocity_mps.item()])
peak_forward_velocity_sector = int(peak_forward_velocity_data["lap_distance"].item() // peak_forward_velocity_data["trackLength"].item())
peak_forward_velcity_world_pos = peak_forward_velocity_data[["world_position_X", "world_position_Y", "world_position_Z"]]

print(f"\nPeak forward velocity: {round(peak_forward_velocity.item(), ndigits=1)} km/h")
print(f"Peak forward velocity: {peak_forward_velocity_mps.item()} mps")
print(f"Peak forward velocity sector: {peak_forward_velocity_sector}")
# We can use these world positions later on in the map
print(f"Peak forward velocity world positions: \n{peak_forward_velcity_world_pos.to_string(index=False)}")


Mean velocities 3D (mps):
velocity_X    58.675385
velocity_Y     0.034427
velocity_Z     0.098109 

Mean velocities 3D (kph):
velocity_X    211.2 km/h
velocity_Y      0.1 km/h
velocity_Z      0.4 km/h
dtype: object

Peak forward velocity: 325.9 km/h
Peak forward velocity: 90.5152 mps
Peak forward velocity sector: 0
Peak forward velocity world positions: 
 world_position_X  world_position_Y  world_position_Z
         -720.076          -494.285          0.831284


While this data is interesting, and we can find the exact points where we have peak velocities, we can also look at completed lap data (instead of continuous data) to get a faster summary

In [60]:
# We use the binIndex to get the completed lap. In high frequency streams of data, 
# we usually divide data into bins, which represent either a segment of time or distance
# this way, we can improve organization and performance by discretizing the continous data

# We will first get the max bin index to figure out how many bins we make (it's around 5994 bins)

max_bin_index = valid_df["binIndex"].max()
completed_laps = valid_df[valid_df["binIndex"] == max_bin_index].reset_index(drop=True)
print(completed_laps.to_string(), end="\n\n")

velocity_description = completed_laps["velocity_X"].describe()
print(velocity_description, end="\n\n")

# We do have to note here that the these velocities won't be the complete picture since we are looking at 
# the velocity upon lap completion while it may be high since the lap usually completes on a straight, it won't always be the case. 
# For example, here in Baku, we actually achieved a slightly higher velocity elsewhere on the track 
# because we carried forward the speed from our previous flying lap during the 1st lap

# More useful summaries for completed laps would be for parameters such as lap time, or parameters that only matter on a lap-by-lap basis
lap_time_description = completed_laps["lap_time"].describe()
print(lap_time_description, end="\n\n")

# we can sort the lap_times to rank our fastest laps
sorted_laps = completed_laps.copy()
sorted_laps.sort_values(by="lap_time", inplace=True)
sorted_laps[["lapNum", "lap_time", "race_position"]]

     carId trackId  trackLength  lapIndex  lapNum  lapFlag  binIndex  validBin  lap_number  lap_distance  lap_time  lap_time_invalid  world_position_X  world_position_Y  world_position_Z  world_forward_X  world_forward_Y  world_forward_Z  world_right_X  world_right_Y  world_right_Z  velocity_X  velocity_Y  velocity_Z  gforce_X  gforce_Y  gforce_Z  pit_status  race_position  flags_status  throttle  brake  clutch  steering  gear      rpm  rpm_perc      fuel  tyre_compound_0  tyre_compound_1  tyre_compound_2  tyre_compound_3      yaw     pitch      roll  susp_pos_0  susp_pos_1  susp_pos_2  susp_pos_3  susp_vel_0  susp_vel_1  susp_vel_2  susp_vel_3  susp_acc_0  susp_acc_1  susp_acc_2  susp_acc_3  wheel_speed_0  wheel_speed_1  wheel_speed_2  wheel_speed_3  wheel_slip_ratio_0  wheel_slip_ratio_1  wheel_slip_ratio_2  wheel_slip_ratio_3  wheel_slip_angle_0  wheel_slip_angle_1  wheel_slip_angle_2  wheel_slip_angle_3  angular_vel_X  angular_vel_Y  angular_vel_Z  angular_acc_X  angular_acc_Y  ang

Unnamed: 0,lapNum,lap_time,race_position
3,3,113.398,15
2,2,114.212,14
1,1,117.871,2
4,4,125.878,12
0,0,137.969,5


In [None]:
# Lap times are also influenced by tire compounds, we can actually group the sorted times based on tire compounds and output them
# Although, rather than outputting the tire compound number, it's best to translate it to the corresponding compound name
# See the F1 2019 Spec sheet in the README for specific translations