# Intro to Formula 1 Data Analysis with Python

Jupyter notebooks have a lot of useful shortcuts:
- 'M' will convert a cell to Markdown
- 'A' will insert a cell Above the current one
- 'B' will insert a cell Below the current one 
- 'DD' will delete a cell
- 'C' will copy a cell
- 'X' will cut a cell
- 'V' will paste a cell

- 'Shift + Enter' will run a cell and move to the next one
- 'Control + Enter' will run a cell and stay in the current one

## 0. Setting everything up

In [None]:
import fastf1 as ff1
from fastf1 import plotting
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
from matplotlib.colors import ListedColormap
from matplotlib.collections import LineCollection


In [None]:
# Set the cache directory (make sure to create a folder called 'cache' in the SAME folder as this script)
ff1.Cache.enable_cache('cache')

# Enable the fastf1 matplotlib settings
ff1.plotting.setup_mpl()

In [None]:
# Load the session and its laps
session = ff1.get_session(2022, 'Zandvoort', 'Q')
session.load()

## 1. Exploring the data

In [None]:
# Explore the lap data
session.laps

In [None]:
# Finding laps of a specific driver 
laps_lat = session.laps.pick_driver('LAT')
laps_lat

In [None]:
# Finding fastest lap of a driver 
laps_lat.pick_fastest()

In [None]:
laps_lat.pick_fastest().get_telemetry()

In [None]:
# Lap is 73 seconds 
# We have 573 rows
# That's almost 8 data points per second!
573/73

# 2. Let's build some plots

In [None]:
# Specify the drivers we want to compare
driver_1, driver_2 = 'VET', 'NOR'

In [None]:
# Find the laps
laps_driver_1 = session.laps.pick_driver(driver_1)
laps_driver_2 = session.laps.pick_driver(driver_2)

fastest_driver_1 = laps_driver_1.pick_fastest()
fastest_driver_2 = laps_driver_2.pick_fastest()

telemetry_driver_1 = fastest_driver_1.get_telemetry()
telemetry_driver_2 = fastest_driver_2.get_telemetry()

https://theoehrly.github.io/Fast-F1/utils.html

In [None]:
# Get the gap (delta time) between driver 1 and driver 2
delta_time, ref_tel, compare_tel = ff1.utils.delta_time(fastest_driver_1, fastest_driver_2)

In [None]:
# Identify team colors
team_driver_1 = laps_driver_1['Team'].iloc[0]
team_driver_2 = laps_driver_2['Team'].iloc[0]

# Fastf1 has a built-in function for the team colors!
color_1 = ff1.plotting.team_color(team_driver_1)
color_2 = ff1.plotting.team_color(team_driver_2)

### 2.1 Telemetry comparison

In [None]:
# Set the size of the plot
plt.rcParams['figure.figsize'] = [20, 15]

# Our plot will consist of 7 "subplots":
#     - Delta
#     - Speed
#     - Throttle
#     - Braking
#     - Gear
#     - RPM
#     - DRS
fig, ax = plt.subplots(7, gridspec_kw={'height_ratios': [1, 3, 2, 1, 1, 2, 1]})

# Set the title of the plot
ax[0].title.set_text(f"Telemetry comparison {driver_1} vs. {driver_2}")

# Subplot 1: The delta
ax[0].plot(ref_tel['Distance'], delta_time, color=color_1)
ax[0].axhline(0)
ax[0].set(ylabel=f"Gap to {driver_2} (s)")

# Subplot 2: Distance
ax[1].plot(telemetry_driver_1['Distance'], telemetry_driver_1['Speed'], label=driver_1, color=color_1)
ax[1].plot(telemetry_driver_2['Distance'], telemetry_driver_2['Speed'], label=driver_2, color=color_2)
ax[1].set(ylabel='Speed')
ax[1].legend(loc="lower right")

# Subplot 3: Throttle
ax[2].plot(telemetry_driver_1['Distance'], telemetry_driver_1['Throttle'], label=driver_1, color=color_1)
ax[2].plot(telemetry_driver_2['Distance'], telemetry_driver_2['Throttle'], label=driver_2, color=color_2)
ax[2].set(ylabel='Throttle')

# Subplot 4: Brake
ax[3].plot(telemetry_driver_1['Distance'], telemetry_driver_1['Brake'], label=driver_1, color=color_1)
ax[3].plot(telemetry_driver_2['Distance'], telemetry_driver_2['Brake'], label=driver_2, color=color_2)
ax[3].set(ylabel='Brake')

# Subplot 5: Gear
ax[4].plot(telemetry_driver_1['Distance'], telemetry_driver_1['nGear'], label=driver_1, color=color_1)
ax[4].plot(telemetry_driver_2['Distance'], telemetry_driver_2['nGear'], label=driver_2, color=color_2)
ax[4].set(ylabel='Gear')

# Subplot 6: RPM
ax[5].plot(telemetry_driver_1['Distance'], telemetry_driver_1['RPM'], label=driver_1, color=color_1)
ax[5].plot(telemetry_driver_2['Distance'], telemetry_driver_2['RPM'], label=driver_2, color=color_2)
ax[5].set(ylabel='RPM')

# Subplot 7: DRS
ax[6].plot(telemetry_driver_1['Distance'], telemetry_driver_1['DRS'], label=driver_1, color=color_1)
ax[6].plot(telemetry_driver_2['Distance'], telemetry_driver_2['DRS'], label=driver_2, color=color_2)
ax[6].set(ylabel='DRS')
ax[6].set(xlabel='Lap distance (meters)')

# Hide x labels and tick labels for top plots and y ticks for right plots.
for a in ax.flat:
    a.label_outer()

### 2.2 Minisector comparison

In [None]:
# Merge the telemetry from both drivers into one dataframe
telemetry_driver_1['Driver'] = driver_1
telemetry_driver_2['Driver'] = driver_2

telemetry = pd.concat([telemetry_driver_1, telemetry_driver_2])

In [None]:
# Calculate minisectors
num_minisectors = 25
total_distance = max(telemetry['Distance'])
minisector_length = total_distance / num_minisectors

minisectors = [0]

for i in range(0, (num_minisectors - 1)):
    minisectors.append(minisector_length * (i + 1))

# Assign a minisector number to every row in the telemetry dataframe
telemetry['Minisector'] = telemetry['Distance'].apply(
    lambda dist: (
        int((dist // minisector_length) + 1)
    )
)

In [None]:
# Calculate minisector speeds per driver
average_speed = telemetry.groupby(['Minisector', 'Driver'])['Speed'].mean().reset_index()

# Per minisector, find the fastest driver
fastest_driver = average_speed.loc[average_speed.groupby(['Minisector'])['Speed'].idxmax()]
fastest_driver = fastest_driver[['Minisector', 'Driver']].rename(columns={'Driver': 'Fastest_driver'})

# Merge the fastest_driver dataframe to the telemetry dataframe on minisector
telemetry = telemetry.merge(fastest_driver, on=['Minisector'])
telemetry = telemetry.sort_values(by=['Distance'])

# Since our plot can only work with integers, we need to convert the driver abbreviations to integers (1 or 2)
telemetry.loc[telemetry['Fastest_driver'] == driver_1, 'Fastest_driver_int'] = 1
telemetry.loc[telemetry['Fastest_driver'] == driver_2, 'Fastest_driver_int'] = 2

In [None]:
# Get the x and y coordinates 
x = np.array(telemetry['X'].values)
y = np.array(telemetry['Y'].values)

# Convert the coordinates to points, and then concat them into segments
points = np.array([x, y]).T.reshape(-1, 1, 2)
segments = np.concatenate([points[:-1], points[1:]], axis=1)
fastest_driver_array = telemetry['Fastest_driver_int'].to_numpy().astype(float)

In [None]:
# The segments we just created can now be colored according to the fastest driver in a minisector
cmap = ListedColormap([color_1, color_2])
lc_comp = LineCollection(segments, norm=plt.Normalize(1, cmap.N+1), cmap=cmap)
lc_comp.set_array(fastest_driver_array)
lc_comp.set_linewidth(5)

In [None]:
# Create the plot 
plt.rcParams['figure.figsize'] = [18, 10]

# Plot the line collection and style the plot
plt.gca().add_collection(lc_comp)
plt.axis('equal')
plt.box(False)
plt.tick_params(labelleft=False, left=False, labelbottom=False, bottom=False)

# Add a colorbar for as legend
cbar = plt.colorbar(mappable=lc_comp, boundaries=np.arange(1,4))
cbar.set_ticks(np.arange(1.5, 9.5))
cbar.set_ticklabels([driver_1, driver_2])