# Formula 1 Race Weekend Analytics

### An end-to-end, interactive data analytics project analyzing a complete Formula 1 race weekend using FastF1 telemetry data, Python.

### This project covers Free Practice, Qualifying, and Race sessions, transforming raw lap-level data into actionable performance, strategy, and consistency insights.

## Project Highlights

#### End-to-end data pipeline
#### Telemetry-grade motorsport data
#### Interactive dashboard
#### Time-series & performance analytics
#### Strong business & strategy storytelling

## Goal: Demonstrate real-world data analyst skills using a complex, domain-rich dataset.

## Install necessary packages

In [None]:
pip install fastf1 dash plotly pandas numpy scipy

## Import the packages

In [23]:
import fastf1
import pandas as pd
import numpy as np

from scipy.stats import linregress

from dash import Dash, dcc, html, Input, Output
import plotly.express as px
import plotly.graph_objects as go

## Configure FastF1

In [24]:
YEAR = 2025
EVENT = "Bahrain"   # change to any completed 2025 race

## Load all Race Weekend Sessions

In [25]:
sessions = {
    "FP1": fastf1.get_session(YEAR, EVENT, "FP1"),
    "FP2": fastf1.get_session(YEAR, EVENT, "FP2"),
    "FP3": fastf1.get_session(YEAR, EVENT, "FP3"),
    "Q":   fastf1.get_session(YEAR, EVENT, "Q"),
    "R":   fastf1.get_session(YEAR, EVENT, "R"),
}

for s in sessions.values():
    s.load()

core           INFO 	Loading data for Bahrain Grand Prix - Practice 1 [v3.7.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '5', '6', '7', '10', '12', '18', '22', '23', '27', '30', '31', '34', '37', '38', '44', '46', '50', '72', '81']
core           INFO 	Loading data for Bahrain Grand Prix - Practice 2 [v3.7.0]
req            INFO 	Using cac

## Build unified laps dataset

In [26]:
laps_all = []

for name, session in sessions.items():
    laps = session.laps.pick_quicklaps()
    laps = laps.dropna(subset=["LapTime"])
    laps["LapTime_s"] = laps["LapTime"].dt.total_seconds()
    laps["Session"] = name
    laps_all.append(laps)

laps_df = pd.concat(laps_all, ignore_index=True)

In [27]:
laps_df

Unnamed: 0,Time,Driver,DriverNumber,LapTime,LapNumber,Stint,PitOutTime,PitInTime,Sector1Time,Sector2Time,...,LapStartTime,LapStartDate,TrackStatus,Position,Deleted,DeletedReason,FastF1Generated,IsAccurate,LapTime_s,Session
0,0 days 00:18:44.577000,NOR,4,0 days 00:01:35.674000,2.0,1.0,NaT,NaT,0 days 00:00:30.195000,0 days 00:00:41.604000,...,0 days 00:17:08.903000,2025-04-11 11:34:02.823,1,,False,,False,True,95.674,FP1
1,0 days 00:23:13.251000,NOR,4,0 days 00:01:35.249000,4.0,1.0,NaT,NaT,0 days 00:00:30.109000,0 days 00:00:41.408000,...,0 days 00:21:38.002000,2025-04-11 11:38:31.922,1,,False,,False,True,95.249,FP1
2,0 days 00:36:35.324000,NOR,4,0 days 00:01:35.973000,7.0,2.0,NaT,NaT,0 days 00:00:30.128000,0 days 00:00:42.156000,...,0 days 00:34:59.351000,2025-04-11 11:51:53.271,1,,False,,False,True,95.973,FP1
3,0 days 00:41:04.072000,NOR,4,0 days 00:01:35.246000,9.0,2.0,NaT,NaT,0 days 00:00:30.193000,0 days 00:00:41.324000,...,0 days 00:39:28.826000,2025-04-11 11:56:22.746,1,,False,,False,True,95.246,FP1
4,0 days 00:58:37.147000,NOR,4,0 days 00:01:33.204000,12.0,3.0,NaT,NaT,0 days 00:00:29.546000,0 days 00:00:40.369000,...,0 days 00:57:03.943000,2025-04-11 12:13:57.863,1,,False,,False,True,93.204,FP1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1330,0 days 02:25:33.298000,HUL,27,0 days 00:01:38.260000,53.0,3.0,NaT,NaT,0 days 00:00:31.255000,0 days 00:00:42.821000,...,0 days 02:23:55.038000,2025-04-13 16:32:09.305,1,15.0,False,,False,True,98.260,R
1331,0 days 02:27:11.796000,HUL,27,0 days 00:01:38.498000,54.0,3.0,NaT,NaT,0 days 00:00:31.388000,0 days 00:00:42.992000,...,0 days 02:25:33.298000,2025-04-13 16:33:47.565,1,15.0,False,,False,True,98.498,R
1332,0 days 02:28:50.220000,HUL,27,0 days 00:01:38.424000,55.0,3.0,NaT,NaT,0 days 00:00:31.415000,0 days 00:00:42.892000,...,0 days 02:27:11.796000,2025-04-13 16:35:26.063,1,15.0,False,,False,True,98.424,R
1333,0 days 02:30:29.218000,HUL,27,0 days 00:01:38.998000,56.0,3.0,NaT,NaT,0 days 00:00:31.369000,0 days 00:00:43.236000,...,0 days 02:28:50.220000,2025-04-13 16:37:04.487,1,15.0,False,,False,True,98.998,R


## PREPARE TEAMMATE METADATA

In [28]:
# Driver → Team mapping
driver_team = (
    laps_df[["Driver", "Team"]]
    .drop_duplicates()
    .set_index("Driver")
)

In [29]:
driver_team

Unnamed: 0_level_0,Team
Driver,Unnamed: 1_level_1
NOR,McLaren
BOR,Kick Sauber
HAD,Racing Bulls
DOO,Alpine
GAS,Alpine
ANT,Mercedes
STR,Aston Martin
TSU,Red Bull Racing
ALB,Williams
HUL,Kick Sauber


## Feature Engineering

In [30]:
# Tyre age per stint
laps_df["TyreAge"] = (
    laps_df.groupby(["Session", "Driver", "Stint"])
    .cumcount() + 1
)

# Race phase
laps_df["RacePhase"] = pd.cut(
    laps_df["LapNumber"],
    bins=[0, 15, 40, 70],
    labels=["Early", "Mid", "Late"]
)

In [31]:
laps_df

Unnamed: 0,Time,Driver,DriverNumber,LapTime,LapNumber,Stint,PitOutTime,PitInTime,Sector1Time,Sector2Time,...,TrackStatus,Position,Deleted,DeletedReason,FastF1Generated,IsAccurate,LapTime_s,Session,TyreAge,RacePhase
0,0 days 00:18:44.577000,NOR,4,0 days 00:01:35.674000,2.0,1.0,NaT,NaT,0 days 00:00:30.195000,0 days 00:00:41.604000,...,1,,False,,False,True,95.674,FP1,1,Early
1,0 days 00:23:13.251000,NOR,4,0 days 00:01:35.249000,4.0,1.0,NaT,NaT,0 days 00:00:30.109000,0 days 00:00:41.408000,...,1,,False,,False,True,95.249,FP1,2,Early
2,0 days 00:36:35.324000,NOR,4,0 days 00:01:35.973000,7.0,2.0,NaT,NaT,0 days 00:00:30.128000,0 days 00:00:42.156000,...,1,,False,,False,True,95.973,FP1,1,Early
3,0 days 00:41:04.072000,NOR,4,0 days 00:01:35.246000,9.0,2.0,NaT,NaT,0 days 00:00:30.193000,0 days 00:00:41.324000,...,1,,False,,False,True,95.246,FP1,2,Early
4,0 days 00:58:37.147000,NOR,4,0 days 00:01:33.204000,12.0,3.0,NaT,NaT,0 days 00:00:29.546000,0 days 00:00:40.369000,...,1,,False,,False,True,93.204,FP1,1,Early
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1330,0 days 02:25:33.298000,HUL,27,0 days 00:01:38.260000,53.0,3.0,NaT,NaT,0 days 00:00:31.255000,0 days 00:00:42.821000,...,1,15.0,False,,False,True,98.260,R,21,Late
1331,0 days 02:27:11.796000,HUL,27,0 days 00:01:38.498000,54.0,3.0,NaT,NaT,0 days 00:00:31.388000,0 days 00:00:42.992000,...,1,15.0,False,,False,True,98.498,R,22,Late
1332,0 days 02:28:50.220000,HUL,27,0 days 00:01:38.424000,55.0,3.0,NaT,NaT,0 days 00:00:31.415000,0 days 00:00:42.892000,...,1,15.0,False,,False,True,98.424,R,23,Late
1333,0 days 02:30:29.218000,HUL,27,0 days 00:01:38.998000,56.0,3.0,NaT,NaT,0 days 00:00:31.369000,0 days 00:00:43.236000,...,1,15.0,False,,False,True,98.998,R,24,Late


## Qualifying vs Race Metrics

In [32]:
quali_fastest = (
    laps_df[laps_df["Session"] == "Q"]
    .groupby("Driver")["LapTime_s"]
    .min()
)

race_avg = (
    laps_df[laps_df["Session"] == "R"]
    .groupby("Driver")["LapTime_s"]
    .mean()
)

comparison = pd.concat(
    [quali_fastest, race_avg],
    axis=1,
    keys=["QualiFastest", "RaceAvg"]
).dropna()

comparison["Delta"] = comparison["RaceAvg"] - comparison["QualiFastest"]
comparison = comparison.reset_index()

In [33]:
comparison

Unnamed: 0,Driver,QualiFastest,RaceAvg,Delta
0,ALB,92.04,98.65652,6.61652
1,ALO,91.634,99.04068,7.40668
2,ANT,90.213,98.432143,8.219143
3,BEA,92.373,98.69532,6.32232
4,BOR,92.186,99.3414,7.1554
5,DOO,91.245,98.605292,7.360292
6,GAS,90.216,98.169306,7.953306
7,HAD,91.271,98.801667,7.530667
8,HAM,90.772,98.062765,7.290765
9,HUL,91.693,98.82,7.127


## Race Consistency vs Pace

In [34]:
race_consistency = (
    laps_df[laps_df["Session"] == "R"]
    .groupby("Driver")["LapTime_s"]
    .std()
)

race_avg_pace = (
    laps_df[laps_df["Session"] == "R"]
    .groupby("Driver")["LapTime_s"]
    .mean()
)

consistency_vs_pace = pd.concat(
    [race_avg_pace, race_consistency],
    axis=1
)
consistency_vs_pace.columns = ["AvgRaceLap_s", "RaceLapStd_s"]
consistency_vs_pace = consistency_vs_pace.dropna()

In [35]:
consistency_vs_pace

Unnamed: 0_level_0,AvgRaceLap_s,RaceLapStd_s
Driver,Unnamed: 1_level_1,Unnamed: 2_level_1
ALB,98.65652,0.933445
ALO,99.04068,0.845631
ANT,98.432143,0.932404
BEA,98.69532,0.850611
BOR,99.3414,0.749557
DOO,98.605292,0.73626
GAS,98.169306,0.921241
HAD,98.801667,0.859098
HAM,98.062765,1.119395
HUL,98.82,0.842449


## Dash App Initialization

In [36]:
app = Dash(__name__)

## Dashboard Controls

In [37]:
drivers = sorted(laps_df["Driver"].unique())
sessions_list = sorted(laps_df["Session"].unique())

## Dashboard Layout

In [38]:
app.layout = html.Div([

    html.H1(
        f"F1 {YEAR} Race Weekend Analysis – {EVENT}",
        style={"textAlign": "center"}
    ),

    html.Hr(),

    # ---------------- CONTROLS ----------------
    html.Div([

        html.Div([
            html.Label("Driver"),
            dcc.Dropdown(
                id="driver_dropdown",
                options=[{"label": d, "value": d} for d in sorted(laps_df["Driver"].unique())],
                value=sorted(laps_df["Driver"].unique())[0],
                clearable=False
            )
        ], style={"width": "24%", "display": "inline-block"}),

        html.Div([
            html.Label("Session"),
            dcc.Dropdown(
                id="session_dropdown",
                options=[{"label": s, "value": s} for s in sorted(laps_df["Session"].unique())],
                value="R",
                clearable=False
            )
        ], style={"width": "24%", "display": "inline-block"}),

        html.Div([
            html.Label("Telemetry Metric"),
            dcc.Dropdown(
                id="metric_dropdown",
                options=[
                    {"label": "Speed", "value": "Speed"},
                    {"label": "Throttle", "value": "Throttle"},
                    {"label": "Brake", "value": "Brake"},
                    {"label": "RPM", "value": "RPM"}
                ],
                value="Speed",
                clearable=False
            )
        ], style={"width": "24%", "display": "inline-block"}),

        html.Div([
            html.Label("Track Sector"),
            dcc.Dropdown(
                id="sector_dropdown",
                options=[
                    {"label": "Full Lap", "value": "ALL"},
                    {"label": "Sector 1", "value": "S1"},
                    {"label": "Sector 2", "value": "S2"},
                    {"label": "Sector 3", "value": "S3"}
                ],
                value="ALL",
                clearable=False
            )
        ], style={"width": "24%", "display": "inline-block"}),

    ]),

    html.Br(),

    dcc.Checklist(
        id="teammate_toggle",
        options=[{"label": " Compare with Teammate", "value": "TEAMMATE"}],
        value=[],
        inline=True
    ),

    html.Hr(),

    # ---------------- PLOTS ----------------
    dcc.Graph(id="lap_time_trend"),
    dcc.Graph(id="quali_vs_race"),
    dcc.Graph(id="consistency_vs_pace"),
    dcc.Graph(id="tyre_degradation"),
    dcc.Graph(id="telemetry_plot"),

    html.Hr(),
    html.P("Data Source: FastF1 | Built with Python, Plotly & Dash",
           style={"textAlign": "center", "color": "gray"})

],
style={"maxWidth": "1500px", "margin": "auto", "padding": "20px"})


## Callbacks

In [39]:
@app.callback(
    Output("lap_time_trend", "figure"),
    Input("driver_dropdown", "value"),
    Input("session_dropdown", "value")
)
def update_lap_trend(driver, session):
    df = laps_df[(laps_df["Driver"] == driver) & (laps_df["Session"] == session)]
    return px.line(df, x="LapNumber", y="LapTime_s",
                   title=f"Lap Time Trend – {driver} ({session})",
                   markers=True)


## Qualifying vs Race Pace Visuals

In [40]:
@app.callback(
    Output("quali_vs_race", "figure"),
    Input("driver_dropdown", "value")
)
def update_quali_vs_race(driver):

    fig = px.scatter(
        comparison,
        x="QualiFastest",
        y="RaceAvg",
        text="Driver",
        title="Qualifying vs Race Pace"
    )

    selected = comparison[comparison["Driver"] == driver]

    fig.add_scatter(
        x=selected["QualiFastest"],
        y=selected["RaceAvg"],
        mode="markers",
        marker=dict(size=14, color="red"),
        name="Selected Driver"
    )

    fig.update_traces(textposition="top center")
    return fig

## Consistency vs Average Race Pace Visuals

In [41]:
@app.callback(
    Output("consistency_vs_pace", "figure"),
    Input("driver_dropdown", "value")
)
def update_consistency_plot(driver):

    fig = px.scatter(
        consistency_vs_pace,
        x="RaceLapStd_s",
        y="AvgRaceLap_s",
        text=consistency_vs_pace.index,
        title="Race Consistency vs Average Pace"
    )

    fig.add_scatter(
        x=[consistency_vs_pace.loc[driver, "RaceLapStd_s"]],
        y=[consistency_vs_pace.loc[driver, "AvgRaceLap_s"]],
        mode="markers",
        marker=dict(size=14, color="red"),
        name="Selected Driver"
    )

    fig.update_traces(textposition="top center")
    return fig

## Tyre Degradation (Race)

In [42]:
@app.callback(
    Output("tyre_degradation", "figure"),
    Input("session_dropdown", "value")
)
def update_tyre_plot(session):

    df = laps_df[laps_df["Session"] == "R"]

    fig = px.line(
        df,
        x="TyreAge",
        y="LapTime_s",
        color="Compound",
        title="Tyre Degradation Trends (Race)"
    )
    return fig

## Telemetry

In [43]:
@app.callback(
    Output("telemetry_plot", "figure"),
    Input("driver_dropdown", "value"),
    Input("session_dropdown", "value"),
    Input("metric_dropdown", "value"),
    Input("teammate_toggle", "value")
)
def update_telemetry(driver, session, metric, teammate_toggle):

    if session not in ["Q", "R"]:
        return go.Figure(layout=go.Layout(
            title="Telemetry available only for Qualifying or Race"
        ))

    s = sessions[session]
    fig = go.Figure()

    # ---- MAIN DRIVER ----
    lap_main = s.laps.pick_driver(driver).pick_fastest()
    if lap_main is None:
        fig.add_annotation(text="No telemetry available",
                           x=0.5, y=0.5,
                           xref="paper", yref="paper",
                           showarrow=False)
        return fig

    tel_main = lap_main.get_telemetry()
    fig.add_trace(go.Scatter(
        x=tel_main["Distance"],
        y=tel_main[metric],
        mode="lines",
        name=f"{driver} {metric}"
    ))

    # ---- TEAMMATE (SAFE) ----
    if "TEAMMATE" in teammate_toggle:

        team = driver_team.loc[driver, "Team"]
        teammates = (
            driver_team[driver_team["Team"] == team]
            .index.difference([driver])
            .tolist()
        )

        if teammates:
            tm = teammates[0]
            laps_tm = s.laps.pick_driver(tm)

            if not laps_tm.empty:
                lap_tm = laps_tm.pick_fastest()

                if lap_tm is not None:
                    try:
                        tel_tm = lap_tm.get_telemetry()
                        fig.add_trace(go.Scatter(
                            x=tel_tm["Distance"],
                            y=tel_tm[metric],
                            mode="lines",
                            line=dict(dash="dash"),
                            name=f"{tm} {metric}"
                        ))
                    except Exception:
                        fig.add_annotation(
                            text="Teammate telemetry unavailable",
                            x=0.5, y=0.4,
                            xref="paper", yref="paper",
                            showarrow=False
                        )

    fig.update_layout(
        title=f"Telemetry Comparison – {metric} ({session})",
        xaxis_title="Distance (m)",
        yaxis_title=metric,
        template="plotly_white"
    )

    return fig

## Run the Dashboard

In [44]:
if __name__ == "__main__":
    app.run(debug=True)


pick_driver is deprecated and will be removed in a future release. Use pick_drivers instead.


pick_driver is deprecated and will be removed in a future release. Use pick_drivers instead.


pick_driver is deprecated and will be removed in a future release. Use pick_drivers instead.


pick_driver is deprecated and will be removed in a future release. Use pick_drivers instead.

