In [1]:
# 1. Imports & basic config

import pandas as pd
import numpy as np

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

pd.set_option("display.max_columns", 50)

DATA_DIR = "data"  # adjust if needed

# Era mapping based on year
def map_era(year: int) -> str:
    if year >= 1987 and year <= 2005:
        return "High-downforce NA (1987–2005)"
    elif year >= 2006 and year <= 2013:
        return "V8 frozen-engine (2006–2013)"
    elif year >= 2014 and year <= 2021:
        return "Hybrid V6 turbo (2014–2021)"
    elif year >= 2022:
        return "Ground-effect hybrid (2022–present)"
    else:
        return "Pre-era (before 1987)"  # will be filtered out later

ERA_ORDER = [
    "High-downforce NA (1987–2005)",
    "V8 frozen-engine (2006–2013)",
    "Hybrid V6 turbo (2014–2021)",
    "Ground-effect hybrid (2022–present)",
]


In [2]:
# 2. Load core CSVs from the Kaggle dataset

races = pd.read_csv(f"{DATA_DIR}/races.csv")
results = pd.read_csv(f"{DATA_DIR}/results.csv")
constructors = pd.read_csv(f"{DATA_DIR}/constructors.csv")
constructor_standings = pd.read_csv(f"{DATA_DIR}/constructor_standings.csv")
seasons = pd.read_csv(f"{DATA_DIR}/seasons.csv")

# Basic sanity checks
print("races:", races.shape)
print("results:", results.shape)
print("constructors:", constructors.shape)
print("constructor_standings:", constructor_standings.shape)
print("seasons:", seasons.shape)

# Keep only necessary columns to simplify
races_small = races[["raceId", "year", "round", "name", "circuitId", "date"]].copy()
constructors_small = constructors[["constructorId", "name", "nationality"]].copy()
constructor_standings_small = constructor_standings[
    ["constructorStandingsId", "raceId", "constructorId", "points", "position", "wins"]
].copy()

# Map era to seasons via race year
races_small["era"] = races_small["year"].apply(map_era)

# Filter to eras of interest (exclude "Pre-era")
races_small = races_small[races_small["era"].isin(ERA_ORDER)].copy()

print(races_small["era"].value_counts())


races: (1125, 18)
results: (26499, 18)
constructors: (212, 5)
constructor_standings: (13261, 7)
seasons: (75, 2)
era
High-downforce NA (1987–2005)          314
Hybrid V6 turbo (2014–2021)            160
V8 frozen-engine (2006–2013)           147
Ground-effect hybrid (2022–present)     68
Name: count, dtype: int64


In [3]:
# 3. Aggregate dominance metrics (wins & points) by constructor and era

# Join results to races to get year & era for each result row
results_races = results.merge(
    races_small[["raceId", "year", "era"]],
    on="raceId",
    how="inner",
)

# Keep only official race results (exclude sprints etc. – results.csv is race results in this dataset)
# Define a "win" as position == 1 in results
results_races["is_win"] = (results_races["positionOrder"] == 1).astype(int)

# Aggregate wins per constructor-season
wins_per_constructor_season = (
    results_races.groupby(["year", "era", "constructorId"], as_index=False)["is_win"]
    .sum()
    .rename(columns={"is_win": "season_wins"})
)

# Aggregate total wins per constructor-era
wins_per_constructor_era = (
    wins_per_constructor_season.groupby(["era", "constructorId"], as_index=False)["season_wins"]
    .sum()
    .rename(columns={"season_wins": "total_wins"})
)

# Use constructor standings as complementary points-based dominance measure
# constructor_standings is per race; aggregate to season by summing points over races in a season
cs_join = constructor_standings_small.merge(
    races_small[["raceId", "year", "era"]],
    on="raceId",
    how="inner",
)

points_per_constructor_season = (
    cs_join.groupby(["year", "era", "constructorId"], as_index=False)["points"]
    .sum()
    .rename(columns={"points": "season_points"})
)

points_per_constructor_era = (
    points_per_constructor_season.groupby(["era", "constructorId"], as_index=False)["season_points"]
    .sum()
    .rename(columns={"season_points": "total_points"})
)

# Merge wins and points together
era_constructor_dom = wins_per_constructor_era.merge(
    points_per_constructor_era,
    on=["era", "constructorId"],
    how="outer",
).fillna(0)

# Attach constructor names
era_constructor_dom = era_constructor_dom.merge(
    constructors_small,
    on="constructorId",
    how="left",
)

# Add dominance share within each era
era_constructor_dom["wins_share"] = era_constructor_dom.groupby("era")["total_wins"].transform(
    lambda x: x / x.sum() if x.sum() > 0 else 0
)
era_constructor_dom["points_share"] = era_constructor_dom.groupby("era")["total_points"].transform(
    lambda x: x / x.sum() if x.sum() > 0 else 0
)

# Sort eras by custom order
era_constructor_dom["era"] = pd.Categorical(
    era_constructor_dom["era"], categories=ERA_ORDER, ordered=True
)
era_constructor_dom = era_constructor_dom.sort_values(["era", "total_wins"], ascending=[True, False])

era_constructor_dom.head()


Unnamed: 0,era,constructorId,total_wins,total_points,name,nationality,wins_share,points_share
12,High-downforce NA (1987–2005),1,96,17681.0,McLaren,British,0.305732,0.227915
15,High-downforce NA (1987–2005),6,92,18313.5,Ferrari,Italian,0.292994,0.236069
13,High-downforce NA (1987–2005),3,82,16115.0,Williams,British,0.261146,0.207729
25,High-downforce NA (1987–2005),22,26,7206.5,Benetton,Italian,0.082803,0.092895
14,High-downforce NA (1987–2005),4,10,4019.0,Renault,French,0.031847,0.051807


In [4]:
# 4. Era-level dominance metric (concentration of wins)

def herfindahl_index(shares: pd.Series) -> float:
    shares = shares.fillna(0)
    return float((shares ** 2).sum())

era_dominance = (
    era_constructor_dom.groupby("era", as_index=False)
    .agg(
        total_era_wins=("total_wins", "sum"),
        total_era_points=("total_points", "sum"),
        top_team_wins=("total_wins", "max"),
        n_teams=("constructorId", "nunique"),
    )
)

# Compute HHI for wins and points within each era
hhi_wins_list = []
hhi_points_list = []

for era_name, group in era_constructor_dom.groupby("era"):
    hhi_wins_list.append(
        {"era": era_name, "hhi_wins": herfindahl_index(group["wins_share"])}
    )
    hhi_points_list.append(
        {"era": era_name, "hhi_points": herfindahl_index(group["points_share"])}
    )

hhi_wins_df = pd.DataFrame(hhi_wins_list)
hhi_points_df = pd.DataFrame(hhi_points_list)

era_dominance = (
    era_dominance.merge(hhi_wins_df, on="era", how="left")
    .merge(hhi_points_df, on="era", how="left")
)

era_dominance


  era_constructor_dom.groupby("era", as_index=False)
  for era_name, group in era_constructor_dom.groupby("era"):


Unnamed: 0,era,total_era_wins,total_era_points,top_team_wins,n_teams,hhi_wins,hhi_points
0,High-downforce NA (1987–2005),314,77577.0,96,39,0.255609,0.165341
1,V8 frozen-engine (2006–2013),147,103695.0,47,23,0.231246,0.148299
2,Hybrid V6 turbo (2014–2021),160,170146.5,111,19,0.523359,0.198597
3,Ground-effect hybrid (2022–present),55,62203.0,45,12,0.687273,0.207471


In [5]:
# 5. Per-season points time series for selected constructors (e.g., Ferrari, McLaren, Mercedes, Red Bull)

# Pick a set of well-known teams
focus_teams = ["Ferrari", "McLaren", "Mercedes", "Red Bull"]

season_constructor_points = points_per_constructor_season.merge(
    constructors_small, on="constructorId", how="left"
)

season_constructor_points["era"] = season_constructor_points["year"].apply(map_era)
season_constructor_points = season_constructor_points[
    season_constructor_points["era"].isin(ERA_ORDER)
].copy()

# Filter to focus teams
season_focus = season_constructor_points[
    season_constructor_points["name"].isin(focus_teams)
].copy()

# Sort for plotting
season_focus = season_focus.sort_values(["year", "name"])

season_focus.head()


Unnamed: 0,year,era,constructorId,season_points,name,nationality
2,1987,High-downforce NA (1987–2005),6,322.0,Ferrari,Italian
0,1987,High-downforce NA (1987–2005),1,757.0,McLaren,British
18,1988,High-downforce NA (1987–2005),6,628.0,Ferrari,Italian
16,1988,High-downforce NA (1987–2005),1,1702.0,McLaren,British
36,1989,High-downforce NA (1987–2005),6,461.0,Ferrari,Italian


In [6]:
# 6. Faceted bar charts: wins per constructor per era

# To avoid tiny bars cluttering, keep top N per era
TOP_N = 10

def top_n_per_era(df, n=10):
    tops = []
    for era_name, group in df.groupby("era"):
        g_sorted = group.sort_values("total_wins", ascending=False).head(n)
        tops.append(g_sorted)
    return pd.concat(tops, axis=0)

era_top_constructors = top_n_per_era(era_constructor_dom, TOP_N)

fig_era_bars = px.bar(
    era_top_constructors,
    x="total_wins",
    y="name",
    color="name",
    facet_col="era",
    facet_col_spacing=0.04,
    orientation="h",
    category_orders={
        "era": ERA_ORDER,
    },
    title="Top constructor wins by era (small multiples)",
    labels={"total_wins": "Total wins in era", "name": "Constructor"},
)

# Make y-axis order descending within facets
fig_era_bars.update_yaxes(categoryorder="total ascending")
fig_era_bars.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))

fig_era_bars.update_layout(
    showlegend=False,
    height=500,
    margin=dict(l=60, r=20, t=60, b=40),
)

fig_era_bars.show()


  for era_name, group in df.groupby("era"):


In [7]:
# 7. Line chart of season points for selected constructors with era shading

fig_season_lines = px.line(
    season_focus,
    x="year",
    y="season_points",
    color="name",
    markers=True,
    title="Constructor season points across eras (selected teams)",
    labels={"year": "Season", "season_points": "Constructor points", "name": "Constructor"},
)

# Add vertical bands to show eras
era_bands = [
    ("High-downforce NA (1987–2005)", 1987, 2005),
    ("V8 frozen-engine (2006–2013)", 2006, 2013),
    ("Hybrid V6 turbo (2014–2021)", 2014, 2021),
    ("Ground-effect hybrid (2022–present)", 2022, season_focus["year"].max()),
]

for era_name, start_year, end_year in era_bands:
    fig_season_lines.add_vrect(
        x0=start_year - 0.5,
        x1=end_year + 0.5,
        fillcolor="LightGrey",
        opacity=0.15,
        layer="below",
        line_width=0,
        annotation_text=era_name.split(" (")[0],
        annotation_position="top left",
    )

fig_season_lines.update_layout(
    height=400,
    margin=dict(l=60, r=20, t=60, b=40),
)

fig_season_lines.show()


In [8]:
# 8. Era dominance summary chart using HHI based on wins

fig_era_hhi = go.Figure()

fig_era_hhi.add_trace(
    go.Bar(
        x=era_dominance["era"],
        y=era_dominance["hhi_wins"],
        name="Wins HHI",
        marker_color="crimson",
    )
)

fig_era_hhi.update_layout(
    title="Dominance concentration by era (Herfindahl index on wins share)",
    xaxis_title="Era",
    yaxis_title="Herfindahl index (0–1, higher = more concentrated)",
    height=350,
    margin=dict(l=60, r=20, t=60, b=40),
)

fig_era_hhi.show()


In [9]:
# 9. Combine key views into one HTML artefact

# Convert the existing figures to traces inside a master subplot figure

from plotly.io import to_json

# Rebuild as subplots to control layout
combined_fig = make_subplots(
    rows=3,
    cols=1,
    row_heights=[0.45, 0.35, 0.20],
    shared_xaxes=False,
    vertical_spacing=0.06,
    specs=[
        [{"type": "xy"}],
        [{"type": "xy"}],
        [{"type": "xy"}],
    ],
    subplot_titles=(
        "Top constructor wins by era (small multiples rendered sequentially)",
        "Constructor season points across eras (selected teams)",
        "Dominance concentration by era (wins Herfindahl index)",
    ),
)

# 9.1 Era bars as separate traces per era stacked vertically (not faceted to stay in one figure)
row_idx = 1
for i, era_name in enumerate(ERA_ORDER, start=1):
    data_era = era_top_constructors[era_top_constructors["era"] == era_name]
    # Slight vertical offset in y (hack via text) is not necessary; we can just show them in one grouped bar chart
    # To keep it simple, stack them by using constructor name with era in label
    combined_fig.add_trace(
        go.Bar(
            x=data_era["total_wins"],
            y=data_era["name"] + " (" + era_name.split()[0] + ")",
            orientation="h",
            name=era_name,
            showlegend=(i == 1),
        ),
        row=row_idx,
        col=1,
    )

combined_fig.update_yaxes(
    row=1, col=1, title_text="Constructors by era", automargin=True
)
combined_fig.update_xaxes(
    row=1, col=1, title_text="Total wins in era"
)

# 9.2 Add season lines (row 2)
for trace in fig_season_lines.data:
    combined_fig.add_trace(trace, row=2, col=1)

# Re-add vrects for eras on row 2
for era_name, start_year, end_year in era_bands:
    combined_fig.add_vrect(
        x0=start_year - 0.5,
        x1=end_year + 0.5,
        fillcolor="LightGrey",
        opacity=0.15,
        layer="below",
        line_width=0,
        row=2,
        col=1,
    )

combined_fig.update_yaxes(
    row=2, col=1, title_text="Constructor points"
)
combined_fig.update_xaxes(
    row=2, col=1, title_text="Season"
)

# 9.3 Add HHI bars (row 3)
combined_fig.add_trace(
    go.Bar(
        x=era_dominance["era"],
        y=era_dominance["hhi_wins"],
        marker_color="crimson",
        name="Wins HHI",
        showlegend=False,
    ),
    row=3,
    col=1,
)

combined_fig.update_yaxes(
    row=3, col=1, title_text="Herfindahl index"
)
combined_fig.update_xaxes(
    row=3, col=1, title_text="Era"
)

combined_fig.update_layout(
    height=900,
    title_text="F1 Constructor Dominance Across Technical Eras",
    legend_title="Era / Constructor",
    margin=dict(l=80, r=40, t=80, b=60),
)

combined_fig.show()

# 9.4 Export to HTML for submission artefact
combined_fig.write_html("f1_era_dominance_dashboard.html", include_plotlyjs="cdn")
print("Saved HTML to f1_era_dominance_dashboard.html")


Saved HTML to f1_era_dominance_dashboard.html
