In [13]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

df = pd.read_csv("../Data/processed/team_dominance_index.csv")
print("✅ Loaded processed dataset:", df.shape)
df.head()

✅ Loaded processed dataset: (1065, 12)


Unnamed: 0,year,constructorId,name,races,wins,podiums,avg_finish,total_points,win_rate,podium_rate,avg_points,TDI
0,1950,adams,Adams,1,0,0,29.5,0.0,0.0,0.0,0.0,0.00339
1,1950,alfa-romeo,Alfa Romeo,6,6,11,6.954545,89.0,1.0,1.833333,14.833333,0.972938
2,1950,alta,Alta,2,0,0,13.333333,0.0,0.0,0.0,0.0,0.0075
3,1950,cooper,Cooper,1,0,0,15.0,0.0,0.0,0.0,0.0,0.006667
4,1950,deidt,Deidt,1,0,2,13.0,10.0,0.0,2.0,10.0,0.57526


### Normalize Metrics (Scaling 0–1)
converts each metric into a 0–1 scale per year — meaning the best team in each category gets 1.0, and others are fractions of that.

In [14]:
metrics = ["win_rate", "podium_rate", "total_points"]

for year in df["year"].unique():
    mask = df["year"] == year
    for col in metrics:
        max_val = df.loc[mask, col].max()
        if max_val > 0:
            df.loc[mask, col] = df.loc[mask, col] / max_val

print("✅ Normalized metrics (0–1 scale)")
df.head()

✅ Normalized metrics (0–1 scale)


Unnamed: 0,year,constructorId,name,races,wins,podiums,avg_finish,total_points,win_rate,podium_rate,avg_points,TDI
0,1950,adams,Adams,1,0,0,29.5,0.0,0.0,0.0,0.0,0.00339
1,1950,alfa-romeo,Alfa Romeo,6,6,11,6.954545,1.0,1.0,0.916667,14.833333,0.972938
2,1950,alta,Alta,2,0,0,13.333333,0.0,0.0,0.0,0.0,0.0075
3,1950,cooper,Cooper,1,0,0,15.0,0.0,0.0,0.0,0.0,0.006667
4,1950,deidt,Deidt,1,0,2,13.0,0.11236,0.0,1.0,10.0,0.57526


### Compute / Refine Team Dominance Index (TDI)
Combine all normalized metrics into a single dominance score for each team-season.

In [15]:
# Recalculate or refine TDI if you want to change weights
w1, w2, w3 = 0.35, 0.25, 0.40  # (Win rate, Podium rate, Points share)

df["TDI"] = (
    w1 * df["win_rate"] +
    w2 * df["podium_rate"] +
    w3 * df["total_points"]
)

print("✅ TDI recalculated with updated weights")
df.head()

✅ TDI recalculated with updated weights


Unnamed: 0,year,constructorId,name,races,wins,podiums,avg_finish,total_points,win_rate,podium_rate,avg_points,TDI
0,1950,adams,Adams,1,0,0,29.5,0.0,0.0,0.0,0.0,0.0
1,1950,alfa-romeo,Alfa Romeo,6,6,11,6.954545,1.0,1.0,0.916667,14.833333,0.979167
2,1950,alta,Alta,2,0,0,13.333333,0.0,0.0,0.0,0.0,0.0
3,1950,cooper,Cooper,1,0,0,15.0,0.0,0.0,0.0,0.0,0.0
4,1950,deidt,Deidt,1,0,2,13.0,0.11236,0.0,1.0,10.0,0.294944


In [16]:
df.to_csv("../Data/processed/team_year_summary.csv", index=False)
print("✅ Saved team_year_summary.csv")

✅ Saved team_year_summary.csv
