In [8]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

df = pd.read_csv("../Data/processed/team_dominance_index.csv")
print("Loaded processed dataset:", df.shape)
df.head()

Loaded processed dataset: (1065, 15)


Unnamed: 0,year,constructorId,name,races,wins,podiums,avg_finish,total_points,win_rate,podium_rate,avg_points,TDI,inv_avg_finish,TDI_normalized,TDI_rank
0,1950,alfa-romeo,Alfa Romeo,6,6,11,6.954545,1.0,1.0,0.916667,14.833333,0.90851,0.862286,1.0,1.0
1,1950,kurtis-kraft,Kurtis Kraft,1,1,1,17.133333,0.157303,1.0,0.5,14.0,0.60557,0.660726,0.648753,2.0
2,1950,deidt,Deidt,1,0,2,13.0,0.11236,0.0,1.0,10.0,0.383858,0.742574,0.391687,3.0
3,1950,ferrari,Ferrari,5,0,4,9.8,0.235955,0.0,0.4,4.2,0.268082,0.805941,0.25745,4.0
4,1950,talbot-lago,Talbot-Lago,6,0,2,10.947368,0.224719,0.0,0.166667,3.333333,0.204094,0.78322,0.183257,5.0


### Normalize Metrics (Scaling 0–1)
converts each metric into a 0–1 scale per year — meaning the best team in each category gets 1.0, and others are fractions of that.

In [7]:
metrics = ["win_rate", "podium_rate", "total_points"]

for year in df["year"].unique():
    mask = df["year"] == year
    for col in metrics:
        max_val = df.loc[mask, col].max()
        if max_val > 0:
            df.loc[mask, col] = df.loc[mask, col] / max_val

print("Normalized metrics (0–1 scale)")
df.head()

Normalized metrics (0–1 scale)


Unnamed: 0,year,constructorId,name,races,wins,podiums,avg_finish,total_points,win_rate,podium_rate,avg_points,TDI,inv_avg_finish,TDI_normalized,TDI_rank
0,1950,alfa-romeo,Alfa Romeo,6,6,11,6.954545,1.0,1.0,0.916667,14.833333,0.979167,0.862286,1.0,1.0
1,1950,kurtis-kraft,Kurtis Kraft,1,1,1,17.133333,0.157303,1.0,0.5,14.0,0.537921,0.660726,0.648753,2.0
2,1950,deidt,Deidt,1,0,2,13.0,0.11236,0.0,1.0,10.0,0.294944,0.742574,0.391687,3.0
3,1950,ferrari,Ferrari,5,0,4,9.8,0.235955,0.0,0.4,4.2,0.194382,0.805941,0.25745,4.0
4,1950,talbot-lago,Talbot-Lago,6,0,2,10.947368,0.224719,0.0,0.166667,3.333333,0.131554,0.78322,0.183257,5.0


### Compute / Refine Team Dominance Index (TDI)
Combine all normalized metrics into a single dominance score for each team-season.

In [9]:
w1, w2, w3 = 0.35, 0.25, 0.40  

df["TDI"] = (
    w1 * df["win_rate"] +
    w2 * df["podium_rate"] +
    w3 * df["total_points"]
)

print("TDI recalculated with updated weights")
df.head()

TDI recalculated with updated weights


Unnamed: 0,year,constructorId,name,races,wins,podiums,avg_finish,total_points,win_rate,podium_rate,avg_points,TDI,inv_avg_finish,TDI_normalized,TDI_rank
0,1950,alfa-romeo,Alfa Romeo,6,6,11,6.954545,1.0,1.0,0.916667,14.833333,0.979167,0.862286,1.0,1.0
1,1950,kurtis-kraft,Kurtis Kraft,1,1,1,17.133333,0.157303,1.0,0.5,14.0,0.537921,0.660726,0.648753,2.0
2,1950,deidt,Deidt,1,0,2,13.0,0.11236,0.0,1.0,10.0,0.294944,0.742574,0.391687,3.0
3,1950,ferrari,Ferrari,5,0,4,9.8,0.235955,0.0,0.4,4.2,0.194382,0.805941,0.25745,4.0
4,1950,talbot-lago,Talbot-Lago,6,0,2,10.947368,0.224719,0.0,0.166667,3.333333,0.131554,0.78322,0.183257,5.0


In [6]:
df.to_csv("../Data/processed/team_year_summary.csv", index=False)
print(" Saved team_year_summary.csv")

 Saved team_year_summary.csv
