In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

df = pd.read_csv("../Data/processed/team_year_summary.csv")
print("Data loaded successfully:", df.shape)
df.head()

Data loaded successfully: (1065, 12)


Unnamed: 0,year,constructorId,name,races,wins,podiums,avg_finish,total_points,win_rate,podium_rate,avg_points,TDI
0,1950,adams,Adams,1,0,0,29.5,0.0,0.0,0.0,0.0,0.0
1,1950,alfa-romeo,Alfa Romeo,6,6,11,6.954545,1.0,1.0,0.916667,14.833333,0.979167
2,1950,alta,Alta,2,0,0,13.333333,0.0,0.0,0.0,0.0,0.0
3,1950,cooper,Cooper,1,0,0,15.0,0.0,0.0,0.0,0.0,0.0
4,1950,deidt,Deidt,1,0,2,13.0,0.11236,0.0,1.0,10.0,0.294944


### Compute Weighted TDI
Combine key metrics (win rate, podium rate, total points, avg finish, 1–2 finishes) into a single Team Dominance Index (TDI) using defined weights.

In [11]:
weights = {
    'win_rate': 0.35,
    'podium_rate': 0.25,
    'total_points': 0.20,
    'avg_finish': 0.15,   
    'one_two_rate': 0.05
}

df.fillna(0, inplace=True)

df['inv_avg_finish'] = 1 - (df['avg_finish'] / df['avg_finish'].max())

df['TDI'] = (
    weights['win_rate'] * df['win_rate'] +
    weights['podium_rate'] * df['podium_rate'] +
    weights['total_points'] * df['total_points'] +
    weights['avg_finish'] * df['inv_avg_finish'] +
    weights['one_two_rate'] * df.get('one_two_rate', 0)
)

print("TDI calculated successfully!")
df.head()

TDI calculated successfully!


Unnamed: 0,year,constructorId,name,races,wins,podiums,avg_finish,total_points,win_rate,podium_rate,avg_points,TDI,inv_avg_finish
0,1950,adams,Adams,1,0,0,29.5,0.0,0.0,0.0,0.0,0.062376,0.415842
1,1950,alfa-romeo,Alfa Romeo,6,6,11,6.954545,1.0,1.0,0.916667,14.833333,0.90851,0.862286
2,1950,alta,Alta,2,0,0,13.333333,0.0,0.0,0.0,0.0,0.110396,0.735974
3,1950,cooper,Cooper,1,0,0,15.0,0.0,0.0,0.0,0.0,0.105446,0.70297
4,1950,deidt,Deidt,1,0,2,13.0,0.11236,0.0,1.0,10.0,0.383858,0.742574


### Normalize & Rank
Normalize TDI values season-wise (0–1 range) to make different years comparable, then assign team ranks per season.

In [12]:
df['TDI_normalized'] = df.groupby('year')['TDI'].transform(
    lambda x: (x - x.min()) / (x.max() - x.min())
)
df['TDI_rank'] = df.groupby('year')['TDI_normalized'].rank(
    ascending=False, method='dense'
)
df.sort_values(['year', 'TDI_rank'], inplace=True)

print("TDI normalized and ranked successfully!")
df.head(10)

TDI normalized and ranked successfully!


Unnamed: 0,year,constructorId,name,races,wins,podiums,avg_finish,total_points,win_rate,podium_rate,avg_points,TDI,inv_avg_finish,TDI_normalized,TDI_rank
1,1950,alfa-romeo,Alfa Romeo,6,6,11,6.954545,1.0,1.0,0.916667,14.833333,0.90851,0.862286,1.0,1.0
8,1950,kurtis-kraft,Kurtis Kraft,1,1,1,17.133333,0.157303,1.0,0.5,14.0,0.60557,0.660726,0.648753,2.0
4,1950,deidt,Deidt,1,0,2,13.0,0.11236,0.0,1.0,10.0,0.383858,0.742574,0.391687,3.0
7,1950,ferrari,Ferrari,5,0,4,9.8,0.235955,0.0,0.4,4.2,0.268082,0.805941,0.25745,4.0
20,1950,talbot-lago,Talbot-Lago,6,0,2,10.947368,0.224719,0.0,0.166667,3.333333,0.204094,0.78322,0.183257,5.0
12,1950,maserati,Maserati,7,0,1,14.972973,0.123596,0.0,0.071429,1.571429,0.148102,0.703505,0.118337,6.0
13,1950,moore,Moore,1,0,0,7.0,0.0,0.0,0.0,0.0,0.129208,0.861386,0.09643,7.0
14,1950,nichels,Nichels,1,0,0,10.0,0.0,0.0,0.0,0.0,0.120297,0.80198,0.086099,8.0
11,1950,marchese,Marchese,1,0,0,12.0,0.0,0.0,0.0,0.0,0.114356,0.762376,0.079211,9.0
10,1950,lesovsky,Lesovsky,1,0,0,12.5,0.0,0.0,0.0,0.0,0.112871,0.752475,0.077489,10.0


In [13]:
output_path = "../Data/processed/team_dominance_index.csv"
df.to_csv(output_path, index=False)
print(f"Final TDI data saved at: {output_path}")

Final TDI data saved at: ../Data/processed/team_dominance_index.csv
