In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

# 1. Defensive Linemen Combined Stats 

In [2]:
pass_rush_summary = pd.read_csv('../data/pass_rush_summary.csv')

In [39]:
grouped_DL_stats = pass_rush_summary.groupby("team_name").agg({
    "true_pass_set_total_pressures": "sum",
    "true_pass_set_sacks": "sum",
    "true_pass_set_pass_rush_win_rate": "mean",
    "true_pass_set_hits": "sum",
    "true_pass_set_hurries": "sum",
    "batted_passes": "sum",
    "penalties": "sum"
}).reset_index()

numeric_columns = grouped_DL_stats.select_dtypes(include=["number"]).columns

scaler = StandardScaler()
scaled_data = scaler.fit_transform(grouped_DL_stats[numeric_columns])

grouped_DL_stats_scaled = pd.DataFrame(scaled_data, columns=numeric_columns)

grouped_DL_stats_scaled["team_name"] = grouped_DL_stats["team_name"]

grouped_DL_stats_scaled["pass_rush_score"] = (
    (1.5 * grouped_DL_stats_scaled["true_pass_set_sacks"]) +
    (1.2 * grouped_DL_stats_scaled["true_pass_set_total_pressures"]) +
    (1.0 * grouped_DL_stats_scaled["true_pass_set_hits"]) +
    (0.8 * grouped_DL_stats_scaled["true_pass_set_hurries"]) +
    (0.6 * grouped_DL_stats_scaled["batted_passes"]) +
    (2.0 * grouped_DL_stats_scaled["true_pass_set_pass_rush_win_rate"]) -
    (0.5 * grouped_DL_stats_scaled["penalties"])
)

grouped_DL_stats_scaled = grouped_DL_stats_scaled.sort_values(by="pass_rush_score", ascending=False)


In [40]:
grouped_DL_stats_scaled.head(20)

Unnamed: 0,true_pass_set_total_pressures,true_pass_set_sacks,true_pass_set_pass_rush_win_rate,true_pass_set_hits,true_pass_set_hurries,batted_passes,penalties,team_name,pass_rush_score
25,2.07593,0.976984,1.834485,0.471276,2.478065,1.796143,-0.716188,PHI,11.515069
9,1.665422,2.960115,0.112592,0.569204,1.286381,0.127468,1.255254,DEN,7.711026
16,1.49639,1.793567,1.671192,-1.29142,2.035439,-0.243348,0.844537,LA,7.597056
5,0.119983,-0.189564,2.765154,1.744335,-0.381977,-0.243348,-1.537622,CHI,7.451497
26,0.458048,0.627019,0.065266,0.765059,0.196841,2.352369,-1.373335,PIT,4.641339
15,1.30321,1.326948,-0.675067,1.94019,0.775659,0.127468,1.090967,KC,4.295854
10,1.762012,0.1604,-0.270554,1.94019,1.763054,-0.984982,1.583828,DET,3.781639
3,0.506343,-0.422874,1.317407,1.352625,0.367081,-0.428757,1.255254,BUF,3.369523
2,0.844408,0.743674,-0.37174,1.352625,0.503274,0.498285,1.748115,BLT,2.565477
20,0.4339,0.39371,-0.585105,0.862987,0.196841,1.796143,-0.798331,MIN,2.438347


# 2. Cornerbacks/Safetys Combined Stats

In [11]:
cb_summary = pd.read_csv('../data/defense_summary.csv')

In [12]:
higher_is_better = [
    "catch_rate", "grades_defense", "grades_defense_penalty", 
    "grades_pass_rush_defense", "interceptions", "tackles", 
    "tackles_for_loss", "total_pressures"
]

lower_is_better = ["qb_rating_against", "declined_penalties", "targets"]

cb_metrics = higher_is_better + lower_is_better

# Aggregate stats by tea,
grouped_cb = cb_summary.groupby("team_name")[cb_metrics].sum().reset_index()

numeric_columns = grouped_cb.select_dtypes(include=["number"]).columns

# Standardizing numeric columns
scaler = StandardScaler()
scaled_data = scaler.fit_transform(grouped_cb[numeric_columns])

grouped_cb_scaled = pd.DataFrame(scaled_data, columns=numeric_columns)

grouped_cb_scaled["team_name"] = grouped_cb["team_name"]

# Reversing the direction for metrics where lower is better
for col in lower_is_better:
    grouped_cb_scaled[col] *= -1  

weights = {
    "catch_rate": 1.2,
    "grades_defense": 1.0,
    "grades_defense_penalty": 0.8,
    "grades_pass_rush_defense": 1.1,
    "interceptions": 1.3,
    "tackles": 1.0,
    "tackles_for_loss": 1.2,
    "total_pressures": 1.5,
    "qb_rating_against": 1.4,  
    "declined_penalties": 1.0,  
    "targets": 0.9  
}

grouped_cb_scaled["cb_safety_rating"] = sum(
    grouped_cb_scaled[col] * weight for col, weight in weights.items()
)


grouped_cb_scaled = grouped_cb_scaled.sort_values(by="cb_safety_rating", ascending=False)
grouped_cb_scaled = grouped_cb_scaled.reset_index(drop=True)

In [14]:
grouped_cb_scaled[['team_name', 'cb_safety_rating']]

Unnamed: 0,team_name,cb_safety_rating
0,BUF,9.263707
1,DET,6.845653
2,PHI,6.818587
3,HST,5.389951
4,NYG,3.349994
5,MIN,2.290193
6,CLV,2.05926
7,ATL,1.803055
8,DEN,1.601254
9,ARZ,1.251197
