In [None]:
# üöÄ Auto-setup: installs deps + configures CFBD access
%run ./_auto_setup.py


In [None]:
# 02_build_simple_rankings.ipynb
# üìä Building Simple Team Power Rankings

# üõ† Requirements:
# - pandas, matplotlib, seaborn (install via `pip install pandas matplotlib seaborn`)

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

import sys
from pathlib import Path

# Import starter pack configuration system
_config_dir = Path().resolve() / "config"
if str(_config_dir.parent) not in sys.path:
    sys.path.insert(0, str(_config_dir.parent))
from config.data_config import get_starter_pack_config

# Get configuration
config = get_starter_pack_config()
DATA_DIR = str(config.data_dir)

# Configure plots
plt.style.use('fivethirtyeight')
plt.rcParams["figure.figsize"] = [15,8]


In [None]:
# üî¢ Load advanced season stats for a recent year
stats_path = os.path.join(DATA_DIR, "advanced_season_stats", f"{config.current_year}.csv")
stats = pd.read_csv(stats_path)

print(f"{len(stats)} records loaded")
stats.head()


In [None]:
# üìÇ Inspect available columns
stats.columns.tolist()

In [None]:
# üßº Filter to FBS teams only and drop teams with missing data
fbs_stats = stats[stats['conference'].notnull()].copy()
fbs_stats = fbs_stats.dropna(subset=['offense_ppa', 'defense_ppa'])

print(f"{len(fbs_stats)} FBS teams with valid data")

In [None]:
# ü•á Build simple rankings:
# We'll use offensive PPA (higher is better) and defensive PPA (lower is better)
# Normalize both so higher = better

# Normalize metrics (min-max)
fbs_stats['offense_score'] = (fbs_stats['offense_ppa'] - fbs_stats['offense_ppa'].min()) / (fbs_stats['offense_ppa'].max() - fbs_stats['offense_ppa'].min())
fbs_stats['defense_score'] = (fbs_stats['defense_ppa'].max() - fbs_stats['defense_ppa']) / (fbs_stats['defense_ppa'].max() - fbs_stats['defense_ppa'].min())

# Weighted average ‚Äî you can tweak these weights
fbs_stats['rating'] = (0.5 * fbs_stats['offense_score']) + (0.5 * fbs_stats['defense_score'])

In [None]:
# üèÜ Top 25 teams by rating
top_teams = fbs_stats.sort_values('rating', ascending=False).head(25)
top_teams[['team', 'conference', 'offense_ppa', 'defense_ppa', 'rating']]

In [None]:
# üìä Visualize top 10 teams
sns.barplot(
    data=top_teams.head(10),
    x='rating',
    y='team',
    palette='viridis',
    hue=top_teams.head(10)['team'],
    legend=False
)
plt.title(f"Top 10 Teams by Simple Efficiency Rating ({config.current_year})")
plt.xlabel("Combined Normalized Rating")
plt.ylabel("Team")
plt.show()

In [None]:
# ü•á Build less simple rankings:
# We'll use offensive success rate (higher is better) and defensive success rate (lower is better) in addition to PPA
# Normalize all so higher = better

# Create a helper function to normalize metrics
def normalize_metric(series, invert=False):
    if invert:
        return (series.max() - series) / (series.max() - series.min())
    else:
        return (series - series.min()) / (series.max() - series.min())

# Normalize metrics (min-max)
fbs_stats['offense_ppa_score'] = normalize_metric(fbs_stats['offense_ppa'])
fbs_stats['defense_ppa_score'] = normalize_metric(fbs_stats['defense_ppa'], invert=True)
fbs_stats['offense_success_rate_score'] = normalize_metric(fbs_stats['offense_successRate'])
fbs_stats['defense_success_rate_score'] = normalize_metric(fbs_stats['defense_successRate'], invert=True)

# Weighted average ‚Äî you can tweak these weights
fbs_stats['rating'] = (0.25 * fbs_stats['offense_score']) + (0.25 * fbs_stats['defense_score']) + (0.25 * fbs_stats['offense_success_rate_score']) + (0.25 * fbs_stats['defense_success_rate_score'])

# Reprint top 25 teams by new rating
# üèÜ Top 25 teams by rating
top_teams = fbs_stats.sort_values('rating', ascending=False).head(25)
top_teams[['team', 'conference', 'offense_ppa', 'defense_ppa', 'offense_successRate', 'defense_successRate', 'rating']]

In [None]:
# üìä Visualize new top 10 teams
sns.barplot(
    data=top_teams.head(10),
    x='rating',
    y='team',
    palette='viridis',
    hue=top_teams.head(10)['team'],
    legend=False
)
plt.title(f"Top 10 Teams by Less Simple Efficiency Rating ({config.current_year})")
plt.xlabel("Combined Normalized Rating")
plt.ylabel("Team")
plt.show()

In [None]:
# üí° What next?
# - Add more metrics
# - Adjust the rating weights
# - Compare to final AP or CFP rankings to measure predictive power

# üß™ You can also join this data with games.csv to track rating evolution over the season.