In [3]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from pathlib import Path

# File paths
DATA_DIR = Path("app")
FG_FILE = DATA_DIR / "field_goal_attempts.csv"
PLAYERS_FILE = DATA_DIR / "kickers.csv"
OUTPUT_FILE = DATA_DIR / "leaderboard.csv"

In [6]:
def load_data(fg_path: Path, players_path: Path):
    fg = pd.read_csv(fg_path)
    players = pd.read_csv(players_path)
    return fg, players

fg_raw, players_raw = load_data(FG_FILE, PLAYERS_FILE)
fg_raw.head()

Unnamed: 0,season,season_type,week,game_date,game_key,play_id,play_sequence,player_id,field_goal_result,attempt_yards,make
232,2010,Reg,1,9/9/2010,54863,1423,53,22912,Made,41,1
233,2010,Reg,1,9/9/2010,54863,1793,67,33337,Missed,46,0
234,2010,Reg,1,9/9/2010,54863,3295,127,33337,Missed,32,0
235,2010,Reg,1,9/12/2010,54866,2073,84,21213,Made,20,1
236,2010,Reg,1,9/12/2010,54870,1564,65,27091,Missed,46,0


In [9]:
def preprocess_fg_data(fg):
    fg = fg[(fg['season_type'] == 'Reg') & (fg['week'] <= 6)]
    fg = fg[fg['field_goal_result'].isin(['Made', 'Missed'])]
    fg['make'] = fg['field_goal_result'].map({'Made': 1, 'Missed': 0})
    return fg.copy()

fg_clean = preprocess_fg_data(fg_raw)
fg_clean.head()

In [14]:
def train_logistic_model(fg):
    model = LogisticRegression()
    model.fit(fg[['attempt_yards']], fg['make'])
    return model

model = train_logistic_model(fg_clean)

def apply_model(fg, model):
    fg = fg.copy()
    fg['expected_make_prob'] = model.predict_proba(fg[['attempt_yards']])[:, 1]
    fg['fgoe'] = fg['make'] - fg['expected_make_prob']
    return fg

fg_scored = apply_model(fg_clean, model)
fg_scored[['attempt_yards', 'make', 'expected_make_prob', 'fgoe']].head()

Unnamed: 0,player_id,player_name,rating,rank
29,33469,STEVEN HAUSCHKA,8.089239,1
23,30403,ROBBIE GOULD,7.856362,2
13,27091,MATT BRYANT,5.911992,3
38,38701,GREG ZUERLEIN,5.398462,4
40,39470,JUSTIN TUCKER,4.40385,5


In [15]:
def build_leaderboard(fg, players):
    stats = fg.groupby('player_id').agg(
        attempts=('make', 'count'),
        fgoe_total=('fgoe', 'sum'),
        fgoe_per_attempt=('fgoe', 'mean')
    ).reset_index()

    stats = stats[stats['attempts'] >= 10].copy()
    stats['rating'] = stats['fgoe_total']
    stats['rank'] = stats['rating'].rank(method='min', ascending=False).astype(int)

    return stats.merge(players[['player_id', 'player_name']], on='player_id')

leaderboard_df = build_leaderboard(fg_scored, players_raw)
leaderboard_df.sort_values('rank').head()

In [None]:
def save_output(df, path):
    df[['player_id', 'player_name', 'rating', 'rank']].sort_values('rank').to_csv(path, index=False)

save_output(leaderboard_df, OUTPUT_FILE)

In [None]:
def run_pipeline():
    fg_raw, players_raw = load_data(FG_FILE, PLAYERS_FILE)
    fg_clean = preprocess_fg_data(fg_raw)
    model = train_logistic_model(fg_clean)
    fg_scored = apply_model(fg_clean, model)
    leaderboard_df = build_leaderboard(fg_scored, players_raw)
    save_output(leaderboard_df, OUTPUT_FILE)
    print("🏁 Pipeline completed successfully!")
    return leaderboard_df

# Run the full pipeline
leaderboard_df = run_pipeline()
leaderboard_df.sort_values("rank").head()