In [22]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from pathlib import Path

# File paths
DATA_DIR = Path("app")
FG_FILE = Path("field_goal_attempts.csv")
PLAYERS_FILE = Path("kickers.csv")
OUTPUT_FILE = Path("leaderboard.csv")

In [23]:
def load_data(fg_path: Path, players_path: Path):
    fg = pd.read_csv(fg_path)
    players = pd.read_csv(players_path)
    return fg, players

fg_raw, players_raw = load_data(FG_FILE, PLAYERS_FILE)
fg_raw.head()

Unnamed: 0,season,season_type,week,game_date,game_key,play_id,play_sequence,player_id,field_goal_result,attempt_yards
0,2010,Pre,1,8/8/2010,55073,433,17,34623,Made,20
1,2010,Pre,1,8/8/2010,55073,2661,104,34623,Missed,49
2,2010,Pre,1,8/8/2010,55073,2772,109,34623,Made,23
3,2010,Pre,1,8/8/2010,55073,1604,64,34623,Made,34
4,2010,Pre,2,8/12/2010,55076,3086,123,34623,Made,28


In [24]:
def preprocess_fg_data(fg):
    fg = fg[(fg['season_type'] == 'Reg') & (fg['week'] <= 6)]
    fg = fg[fg['field_goal_result'].isin(['Made', 'Missed'])]
    fg['make'] = fg['field_goal_result'].map({'Made': 1, 'Missed': 0})
    return fg.copy()

fg_clean = preprocess_fg_data(fg_raw)
fg_clean.head()

Unnamed: 0,season,season_type,week,game_date,game_key,play_id,play_sequence,player_id,field_goal_result,attempt_yards,make
232,2010,Reg,1,9/9/2010,54863,1423,53,22912,Made,41,1
233,2010,Reg,1,9/9/2010,54863,1793,67,33337,Missed,46,0
234,2010,Reg,1,9/9/2010,54863,3295,127,33337,Missed,32,0
235,2010,Reg,1,9/12/2010,54866,2073,84,21213,Made,20,1
236,2010,Reg,1,9/12/2010,54870,1564,65,27091,Missed,46,0


In [25]:
def train_logistic_model(fg):
    model = LogisticRegression()
    model.fit(fg[['attempt_yards']], fg['make'])
    return model

model = train_logistic_model(fg_clean)

def apply_model(fg, model):
    fg = fg.copy()
    fg['expected_make_prob'] = model.predict_proba(fg[['attempt_yards']])[:, 1]
    fg['fgoe'] = fg['make'] - fg['expected_make_prob']
    return fg

fg_scored = apply_model(fg_clean, model)
fg_scored[['attempt_yards', 'make', 'expected_make_prob', 'fgoe']].head()

Unnamed: 0,attempt_yards,make,expected_make_prob,fgoe
232,41,1,0.871834,0.128166
233,46,0,0.793961,-0.793961
234,32,0,0.949799,-0.949799
235,20,1,0.986667,0.013333
236,46,0,0.793961,-0.793961


In [26]:
def build_leaderboard(fg, players):
    stats = fg.groupby('player_id').agg(
        attempts=('make', 'count'),
        fgoe_total=('fgoe', 'sum'),
        fgoe_per_attempt=('fgoe', 'mean')
    ).reset_index()

    stats = stats[stats['attempts'] >= 10].copy()
    stats['rating'] = stats['fgoe_total']
    stats['rank'] = stats['rating'].rank(method='min', ascending=False).astype(int)

    return stats.merge(players[['player_id', 'player_name']], on='player_id')

leaderboard_df = build_leaderboard(fg_scored, players_raw)
leaderboard_df.sort_values('rank').head()

Unnamed: 0,player_id,attempts,fgoe_total,fgoe_per_attempt,rating,rank,player_name
29,33469,87,8.089239,0.09298,8.089239,1,STEVEN HAUSCHKA
23,30403,100,7.856362,0.078564,7.856362,2,ROBBIE GOULD
13,27091,97,5.911992,0.060948,5.911992,3,MATT BRYANT
38,38701,77,5.398462,0.07011,5.398462,4,GREG ZUERLEIN
40,39470,97,4.40385,0.045401,4.40385,5,JUSTIN TUCKER


In [27]:
def save_output(df, path):
    df[['player_id', 'player_name', 'rating', 'rank']].sort_values('rank').to_csv(path, index=False)

save_output(leaderboard_df, OUTPUT_FILE)

In [28]:
def run_pipeline():
    fg_raw, players_raw = load_data(FG_FILE, PLAYERS_FILE)
    fg_clean = preprocess_fg_data(fg_raw)
    model = train_logistic_model(fg_clean)
    fg_scored = apply_model(fg_clean, model)
    leaderboard_df = build_leaderboard(fg_scored, players_raw)
    save_output(leaderboard_df, OUTPUT_FILE)
    print("🏁 Pipeline completed successfully!")
    return leaderboard_df

# Run the full pipeline
leaderboard_df = run_pipeline()
leaderboard_df.sort_values("rank").head()

🏁 Pipeline completed successfully!


Unnamed: 0,player_id,attempts,fgoe_total,fgoe_per_attempt,rating,rank,player_name
29,33469,87,8.089239,0.09298,8.089239,1,STEVEN HAUSCHKA
23,30403,100,7.856362,0.078564,7.856362,2,ROBBIE GOULD
13,27091,97,5.911992,0.060948,5.911992,3,MATT BRYANT
38,38701,77,5.398462,0.07011,5.398462,4,GREG ZUERLEIN
40,39470,97,4.40385,0.045401,4.40385,5,JUSTIN TUCKER
