In [1]:
import pandas as pd
import numpy as np

In [2]:
stats = pd.read_csv("../data/stats.csv")

In [3]:
rosters = pd.read_csv("../data/rosters.csv")

In [4]:
rosters = rosters[~rosters['full_name'].isna()].copy()

stats = pd.DataFrame(np.where(stats==0,np.nan,stats),
                     columns=stats.columns,
                     index=stats.index)
stats['cnt'] = 1


def name_clean(x):
    return ''.join([i for i in x if (i.isalpha())|(i==' ')])

rosters['name'] = rosters['full_name'].apply(name_clean)
stats['name'] = stats['player_display_name'].apply(name_clean)

agg_dict = {
    'cnt':'sum'
}

agg_cols = [
    i for i in stats.columns if i not in
    ['player_id', 'player_name', 'player_display_name', 'position',
       'position_group', 'headshot_url', 'recent_team', 'season', 'week',
       'season_type', 'opponent_team','cnt','name']
]

for i in agg_cols:
    agg_dict[i]='mean'

In [5]:
current_season = 2024

all_rss = pd.DataFrame()

for current_season in [
    2017,2018,2019,2020,2021,2022,2023,2024
    ]:
    agg_stats = stats[(stats['season']<current_season)&
                    (stats['season']>current_season-6)].groupby(['name']).agg(
        agg_dict
    ).reset_index()
    roster_season = rosters[rosters['season']==current_season].copy()
    roster_season_stats = roster_season.merge(agg_stats,on=['name'],how='inner')

    all_rss = pd.concat([all_rss,roster_season_stats])

In [6]:
all_rss.groupby(['season','team'])['first_name'].count()

season  team
2017    ARI     30
        ATL     19
        BAL     33
        BUF     29
        CAR     19
                ..
2024    SEA     16
        SF      20
        TB      17
        TEN     23
        WAS     22
Name: first_name, Length: 256, dtype: int64

In [7]:
all_rss.to_csv("../data/player_feats.csv",index=False)

In [8]:
team_feats = all_rss.groupby(
    ['season','team']
).agg(
    agg_dict
).reset_index()


In [9]:
team_feats.to_csv("../data/team_feats.csv",index=False)

In [10]:
team_feats

Unnamed: 0,season,team,cnt,completions,attempts,passing_yards,passing_tds,interceptions,sacks,sack_yards,...,receiving_first_downs,receiving_epa,receiving_2pt_conversions,racr,target_share,air_yards_share,wopr,special_teams_tds,fantasy_points,fantasy_points_ppr
0,2017,ARI,733,13.157143,15.641602,156.058571,1.676459,1.881746,2.184757,14.418485,...,1.584688,0.120258,1.0,1.004994,0.074038,0.065225,0.154486,1.2,5.381914,6.770719
1,2017,ATL,598,9.159926,13.913235,104.035784,1.394537,1.352031,1.896635,12.927632,...,1.885596,0.689923,1.0,0.639588,0.086667,0.077431,0.183482,1.0,5.648232,7.24072
2,2017,BAL,711,14.284936,10.922222,151.567788,1.406566,1.221434,1.982692,11.410897,...,1.667101,-0.03192,1.0,1.497929,0.086732,0.080565,0.185999,1.0,4.671847,6.30085
3,2017,BUF,809,9.510417,12.3,151.989583,1.253968,1.083333,2.666667,17.12,...,1.843229,0.588333,1.0,0.730224,0.096685,0.093041,0.209547,1.0,4.649553,6.441404
4,2017,CAR,569,14.050931,22.299197,172.347755,1.578571,1.572464,2.311688,14.905405,...,1.419304,0.727956,1.0,0.391524,0.072732,0.08082,0.165619,1.0,4.902468,6.222306
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
251,2024,SEA,421,21.820175,33.396199,231.331871,1.736779,1.396825,3.271242,23.747475,...,1.903724,0.530054,1.0,2.307878,0.091102,0.077682,0.18986,,5.38883,7.317606
252,2024,SF,579,10.356609,15.695936,122.067672,1.339394,1.456277,2.509921,17.972583,...,1.7273,0.474776,1.0,1.75479,0.095819,0.09318,0.208697,1.0,6.442166,7.950973
253,2024,TB,455,11.735021,13.956751,126.506329,1.390625,1.340909,2.547761,17.326493,...,1.645248,0.40152,1.0,0.731652,0.085945,0.071629,0.17518,,4.817556,6.360406
254,2024,TEN,528,7.841111,12.922381,90.184444,1.5,1.166667,2.313492,16.397817,...,1.880512,0.721951,1.0,0.499783,0.108696,0.106534,0.237617,1.0,4.536923,6.363807
