### Author: Rodolfo Elenes

Date Created: 8/5/2025

Change log:
8/5/2025 - Initialized

# Notebook to do list
    1.) Download all roster csv files for every season from NFLverse
            -API only supports up to 1999 season
    2.) Insert new rows and only keep latest record of a player so each player has one row in the final table
    
# Enhancements
    -

In [None]:
import pandas as pd
import datetime
import numpy as np
import warnings
warnings.filterwarnings("ignore")

# Get Runningbacks

In [None]:
# Setup the starting dataframe

players = pd.read_csv("../src/rosters/roster_2024.csv")
df = players[(players['depth_chart_position'] == 'RB')].dropna(subset=['birth_date', 'pfr_id'])
display(df)

# Exploratory Data Work

In [None]:
display(df.head()) #visualize the table
print(df.info()) #check data structure
print("\n")
print(df.isnull().any()) #check for nulls

#check duplicate rows
if df.duplicated().any():
    print('\nDuplicates found.')
else:
    print('\nNo duplicates found.')

In [None]:
# check experience in league splits

df_numeric = df.select_dtypes(include=['number'])
df_numeric = df_numeric['years_exp'].value_counts().reset_index()
df_numeric.columns = ['years_exp', 'count']
display(df_numeric.sort_values("years_exp"))

# Create and Export Final Table

In [None]:
#Create age column and drop unneccesary columns

df['birth_date'] = pd.to_datetime(df['birth_date'])
opening_day = datetime.datetime(2024, 9, 5, 0, 0, 0) # September 5, 2024, 12:00 AM, opening day
df['age'] = ((opening_day - df['birth_date']) / pd.Timedelta(days=365)).astype(int)
df['gm_log_rtrvd'] = 0
df = df[['season', 'team', 'position', 'full_name', 'height', 'weight', 'age', 'years_exp', 'pfr_id', 'gm_log_rtrvd']].reset_index(drop=True)
display(df)

In [None]:
# Export final table

df.to_csv('../tables/players_xref.csv', index = False)