In [None]:
import pandas as pd
import numpy as np
import re

path = "/Users/sethfried/Fantasy Football/data/player_game_logs/pfr_gamelogs_WR.csv"
df = pd.read_csv(path)

df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d', errors='coerce')
df['Week'] = pd.to_numeric(df['Week'], errors='coerce').astype('Int64')
df = df[df['Week'].notna()].copy()

df = df.rename(columns={'Unnamed: 6_level_0': 'home'})
df['home'] = (
    df['home']
      .fillna('')
      .apply(lambda x: 1 if '@' in str(x) else 0)
      .astype(int)
)

def split_result(res):
    if not isinstance(res, str):
        return pd.Series([0, np.nan, np.nan])
    win = 1 if res.startswith('W') else 0
    m = re.search(r',\s*(\d+)-(\d+)', res)
    ts = int(m.group(1)) if m else np.nan
    os = int(m.group(2)) if m else np.nan
    return pd.Series([win, ts, os])

df[['win','score_for','score_against']] = df['Result'].apply(split_result)
df = df.drop(columns=['Result'], errors='ignore')

df['season'] = (
    df['Date'].dt.year
      .where(df['Date'].dt.month >= 9, df['Date'].dt.year - 1)
      .astype('Int64')
)

df['Did Not Play'] = 0
df['Inactive']     = 0
gs_raw = df.get('GS', pd.Series(dtype='string')).astype('string').fillna('')

df.loc[gs_raw.str.contains('Inactive', na=False), 'Inactive']     = 1
df.loc[gs_raw.str.contains('Did Not Play', na=False), 'Did Not Play'] = 1

df['GS'] = gs_raw.apply(lambda x: 1 if '*' in x else 0).astype(int)

df = df.replace({'Inactive': 0, 'Did Not Play': 0})

keep_cols = [
    'Gcar','Week','Date','Team','home','Opp','GS',
    'Passing Cmp','Passing Att','Passing Cmp%','Passing Yds','Passing TD',
    'Passing Int','Passing Y/A','Passing AY/A','Passing Rate','Passing Sk',
    'Rushing Att','Rushing Yds','Rushing TD','Rushing Y/A',
    'Snap Counts OffSnp','Snap Counts Off%','Snap Counts DefSnp','Snap Counts Def%',
    'Snap Counts STSnp','Snap Counts ST%',
    'Player','Position','Year',
    'Receiving Tgt','Receiving Rec','Receiving Yds','Receiving TD',
    'Receiving Ctch%','Receiving Y/Tgt',
    'Fumbles Fmb','Fumbles FL','Fumbles FF','Fumbles FR','Fumbles Yds','Fumbles FRTD',
    'Did Not Play','Inactive','win','season','score_for','score_against'
]
df = df.loc[:, df.columns.intersection(keep_cols)].copy()

def fill_missing_seasons(df):
    new_rows = []
    for player, grp in df.groupby('Player'):
        seasons = sorted(int(s) for s in grp['season'].dropna().unique())
        if len(seasons) < 2:
            continue
        full_range = range(seasons[0], seasons[-1] + 1)
        missing = [s for s in full_range if s not in seasons]
        for s in missing:
            prev = grp[grp['season'] == s - 1]
            if prev.empty:
                continue
            last_week = int(prev['Week'].max())
            last_gcar = int(prev['Gcar'].max()) if 'Gcar' in prev else 0
            pos       = prev.iloc[0]['Position']
            for w in range(1, last_week + 1):
                row = {col: 0 for col in df.columns}
                row.update({
                    'Player': player,
                    'Position': pos,
                    'season': s,
                    'Week': w,
                    'Gcar': last_gcar,
                    'Inactive': 1
                })
                for c in ['Date','Team','home','Opp','win','score_for','score_against']:
                    row[c] = np.nan
                new_rows.append(row)
    if new_rows:
        df = pd.concat([df, pd.DataFrame(new_rows)], ignore_index=True, sort=False)
        df = df.sort_values(['Player','season','Week']).reset_index(drop=True)
    return df

df = fill_missing_seasons(df)

df['win'] = df['win'].fillna(0).astype(int)

string_cols = ['Team','Opp','Player','Position']
non_string_cols = df.columns.difference(string_cols)
df[non_string_cols] = df[non_string_cols].fillna(0)

df['home'] = df['home'].fillna(0).astype(int)
df['Date'] = pd.to_datetime(df['Date'], errors='coerce').dt.date


pd.set_option('display.max_columns', None)
print(df.dtypes)
print(df.head())

export_path = "/Users/sethfried/Fantasy Football/data/player_game_logs/pfr_gamelogs_WR_cleaned.csv"
df.to_csv(export_path, index=False)
print(f"Exported cleaned WR gamelogs to: {export_path}")


  df = pd.read_csv(path)
  df = df.replace({'Inactive': 0, 'Did Not Play': 0})
  df = pd.concat([df, pd.DataFrame(new_rows)], ignore_index=True, sort=False)


Gcar                  float64
Week                    Int64
Date                   object
Team                   object
home                    int64
Opp                    object
GS                      int64
Receiving Tgt          object
Receiving Rec          object
Receiving Yds          object
Receiving TD           object
Receiving Ctch%        object
Receiving Y/Tgt        object
Fumbles Fmb            object
Fumbles FL             object
Fumbles FF             object
Fumbles FR             object
Fumbles Yds            object
Fumbles FRTD           object
Snap Counts OffSnp     object
Snap Counts Off%       object
Snap Counts DefSnp     object
Snap Counts Def%       object
Snap Counts STSnp      object
Snap Counts ST%        object
Player                 object
Position               object
Year                    int64
Rushing Att            object
Rushing Yds            object
Rushing TD             object
Rushing Y/A            object
Passing Cmp            object
Passing At