In [None]:
import pandas as pd
import numpy as np
import re


path = "/Users/sethfried/Fantasy Football/data/player_game_logs/pfr_gamelogs_TE.csv"
df = pd.read_csv(path)


df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d', errors='coerce')
df['Week'] = pd.to_numeric(df['Week'], errors='coerce').astype('Int64')
df = df[df['Week'].notna()].copy()

df = df.rename(columns={'Unnamed: 6_level_0': 'home'})
df['home'] = (
    df['home']
      .fillna('')
      .apply(lambda x: 1 if '@' in str(x) else 0)
      .astype('int')
)

def split_result(res):
    if not isinstance(res, str):
        return pd.Series([0, np.nan, np.nan])
    win = 1 if res.startswith('W') else 0
    m = re.search(r',\s*(\d+)-(\d+)', res)
    if m:
        return pd.Series([win, int(m.group(1)), int(m.group(2))])
    else:
        return pd.Series([win, np.nan, np.nan])

df[['win','score_for','score_against']] = df['Result'].apply(split_result)
df = df.drop(columns=['Result'])

df['season'] = (
    df['Date'].dt.year
      .where(df['Date'].dt.month >= 9,
             df['Date'].dt.year - 1)
      .astype('Int64')
)

df['Did Not Play'] = 0
df['Inactive']     = 0

gs_raw = df['GS'].astype('string').fillna('')

mask_inactive = gs_raw.str.contains('Inactive', na=False)
mask_dnp      = gs_raw.str.contains('Did Not Play', na=False)
df.loc[mask_inactive, 'Inactive']     = 1
df.loc[mask_dnp,      'Did Not Play'] = 1

df['GS'] = gs_raw.apply(lambda x: 1 if '*' in x else 0).astype('int')

df = df.replace({'Did Not Play': 0, 'Inactive': 0})

keep_cols = [
    'Gcar','Week','Date','Team','home','Opp','GS',
    'Passing Cmp','Passing Att','Passing Cmp%','Passing Yds','Passing TD',
    'Passing Int','Passing Y/A','Passing AY/A','Passing Rate','Passing Sk',
    'Rushing Att','Rushing Yds','Rushing TD','Rushing Y/A',
    'Snap Counts OffSnp','Snap Counts Off%','Snap Counts DefSnp','Snap Counts Def%',
    'Snap Counts STSnp','Snap Counts ST%',
    'Player','Position','Year',
    'Receiving Tgt','Receiving Rec','Receiving Yds','Receiving TD',
    'Receiving Ctch%','Receiving Y/Tgt',
    'Fumbles Fmb','Fumbles FL','Fumbles FF','Fumbles FR','Fumbles Yds','Fumbles FRTD',
    'Did Not Play','Inactive','win','season','score_for','score_against'
]
df = df.loc[:, df.columns.intersection(keep_cols)].copy()


for c in ['Team','Opp','Player','Position']:
    if c in df:
        df[c] = df[c].astype('string')
int_cols = [
    'Gcar','Week','GS','Season','win','score_for','score_against','home'
    'Passing Cmp','Passing Att','Passing TD','Passing Int','Passing Sk',
    'Rushing Att','Rushing TD','Snap Counts OffSnp','Snap Counts DefSnp','Snap Counts STSnp',
    'Receiving Tgt','Receiving Rec','Receiving TD','Fumbles Fmb','Fumbles FL','Fumbles FF','Fumbles FR','Fumbles FRTD',
    'Did Not Play','Inactive'
]
for c in int_cols:
    if c in df:
        df[c] = pd.to_numeric(df[c], errors='coerce').fillna(0).astype('Int64')
float_cols = [
    'Passing Cmp%','Passing Yds','Passing Y/A','Passing AY/A','Passing Rate',
    'Rushing Yds','Rushing Y/A','Snap Counts Off%','Snap Counts Def%','Snap Counts ST%',
    'Receiving Yds','Receiving Ctch%','Receiving Y/Tgt','Fumbles Yds'
]
for c in float_cols:
    if c in df:
        df[c] = pd.to_numeric(df[c], errors='coerce').fillna(0.0).astype('float')

pd.set_option('display.max_columns', None)
print(df.dtypes)
print(df.head())



  df = pd.read_csv(path)
  df = df.replace({'Did Not Play': 0, 'Inactive': 0})


Gcar                           Int64
Week                           Int64
Date                  datetime64[ns]
Team                  string[python]
home                           int64
Opp                   string[python]
GS                             Int64
Receiving Tgt                  Int64
Receiving Rec                  Int64
Receiving Yds                float64
Receiving TD                   Int64
Receiving Ctch%              float64
Receiving Y/Tgt              float64
Fumbles Fmb                    Int64
Fumbles FL                     Int64
Fumbles FF                     Int64
Fumbles FR                     Int64
Fumbles Yds                  float64
Fumbles FRTD                   Int64
Snap Counts OffSnp             Int64
Snap Counts Off%             float64
Snap Counts DefSnp             Int64
Snap Counts Def%             float64
Snap Counts STSnp              Int64
Snap Counts ST%              float64
Player                string[python]
Position              string[python]
Y

In [19]:
import numpy as np
import pandas as pd

def fill_missing_seasons(df):
    """
    For each player, detect any completely missing seasons between their first
    and last recorded seasons, and insert dummy rows for each missing week.
    Assumes df has columns:
      ['Player','Position','season','Week','Gcar','Inactive',
       'Date','Team','home','Opp','win','score_for','score_against', ...]
    """
    new_rows = []
    for player, grp in df.groupby('Player'):
        # get all real seasons as ints
        seasons = sorted(int(s) for s in grp['season'].dropna().unique())
        if len(seasons) < 2:
            continue
        # find the gap seasons
        full_range = range(seasons[0], seasons[-1] + 1)
        missing = [s for s in full_range if s not in seasons]

        for s in missing:
            prev = grp[grp['season'] == s - 1]
            if prev.empty:
                continue
            last_week = int(prev['Week'].max())
            last_gcar = int(prev.sort_values('Week').iloc[-1]['Gcar'])
            pos       = prev.iloc[0]['Position']

            # one dummy row per week in the missing season
            for w in range(1, last_week + 1):
                row = {col: 0 for col in df.columns}
                row.update({
                    'Player': player,
                    'Position': pos,
                    'season': s,
                    'Week': w,
                    'Gcar': last_gcar,
                    'Inactive': 1
                })
                # these fields stay NaN for context
                for c in ['Date','Team','home','Opp','win','score_for','score_against']:
                    row[c] = np.nan
                new_rows.append(row)

    if new_rows:
        df = pd.concat([df, pd.DataFrame(new_rows)], ignore_index=True, sort=False)
        df = df.sort_values(['Player','season','Week']).reset_index(drop=True)
    return df

# --- Usage (after your cleaning steps) ---
# df = your cleaned TE DataFrame

df = fill_missing_seasons(df)

# Ensure 'home' is integer (no NaNs)
df['home'] = df['home'].fillna(0).astype(int)

# (Optional) Export final version
df.to_csv(
    "/Users/sethfried/Fantasy Football/data/player_game_logs/pfr_gamelogs_TE_filled_seasons.csv",
    index=False
)


  df = pd.concat([df, pd.DataFrame(new_rows)], ignore_index=True, sort=False)
