In [None]:
import pandas as pd
import numpy as np
import re

df = pd.read_csv("/Users/sethfried/Fantasy Football/data/player_game_logs/pfr_gamelogs_K.csv")

drop_cols = [
    'Rk', 'Gcar', 'Gtm',
    'GS', 'Sk',
    'Tackles Comb', 'Tackles Solo', 'Tackles Ast',
    'Tackles TFL', 'Tackles QBHits', 'Sfty'
]
df = df.drop(columns=[c for c in drop_cols if c in df.columns])

df['Week'] = pd.to_numeric(df['Week'], errors='coerce').astype('Int64')
df = df[df['Week'].notna() & (df['Week'] > 0)].copy()

df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d', errors='coerce')

for col in ['Team', 'Opp', 'Player', 'Position']:
    df[col] = df[col].astype('string')

df = df.rename(columns={'Unnamed: 6_level_0': 'home'})
df['home'] = df['home'].fillna('').map(lambda x: 1 if str(x).strip()=='@' else 0).astype('int')

def split_result(res):
    if not isinstance(res, str):
        return pd.Series([0, np.nan, np.nan])
    win = 1 if res.startswith('W') else 0
    m = re.search(r',\s*([0-9]+)-([0-9]+)', res)
    if m:
        ts, os = int(m.group(1)), int(m.group(2))
    else:
        ts, os = np.nan, np.nan
    return pd.Series([win, ts, os])

df[['Win','Team Score','Opp Score']] = df['Result'].apply(split_result)
df = df.drop(columns=['Result'], errors='ignore')

df['Year'] = pd.to_numeric(df['Year'], errors='coerce').astype('Int64')


int_cols = [
    '0-19 FGA','0-19 FGM','20-29 FGA','20-29 FGM','30-39 FGA','30-39 FGM',
    '40-49 FGA','40-49 FGM','50+ FGA','50+ FGM',
    'Kicking Totals FGA','Kicking Totals FGM','Kicking Totals XPA','Kicking Totals XPM',
    'Kickoffs KO','Kickoffs TB','Snap Counts OffSnp','Snap Counts STSnp',
    'Receiving Tgt','Receiving Rec','Receiving TD',
    'Passing Cmp','Passing Att','Passing TD','Passing Int','Passing Sk',
    'Rushing Att','Rushing TD',
    'Fumbles Fmb','Fumbles FL','Fumbles FF','Fumbles FR','Fumbles FRTD',
    'Punting Pnt','Punting TB','Punting Pnt20','Punting Blck'
]
for c in int_cols:
    if c in df.columns:
        df[c] = pd.to_numeric(df[c], errors='coerce').fillna(0).astype('Int64')

float_cols = [
    '0-19 FGM','20-29 FGM','30-39 FGM','40-49 FGM','50+ FGM',  # if any decimal pct
    'Kicking Totals FG%','Kicking Totals XP%','Kickoffs KOYds','Kickoffs TB%','Kickoffs KOAvg',
    'Snap Counts Off%','Snap Counts ST%','Receiving Yds','Receiving Ctch%','Receiving Y/Tgt',
    'Passing Cmp%','Passing Yds','Passing Y/A','Passing AY/A','Passing Rate',
    'Rushing Yds','Rushing Y/A',
    'Punting Yds','Punting Y/P','Punting RetYds','Punting NetYds','Punting NY/P',
    'Punting TB%','Punting In20%'
]
for c in float_cols:
    if c in df.columns:
        df[c] = pd.to_numeric(df[c], errors='coerce').fillna(0.0).astype('float')

keep = [
    'Week','Date','Team','home','Opp','Win','Team Score','Opp Score','Year',
    'Player','Position'
] + int_cols + float_cols
df = df.loc[:, df.columns.intersection(keep)]

pd.set_option('display.max_columns', None)
print(df.dtypes)
print(df.head())

df.to_csv("cleaned_k_logs.csv", index=False)


  df = pd.read_csv("/Users/sethfried/Fantasy Football/data/player_game_logs/pfr_gamelogs_K.csv")


Week                        Int64
Date               datetime64[ns]
Team               string[python]
home                        int64
Opp                string[python]
                        ...      
Receiving Ctch%           float64
Receiving Y/Tgt           float64
Win                         int64
Team Score                  int64
Opp Score                   int64
Length: 72, dtype: object
   Week       Date Team  home  Opp  0-19 FGA  0-19 FGM  20-29 FGA  20-29 FGM  \
0     1 2016-09-11  TAM     1  ATL         0       0.0          0        0.0   
1     2 2016-09-18  TAM     1  ARI         0       0.0          0        0.0   
2     3 2016-09-25  TAM     0  LAR         0       0.0          0        0.0   
3     4 2016-10-02  TAM     0  DEN         0       0.0          0        0.0   
4     5 2016-10-10  TAM     1  CAR         0       0.0          0        0.0   

   30-39 FGA  30-39 FGM  40-49 FGA  40-49 FGM  50+ FGA  50+ FGM  \
0          0        0.0          0        0.0       