In [1]:
import pandas as pd

df_DST = pd.read_csv("../data/DST.csv")
df_K   = pd.read_csv("../data/K.csv")
df_QB  = pd.read_csv("../data/QB.csv")
df_RB  = pd.read_csv("../data/RB.csv")
df_TE  = pd.read_csv("../data/TE.csv")
df_WR  = pd.read_csv("../data/WR.csv")


df_DST["Position"] = "DST"
df_K["Position"]   = "K"
df_QB["Position"]  = "QB"
df_RB["Position"]  = "RB"
df_TE["Position"]  = "TE"
df_WR["Position"]  = "WR"


df = pd.concat([df_DST, df_K, df_QB, df_RB, df_TE, df_WR], ignore_index=True)

print("Shape inicial:", df.shape)

Shape inicial: (983, 39)


In [3]:
df.head()

Unnamed: 0,Rank,Player,SACK,INT,FR,FF,DEF TD,SFTY,SPC TD,G,...,TD,SACKS,ATT.1,YDS.1,TD.1,FL,20+,TGT,REC,Y/R
0,1.0,Houston Texans (HOU),33.0,12.0,7.0,8.0,2.0,0.0,0.0,11.0,...,,,,,,,,,,
1,2.0,Los Angeles Rams (LAR),31.0,12.0,7.0,10.0,1.0,0.0,0.0,11.0,...,,,,,,,,,,
2,3.0,Seattle Seahawks (SEA),36.0,9.0,4.0,3.0,2.0,0.0,2.0,11.0,...,,,,,,,,,,
3,4.0,Cleveland Browns (CLE),42.0,9.0,6.0,10.0,2.0,0.0,0.0,11.0,...,,,,,,,,,,
4,5.0,Pittsburgh Steelers (PIT),34.0,9.0,11.0,13.0,3.0,0.0,0.0,11.0,...,,,,,,,,,,


In [2]:
df.to_csv("../data/df.csv", index=False)

In [None]:
drop_cols = [
    "Player",
    "Team",
    "FPTS",
    "FPTS/G"
]

df = df.drop(columns=[c for c in drop_cols if c in df.columns])
print("Después del drop:", df.shape)

Después del drop: (983, 36)


In [None]:
cols_to_convert = []

for col in df.columns:
    if col == "Position":
        continue
    numeric_ratio = pd.to_numeric(df[col], errors="coerce").notna().mean()
    if numeric_ratio > 0.50:
        cols_to_convert.append(col)

print("\nColumnas convertidas a numéricas (>50%):")
print(cols_to_convert)

for col in cols_to_convert:
    df[col] = pd.to_numeric(df[col], errors="coerce")



Columnas convertidas a numéricas (>50%):
['Rank', 'G', 'LG', 'ATT', 'YDS', 'TD', 'YDS.1', 'TD.1', 'FL', '20+', 'TGT', 'REC', 'Y/R']


In [4]:
num_cols = df.select_dtypes(include=["float64", "int64"]).columns
df[num_cols] = df[num_cols].fillna(df[num_cols].median())

print("\nColumnas rellenadas con la mediana:")
print(num_cols)


Columnas rellenadas con la mediana:
Index(['Rank', 'SACK', 'INT', 'FR', 'FF', 'DEF TD', 'SFTY', 'SPC TD', 'G',
       'FG', 'FGA', 'PCT', 'LG', '1-19', '20-29', '30-39', '40-49', '50+',
       'XPT', 'XPA', 'CMP', 'ATT', 'YDS', 'Y/A', 'TD', 'SACKS', 'ATT.1',
       'YDS.1', 'TD.1', 'FL', '20+', 'TGT', 'REC', 'Y/R'],
      dtype='object')


In [5]:
df = pd.get_dummies(df, drop_first=True)
print("Shape después del OHE:", df.shape)


Shape después del OHE: (983, 317)


In [6]:
df.head()

Unnamed: 0,Rank,SACK,INT,FR,FF,DEF TD,SFTY,SPC TD,G,FG,...,ROST_99.4%,ROST_99.6%,ROST_99.7%,ROST_99.8%,ROST_99.9%,Position_K,Position_QB,Position_RB,Position_TE,Position_WR
0,1.0,33.0,12.0,7.0,8.0,2.0,0.0,0.0,11.0,9.5,...,False,False,False,False,False,False,False,False,False,False
1,2.0,31.0,12.0,7.0,10.0,1.0,0.0,0.0,11.0,9.5,...,False,False,False,False,False,False,False,False,False,False
2,3.0,36.0,9.0,4.0,3.0,2.0,0.0,2.0,11.0,9.5,...,False,False,False,False,False,False,False,False,False,False
3,4.0,42.0,9.0,6.0,10.0,2.0,0.0,0.0,11.0,9.5,...,False,False,False,False,False,False,False,False,False,False
4,5.0,34.0,9.0,11.0,13.0,3.0,0.0,0.0,11.0,9.5,...,False,False,False,False,False,False,False,False,False,False


In [7]:
df.to_csv("../data/df_processed_eg.csv", index=False)