In [42]:
# general
import pandas as pd
import numpy as np

# visualize
import seaborn as sns
import matplotlib.pyplot as plt

# models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder

import setuptools.dist
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# data
import nfl_data_py as nfl

In [21]:
columns = ['player_id', 'player_display_name', 'position', 'recent_team', 'season', 'week', 'season_type', 'opponent_team', 'carries', 'rushing_yards', 'rushing_tds', 'rushing_epa', 'rushing_2pt_conversions', 'receptions', 'targets', 'receiving_yards', 'receiving_tds', 'receiving_air_yards', 'receiving_yards_after_catch', 'receiving_first_downs', 'receiving_epa', 'receiving_2pt_conversions', 'racr', 'target_share', 'air_yards_share', 'wopr', 'fantasy_points_ppr']

In [22]:
# drop season_type not reg
df_weekly = nfl.import_weekly_data(years=range(1999,2024), columns=columns)
# df_weekly = nfl.import_weekly_data(years=range(1999,2024))

Downcasting floats.


In [28]:
df_weekly_wr = df_weekly[(df_weekly['position']=='WR') & (df_weekly['season_type'] == 'REG')]

In [32]:
df_weekly_wr.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 46903 entries, 32 to 5588
Data columns (total 27 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   player_id                    46903 non-null  object 
 1   player_display_name          46903 non-null  object 
 2   position                     46903 non-null  object 
 3   recent_team                  46903 non-null  object 
 4   season                       46903 non-null  int32  
 5   week                         46903 non-null  int32  
 6   season_type                  46903 non-null  object 
 7   opponent_team                46903 non-null  object 
 8   carries                      46903 non-null  int32  
 9   rushing_yards                46903 non-null  float32
 10  rushing_tds                  46903 non-null  int32  
 11  rushing_epa                  5800 non-null   float32
 12  rushing_2pt_conversions      46903 non-null  int32  
 13  receptions      

In [59]:
df_test = df_weekly_wr.copy()

In [60]:
columns_to_drop = [
    "player_display_name",
    "position",
    "season_type",
    "carries",
    "rushing_yards",
    "rushing_tds",
    "rushing_epa",
    "rushing_2pt_conversions",
    "receiving_air_yards",
    "receiving_yards_after_catch",
    "receiving_first_downs",
    "receiving_epa",
    "racr",
    "air_yards_share",
    "wopr",
    "fantasy_points_ppr"
]

df_test = df_test.drop(columns=columns_to_drop)

In [68]:
df_test['receiving_points'] = df_test['receiving_tds'] * 6 + df_test['receiving_2pt_conversions'] * 2

In [71]:
df_test = df_test.drop(columns=['receiving_2pt_conversions', 'receiving_tds'])
df_test.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 46903 entries, 32 to 5588
Data columns (total 10 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   player_id         46903 non-null  object 
 1   recent_team       46903 non-null  object 
 2   season            46903 non-null  int32  
 3   week              46903 non-null  int32  
 4   opponent_team     46903 non-null  object 
 5   receptions        46903 non-null  int32  
 6   targets           46903 non-null  int32  
 7   receiving_yards   46903 non-null  float32
 8   target_share      46117 non-null  float32
 9   receiving_points  46903 non-null  int32  
dtypes: float32(2), int32(5), object(3)
memory usage: 3.7+ MB


In [58]:
# scaler = MinMaxScaler()
# df_test[['week', 'season']] = scaler.fit_transform(df_test[['week', 'season']])

In [66]:
# Preprocess the data
def preprocess_data(df):
    # Extract target columns and features
    target_columns = ['targets', 'receptions', 'receiving_yards', 'receiving_points', 'target_share']
    categorical_columns = ['recent_team', 'opponent_team']
    numerical_columns = ['season', 'week']  # Can also extract season and week separately

    # Encode categorical variables
    encoder = OneHotEncoder(sparse_output=False)
    categorical_data = encoder.fit_transform(df[categorical_columns])

    # Normalize numerical variables
    scaler = MinMaxScaler()
    numerical_data = scaler.fit_transform(df[numerical_columns + target_columns])

    # Combine all features
    features = np.hstack([categorical_data, numerical_data])

    # Create time-series sequences
    sequence_length = 5  # For example, use a 5-week history
    X, y = [], []
    for i in range(len(features) - sequence_length):
        X.append(features[i:i + sequence_length])
        y.append(features[i + sequence_length, -len(target_columns):])  # Predict targets
    X, y = np.array(X), np.array(y)
    return X, y, scaler

In [72]:
X, y, scaler = preprocess_data(df_test)

# Split the data into train, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [78]:
# Build the LSTM model
model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dropout(0.2),
    LSTM(32, return_sequences=False),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dense(y_train.shape[1])  # Output layer matches the number of targets
])

# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

  super().__init__(**kwargs)


In [79]:
# Train the model
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=50,
    batch_size=32,
    verbose=1
)

Epoch 1/50
[1m1173/1173[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - loss: nan - mae: nan - val_loss: nan - val_mae: nan
Epoch 2/50
[1m1173/1173[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: nan - mae: nan - val_loss: nan - val_mae: nan
Epoch 3/50
[1m1173/1173[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: nan - mae: nan - val_loss: nan - val_mae: nan
Epoch 4/50
[1m1173/1173[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: nan - mae: nan - val_loss: nan - val_mae: nan
Epoch 5/50
[1m1173/1173[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: nan - mae: nan - val_loss: nan - val_mae: nan
Epoch 6/50
[1m1173/1173[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: nan - mae: nan - val_loss: nan - val_mae: nan
Epoch 7/50
[1m1173/1173[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: nan - mae: nan - val_loss: nan - val_mae: nan
Epoch 

KeyboardInterrupt: 