In [2]:
import pandas as pd
import numpy as np
from nfl_data_py import import_weekly_data, import_players, import_rosters, import_schedules
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
import matplotlib.pyplot as plt


# 1. Fetch data
seasons = list(range(2018, 2024))  # Adjust the range as needed
player_stats = pd.concat([import_weekly_data(seasons)])
players = import_players()
latest_rosters = import_rosters([2024])  # For 2024 predictions
schedule_2024 = import_schedules([2024])

# 2. Preprocess data
df = pd.merge(player_stats, players, left_on='player_id', right_on='gsis_id', how='left')
df = df.sort_values(['player_id', 'season', 'week'])

# Create target variables
target_cols = ['passing_yards', 'attempts', 'completions', 
               'rushing_yards', 'carries', 
               'receiving_yards', 'targets', 'receptions']

for col in target_cols:
    df[f'next_week_{col}'] = df.groupby(['player_id', 'season'])[col].shift(-1)

# Remove last week of each season for each player
df = df.groupby(['player_id', 'season']).apply(lambda x: x.iloc[:-1]).reset_index(drop=True)

# Identify features
categorical_features = ['position_x', 'recent_team']
numerical_features = target_cols

# Create preprocessor
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(sparse=False, handle_unknown='ignore'), categorical_features)
    ])

# Prepare features
X = df[categorical_features + numerical_features]

# Fit preprocessor and transform data
X_transformed = preprocessor.fit_transform(X)

# Get feature names after preprocessing
feature_names = (numerical_features + 
                 preprocessor.named_transformers_['cat'].get_feature_names_out(categorical_features).tolist())

# 3. Build and train models
input_dim = X_transformed.shape[1]

# Function to load and preprocess data
def load_and_preprocess_data(seasons):
    player_stats = pd.concat([import_weekly_data(seasons)])
    players = import_players()
    df = pd.merge(player_stats, players, left_on='player_id', right_on='gsis_id', how='left')
    df = df.sort_values(['player_id', 'season', 'week'])

    for col in target_cols:
        df[f'next_week_{col}'] = df.groupby(['player_id', 'season'])[col].shift(-1)

    df = df.groupby(['player_id', 'season']).apply(lambda x: x.iloc[:-1]).reset_index(drop=True)
    return df

# Function to create and train models
def create_and_train_models(df):
    X = df[categorical_features + numerical_features]
    
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', StandardScaler(), numerical_features),
            ('cat', OneHotEncoder(sparse=False, handle_unknown='ignore'), categorical_features)
        ])
    
    X_transformed = preprocessor.fit_transform(X)
    input_dim = X_transformed.shape[1]
    
    models = {}
    for stat in target_cols:
        y = df[f'next_week_{stat}'].values
        mask = ~np.isnan(y)
        X_stat = X_transformed[mask]
        y_stat = y[mask]
        
        model = Sequential([
            Input(shape=(input_dim,)),
            Dense(128, activation='relu'),
            Dense(64, activation='relu'),
            Dense(32, activation='relu'),
            Dense(1, activation='linear')
        ])
        model.compile(optimizer='adam', loss='mse')
        model.fit(X_stat, y_stat, epochs=100, batch_size=32, validation_split=0.2, verbose=0)
        models[stat] = model
    
    return models, preprocessor

def create_model(input_dim):
    model = Sequential([
        Input(shape=(input_dim,)),
        Dense(128, activation='relu'),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(1, activation='linear')
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

# Create and train models for each stat
models = {}
for stat in target_cols:
    y = df[f'next_week_{stat}'].values
    mask = ~np.isnan(y)
    X_stat = X_transformed[mask]
    y_stat = y[mask]
    
    models[stat] = create_model(input_dim)
    models[stat].fit(X_stat, y_stat, epochs=100, batch_size=32, validation_split=0.2, verbose=0)

def predict_next_week(player_name, df, models, preprocessor, current_week, current_season):
    try:
        latest_rosters = import_rosters([current_season])
        player_matches = latest_rosters[latest_rosters['player_name'].str.contains(player_name, case=False, na=False)]
        
        if player_matches.empty:
            print(f"Error: Player '{player_name}' not found in the latest roster.")
            return None
        
        if len(player_matches) > 1:
            print(f"Multiple matches found for '{player_name}'. Using the first match.")
        
        player_info = player_matches.iloc[0]
        
        # Check player status
        if player_info['status'] not in ['ACT', 'Active']:
            print(f"Warning: {player_name} is not active. Status: {player_info['status']}")
            return None
        
        player_stats = df[(df['player_id'] == player_info['gsis_id']) & 
                          (df['season'] == current_season) & 
                          (df['week'] == current_week)]
        
        if player_stats.empty:
            print(f"Error: No stats found for '{player_name}' in week {current_week}.")
            return None
        
        last_stats = player_stats.iloc[0]
        
        input_data = pd.DataFrame({col: [last_stats[col]] for col in numerical_features})
        input_data['position_x'] = [player_info['position']]
        input_data['recent_team'] = [player_info['team']]
        
        input_transformed = preprocessor.transform(input_data)
        
        predictions = {stat: models[stat].predict(input_transformed)[0][0] for stat in target_cols}
        
        print(f"Week {current_week + 1} Prediction for {player_name}:")
        print(f"Team: {player_info['team']}")
        print(f"Position: {player_info['position']}")
        
        if player_info['position'] in ['QB']:
            print(f"Predicted passing yards: {predictions['passing_yards']:.2f}")
            print(f"Predicted passing attempts: {predictions['attempts']:.2f}")
            print(f"Predicted passing completions: {predictions['completions']:.2f}")
            print(f"Predicted rushing yards: {predictions['rushing_yards']:.2f}")
            print(f"Predicted rushing attempts: {predictions['carries']:.2f}")
        elif player_info['position'] in ['RB']:
            print(f"Predicted rushing yards: {predictions['rushing_yards']:.2f}")
            print(f"Predicted rushing attempts: {predictions['carries']:.2f}")
            print(f"Predicted receiving yards: {predictions['receiving_yards']:.2f}")
            print(f"Predicted targets: {predictions['targets']:.2f}")
            print(f"Predicted receptions: {predictions['receptions']:.2f}")
        elif player_info['position'] in ['WR', 'TE']:
            print(f"Predicted receiving yards: {predictions['receiving_yards']:.2f}")
            print(f"Predicted targets: {predictions['targets']:.2f}")
            print(f"Predicted receptions: {predictions['receptions']:.2f}")
        
        return predictions
    
    except Exception as e:
        print(f"An error occurred for {player_name}: {str(e)}")
        return None

def get_all_predictions(position, df, models, preprocessor, current_week, current_season):
    predictions = []
    latest_rosters = import_rosters([current_season])
    
    if position == 'QB':
        players = latest_rosters[(latest_rosters['position'] == position) & 
                                 (latest_rosters['depth_chart_position'] == '1')]
    elif position == 'RB':
        players = latest_rosters[(latest_rosters['position'] == position) & 
                                 (latest_rosters['depth_chart_position'].isin(['1', '2']))]
    elif position == 'WR':
        players = latest_rosters[(latest_rosters['position'] == position) & 
                                 (latest_rosters['depth_chart_position'].isin(['1', '2', '3']))]
    elif position == 'TE':
        players = latest_rosters[(latest_rosters['position'] == position) & 
                                 (latest_rosters['depth_chart_position'].isin(['1', '2']))]
    else:
        players = latest_rosters[latest_rosters['position'] == position]

    for _, player in players.iterrows():
        pred = predict_next_week(player['player_name'], df, models, preprocessor, current_week, current_season)
        if pred:
            if position == 'QB':
                predictions.append((player['player_name'], pred['passing_yards'], pred['attempts'], pred['completions']))
            elif position == 'RB':
                predictions.append((player['player_name'], pred['rushing_yards'], pred['carries'], pred['receiving_yards']))
            elif position in ['WR', 'TE']:
                predictions.append((player['player_name'], pred['receiving_yards'], pred['targets'], pred['receptions']))
    
    if not predictions:
        print(f"No valid predictions for {position}. Check if there are active players for this position.")
    
    predictions.sort(key=lambda x: x[1], reverse=True)
    return predictions

def plot_top_predictions(position, top_n, df, models, preprocessor, current_week, current_season):
    all_predictions = get_all_predictions(position, df, models, preprocessor, current_week, current_season)
    
    if not all_predictions:
        print(f"No valid predictions for {position}")
        return
    
    top_predictions = all_predictions[:top_n]
    
    if position == 'QB':
        names, yards, attempts, completions = zip(*top_predictions)
        fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 12))
        
        ax1.bar(names, yards)
        ax1.set_ylabel('Predicted Passing Yards')
        ax1.set_title(f'Week {current_week + 1} Predictions for Top {top_n} {position}s')
        plt.setp(ax1.get_xticklabels(), rotation=45, ha='right')
        
        ax2.bar(names, attempts, label='Attempts')
        ax2.bar(names, completions, label='Completions')
        ax2.set_ylabel('Predicted Attempts/Completions')
        ax2.legend()
        plt.setp(ax2.get_xticklabels(), rotation=45, ha='right')
        
    elif position == 'RB':
        names, rush_yards, carries, rec_yards = zip(*top_predictions)
        fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 12))
        
        ax1.bar(names, rush_yards)
        ax1.set_ylabel('Predicted Rushing Yards')
        ax1.set_title(f'Week {current_week + 1} Predictions for Top {top_n} {position}s')
        plt.setp(ax1.get_xticklabels(), rotation=45, ha='right')
        
        ax2.bar(names, carries, label='Carries')
        ax2.bar(names, rec_yards, label='Receiving Yards')
        ax2.set_ylabel('Predicted Carries/Receiving Yards')
        ax2.legend()
        plt.setp(ax2.get_xticklabels(), rotation=45, ha='right')
        
    elif position in ['WR', 'TE']:
        names, rec_yards, targets, receptions = zip(*top_predictions)
        fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 12))
        
        ax1.bar(names, rec_yards)
        ax1.set_ylabel('Predicted Receiving Yards')
        ax1.set_title(f'Week {current_week + 1} Predictions for Top {top_n} {position}s')
        plt.setp(ax1.get_xticklabels(), rotation=45, ha='right')
        
        ax2.bar(names, targets, label='Targets')
        ax2.bar(names, receptions, label='Receptions')
        ax2.set_ylabel('Predicted Targets/Receptions')
        ax2.legend()
        plt.setp(ax2.get_xticklabels(), rotation=45, ha='right')
    
    plt.tight_layout()
    plt.show()

# Example usage
current_season = 2024
current_week = 1  # Start with week 0 (preseason)

# Initial data load and model training
df = load_and_preprocess_data(seasons)
models, preprocessor = create_and_train_models(df)

plot_top_predictions('QB', 32, df, models, preprocessor, current_week, current_season)
plot_top_predictions('RB', 24, df, models, preprocessor, current_week, current_season)
plot_top_predictions('WR', 36, df, models, preprocessor, current_week, current_season)
plot_top_predictions('TE', 12, df, models, preprocessor, current_week, current_season)

Downcasting floats.


  df = df.groupby(['player_id', 'season']).apply(lambda x: x.iloc[:-1]).reset_index(drop=True)


: 