# Basic Data PreProcessing

In [1]:
import pandas as pd

# Load the datasets
nba_data = pd.read_csv('nfl_data.csv')
player_mvp_stats = pd.read_csv('nfl_player_data.csv')


In [2]:
team_mapping = {
    'ARI': 'ARI', 'ATL': 'ATL', 'BAL': 'BAL', 'BUF': 'BUF', 'CAR': 'CAR',
    'CHI': 'CHI', 'CIN': 'CIN', 'CLE': 'CLE', 'DAL': 'DAL', 'DEN': 'DEN',
    'DET': 'DET', 'GB': 'GB', 'HOU': 'HOU', 'IND': 'IND', 'JAX': 'JAX',
    'KC': 'KC', 'LV': 'LV', 'LAC': 'LAC', 'LAR': 'LAR', 'MIA': 'MIA',
    'MIN': 'MIN', 'NE': 'NE', 'NO': 'NO', 'NYG': 'NYG', 'NYJ': 'NYJ',
    'PHI': 'PHI', 'PIT': 'PIT', 'SEA': 'SEA', 'SF': 'SF', 'TB': 'TB',
    'TEN': 'TEN', 'WAS': 'WAS'
}



In [3]:
player_mvp_stats['Tm'] = player_mvp_stats['Tm'].map(team_mapping)


In [4]:
combined_data = pd.merge(nba_data, player_mvp_stats, left_on='team', right_on='Tm', how='inner')


In [5]:
combined_data.head()


Unnamed: 0,date,team,team_opp,points_scored,points_scored_opp,yards_gained,yards_gained_opp,turnovers,turnovers_opp,won,...,Pass Yds,Pass TD,Pass Int,Rush Att,Rush Yds,Rush TD,Receptions,Rec Yds,Rec TD,Game Date
0,2020-09-14,NO,TB,28,41,557,187,0,4,False,...,370,2,1,0,0,0,0,0,0,9/9/2019
1,2020-09-14,NO,TB,28,41,557,187,0,4,False,...,0,0,0,13,97,0,7,72,0,9/9/2019
2,2020-09-14,NO,TB,28,41,557,187,0,4,False,...,0,0,0,6,43,1,2,4,0,9/9/2019
3,2020-09-14,NO,TB,28,41,557,187,0,4,False,...,0,0,0,2,8,0,1,9,1,9/9/2019
4,2020-09-14,NO,TB,28,41,557,187,0,4,False,...,0,0,0,0,0,0,10,123,0,9/9/2019


In [6]:
# Select and display only the 'Player' and 'Tm' columns
player_and_team_data = combined_data[['Player', 'Tm']]
print(player_and_team_data)


                 Player   Tm
0            Drew Brees   NO
1          Alvin Kamara   NO
2       Latavius Murray   NO
3           Taysom Hill   NO
4        Michael Thomas   NO
...                 ...  ...
158538      Tyler Kroft  NYJ
158539       Jeff Smith  NYJ
158540      Denzel Mims  NYJ
158541       Ty Johnson  NYJ
158542     Kenny Yeboah  NYJ

[158543 rows x 2 columns]


In [7]:
# Select and display only the 'team' and 'team_opp' columns
team_and_team_opp_data = combined_data[['team', 'team_opp']]
print(team_and_team_opp_data)


       team team_opp
0        NO       TB
1        NO       TB
2        NO       TB
3        NO       TB
4        NO       TB
...     ...      ...
158538  NYJ      TEN
158539  NYJ      TEN
158540  NYJ      TEN
158541  NYJ      TEN
158542  NYJ      TEN

[158543 rows x 2 columns]


# Adding Weights

In [8]:
# Get all player names

unique_players = combined_data['Player'].unique()

pd.set_option('display.max_rows', None)

print(unique_players)

pd.reset_option('display.max_rows')


['Drew Brees' 'Alvin Kamara' 'Latavius Murray' 'Taysom Hill'
 'Michael Thomas' 'Ted Ginn Jr.' 'Keith Kirkwood' 'Zach Line'
 'Deonte Harris' 'Jared Cook' "Tre'Quan Smith" 'Josh Hill'
 'Teddy Bridgewater' 'Austin Carr' "Lil'Jordan Humphrey" 'Dan Arnold'
 'Zach Zenner' 'Dwayne Washington' 'Krishawn Hogan' 'Jason Vander Laan'
 'Justin Hardee' 'Ricky Ortiz' 'Adam Trautman' 'Bennie Fowler'
 'Michael Burton' 'Emmanuel Sanders' 'Ty Montgomery' 'Marquez Callaway'
 'Garrett Griffin' 'Juwan Johnson' 'Tommylee Lewis' 'Jameis Winston'
 'Tony Jones' 'Chris Hogan' 'Alex Armah' 'Kenny Stills' 'Devine Ozigbo'
 'Kevin White' 'Trevor Siemian' 'Mark Ingram' 'Nick Vannett'
 'Adam Prentice' 'Blake Gillikin' 'Ethan Wolf' 'Easop Winston' 'Ian Book'
 'Gus Edwards' 'Justice Hill' 'Marquise Brown' 'Mark Andrews'
 'Willie Snead' 'Hayden Hurst' 'Nick Boyle' 'Seth Roberts' 'Miles Boykin'
 'Patrick Ricard' 'Lamar Jackson' 'Robert Griffin III' 'Anthony Levine'
 'Chris Moore' 'Sam Koch' "De'Anthony Thomas" 'Trace McSo

In [9]:
# Sample mapping of player names to their corresponding weights
player_weights = {
    'DeVonta Smith': 0.99,
    'Demarcus Lawrence': 0.98,
    'Tristan Wirfs': 0.97,
    'Harrison Smith': 0.96,
    'Trevor Lawrence': 0.95,
    'Jamaal Williams': 0.94,
    'Terry McLaurin': 0.93,
    'Eric Kendricks': 0.92,
    'Marlon Humphrey': 0.91,
    'Dalvin Cook': 0.90,
    'Deandre Hopkins': 0.89,
    'Marshon Lattimore': 0.88,
    'Aidan Hutchinson': 0.87,
    'Chris Lindstrom': 0.86,
    'Justin Fields': 0.85,
    'Laremy Tunsil': 0.84,
    'Za\'Darius Smith': 0.83,
    'Terron Armstead': 0.82,
    'Tua Tagovailoa': 0.81,
    'Christian Wilkins': 0.80,
    'Mark Andrews': 0.79,
    'Dre Greenlaw': 0.78,
    'Talanoa Hufanga': 0.77,
    'Geno Smith': 0.76,
    'Tariq Woolen': 0.75,
    'Trey Hendrickson': 0.74,
    'Garrett Wilson': 0.73,
    'Budda Baker': 0.72,
    'Lamar Jackson': 0.71,
    'DeForest Buckner': 0.70,
    'Joey Bosa': 0.69,
    'Matt Milano': 0.68,
    'Zack Martin': 0.67,
    'Amon-Ra St. Brown': 0.66,
    'Jared Goff': 0.65,
    'Darius Slay': 0.64,
    'Aaron Jones': 0.63,
    'Grady Jarrett': 0.62,
    'Bobby Wagner': 0.61,
    'Deebo Samuel': 0.60,
}

# access the performance weight of each player by their name.
# For example, to get the weight of DeVonta Smith:
devonta_smith_weight = player_weights['DeVonta Smith']
print(f"DeVonta Smith's weight: {devonta_smith_weight}")


# Add player weights to combined_data
combined_data['player_weight'] = combined_data['Player'].map(player_weights)


DeVonta Smith's weight: 0.99


In [10]:
budda_baker_weight = player_weights['Budda Baker']
print(f"Budda Baker's weight: {budda_baker_weight}")


Budda Baker's weight: 0.72


In [11]:
default_weight = 0.5

combined_data['player_weight'] = combined_data['Player'].map(player_weights).fillna(default_weight)

print(combined_data[['Player', 'player_weight']])


                 Player  player_weight
0            Drew Brees            0.5
1          Alvin Kamara            0.5
2       Latavius Murray            0.5
3           Taysom Hill            0.5
4        Michael Thomas            0.5
...                 ...            ...
158538      Tyler Kroft            0.5
158539       Jeff Smith            0.5
158540      Denzel Mims            0.5
158541       Ty Johnson            0.5
158542     Kenny Yeboah            0.5

[158543 rows x 2 columns]


In [None]:
#lebron_weight = combined_data.loc[combined_data['Player'] == 'LeBron James', 'player_weight'].values[0]
#steph_weight = combined_data.loc[combined_data['Player'] == 'Stephen Curry', 'player_weight'].values[0]

#print(lebron_weight)
#print(steph_weight)


In [12]:
team_weights = {
  'points_scored': 0.833,
  'points_scored_opp': 1.0,
  'yards_gained': 0.667,
  'yards_gained_opp': 0.417,
  'turnovers': 1.0,
  'turnovers_opp': 0.917,
  'turnovers_opp': 0.75,
  'yards_gained': 0.75
}


for col, weight in team_weights.items():
  combined_data[col] = combined_data[col] * weight

print(combined_data.head())


         date team team_opp  points_scored  points_scored_opp  yards_gained  \
0  2020-09-14   NO       TB         23.324               41.0        417.75   
1  2020-09-14   NO       TB         23.324               41.0        417.75   
2  2020-09-14   NO       TB         23.324               41.0        417.75   
3  2020-09-14   NO       TB         23.324               41.0        417.75   
4  2020-09-14   NO       TB         23.324               41.0        417.75   

   yards_gained_opp  turnovers  turnovers_opp    won  ... Pass TD Pass Int  \
0            77.979        0.0            3.0  False  ...       2        1   
1            77.979        0.0            3.0  False  ...       0        0   
2            77.979        0.0            3.0  False  ...       0        0   
3            77.979        0.0            3.0  False  ...       0        0   
4            77.979        0.0            3.0  False  ...       0        0   

  Rush Att  Rush Yds  Rush TD  Receptions  Rec Yds  Rec 

In [13]:
print(combined_data['won'])


0         False
1         False
2         False
3         False
4         False
          ...  
158538    False
158539    False
158540    False
158541    False
158542    False
Name: won, Length: 158543, dtype: bool


In [None]:
combined_data['won'] = combined_data['won'].astype(int)


In [None]:
print(combined_data['won'])


0         0
1         0
2         0
3         0
4         0
         ..
158538    0
158539    0
158540    0
158541    0
158542    0
Name: won, Length: 158543, dtype: int64


In [None]:
print(combined_data['won'])


0         0
1         0
2         0
3         0
4         0
         ..
158538    0
158539    0
158540    0
158541    0
158542    0
Name: won, Length: 158543, dtype: int64


# Neural Network Feedforward Functional API

In [None]:
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras import regularizers

# Extract features and target variable
X = combined_data[['points_scored', 'points_scored_opp', 'yards_gained', 'yards_gained_opp', 'turnovers', 'turnovers_opp', 'turnovers_opp', 'yards_gained', 'player_weight']]
y = combined_data['won']  # Use the 'won' column as the target variable

# Split the data into training and testing sets (80/20 split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Data preprocessing
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)  # Scale the features
X_test_scaled = scaler.transform(X_test)

# Define the model architecture using the Functional API
num_features = X_train_scaled.shape[1]

input_layer = tf.keras.layers.Input(shape=(num_features,))
hidden1 = tf.keras.layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.5))(input_layer)
dropout1 = tf.keras.layers.Dropout(0.50)(hidden1)
hidden2 = tf.keras.layers.Dense(64, activation='relu')(dropout1)
dropout2 = tf.keras.layers.Dropout(0.5)(hidden2)
hidden3 = tf.keras.layers.Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.9))(dropout2)
player_weight_input = tf.keras.layers.Input(shape=(1,))
merged = tf.keras.layers.Concatenate()([hidden3, player_weight_input])

#adjustment_layer = tf.keras.layers.Dense(39, activation='relu')(merged)  # adjusting for 39 features if needed

#output_layer = tf.keras.layers.Dense(1, activation='sigmoid')(adjustment_layer)

output_layer = tf.keras.layers.Dense(1, activation='sigmoid')(merged)

model = tf.keras.models.Model(inputs=[input_layer, player_weight_input], outputs=output_layer)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

# Train the model
model.fit([X_train_scaled, X_train['player_weight']], y_train,
          epochs=1, batch_size=4,
          validation_data=([X_test_scaled, X_test['player_weight']], y_test))

# Prediction
# For new predictions, you would preprocess the new data (let's call it new_data) in the same way as X_train
# Example:
# new_data_scaled = scaler.transform(new_data_features)
# predictions = model.predict([new_data_scaled, new_data_player_weight])


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 9)]                  0         []                            
                                                                                                  
 dense (Dense)               (None, 128)                  1280      ['input_1[0][0]']             
                                                                                                  
 dropout (Dropout)           (None, 128)                  0         ['dense[0][0]']               
                                                                                                  
 dense_1 (Dense)             (None, 64)                   8256      ['dropout[0][0]']             
                                                                                              

<keras.src.callbacks.History at 0x797244d4ea40>

In [None]:
import numpy as np


In [None]:
# Function to retrieve team data
def get_team_data(team_name, combined_data):
    # Filter the DataFrame for rows matching the team name
    team_data_filtered = combined_data[(combined_data['team'] == team_name) | (combined_data['team_opp'] == team_name)]

    if team_data_filtered.empty:
        raise ValueError(f"No data found for team: {team_name}")

    numeric_columns = ['points_scored', 'points_scored_opp', 'yards_gained', 'yards_gained_opp',
                       'turnovers', 'turnovers_opp', 'turnovers_opp', 'yards_gained', 'player_weight']

    team_data_numeric = team_data_filtered[numeric_columns]
    team_data_mean = team_data_numeric.mean()
    average_player_weight = combined_data[combined_data['Tm'] == team_name]['player_weight'].mean()
    return team_data_mean, average_player_weight

# Function to predict game outcome
def predict_game_outcome(team1_name, team2_name, combined_data, scaler, model):
    team1_data, team1_player_weight = get_team_data(team1_name, combined_data)
    team2_data, team2_player_weight = get_team_data(team2_name, combined_data)

    # Prepare features for scaling
    features_team1 = np.array([team1_data])
    features_team2 = np.array([team2_data])

    # Scale features
    features_team1_scaled = scaler.transform(features_team1)
    features_team2_scaled = scaler.transform(features_team2)

    # Reshape player weight for input
    team1_weight = np.array([[team1_player_weight]])
    team2_weight = np.array([[team2_player_weight]])

    # Predict using the model
    prediction_team1 = model.predict([features_team1_scaled, team1_weight])
    prediction_team2 = model.predict([features_team2_scaled, team2_weight])

    # Interpret the prediction
    if prediction_team1[0] > 0.5:
        winner = team1_name
    else:
        winner = team2_name

    return f"{winner} is predicted to win"

# Example usage
team1 = input("Enter team one: ")
team2 = input("Enter team two: ")
# testing with 2 teams nfl
prediction = predict_game_outcome(team1, team2, combined_data, scaler, model)
print(prediction)


Enter team one: SF
Enter team two: WAS
SF is predicted to win


