In [103]:
import os
import csv
import numpy as np
import pandas as pd
import sklearn as skl
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
import tensorflow as tf
import keras_tuner as kt


In [104]:
nfl_data = pd.read_csv(r'C:\Users\wmk71\UNC-VIRT-DATA-PT-03-2024-U-LOLC\Project_4\archive\nfl_team_stats_2002-2023.csv')

In [105]:
nfl_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5929 entries, 0 to 5928
Data columns (total 53 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   season                 5929 non-null   int64 
 1   week                   5929 non-null   object
 2   date                   5929 non-null   object
 3   away                   5929 non-null   object
 4   home                   5929 non-null   object
 5   score_away             5929 non-null   int64 
 6   score_home             5929 non-null   int64 
 7   first_downs_away       5929 non-null   int64 
 8   first_downs_home       5929 non-null   int64 
 9   third_down_comp_away   5929 non-null   int64 
 10  third_down_att_away    5929 non-null   int64 
 11  third_down_comp_home   5929 non-null   int64 
 12  third_down_att_home    5929 non-null   int64 
 13  fourth_down_comp_away  5929 non-null   int64 
 14  fourth_down_att_away   5929 non-null   int64 
 15  fourth_down_comp_home

In [106]:
nfl_data.head()

Unnamed: 0,season,week,date,away,home,score_away,score_home,first_downs_away,first_downs_home,third_down_comp_away,...,redzone_comp_home,redzone_att_home,fumbles_away,fumbles_home,interceptions_away,interceptions_home,def_st_td_away,def_st_td_home,possession_away,possession_home
0,2002,1,2002-09-05,49ers,Giants,16,13,13,21,4,...,0,6,0,0,1,3,0,0,27:32,32:28
1,2002,1,2002-09-08,Jets,Bills,37,31,18,26,2,...,0,8,1,1,0,2,2,0,21:06,39:08
2,2002,1,2002-09-08,Vikings,Bears,23,27,19,20,5,...,0,6,1,1,2,1,0,0,31:31,28:29
3,2002,1,2002-09-08,Chargers,Bengals,34,6,27,13,6,...,0,5,0,0,0,1,0,0,37:48,22:12
4,2002,1,2002-09-08,Chiefs,Browns,40,39,24,24,5,...,0,10,0,1,1,0,0,0,30:16,29:44


In [107]:
# Add a new column for the result
nfl_data['result'] = ''

# Iterate through the rows to determine the outcome
for index, row in nfl_data.iterrows():
    if row['score_away'] > row['score_home']:
        nfl_data.at[index, 'result'] = 0 # Away Win
    elif row['score_away'] < row['score_home']:
        nfl_data.at[index, 'result'] = 1 # Away Loss
    else:
        nfl_data.at[index, 'result'] = 2 # Tie

# Print out the updated DataFrame with results
print(nfl_data[['score_away', 'score_home', 'result']].head())

   score_away  score_home result
0          16          13      0
1          37          31      0
2          23          27      1
3          34           6      0
4          40          39      0


In [108]:
team_name_to_id = {
    'Cardinals': 1,
    'Falcons': 2,
    'Ravens': 3,
    'Bills': 4,
    'Panthers': 5,
    'Bears': 6,
    'Bengals': 7,
    'Browns': 8,
    'Cowboys': 9,
    'Broncos': 10,
    'Lions': 11,
    'Packers': 12,
    'Texans': 13,
    'Colts': 14,
    'Jaguars': 15,
    'Chiefs': 16,
    'Raiders': 17,
    'Chargers': 18,
    'Rams': 19,
    'Dolphins': 20,
    'Vikings': 21,
    'Patriots': 22,
    'Saints': 23,
    'Giants': 24,
    'Jets': 25,
    'Eagles': 26,
    'Steelers': 27,
    '49ers': 28,
    'Seahawks': 29,
    'Buccaneers': 30,
    'Titans': 31,
    'Commanders': 32,
    # Add all other team names and their corresponding IDs
}
# Replace team names with numeric IDs using a for loop
for index, row in nfl_data.iterrows():
    home_team = row['home']
    away_team = row['away']
    
    if home_team in team_name_to_id:
        nfl_data.at[index, 'home'] = team_name_to_id[home_team]
    
    if away_team in team_name_to_id:
        nfl_data.at[index, 'away'] = team_name_to_id[away_team]

# Display the DataFrame with team names replaced by IDs
print(nfl_data.head)

<bound method NDFrame.head of       season        week        date away home  score_away  score_home  \
0       2002           1  2002-09-05   28   24          16          13   
1       2002           1  2002-09-08   25    4          37          31   
2       2002           1  2002-09-08   21    6          23          27   
3       2002           1  2002-09-08   18    7          34           6   
4       2002           1  2002-09-08   16    8          40          39   
...      ...         ...         ...  ...  ...         ...         ...   
5924    2023    Division  2024-01-21   30   11          23          31   
5925    2023    Division  2024-01-21   16    4          27          24   
5926    2023  Conference  2024-01-28   16    3          17          10   
5927    2023  Conference  2024-01-28   11   28          31          34   
5928    2023   Superbowl  2024-02-11   28   16          22          25   

      first_downs_away  first_downs_home  third_down_comp_away  ...  \
0         

In [109]:
# Drop rows with any missing values
nfl_data_clean = nfl_data.drop(columns=['date', 'week', 'possession_away', 'possession_home'])

nfl_data_clean = nfl_data_clean.dropna()

nfl_data_clean

Unnamed: 0,season,away,home,score_away,score_home,first_downs_away,first_downs_home,third_down_comp_away,third_down_att_away,third_down_comp_home,...,redzone_att_away,redzone_comp_home,redzone_att_home,fumbles_away,fumbles_home,interceptions_away,interceptions_home,def_st_td_away,def_st_td_home,result
0,2002,28,24,16,13,13,21,4,12,9,...,8,0,6,0,0,1,3,0,0,0
1,2002,25,4,37,31,18,26,2,8,7,...,9,0,8,1,1,0,2,2,0,0
2,2002,21,6,23,27,19,20,5,13,7,...,7,0,6,1,1,2,1,0,0,1
3,2002,18,7,34,6,27,13,6,10,4,...,7,0,5,0,0,0,1,0,0,0
4,2002,16,8,40,39,24,24,5,11,4,...,10,0,10,0,1,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5924,2023,30,11,23,31,23,26,4,12,6,...,3,3,4,0,0,2,0,0,0,1
5925,2023,16,4,27,24,21,27,1,5,7,...,4,3,4,1,0,0,0,0,0,0
5926,2023,16,3,17,10,22,16,8,18,3,...,3,0,1,0,2,0,1,0,0,0
5927,2023,11,28,31,34,28,23,6,12,6,...,4,4,5,1,0,0,1,0,0,1


In [110]:
# Separate features and target variable
X = nfl_data_clean.drop(['result'], axis=1).values  # Replace 'target_column' with your target column name
y = nfl_data_clean['result'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [111]:
print(type(X_train))
print(type(y_train))
print(X_train.dtype)
print(y_train.dtype)

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
object
object


In [112]:
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
y_train = y_train.astype('float32')  # or int32, depending on your problem
y_test = y_test.astype('float32')

In [113]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.

number_input_features = X_train.shape[1]
hidden_nodes_layer1 =  9
hidden_nodes_layer2 = 5
hidden_nodes_layer3 = 1
hidden_nodes_layer4 = 3
hidden_nodes_layer5 = 9
hidden_nodes_layer6 = 5

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="sigmoid")
)

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Third hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer3, activation="relu"))

# Fourth hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer4, activation="relu"))

# Fifth hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer5, activation="relu"))

# Sixth hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer6, activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))


# Check the structure of the model
nn.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [114]:
# Compile the model
nn.compile(loss='mean_squared_error', optimizer="adam", metrics=["accuracy"])

In [115]:
nn.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

Epoch 1/50
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.5573 - loss: 0.2548 - val_accuracy: 0.5787 - val_loss: 0.2544
Epoch 2/50
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.5614 - loss: 0.2523 - val_accuracy: 0.5787 - val_loss: 0.2529
Epoch 3/50
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.5435 - loss: 0.2538 - val_accuracy: 0.5787 - val_loss: 0.2519
Epoch 4/50
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.5505 - loss: 0.2563 - val_accuracy: 0.5787 - val_loss: 0.2513
Epoch 5/50
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.5518 - loss: 0.2514 - val_accuracy: 0.5787 - val_loss: 0.2508
Epoch 6/50
[1m112/112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.5605 - loss: 0.2521 - val_accuracy: 0.5787 - val_loss: 0.2506
Epoch 7/50
[1m112/112[0m 

<keras.src.callbacks.history.History at 0x1c9c852e950>

In [116]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

47/47 - 0s - 991us/step - accuracy: 0.5705 - loss: 0.2463
Loss: 0.24625177681446075, Accuracy: 0.5704652667045593


In [117]:

feature_columns = [
    'season','away', 'home', 'score_away', 'score_home', 'first_downs_away', 'first_downs_home',
    'third_down_comp_away', 'third_down_att_away', 'third_down_comp_home', 'third_down_att_home',
    'fourth_down_comp_away', 'fourth_down_att_away', 'fourth_down_comp_home', 'fourth_down_att_home',
    'plays_away', 'plays_home', 'drives_away', 'drives_home', 'yards_away', 'yards_home',
    'pass_comp_away', 'pass_att_away', 'pass_yards_away', 'pass_comp_home', 'pass_att_home', 'pass_yards_home',
    'sacks_num_away', 'sacks_yards_away', 'sacks_num_home', 'sacks_yards_home',
    'rush_att_away', 'rush_yards_away', 'rush_att_home', 'rush_yards_home',
    'pen_num_away', 'pen_yards_away', 'pen_num_home', 'pen_yards_home',
    'redzone_comp_away', 'redzone_att_away', 'redzone_comp_home', 'redzone_att_home',
    'fumbles_away', 'fumbles_home', 'interceptions_away', 'interceptions_home',
    'def_st_td_away', 'def_st_td_home'
]

# Create new_matchup DataFrame
new_matchup = pd.DataFrame({
    'home': [6],
    'away': [3], 
    # Include other features with placeholder values
    'season': [0],
    'score_away': [0],
    'score_home': [0],
    'first_downs_away': [0],
    'first_downs_home': [0],
    'third_down_comp_away': [0],
    'third_down_att_away': [0],
    'third_down_comp_home': [0],
    'third_down_att_home': [0],
    'fourth_down_comp_away': [0],
    'fourth_down_att_away': [0],
    'fourth_down_comp_home': [0],
    'fourth_down_att_home': [0],
    'plays_away': [0],
    'plays_home': [0],
    'drives_away': [0],
    'drives_home': [0],
    'yards_away': [0],
    'yards_home': [0],
    'pass_comp_away': [0],
    'pass_att_away': [0],
    'pass_yards_away': [0],
    'pass_comp_home': [0],
    'pass_att_home': [0],
    'pass_yards_home': [0],
    'sacks_num_away': [0],
    'sacks_yards_away': [0],
    'sacks_num_home': [0],
    'sacks_yards_home': [0],
    'rush_att_away': [0],
    'rush_yards_away': [0],
    'rush_att_home': [0],
    'rush_yards_home': [0],
    'pen_num_away': [0],
    'pen_yards_away': [0],
    'pen_num_home': [0],
    'pen_yards_home': [0],
    'redzone_comp_away': [0],
    'redzone_att_away': [0],
    'redzone_comp_home': [0],
    'redzone_att_home': [0],
    'fumbles_away': [0],
    'fumbles_home': [0],
    'interceptions_away': [0],
    'interceptions_home': [0],
    'def_st_td_away': [0],
    'def_st_td_home': [0]
})

new_matchup = new_matchup.reindex(columns=feature_columns, fill_value=0)


print(new_matchup.shape)

new_matchup_scaled = scaler.transform(new_matchup)

predictions = nn.predict(new_matchup_scaled)

predicted_classes = (predictions > 0.5).astype(int)

def display_results(predictions, class_labels):
    probability = predictions[0][0]
    predicted_class = class_labels[0][0]
    
    print(f"Predicted probability of home team winning: {probability:.4f}")
    print(f"Predicted class: {'Home team wins' if predicted_class == 1 else 'Away team wins'}")

display_results(predictions, predicted_classes)


(1, 49)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
Predicted probability of home team winning: 0.5640
Predicted class: Home team wins


