In [1]:
# Importing key libraries
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# NFL Win Count Predictor
- ### This network predicts the number of wins an NFL team had based on their offensive statistics across the course of a season.
- ### This project was built to determine how relevant overall statistics are to a team's success or lack thereof.

In [2]:
# Creating the neural network class
class NeuralNetwork(nn.Module):
  def __init__(self, in_features, h1=8, h2=8, out_features=17):
    super().__init__()
    self.fc1 = nn.Linear(in_features, h1)
    self.fc2 = nn.Linear(h1, h2)
    self.out = nn.Linear(h2, out_features)

  def forward(self, x):
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = self.out(x)
    return x

In [3]:
# Picking a random seed
torch.manual_seed(23)

# Creating model instance
model = NeuralNetwork(in_features=9)

In [4]:
nfl_df = pd.read_csv('team_stats_2003_2023.csv',
                     usecols=['wins', 'points_diff', 'mov', 'yds_per_play_offense',
                              'first_down', 'pass_td', 'pass_int', 'rush_td',
                              'score_pct', 'turnover_pct'])
nfl_df = nfl_df.dropna() # Without this, all tensors at the end read 'nan'
nfl_df.head()

Unnamed: 0,wins,points_diff,mov,yds_per_play_offense,first_down,pass_td,pass_int,rush_td,score_pct,turnover_pct
0,14,110,6.9,4.8,294,23,13,9,27.9,11.3
1,10,50,3.1,4.8,266,17,19,14,28.1,17.2
2,6,-36,-2.3,4.4,268,11,17,13,21.9,17.6
3,6,-16,-1.0,5.3,274,20,14,8,32.4,11.8
4,10,110,6.9,4.9,259,16,19,18,31.8,16.6


In [5]:
# Splitting the data
X = nfl_df.drop(columns=['wins'])
y = nfl_df['wins']

# Converting X and y to numpy arrays
X = X.values
y = y.values

# Using train_test_split to create train and test splits
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=23)

In [6]:
# Normalizing the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)
y_train_scaled = scaler.fit_transform(pd.DataFrame(y_train))
y_test_scaled = scaler.fit_transform(pd.DataFrame(y_test))

# Converting X features to tensors
X_train_scaled = torch.FloatTensor(X_train_scaled)
X_test_scaled = torch.FloatTensor(X_test_scaled)

# Converting y features to tensors
y_train_scaled = torch.FloatTensor(y_train_scaled)
y_test_scaled = torch.FloatTensor(y_test_scaled)

In [7]:
# Setting model criterion, optimizer, and learning rate
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Training the model
epochs = 1000
losses = []
for i in range(epochs):
  # Moving forward to get a prediction
  y_pred = model.forward(X_train_scaled)

  # Measuring the loss
  loss = criterion(y_pred, y_train_scaled)

  # Tracking losses over time
  losses.append(loss.detach().numpy())

  if i % 100 == 0:
    print(f'Epoch {i} with loss {loss}')

  # Backpropogation
  optimizer.zero_grad()
  loss.backward()
  optimizer.step()

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 0 with loss 1.0291725397109985
Epoch 100 with loss 0.13813938200473785
Epoch 200 with loss 0.1289549469947815
Epoch 300 with loss 0.11817751824855804
Epoch 400 with loss 0.11119937151670456
Epoch 500 with loss 0.10182685405015945
Epoch 600 with loss 0.094197116792202
Epoch 700 with loss 0.09170962870121002
Epoch 800 with loss 0.08698403090238571
Epoch 900 with loss 0.0849895253777504


In [8]:
# Evaluating the model with the test data set
correct = 0
with torch.no_grad():
  for i, data in enumerate(X_test_scaled):
    y_eval = model.forward(data)

    # Printing the prediction and actual value
    print(f'{i+1}.) {y_eval.argmax().item()} win(s) predicted, {y_test[i]} actual win(s)')

    # Counting the number of correct predictions
    if y_eval.argmax().item() == y_test[i]:
      correct += 1
print(f'The network got {correct} correct!')

1.) 13 win(s) predicted, 8 actual win(s)
2.) 12 win(s) predicted, 10 actual win(s)
3.) 13 win(s) predicted, 4 actual win(s)
4.) 13 win(s) predicted, 6 actual win(s)
5.) 5 win(s) predicted, 5 actual win(s)
6.) 13 win(s) predicted, 10 actual win(s)
7.) 7 win(s) predicted, 11 actual win(s)
8.) 13 win(s) predicted, 7 actual win(s)
9.) 5 win(s) predicted, 8 actual win(s)
10.) 5 win(s) predicted, 10 actual win(s)
11.) 5 win(s) predicted, 5 actual win(s)
12.) 13 win(s) predicted, 5 actual win(s)
13.) 13 win(s) predicted, 8 actual win(s)
14.) 13 win(s) predicted, 4 actual win(s)
15.) 5 win(s) predicted, 4 actual win(s)
16.) 6 win(s) predicted, 3 actual win(s)
17.) 6 win(s) predicted, 5 actual win(s)
18.) 10 win(s) predicted, 9 actual win(s)
19.) 7 win(s) predicted, 7 actual win(s)
20.) 13 win(s) predicted, 8 actual win(s)
21.) 5 win(s) predicted, 12 actual win(s)
22.) 5 win(s) predicted, 4 actual win(s)
23.) 13 win(s) predicted, 5 actual win(s)
24.) 5 win(s) predicted, 8 actual win(s)
25.) 13 

The overall struggles of the neural network at predicting team records indicates that factors other than pure statistics are predictive of team success. Nevertheless, which type of statistics are more predictive of a team's success: rushing or passing?

# Comparing Win Count Predictors
- ### The process above was replicated by creating separate dataframes for passing and rushing statistics.
- ### Afterwards, it is determined whether improved team statistics correlate with team wins.

In [9]:
rushing_df = pd.read_csv('team_stats_2003_2023.csv',
                         usecols=['wins', 'rush_att', 'rush_yds', 'rush_td',
                                  'rush_yds_per_att', 'rush_fd'])
passing_df = pd.read_csv('team_stats_2003_2023.csv',
                         usecols=['wins', 'pass_att', 'pass_yds', 'pass_td',
                                  'pass_net_yds_per_att', 'pass_fd'])

rushing_df = rushing_df.dropna()
passing_df = passing_df.dropna()

torch.manual_seed(23)
model = NeuralNetwork(in_features=5)

In [10]:
rush_X = rushing_df.drop(columns=['wins'])
rush_y = rushing_df['wins']
pass_X = passing_df.drop(columns=['wins'])
pass_y = passing_df['wins']

rush_X = rush_X.values
rush_y = rush_y.values
pass_X = pass_X.values
pass_y = pass_y.values

rush_X_train, rush_X_test, rush_y_train, rush_y_test = train_test_split(rush_X, rush_y, test_size=0.2, random_state=23)
pass_X_train, pass_X_test, pass_y_train, pass_y_test = train_test_split(pass_X, pass_y, test_size=0.2, random_state=23)

In [11]:
scaler = StandardScaler()

pass_X_train_scaled = scaler.fit_transform(pass_X_train)
pass_X_test_scaled = scaler.fit_transform(pass_X_test)
pass_y_train_scaled = scaler.fit_transform(pd.DataFrame(pass_y_train))
pass_y_test_scaled = scaler.fit_transform(pd.DataFrame(pass_y_test))

pass_X_train_scaled = torch.FloatTensor(pass_X_train_scaled)
pass_X_test_scaled = torch.FloatTensor(pass_X_test_scaled)
pass_y_train_scaled = torch.FloatTensor(pass_y_train_scaled)
pass_y_test_scaled = torch.FloatTensor(pass_y_test_scaled)

rush_X_train_scaled = scaler.fit_transform(rush_X_train)
rush_X_test_scaled = scaler.fit_transform(rush_X_test)
rush_y_train_scaled = scaler.fit_transform(pd.DataFrame(rush_y_train))
rush_y_test_scaled = scaler.fit_transform(pd.DataFrame(rush_y_test))

rush_X_train_scaled = torch.FloatTensor(rush_X_train_scaled)
rush_X_test_scaled = torch.FloatTensor(rush_X_test_scaled)
rush_y_train_scaled = torch.FloatTensor(rush_y_train_scaled)
rush_y_test_scaled = torch.FloatTensor(rush_y_test_scaled)

In [12]:
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

epochs = 1000
pass_losses = []
for i in range(epochs):
  pass_y_pred = model.forward(pass_X_train_scaled)
  pass_loss = criterion(pass_y_pred, pass_y_train_scaled)
  pass_losses.append(pass_loss.detach().numpy())

  if i % 100 == 0:
    print(f'Epoch {i} with (pass) loss {pass_loss}')

  optimizer.zero_grad()
  pass_loss.backward()
  optimizer.step()

print('')
rush_losses = []
for i in range(epochs):
  rush_y_pred = model.forward(rush_X_train_scaled)
  rush_loss = criterion(rush_y_pred, rush_y_train_scaled)
  rush_losses.append(rush_loss.detach().numpy())

  if i % 100 == 0:
    print(f'Epoch {i} with (rush) loss {rush_loss}')

  optimizer.zero_grad()
  rush_loss.backward()
  optimizer.step()

Epoch 0 with (pass) loss 1.0466370582580566


  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 100 with (pass) loss 0.5100817084312439
Epoch 200 with (pass) loss 0.4592422842979431
Epoch 300 with (pass) loss 0.440909206867218
Epoch 400 with (pass) loss 0.43107691407203674
Epoch 500 with (pass) loss 0.4256610870361328
Epoch 600 with (pass) loss 0.4146038889884949
Epoch 700 with (pass) loss 0.4087155759334564
Epoch 800 with (pass) loss 0.4060881435871124
Epoch 900 with (pass) loss 0.40438756346702576

Epoch 0 with (rush) loss 1.6485686302185059
Epoch 100 with (rush) loss 0.6213229298591614
Epoch 200 with (rush) loss 0.5967588424682617
Epoch 300 with (rush) loss 0.5795039534568787
Epoch 400 with (rush) loss 0.567008376121521
Epoch 500 with (rush) loss 0.5546119213104248
Epoch 600 with (rush) loss 0.5492381453514099
Epoch 700 with (rush) loss 0.5466907620429993
Epoch 800 with (rush) loss 0.5449942946434021
Epoch 900 with (rush) loss 0.5425501465797424


In [13]:
# Evaluating the passing model with the test data set
correct = 0
print('PASSING MODEL:')
with torch.no_grad():
  for i, data in enumerate(pass_X_test_scaled):
    pass_y_eval = model.forward(data)

    # Printing the prediction and actual value
    print(f'{i+1}.) {pass_y_eval.argmax().item()} win(s) predicted, {pass_y_test[i]} actual win(s)')

    # Counting the number of correct predictions
    if pass_y_eval.argmax().item() == pass_y_test[i]:
      correct += 1
print(f'The passing network got {correct} correct!')

PASSING MODEL:
1.) 0 win(s) predicted, 0 actual win(s)
2.) 14 win(s) predicted, 12 actual win(s)
3.) 8 win(s) predicted, 4 actual win(s)
4.) 8 win(s) predicted, 7 actual win(s)
5.) 14 win(s) predicted, 8 actual win(s)
6.) 2 win(s) predicted, 4 actual win(s)
7.) 3 win(s) predicted, 10 actual win(s)
8.) 8 win(s) predicted, 7 actual win(s)
9.) 14 win(s) predicted, 13 actual win(s)
10.) 14 win(s) predicted, 10 actual win(s)
11.) 14 win(s) predicted, 5 actual win(s)
12.) 14 win(s) predicted, 7 actual win(s)
13.) 14 win(s) predicted, 14 actual win(s)
14.) 0 win(s) predicted, 5 actual win(s)
15.) 7 win(s) predicted, 8 actual win(s)
16.) 1 win(s) predicted, 9 actual win(s)
17.) 14 win(s) predicted, 10 actual win(s)
18.) 14 win(s) predicted, 4 actual win(s)
19.) 14 win(s) predicted, 10 actual win(s)
20.) 14 win(s) predicted, 13 actual win(s)
21.) 14 win(s) predicted, 13 actual win(s)
22.) 0 win(s) predicted, 1 actual win(s)
23.) 8 win(s) predicted, 12 actual win(s)
24.) 2 win(s) predicted, 4 ac

In [14]:
# Evaluating the rushing model with the test data set
correct = 0
print('RUSHING MODEL:')
with torch.no_grad():
  for i, data in enumerate(rush_X_test_scaled):
    rush_y_eval = model.forward(data)

    # Printing the prediction and actual value
    print(f'{i+1}.) {rush_y_eval.argmax().item()} win(s) predicted, {rush_y_test[i]} actual win(s)')

    # Counting the number of correct predictions
    if rush_y_eval.argmax().item() == rush_y_test[i]:
      correct += 1
print(f'The rushing network got {correct} correct!')

RUSHING MODEL:
1.) 8 win(s) predicted, 0 actual win(s)
2.) 14 win(s) predicted, 12 actual win(s)
3.) 8 win(s) predicted, 4 actual win(s)
4.) 8 win(s) predicted, 7 actual win(s)
5.) 3 win(s) predicted, 8 actual win(s)
6.) 14 win(s) predicted, 4 actual win(s)
7.) 8 win(s) predicted, 10 actual win(s)
8.) 6 win(s) predicted, 7 actual win(s)
9.) 2 win(s) predicted, 13 actual win(s)
10.) 14 win(s) predicted, 10 actual win(s)
11.) 2 win(s) predicted, 5 actual win(s)
12.) 6 win(s) predicted, 7 actual win(s)
13.) 6 win(s) predicted, 14 actual win(s)
14.) 8 win(s) predicted, 5 actual win(s)
15.) 6 win(s) predicted, 8 actual win(s)
16.) 14 win(s) predicted, 9 actual win(s)
17.) 3 win(s) predicted, 10 actual win(s)
18.) 3 win(s) predicted, 4 actual win(s)
19.) 14 win(s) predicted, 10 actual win(s)
20.) 3 win(s) predicted, 13 actual win(s)
21.) 14 win(s) predicted, 13 actual win(s)
22.) 14 win(s) predicted, 1 actual win(s)
23.) 14 win(s) predicted, 12 actual win(s)
24.) 8 win(s) predicted, 4 actual

Despite the expected struggles of each network, the rushing statistics appear to be slightly more predictive of an NFL team's win total. Does this mean that teams with more rushing attempts tend to win more games?

In [15]:
rush_att = rushing_df['rush_att'].values
wins = rushing_df['wins'].values
combined_tensor = torch.tensor(np.array([rush_att, wins]))
print(torch.corrcoef(combined_tensor))

tensor([[1.0000, 0.4307],
        [0.4307, 1.0000]])


Since the Pearson correlation coefficient is between 0 and 1 when comparing the rush attempts and wins data, there exists a positive correlation between the two variables. Therefore, the data shows that as a team's rush attempts increase, the team's wins increase.<br><br>
It is important to note that this does not imply causation. However, these findings make sense, given the tendency of NFL teams to call more rush plays when they are actively winning a football game.