# **Massey Ranking of NFL Teams for the 2022 Season with Current Game Data**

**Pranav Narala and Kerry Jin**

**Data source:**

https://masseyratings.com/scores.php?s=384378&sub=384378&all=1&mode=3&sch=on&format=1

https://masseyratings.com/scores.php?s=384378&sub=384378&all=1&mode=3&sch=on&format=2


**Other Sources Used:**

https://masseyratings.com/theory/predict.htm

https://www3.nd.edu/~apilking/Math10170/Information/Lectures%202015/Topic%209%20Massey's%20Method.pdf

http://users.dimi.uniud.it/~massimo.franceschet/teaching/datascience/network/massey.html

https://www.dcs.bbk.ac.uk/~ale/dsta/2020-21/dsta-3/lm-ch2-massey.pdf

https://www.youtube.com/watch?v=55OdkUN5vbA

https://www.youtube.com/watch?v=w9BJHh7vGCA

https://operations.nfl.com/gameday/nfl-schedule/creating-the-nfl-schedule/#:~:text=Six%20games%20against%20divisional%20opponents,and%20two%20on%20the%20road.

https://www.google.com/search?q=spearman%27s+rank+correlation+coefficient&oq=spearm&aqs=chrome.1.69i57j69i59l3j46i433i512j0i67i131i433j0i67j0i433i512j0i131i433i512j0i10i433.1610j0j7&sourceid=chrome&ie=UTF-8#wptab=si:AC1wQDDneak2MGu90lY3o217UabRapWzjDthgzyFzlAOtapK_cZryAbbwKnZFOpLqVknFzgrQFLyUPiXpIjdvq04Yi8OO8Joa50DQabGdGm_2HdLtsIX4U2q4xfMFzQOIDVgrDiqC1O7ww5CI25YvcKMMNXIVBszm_LvrAQXsEV3DIlmBlWCVwU%3D





# **Getting Rankings**

In [None]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression

In [None]:
#Loading in the assigned IDs for each team as a dataframe
team_IDs = pd.read_csv('/content/NFLTeamToID.csv')
team_IDs.head()

Unnamed: 0,ID,Team
0,1,Arizona
1,2,Atlanta
2,3,Baltimore
3,4,Buffalo
4,5,Carolina


In [None]:
#Loading in the dataframe with information about each game that has been played so far.
games = pd.read_csv('/content/games.csv')
games.head()

Unnamed: 0.1,Unnamed: 0,Game Date,Team 1 ID,Home Team,Team 1 Points Scored,Team 2 ID,Home Team.1,Team 2 Points Scored
0,738771,20220908,4,-1,31,18,1,10
1,738774,20220911,23,-1,27,2,1,26
2,738774,20220911,8,-1,26,5,1,24
3,738774,20220911,6,1,19,28,-1,10
4,738774,20220911,27,-1,23,7,1,20


In [None]:
#Cleaning the dataframe
#Deleting rows for games have haven't been played yet
games_filtered = games[(games['Game Date'] >= 20221129)].index
games.drop(games_filtered , inplace=True)
#Dropping unnecessary columns
games.drop(columns=['Unnamed: 0', 'Game Date', 'Home Team', 'Home Team.1'], inplace=True)
games.head(len(games))

Unnamed: 0,Team 1 ID,Team 1 Points Scored,Team 2 ID,Team 2 Points Scored
0,4,31,18,10
1,23,27,2,26
2,8,26,5,24
3,6,19,28,10
4,27,23,7,20
...,...,...,...,...
175,19,40,29,34
176,16,26,18,10
177,28,13,23,0
178,26,40,12,33


In [None]:
games_matrix = np.zeros((len(games) + 1, len(team_IDs)))

for i in range(len(team_IDs)):
  games_matrix[len(games), i] = 1

for i in range(len(games)):
  games_matrix[i, games['Team 1 ID'][i]-1] = 1
  games_matrix[i, games['Team 2 ID'][i]-1] = -1

games_matrix

array([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0., -1.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       ...,
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 1.,  1.,  1., ...,  1.,  1.,  1.]])

In [None]:
#Creating a matrix with the point differentials for each game
point_differential_matrix = np.zeros(len(games)+1)
for i in range(len(games)):
  point_differential_matrix[i] = games['Team 1 Points Scored'][i] - games['Team 2 Points Scored'][i]

point_differential_matrix

array([21.,  1.,  2.,  9.,  3.,  3.,  0., 13., 15.,  6., 23.,  5., 16.,
        1., 16.,  1.,  3.,  4.,  1.,  9., 24., 10.,  3.,  3.,  4., 20.,
        3.,  7.,  6., 17., 34., 17., 12.,  8.,  3.,  3.,  2.,  4., 11.,
       15.,  2., 16., 28.,  8.,  4.,  2.,  1.,  7., 12.,  3.,  3.,  3.,
       15.,  3., 10.,  7.,  8.,  8.,  4., 10.,  3.,  9., 10., 15.,  3.,
        5., 35.,  2.,  7.,  7., 29.,  7., 23.,  6.,  4., 22.,  3., 12.,
        2.,  1.,  5., 14., 23., 17.,  7.,  8.,  4.,  4.,  2., 14., 10.,
        4.,  9.,  3.,  8.,  3., 18., 18., 18.,  6.,  9.,  2.,  7., 18.,
       14., 21.,  6., 19.,  5.,  4.,  3., 20.,  4.,  8., 24.,  5., 22.,
        7.,  1., 17., 14., 10., 19., 12.,  3.,  3., 21.,  6.,  7., 23.,
        3.,  3., 10.,  3.,  3., 14., 10.,  5.,  3.,  1., 10., 22.,  8.,
       10.,  7.,  5.,  3., 10.,  6., 11., 10.,  3., 10.,  8., 13.,  1.,
        7.,  7., 13.,  6.,  3., 37.,  7., 28.,  3.,  8.,  7., 13.,  6.,
        1., 15., 21.,  4.,  6.,  1.,  6., 16., 13.,  7.,  7.,  0

In [None]:
#Applying Linear Regression to the Matrices
rankings = LinearRegression(fit_intercept=False).fit(games_matrix, point_differential_matrix)

In [None]:
rankings.coef_

array([-5.8421289 , -3.33962889,  6.24620091, 11.10124263, -4.1298094 ,
       -3.44979609,  5.44590992, -0.19723941,  8.49327154, -5.59111745,
       -0.51868527, -1.32420838, -8.9985769 , -5.03954072, -0.23497365,
        6.12423307, -4.04476514, -5.95495963, -4.18070149,  3.99271189,
        2.22172409,  4.16989769, -3.54498807, -0.42254972,  5.03855438,
        6.56467512, -3.13940117,  3.49296366, -1.84947968, -0.51054628,
       -0.37526154, -0.20302713])

In [None]:
rankings_df = pd.DataFrame()

teams_list = []
rankings_list = []

for i in range(len(rankings.coef_)):
  teams_list.append(team_IDs['Team'][i])
  rankings_list.append(rankings.coef_[i])

rankings_df['Team'] = teams_list
rankings_df['Ranking'] = rankings_list

rankings_df = rankings_df.sort_values(by=['Ranking'], ascending=False)
rankings_df = rankings_df.reset_index()
rankings_df.head(len(rankings_df))

Unnamed: 0,index,Team,Ranking
0,3,Buffalo,11.101243
1,8,Dallas,8.493272
2,25,Philadelphia,6.564675
3,2,Baltimore,6.246201
4,15,Kansas_City,6.124233
5,6,Cincinnati,5.44591
6,24,NY_Jets,5.038554
7,21,New_England,4.169898
8,19,Miami,3.992712
9,27,San_Francisco,3.492964


# **Offensive and Defensive Ratings**

In [None]:
f = np.zeros(len(team_IDs))

for i in range(len(games)):
  f[(games['Team 1 ID'][i]-1)] += games['Team 1 Points Scored'][i]
  f[(games['Team 2 ID'][i]-1)] += games['Team 2 Points Scored'][i]

T = np.zeros((len(team_IDs), len(team_IDs)))

for i in range(len(games)):
  T[(games['Team 1 ID'][i]-1), (games['Team 1 ID'][i]-1)] += 1
  T[(games['Team 2 ID'][i]-1), (games['Team 2 ID'][i]-1)] += 1

P = np.zeros((len(team_IDs), len(team_IDs)))

for i in range(len(games)):
  P[(games['Team 1 ID'][i]-1), (games['Team 2 ID'][i]-1)] += 1
  P[(games['Team 2 ID'][i]-1), (games['Team 1 ID'][i]-1)] += 1


defensive_rankings = LinearRegression().fit(T+P, np.matmul(T, np.asarray(rankings_list))- f)
defensive_rankings.coef_

array([-6.18812082, -4.79881446,  2.01797272,  5.18863995, -2.76817604,
       -4.99419372,  1.56464979, -3.20209872,  5.03167574,  3.17092033,
       -4.21461362, -2.71595736, -2.44541144, -1.2814733 ,  0.04453417,
       -1.40962196, -3.02746502, -0.19539776, -5.00646155,  0.02229555,
        0.76502633,  4.82218002, -3.29678672,  1.81987108,  5.25550813,
        1.88467068, -0.28323099,  5.50712591, -4.65288215,  4.85974158,
        2.79859592,  1.28373446])

In [None]:
defensive_rankings_df = pd.DataFrame()

teams_list = []
rankings_list = []

for i in range(len(rankings.coef_)):
  teams_list.append(team_IDs['Team'][i])
  rankings_list.append(defensive_rankings.coef_[i])

defensive_rankings_df['Team'] = teams_list
defensive_rankings_df['Defensive Ranking'] = rankings_list

defensive_rankings_df = defensive_rankings_df.sort_values(by=['Defensive Ranking'], ascending=False)
defensive_rankings_df = defensive_rankings_df.reset_index()
defensive_rankings_df.head(len(defensive_rankings_df))

Unnamed: 0,index,Team,Defensive Ranking
0,27,San_Francisco,5.507126
1,24,NY_Jets,5.255508
2,3,Buffalo,5.18864
3,8,Dallas,5.031676
4,29,Tampa_Bay,4.859742
5,21,New_England,4.82218
6,9,Denver,3.17092
7,30,Tennessee,2.798596
8,2,Baltimore,2.017973
9,25,Philadelphia,1.884671


In [None]:
offensive_rankings = rankings.coef_ - defensive_rankings.coef_
offensive_rankings

array([ 0.34599192,  1.45918557,  4.22822818,  5.91260268, -1.36163336,
        1.54439763,  3.88126013,  3.00485932,  3.4615958 , -8.76203779,
        3.69592835,  1.39174898, -6.55316547, -3.75806742, -0.27950782,
        7.53385503, -1.01730012, -5.75956186,  0.82576006,  3.97041634,
        1.45669777, -0.65228233, -0.24820135, -2.2424208 , -0.21695374,
        4.68000444, -2.85617018, -2.01416226,  2.80340247, -5.37028786,
       -3.17385746, -1.48676159])

In [None]:
offensive_rankings_df = pd.DataFrame()

teams_list = []
rankings_list = []

for i in range(len(rankings.coef_)):
  teams_list.append(team_IDs['Team'][i])
  rankings_list.append(offensive_rankings[i])

offensive_rankings_df['Team'] = teams_list
offensive_rankings_df['Offensive Ranking'] = rankings_list

offensive_rankings_df = offensive_rankings_df.sort_values(by=['Offensive Ranking'], ascending=False)
offensive_rankings_df = offensive_rankings_df.reset_index()
offensive_rankings_df.head(len(offensive_rankings_df))

Unnamed: 0,index,Team,Offensive Ranking
0,15,Kansas_City,7.533855
1,3,Buffalo,5.912603
2,25,Philadelphia,4.680004
3,2,Baltimore,4.228228
4,19,Miami,3.970416
5,6,Cincinnati,3.88126
6,10,Detroit,3.695928
7,8,Dallas,3.461596
8,7,Cleveland,3.004859
9,28,Seattle,2.803402
