## First, we focus on Super Bowl wins.

In [1]:
# pip install statsmodels
# pip install scikit-learn

In [2]:
import pandas as pd

# Load the dataset to inspect its structure and identify relevant columns for modeling
file_path = 'NFL_Positional_Spending_with_Championships_and_Wins_MJT.csv'
nfl_data = pd.read_csv(file_path)

# Display the first few rows and the column information
nfl_data.head(), nfl_data.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 448 entries, 0 to 447
Data columns (total 21 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Team           448 non-null    object 
 1   OFF            448 non-null    float64
 2   QB             448 non-null    float64
 3   RB             448 non-null    float64
 4   WR             448 non-null    float64
 5   TE             448 non-null    float64
 6   OL             448 non-null    float64
 7   DEF            448 non-null    float64
 8   DL             448 non-null    float64
 9   LB             448 non-null    float64
 10  SEC            448 non-null    float64
 11  SPT            448 non-null    float64
 12  K              448 non-null    float64
 13  P              448 non-null    float64
 14  LS             448 non-null    float64
 15  Total          448 non-null    float64
 16  Year           448 non-null    int64  
 17  Wins           448 non-null    int64  
 18  SuperBowl_

(  Team       OFF        QB        RB        WR        TE        OL       DEF  \
 0  DET  0.525250  0.075417  0.060167  0.205417  0.047000  0.196500  0.392417   
 1  PHI  0.464750  0.157667  0.015083  0.102833  0.029500  0.154667  0.516250   
 2  CAR  0.535833  0.045583  0.091083  0.112000  0.053750  0.251667  0.363917   
 3  ATL  0.543833  0.135833  0.077750  0.112333  0.080500  0.134167  0.398167   
 4  NYG  0.478167  0.150750  0.099333  0.062833  0.012667  0.160917  0.394583   
 
          DL        LB  ...       SPT         K         P        LS     Total  \
 0  0.227583  0.072833  ...  0.036833  0.023750  0.005750  0.007333  1.022250   
 1  0.242250  0.077167  ...  0.014833  0.004000  0.003167  0.007667  0.996750   
 2  0.132333  0.135750  ...  0.043000  0.016667  0.016333  0.010000  0.989333   
 3  0.083833  0.186667  ...  0.026000  0.015083  0.003333  0.007583  0.972333   
 4  0.197250  0.095917  ...  0.027250  0.012500  0.007083  0.007667  0.959667   
 
    Year  Wins  SuperBow

In [None]:
import statsmodels.api as sm

# Define the response variable and predictors
y = nfl_data['SuperBowl_Win']
X = nfl_data[['QB', 'RB', 'WR', 'TE', 'LB', 'OL', 'DL', 'SEC', 'K', 'P', 'LS']]

# Add a constant to the model (intercept)
X = sm.add_constant(X)

# Fit the logistic regression model
glm_model = sm.GLM(y, X, family=sm.families.Binomial()).fit()

# Display the summary of the model
glm_model_summary = glm_model.summary()
print(glm_model_summary)

#########################################################################

# Re-importing necessary libraries and reloading data to redefine variables
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

# Standardize the predictor variables (positional spending columns)
scaler = StandardScaler()
X_standardized = scaler.fit_transform(X)

# Apply Ridge regularization using logistic regression
ridge_model = LogisticRegression(solver='liblinear', max_iter=1000)
ridge_model.fit(X_standardized, y)

# Retrieve coefficients
ridge_coefficients = ridge_model.coef_.flatten()

# Creating a DataFrame for easier interpretation
ridge_summary1 = pd.DataFrame({
    'Position': X.columns,
    'Standardized Coefficient (Super Bowl Wins)': ridge_coefficients
}).sort_values(by='Standardized Coefficient (Super Bowl Wins)', ascending=False)

# Display the summary
print("Super Bowl Wins - Ridge Regression Coefficients Summary")
print(ridge_summary1)

                 Generalized Linear Model Regression Results                  
Dep. Variable:          SuperBowl_Win   No. Observations:                  448
Model:                            GLM   Df Residuals:                      436
Model Family:                Binomial   Df Model:                           11
Link Function:                  Logit   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -50.533
Date:                Sat, 16 Nov 2024   Deviance:                       101.07
Time:                        17:20:38   Pearson chi2:                     345.
No. Iterations:                     8   Pseudo R-squ. (CS):            0.03635
Covariance Type:            nonrobust                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const        -10.8905      2.516     -4.329      0.0

1. Significant predictors: QB and TE spending (p-values < 0.05) suggest that investment in these positions has a notable relationship with the likelihood of a Super Bowl win. <br>
2. Positional spending on RB, WR, OL, DL, LB, SEC, and Special Teams does not show a statistically significant effect on Super Bowl wins in this model.

## Now let's go to conference wins

In [4]:
# Define the response variable and predictors
y = nfl_data['CC_Win']
X = nfl_data[['QB', 'RB', 'WR', 'TE', 'LB', 'OL', 'DL', 'SEC', 'K', 'P', 'LS']]

# Add a constant to the model (intercept)
X = sm.add_constant(X)

# Fit the logistic regression model
glm_model = sm.GLM(y, X, family=sm.families.Binomial()).fit()

# Display the summary of the model
glm_model_summary = glm_model.summary()
print(glm_model_summary)

#########################################################################

# Define the response variable and standardize predictor variables
y = nfl_data['CC_Win']
X = nfl_data[['QB', 'RB', 'WR', 'TE', 'LB', 'OL', 'DL', 'SEC', 'K', 'P', 'LS']]
scaler = StandardScaler()
X_standardized = scaler.fit_transform(X)

# Apply Ridge regularization (L2 penalty) using logistic regression
ridge_model = LogisticRegression(solver='liblinear', max_iter=1000)
ridge_model.fit(X_standardized, y)

# Retrieve coefficients
ridge_coefficients = ridge_model.coef_.flatten()

# Creating a DataFrame for easier interpretation
ridge_summary2 = pd.DataFrame({
    'Position': X.columns,
    'Standardized Coefficient (Conference Championship Wins)': ridge_coefficients
}).sort_values(by='Standardized Coefficient (Conference Championship Wins)', ascending=False)

# Display the summary
print("Conference Championship Wins - Ridge Regression Coefficients Summary")
print(ridge_summary2)

                 Generalized Linear Model Regression Results                  
Dep. Variable:                 CC_Win   No. Observations:                  448
Model:                            GLM   Df Residuals:                      436
Model Family:                Binomial   Df Model:                           11
Link Function:                  Logit   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -84.805
Date:                Sat, 16 Nov 2024   Deviance:                       169.61
Time:                        17:21:09   Pearson chi2:                     361.
No. Iterations:                     7   Pseudo R-squ. (CS):            0.06243
Covariance Type:            nonrobust                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -9.5924      1.729     -5.548      0.0

## Finally, Regular Season wins

In [5]:
# Define the response variable and predictors
y = nfl_data['Wins']
X = nfl_data[['QB', 'RB', 'WR', 'TE', 'LB', 'OL', 'DL', 'SEC', 'K', 'P', 'LS']]

# Add a constant to the model (intercept)
X = sm.add_constant(X)

# Fit the GLM model
glm_model = sm.GLM(y, X, family=sm.families.Poisson()).fit()  # Poisson for count data
glm_model_summary = glm_model.summary()

# Display the summary of the model
print("Regular Season Wins - GLM Summary")
print(glm_model_summary)

#########################################################################

from sklearn.linear_model import Ridge
# Define the response variable and standardize predictor variables
y = nfl_data['Wins']
X = nfl_data[['QB', 'RB', 'WR', 'TE', 'LB', 'OL', 'DL', 'SEC', 'K', 'P', 'LS']]
scaler = StandardScaler()
X_standardized = scaler.fit_transform(X)

# Apply Ridge regularization using Ridge regression (L2 penalty)
ridge_model = Ridge(alpha=1.0)
ridge_model.fit(X_standardized, y)

# Retrieve coefficients
ridge_coefficients = ridge_model.coef_

# Creating a DataFrame for easier interpretation
ridge_summary3 = pd.DataFrame({
    'Position': X.columns,
    'Standardized Coefficient (Regular Season Wins)': ridge_coefficients
}).sort_values(by='Standardized Coefficient (Regular Season Wins)', ascending=False)

# Display the summary
print("Regular Season Wins - Ridge Regression Coefficients Summary")
print(ridge_summary3)

Regular Season Wins - GLM Summary
                 Generalized Linear Model Regression Results                  
Dep. Variable:                   Wins   No. Observations:                  448
Model:                            GLM   Df Residuals:                      436
Model Family:                 Poisson   Df Model:                           11
Link Function:                    Log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -1245.0
Date:                Sat, 09 Nov 2024   Deviance:                       900.04
Time:                        14:41:51   Pearson chi2:                     681.
No. Iterations:                     4   Pseudo R-squ. (CS):             0.3203
Covariance Type:            nonrobust                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.7

In [13]:
# Concatenate the DataFrames horizontally (side by side)
spacer = pd.DataFrame({'': ['']})
combined_summary = pd.concat([ridge_summary1, spacer, ridge_summary2, spacer, ridge_summary3], axis=1)

# Save the combined DataFrame to a single Excel sheet
file_path = 'Ace_NFL_Position_Weights.xlsx'
combined_summary.to_excel(file_path, index=False, sheet_name='Summary')

print(f"Table saved as {file_path}")



Table saved as Ace_NFL_Position_Weights.xlsx
