Kaggle Dataset
https://www.kaggle.com/datasets/tobycrabtree/nfl-scores-and-betting-data?select=spreadspoke_scores.csv

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [None]:
# Load dataset
df = pd.read_csv('spreadspoke_scores.csv')

# Create binary classification label: 1 if home team won, else 0
df['home_win'] = (df['score_home'] > df['score_away']).astype(int)

# Keep only relevant columns
columns_to_keep = [
    'team_home', 'team_away', 'team_favorite_id',
    'spread_favorite', 'over_under_line', 'home_win'
]
model_df = df[columns_to_keep].copy()

# Convert spread and over/under values to numeric
model_df['spread_favorite'] = pd.to_numeric(model_df['spread_favorite'], errors='coerce')
model_df['over_under_line'] = pd.to_numeric(model_df['over_under_line'], errors='coerce')

# Drop rows with missing betting line data
model_df = model_df.dropna(subset=['spread_favorite', 'over_under_line'])

# One-hot encode categorical team fields
model_df_encoded = pd.get_dummies(model_df, columns=['team_home', 'team_away', 'team_favorite_id'])

# Separate features and target
X = model_df_encoded.drop(columns=['home_win'])
y = model_df_encoded['home_win']

# Normalize numeric features
scaler = MinMaxScaler()
X_scaled = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

# Reattach target column
final_df = X_scaled.copy()
final_df['home_win'] = y.values

#Print the number of rows and columns in the final DataFrame
print(f"Final DataFrame shape: {final_df.shape}")

# Display preview
final_df.head()

Unnamed: 0,spread_favorite,over_under_line,team_home_Arizona Cardinals,team_home_Atlanta Falcons,team_home_Baltimore Colts,team_home_Baltimore Ravens,team_home_Buffalo Bills,team_home_Carolina Panthers,team_home_Chicago Bears,team_home_Cincinnati Bengals,...,team_favorite_id_NYJ,team_favorite_id_PHI,team_favorite_id_PICK,team_favorite_id_PIT,team_favorite_id_SEA,team_favorite_id_SF,team_favorite_id_TB,team_favorite_id_TEN,team_favorite_id_WAS,home_win
0,0.490566,0.422535,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
1,0.320755,0.338028,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
2,0.54717,0.309859,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
3,0.90566,0.225352,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
4,0.773585,0.169014,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
