In [None]:
!pip install nfl_data_py fastparquet appdirs --no-deps

Collecting nfl_data_py
  Downloading nfl_data_py-0.3.3-py3-none-any.whl.metadata (12 kB)
Collecting fastparquet
  Downloading fastparquet-2025.12.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Collecting appdirs
  Downloading appdirs-1.4.4-py2.py3-none-any.whl.metadata (9.0 kB)
Downloading nfl_data_py-0.3.3-py3-none-any.whl (13 kB)
Downloading fastparquet-2025.12.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m22.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading appdirs-1.4.4-py2.py3-none-any.whl (9.6 kB)
Installing collected packages: appdirs, nfl_data_py, fastparquet
Successfully installed appdirs-1.4.4 fastparquet-2025.12.0 nfl_data_py-0.3.3


In [None]:
# --- BLOCK 1: IMPORTS ---
import pandas as pd
import nfl_data_py as nfl
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# --- BLOCK 2: GET THE DATA ---
print("Downloading NFL data (this may take 1-2 minutes)...")
# We grab the 2024 (past) and 2025 (current) seasons
years = [2024, 2025]
schedule = nfl.import_schedules(years)

# Filter: We only want Regular Season games ('REG')
schedule = schedule[schedule['game_type'] == 'REG']

# --- BLOCK 3: CREATE THE TARGET (WHAT WE PREDICT) ---
# Create a column 'home_win': 1 if Home won, 0 if Away won
# We look at 'result' (which is HomeScore - AwayScore)
schedule['home_win'] = (schedule['result'] > 0).astype(int)

# --- BLOCK 4: FEATURE ENGINEERING (THE STATS) ---

print("Processing stats...")

schedule['home_avg_points'] = 0.0
schedule['away_avg_points'] = 0.0

team_scores = {}

# Iterate through every game in chronological order
for index, row in schedule.iterrows():
    home_team = row['home_team']
    away_team = row['away_team']


    home_avg = sum(team_scores.get(home_team, [20])) / len(team_scores.get(home_team, [20]))
    away_avg = sum(team_scores.get(away_team, [20])) / len(team_scores.get(away_team, [20]))

    schedule.at[index, 'home_avg_points'] = home_avg
    schedule.at[index, 'away_avg_points'] = away_avg

    if home_team not in team_scores: team_scores[home_team] = []
    if away_team not in team_scores: team_scores[away_team] = []

    team_scores[home_team].append(row['home_score'])
    team_scores[away_team].append(row['away_score'])

# --- BLOCK 5: TRAIN THE MODEL ---
print("Training the AI model...")

# Step A: Select Features (X) and Target (y)
features = ['home_avg_points', 'away_avg_points']
target = 'home_win'

# Remove rows with missing data
model_data = schedule.dropna(subset=features + [target])

X = model_data[features]
y = model_data[target]

# Step B: Split into Training (Study) and Testing (Exam)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Step C: Create and Train the Logistic Regression Model
model = LogisticRegression()
model.fit(X_train, y_train) # The magic line where it learns!

# --- BLOCK 6: EVALUATE ---
predictions = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, predictions)

print("------------------------------------------------")
print(f"Model Accuracy: {accuracy:.2%}")
print("------------------------------------------------")

# Let's see the coefficients (What the model learned)
print("Model Logic:")
print(f"Weight for Home Avg Points: {model.coef_[0][0]:.4f} (Positive means scoring helps Home win)")
print(f"Weight for Away Avg Points: {model.coef_[0][1]:.4f} (Negative means opponent scoring hurts Home)")

Downloading NFL data (this may take 1-2 minutes)...
Processing stats...
Training the AI model...
------------------------------------------------
Model Accuracy: 47.71%
------------------------------------------------
Model Logic:
Weight for Home Avg Points: 0.0938 (Positive means scoring helps Home win)
Weight for Away Avg Points: -0.0919 (Negative means opponent scoring hurts Home)


In [None]:
# --- BLOCK 7: PREDICT THE FUTURE (WEEK 16, 2025) ---
import pandas as pd

# 1. Get the data for the upcoming week (Week 16 of 2025)
current_week = 16
season_2025 = schedule[(schedule['season'] == 2025) & (schedule['week'] == current_week)]

# 2. Prepare the data (Input)
future_X = season_2025[['home_avg_points', 'away_avg_points']]

# 3. Ask the AI to predict
future_preds = model.predict(future_X)
future_probs = model.predict_proba(future_X)

# 4. Display the results nicely
print(f"--- AI PREDICTIONS FOR WEEK {current_week} (2025) ---")
print(f"{'Home Team':<15} vs {'Away Team':<15} | {'Winner':<10} | {'Confidence'}")
print("-" * 65)

for i in range(len(season_2025)):
    # Get team names
    home = season_2025.iloc[i]['home_team']
    away = season_2025.iloc[i]['away_team']

    # Get prediction info
    winner_code = future_preds[i] # 1 = Home, 0 = Away
    confidence = future_probs[i][1] # Probability of Home Win

    # Translate to English
    if winner_code == 1:
        predicted_winner = home
        win_prob = confidence
    else:
        predicted_winner = away
        win_prob = 1 - confidence # If Home has 40% chance, Away has 60%

    print(f"{home:<15} vs {away:<15} | {predicted_winner:<10} | {win_prob:.1%}")

--- AI PREDICTIONS FOR WEEK 16 (2025) ---
Home Team       vs Away Team       | Winner     | Confidence
-----------------------------------------------------------------
SEA             vs LA              | SEA        | 53.8%
WAS             vs PHI             | WAS        | 53.8%
CHI             vs GB              | GB         | 55.7%
CAR             vs TB              | TB         | 62.3%
CLE             vs BUF             | BUF        | 76.6%
DAL             vs LAC             | DAL        | 57.2%
MIA             vs CIN             | CIN        | 56.6%
NO              vs NYJ             | NO         | 50.3%
NYG             vs MIN             | MIN        | 56.9%
TEN             vs KC              | KC         | 59.1%
ARI             vs ATL             | ARI        | 56.5%
DEN             vs JAX             | DEN        | 59.4%
DET             vs PIT             | DET        | 73.2%
HOU             vs LV              | HOU        | 67.5%
BAL             vs NE              | BAL       

In [None]:
# --- BLOCK 9: PREDICT THE ENTIRE REMAINDER OF THE SEASON ---
import pandas as pd
import numpy as np

print("Generating predictions for the rest of the season...")

# 1. IDENTIFY THE "CURRENT" STATS (End of Week 15)
current_averages = {}

# Filter for games that actually happened (score is not empty)
played_games = schedule[schedule['home_score'].notna()]

# Group by team and calculate the mean of their scores
home_stats = played_games.groupby('home_team')['home_score'].mean()
away_stats = played_games.groupby('away_team')['away_score'].mean()

# Combine them for a total average
for team in schedule['home_team'].unique():
    h_avg = home_stats.get(team, 20) # Default to 20 if missing
    a_avg = away_stats.get(team, 20)
    current_averages[team] = (h_avg + a_avg) / 2

# 2. SELECT THE FUTURE GAMES
remaining_weeks = [16, 17, 18]
future_schedule = schedule[
    (schedule['season'] == 2025) &
    (schedule['week'].isin(remaining_weeks))
].copy()

# 3. APPLY THE STATS TO THE FUTURE SCHEDULE
future_schedule['home_avg_points'] = future_schedule['home_team'].map(current_averages)
future_schedule['away_avg_points'] = future_schedule['away_team'].map(current_averages)

# 4. PREDICT
X_future = future_schedule[['home_avg_points', 'away_avg_points']]
future_probs = model.predict_proba(X_future)

# 5. DISPLAY RESULTS GROUPED BY WEEK
print("\n" + "="*60)
print("      PREDICTED WINNERS: REST OF 2025 SEASON")
print("="*60)

for week in remaining_weeks:
    print(f"\n--- WEEK {week} ---")
    print(f"{'Matchup':<35} | {'Predicted Winner':<15} | {'Confidence'}")
    print("-" * 65)

    week_games = future_schedule[future_schedule['week'] == week]

    week_X = week_games[['home_avg_points', 'away_avg_points']]
    if len(week_X) > 0:
        week_preds = model.predict(week_X)
        week_probs = model.predict_proba(week_X)

        for i in range(len(week_games)):
            home = week_games.iloc[i]['home_team']
            away = week_games.iloc[i]['away_team']

            # Logic to pick winner
            win_prob_home = week_probs[i][1]
            if win_prob_home > 0.5:
                winner = home
                conf = win_prob_home
            else:
                winner = away
                conf = 1.0 - win_prob_home

            print(f"{home} vs {away:<28} | {winner:<15} | {conf:.1%}")
    else:
        print("No games scheduled (or data missing for this week).")

Generating predictions for the rest of the season...

      PREDICTED WINNERS: REST OF 2025 SEASON

--- WEEK 16 ---
Matchup                             | Predicted Winner | Confidence
-----------------------------------------------------------------
SEA vs LA                           | SEA             | 52.5%
WAS vs PHI                          | WAS             | 54.2%
CHI vs GB                           | GB              | 52.6%
CAR vs TB                           | TB              | 61.3%
CLE vs BUF                          | BUF             | 75.3%
DAL vs LAC                          | DAL             | 57.6%
MIA vs CIN                          | CIN             | 58.9%
NO vs NYJ                          | NO              | 54.4%
NYG vs MIN                          | MIN             | 54.2%
TEN vs KC                           | KC              | 56.3%
ARI vs ATL                          | ARI             | 55.0%
DEN vs JAX                          | DEN             | 56.4%
DET vs 