# College Football Predictions

Testing the college football model

In [77]:
from sklearn.ensemble import RandomForestRegressor
import pickle
import pandas as pd

year = 2025
week = 5

In [78]:
model: RandomForestRegressor = None

with open("college_football_model.pkl", "rb") as file:
        model = pickle.load(file)

In [79]:
games = pd.read_csv(f'cfbd_{year}_{week}_games.csv')

In [80]:
matchup_cols = ['season', 'week', 'homeTeam', 'awayTeam']

# Select only the feature columns used for training
feature_cols = [
    'season', 'week', 'homeId', 'awayId', 'home_year_x', 'home_elo', 'home_fpi', 'home_rating',
    'home_offense', 'home_defense', 'home_year_y', 'home_epa', 'home_epaAllowed', 'home_successRate',
    'home_successRateAllowed', 'home_explosiveness', 'home_explosivenessAllowed', 'home_year', 'home_rank', 'home_points',
    'away_year_x', 'away_elo', 'away_fpi', 'away_rating', 'away_offense', 'away_defense', 'away_year_y', 'away_epa',
    'away_epaAllowed', 'away_successRate', 'away_successRateAllowed', 'away_explosiveness', 'away_explosivenessAllowed',
    'away_year', 'away_rank', 'away_points'
]

X_pred = games[feature_cols]

# Predict scores
preds = model.predict(X_pred)

# Create DataFrame with predictions and matchup info
predictions_df = games[matchup_cols].copy()
predictions_df['pred_homePoints'] = preds[:, 0]
predictions_df['pred_awayPoints'] = preds[:, 1]
predictions_df['pred_pointDiff'] = round(predictions_df['pred_homePoints'] - predictions_df['pred_awayPoints'], 2)

# Save predictions
predictions_df.to_csv(f'season{year}_week{week}_predictions.csv', index=False)

# Show predictions
predictions_df.head()

Unnamed: 0,season,week,homeTeam,awayTeam,pred_homePoints,pred_awayPoints,pred_pointDiff
0,2025,5,East Carolina,Army,30.55,29.65,0.9
1,2025,5,Virginia,Florida State,30.54,28.94,1.6
2,2025,5,Arizona State,TCU,30.86,26.93,3.93
3,2025,5,Oregon State,Houston,19.85,29.25,-9.4
4,2025,5,Illinois,USC,24.73,32.31,-7.58


In [81]:
# I have predictions and the betting lines now. Next step is to compare them and see if there are any edges.
predictions_df = pd.read_csv(f'season{year}_week{week}_predictions.csv')
lines_df = pd.read_csv(f'cfbd_lines_{year}_week{week}.csv')

# Merge the dataframes on the relevant columns
merged_df = pd.merge(predictions_df, lines_df, left_on=['season', 'week', 'awayTeam', 'homeTeam'], right_on=['season', 'week', 'awayTeam', 'homeTeam'], suffixes=('_pred', '_line'))

# Use the new 'spread' column from the updated lines csv
merged_df['line_edge'] = merged_df['pred_pointDiff'] - merged_df['spread']

# Show potential edges
potential_edges = merged_df[abs(merged_df['line_edge']) > 1]

potential_edges.head()

Unnamed: 0,season,week,homeTeam,awayTeam,pred_homePoints,pred_awayPoints,pred_pointDiff,startDate,homeTeamId,awayTeamId,homeScore,awayScore,spread,formatted_spread,overUnder,homeMoneyline,awayMoneyline,line_edge
0,2025,5,East Carolina,Army,30.55,29.65,0.9,2025-09-25 23:30:00+00:00,151,349,28.0,6.0,-3.5,East Carolina -3.5,52.5,-175.0,145.0,4.4
1,2025,5,Virginia,Florida State,30.54,28.94,1.6,2025-09-26 23:00:00+00:00,258,52,,,7.0,Florida State -7,60.5,225.0,-278.0,-5.4
2,2025,5,Arizona State,TCU,30.86,26.93,3.93,2025-09-27 01:00:00+00:00,9,2628,,,-3.0,Arizona State -3,54.5,-162.0,136.0,6.93
3,2025,5,Oregon State,Houston,19.85,29.25,-9.4,2025-09-27 02:30:00+00:00,204,248,,,13.5,Houston -13.5,47.5,390.0,-520.0,-22.9
4,2025,5,Illinois,USC,24.73,32.31,-7.58,2025-09-27 16:00:00+00:00,356,30,,,7.0,USC -7,60.5,205.0,-250.0,-14.58


In [82]:
potential_edges = potential_edges.drop(columns=['homeTeamId', 'awayTeamId'])

potential_edges.to_csv(f'potential_edges_{year}_week{week}.csv', index=False)