# College Football Predictions

Testing the college football model

In [18]:
from sklearn.ensemble import RandomForestRegressor
import pickle
import pandas as pd

year = 2025
week = 'bowl'

In [19]:
model: RandomForestRegressor = None

with open("college_football_model.pkl", "rb") as file:
        model = pickle.load(file)

In [20]:
games = pd.read_csv(f'cfbd_{year}_{week}_games.csv')

games.head()

Unnamed: 0,season,week,homeId,homeTeam,awayTeam,awayId,homePoints,awayPoints,home_year_x,home_elo,...,away_year_y,away_epa,away_epaAllowed,away_successRate,away_successRateAllowed,away_explosiveness,away_explosivenessAllowed,away_year,away_rank,away_points
0,2025,1,201,Oklahoma,Alabama,333,,,2025,1823,...,2025,0.250189,0.070375,0.464401,0.359527,0.920588,0.988784,2025,3,298.4
1,2025,1,245,Texas A&M,Miami,2390,,,2025,1878,...,2025,0.231915,0.041512,0.488316,0.344528,0.861281,0.981342,2025,13,257.66
2,2025,1,145,Ole Miss,Tulane,2655,,,2025,1960,...,2025,0.185502,0.127234,0.432732,0.435237,0.925262,0.93748,2025,74,175.32
3,2025,1,2483,Oregon,James Madison,256,,,2025,2087,...,2025,0.181989,0.032333,0.428873,0.307656,0.999248,0.996728,2025,108,138.48
4,2025,1,328,Utah State,Washington State,265,,,2025,1462,...,2025,0.147235,0.107252,0.435415,0.415122,0.909368,0.917993,2025,71,178.96


In [21]:
matchup_cols = ['season', 'week', 'homeTeam', 'awayTeam']

# Select only the feature columns used for training
feature_cols = [
    'home_elo', 'home_fpi', 'home_rating',
    'home_defense', 'home_epa', 'home_successRate',
    'home_explosiveness',
    'home_rank',
    'away_elo', 'away_fpi', 'away_rating',
    'away_defense', 'away_epa', 'away_successRate',
    'away_explosiveness',
    'away_rank'
]

X_pred = games[feature_cols]

# Predict scores
preds = model.predict(X_pred)

# Create DataFrame with predictions and matchup info
predictions_df = games[matchup_cols].copy()
predictions_df['pred_homePoints'] = preds[:, 0]
predictions_df['pred_awayPoints'] = preds[:, 1]
predictions_df['pred_pointDiff'] = abs(round(predictions_df['pred_homePoints'] - predictions_df['pred_awayPoints'], 2))

# Save predictions
predictions_df.to_csv(f'season{year}_week{week}_predictions.csv', index=False)

# Show predictions
predictions_df.head()

Unnamed: 0,season,week,homeTeam,awayTeam,pred_homePoints,pred_awayPoints,pred_pointDiff
0,2025,1,Oklahoma,Alabama,20.343042,26.484776,6.14
1,2025,1,Texas A&M,Miami,26.185424,24.849628,1.34
2,2025,1,Ole Miss,Tulane,36.866712,15.273706,21.59
3,2025,1,Oregon,James Madison,34.366951,16.183689,18.18
4,2025,1,Utah State,Washington State,23.327105,27.3807,4.05


In [22]:
# I have predictions and the betting lines now. Next step is to compare them and see if there are any edges.
predictions_df = pd.read_csv(f'season{year}_week{week}_predictions.csv')
lines_df = pd.read_csv(f'cfbd_lines_{year}_week{week}.csv')

# Merge the dataframes on the relevant columns
merged_df = pd.merge(predictions_df, lines_df, left_on=['season', 'week', 'awayTeam', 'homeTeam'], right_on=['season', 'week', 'awayTeam', 'homeTeam'], suffixes=('_pred', '_line'))

# Use the new 'spread' column from the updated lines csv
merged_df['line_edge'] = merged_df['pred_pointDiff'] - abs(merged_df['spread'])

# Show potential edges
potential_edges = merged_df[abs(merged_df['line_edge']) > 1]

potential_edges.head()

Unnamed: 0,season,week,homeTeam,awayTeam,pred_homePoints,pred_awayPoints,pred_pointDiff,startDate,homeTeamId,homeScore,awayTeamId,awayScore,spread,formatted_spread,overUnder,homeMoneyline,awayMoneyline,line_edge
0,2025,1,Oklahoma,Alabama,20.343042,26.484776,6.14,2025-12-20 01:00:00+00:00,201,,333,,-1.5,Oklahoma -1.5,41.5,-130,110,4.64
1,2025,1,Texas A&M,Miami,26.185424,24.849628,1.34,2025-12-20 17:00:00+00:00,245,,2390,,-3.5,Texas A&M -3.5,48.5,-155,130,-2.16
2,2025,1,Ole Miss,Tulane,36.866712,15.273706,21.59,2025-12-20 20:30:00+00:00,145,,2655,,-17.5,Ole Miss -17.5,57.5,-950,625,4.09
3,2025,1,Oregon,James Madison,34.366951,16.183689,18.18,2025-12-21 00:30:00+00:00,2483,,256,,-20.5,Oregon -20.5,46.5,-1800,1000,-2.32
4,2025,1,Utah State,Washington State,23.327105,27.3807,4.05,2025-12-22 19:00:00+00:00,328,,265,,-1.5,Utah State -1.5,49.5,-118,-102,2.55


In [23]:
potential_edges = potential_edges.drop(columns=['homeTeamId', 'awayTeamId'])

potential_edges.to_csv(f'potential_edges_{year}_week{week}.csv', index=False)