# College Football Predictions

Testing the college football model

In [21]:
from sklearn.ensemble import RandomForestRegressor
import pickle
import pandas as pd

year = 2025
week = 6

In [22]:
model: RandomForestRegressor = None

with open("college_football_model.pkl", "rb") as file:
        model = pickle.load(file)

In [23]:
games = pd.read_csv(f'cfbd_{year}_{week}_games.csv')

In [24]:
matchup_cols = ['season', 'week', 'homeTeam', 'awayTeam']

# Select only the feature columns used for training
feature_cols = [
    'season', 'week', 'homeId', 'awayId', 'home_year_x', 'home_elo', 'home_fpi', 'home_rating',
    'home_offense', 'home_defense', 'home_year_y', 'home_epa', 'home_epaAllowed', 'home_successRate',
    'home_successRateAllowed', 'home_explosiveness', 'home_explosivenessAllowed', 'home_year', 'home_rank', 'home_points',
    'away_year_x', 'away_elo', 'away_fpi', 'away_rating', 'away_offense', 'away_defense', 'away_year_y', 'away_epa',
    'away_epaAllowed', 'away_successRate', 'away_successRateAllowed', 'away_explosiveness', 'away_explosivenessAllowed',
    'away_year', 'away_rank', 'away_points'
]

X_pred = games[feature_cols]

# Predict scores
preds = model.predict(X_pred)

# Create DataFrame with predictions and matchup info
predictions_df = games[matchup_cols].copy()
predictions_df['pred_homePoints'] = preds[:, 0]
predictions_df['pred_awayPoints'] = preds[:, 1]
predictions_df['pred_pointDiff'] = abs(round(predictions_df['pred_homePoints'] - predictions_df['pred_awayPoints'], 2))

# Save predictions
predictions_df.to_csv(f'season{year}_week{week}_predictions.csv', index=False)

# Show predictions
predictions_df.head()

Unnamed: 0,season,week,homeTeam,awayTeam,pred_homePoints,pred_awayPoints,pred_pointDiff
0,2025,6,New Mexico State,Sam Houston,20.49,18.78,1.71
1,2025,6,South Florida,Charlotte,34.86,17.39,17.47
2,2025,6,Delaware,Western Kentucky,30.18,22.6,7.58
3,2025,6,San José State,New Mexico,23.93,24.14,0.21
4,2025,6,BYU,West Virginia,31.65,13.75,17.9


In [25]:
# I have predictions and the betting lines now. Next step is to compare them and see if there are any edges.
predictions_df = pd.read_csv(f'season{year}_week{week}_predictions.csv')
lines_df = pd.read_csv(f'cfbd_lines_{year}_week{week}.csv')

# Merge the dataframes on the relevant columns
merged_df = pd.merge(predictions_df, lines_df, left_on=['season', 'week', 'awayTeam', 'homeTeam'], right_on=['season', 'week', 'awayTeam', 'homeTeam'], suffixes=('_pred', '_line'))

# Use the new 'spread' column from the updated lines csv
merged_df['line_edge'] = merged_df['pred_pointDiff'] - abs(merged_df['spread'])

# Show potential edges
potential_edges = merged_df[abs(merged_df['line_edge']) > 1]

potential_edges.head()

Unnamed: 0,season,week,homeTeam,awayTeam,pred_homePoints,pred_awayPoints,pred_pointDiff,startDate,homeTeamId,awayTeamId,homeScore,awayScore,spread,formatted_spread,overUnder,homeMoneyline,awayMoneyline,line_edge
1,2025,6,South Florida,Charlotte,34.86,17.39,17.47,2025-10-03 23:00:00+00:00,58,2429,,,-27.5,South Florida -27.5,55.5,-5000.0,1800.0,-10.03
2,2025,6,Delaware,Western Kentucky,30.18,22.6,7.58,2025-10-03 23:00:00+00:00,48,98,,,-2.5,Delaware -2.5,62.5,-142.0,120.0,5.08
3,2025,6,San José State,New Mexico,23.93,24.14,0.21,2025-10-04 02:00:00+00:00,23,167,,,-2.5,San José State -2.5,57.5,-130.0,110.0,-2.29
4,2025,6,BYU,West Virginia,31.65,13.75,17.9,2025-10-04 02:30:00+00:00,252,277,,,-19.5,BYU -19.5,47.5,-1200.0,750.0,-1.6
6,2025,6,Michigan,Wisconsin,36.35,15.35,21.0,2025-10-04 16:00:00+00:00,130,275,,,-16.5,Michigan -16.5,41.5,-800.0,550.0,4.5


In [26]:
potential_edges = potential_edges.drop(columns=['homeTeamId', 'awayTeamId'])

potential_edges.to_csv(f'potential_edges_{year}_week{week}.csv', index=False)