# College Football Predictions

Testing the college football model

In [59]:
from sklearn.ensemble import RandomForestRegressor
import pickle
import pandas as pd

year = 2025
week = 4

In [60]:
model: RandomForestRegressor = None

with open("college_football_model.pkl", "rb") as file:
        model = pickle.load(file)

In [61]:
games = pd.read_csv(f'cfbd_{year}_{week}_games.csv')

In [62]:
matchup_cols = ['season', 'week', 'homeTeam', 'awayTeam']

# Select only the feature columns used for training
feature_cols = [
    'season', 'week', 'homeId', 'awayId', 'home_year_x', 'home_elo', 'home_fpi', 'home_rating',
    'home_offense', 'home_defense', 'home_year_y', 'home_epa', 'home_epaAllowed', 'home_successRate',
    'home_successRateAllowed', 'home_explosiveness', 'home_explosivenessAllowed', 'home_year', 'home_rank', 'home_points',
    'away_year_x', 'away_elo', 'away_fpi', 'away_rating', 'away_offense', 'away_defense', 'away_year_y', 'away_epa',
    'away_epaAllowed', 'away_successRate', 'away_successRateAllowed', 'away_explosiveness', 'away_explosivenessAllowed',
    'away_year', 'away_rank', 'away_points'
]

X_pred = games[feature_cols]

# Predict scores
preds = model.predict(X_pred)

# Create DataFrame with predictions and matchup info
predictions_df = games[matchup_cols].copy()
predictions_df['pred_homePoints'] = preds[:, 0]
predictions_df['pred_awayPoints'] = preds[:, 1]
predictions_df['pred_pointDiff'] = round(predictions_df['pred_homePoints'] - predictions_df['pred_awayPoints'], 2)

# Save predictions
predictions_df.to_csv(f'season{year}_week{week}_predictions.csv', index=False)

# Show predictions
predictions_df.head()

Unnamed: 0,season,week,homeTeam,awayTeam,pred_homePoints,pred_awayPoints,pred_pointDiff
0,2025,4,Charlotte,Rice,15.05,24.7,-9.65
1,2025,4,Oklahoma State,Tulsa,21.98,20.23,1.75
2,2025,4,Rutgers,Iowa,27.14,33.95,-6.81
3,2025,4,Miami (OH),UNLV,33.49,37.81,-4.32
4,2025,4,Louisville,Bowling Green,35.53,14.11,21.42


In [63]:
# I have predictions and the betting lines now. Next step is to compare them and see if there are any edges.
predictions_df = pd.read_csv(f'season{year}_week{week}_predictions.csv')
lines_df = pd.read_csv(f'cfbd_lines_{year}_week{week}.csv')

# Merge the dataframes on the relevant columns
merged_df = pd.merge(predictions_df, lines_df, left_on=['season', 'week', 'awayTeam', 'homeTeam'], right_on=['season', 'week', 'awayTeam', 'homeTeam'], suffixes=('_pred', '_line'))

# Use the new 'spread' column from the updated lines csv
merged_df['line_edge'] = merged_df['pred_pointDiff'] - merged_df['spread']

# Show potential edges
potential_edges = merged_df[abs(merged_df['line_edge']) > 1]

potential_edges.head()

Unnamed: 0,season,week,homeTeam,awayTeam,pred_homePoints,pred_awayPoints,pred_pointDiff,startDate,homeTeamId,homeScore,awayTeamId,awayScore,spread,formatted_spread,overUnder,homeMoneyline,awayMoneyline,line_edge
0,2025,4,Charlotte,Rice,15.05,24.7,-9.65,2025-09-18 23:30:00+00:00,2429,17,242,28,1.5,Rice -1.5,42.5,102.0,-122.0,-11.15
1,2025,4,Oklahoma State,Tulsa,21.98,20.23,1.75,2025-09-19 23:30:00+00:00,197,12,202,19,-10.0,Oklahoma State -10,54.5,-375.0,295.0,11.75
2,2025,4,Rutgers,Iowa,27.14,33.95,-6.81,2025-09-20 00:00:00+00:00,164,28,2294,38,2.5,Iowa -2.5,46.5,110.0,-130.0,-9.31
3,2025,4,Miami (OH),UNLV,33.49,37.81,-4.32,2025-09-20 16:00:00+00:00,193,38,2439,41,2.5,UNLV -2.5,49.5,110.0,-130.0,-6.82
4,2025,4,Louisville,Bowling Green,35.53,14.11,21.42,2025-09-20 16:00:00+00:00,97,40,189,17,-26.5,Louisville -26.5,50.5,-4000.0,1600.0,47.92


In [None]:
potential_edges = potential_edges.drop(columns=['homeTeamId', 'awayTeamId'])

potential_edges.to_csv(f'potential_edges_{year}_week{week}.csv', index=False)