## College Football Matchup Collector

This wil create CSV files containing matchup data and all necessary team data to create a model.

In [1]:
import cfbd
import pandas as pd
import json

CFBD_API_KEY = ""

# Get API Key from secrets.json
with open('secrets.json') as f:
    secrets = json.load(f)
    CFBD_API_KEY = secrets["CFBD_API_KEY"]
    
# Configure API key authorization
configuration = cfbd.Configuration(
    access_token=CFBD_API_KEY,  host="https://api.collegefootballdata.com")

print("Using CFBD API key:", CFBD_API_KEY)


Using CFBD API key: q7ytOkNtIxdIVFT0kdFj720IEJ8valixpxI1cTtpxjpkGDPLWftm9CWj5C4Arn3v


In [2]:
# Parameters

year = 2025
week = 6
provider = 'DraftKings'

In [3]:
# Fetch team ratings: Elo, FPI, SP
def fetch_team_ratings(year, week, api_client):
		rating_api = cfbd.RatingsApi(api_client)

		try:
				elo = rating_api.get_elo(year=year, week=week)
				ratings_df = pd.DataFrame([r.to_dict() for r in elo])

				fpi = rating_api.get_fpi(year=year)
				fpi_df = pd.DataFrame([r.to_dict() for r in fpi])

				sp = rating_api.get_sp(year=year)
				sp_df = pd.DataFrame([r.to_dict() for r in sp])

				merged_df = ratings_df.merge(fpi_df, on="team").merge(sp_df, on="team")
				
				merged_df['offense'] = merged_df['offense'].apply(lambda x: x['rating'] if isinstance(x, dict) and 'rating' in x else None)
				merged_df['defense'] = merged_df['defense'].apply(lambda x: x['rating'] if isinstance(x, dict) and 'rating' in x else None)

				return merged_df[['team', 'year', 'elo', 'fpi',
													'rating', 'offense', 'defense']]

		except Exception as e:
				print("Error fetching ratings:", e)

In [4]:
# Fetch team advanced metrics: EPA, Success Rate, Explosiveness
def fetch_team_advanced_metrics(year, api_client):
		advanced_metrics_api = cfbd.AdjustedMetricsApi(api_client)

		try:
				epa = advanced_metrics_api.get_adjusted_team_season_stats(year=year)
				epa_df = pd.DataFrame([r.to_dict() for r in epa])
				
				epa_df['epa'] = epa_df['epa'].apply(lambda x: x['total'] if isinstance(x, dict) and 'total' in x else None)
				epa_df['epaAllowed'] = epa_df['epaAllowed'].apply(lambda x: x['total'] if isinstance(x, dict) and 'total' in x else None)
				epa_df['successRate'] = epa_df['successRate'].apply(lambda x: x['total'] if isinstance(x, dict) and 'total' in x else None)
				epa_df['successRateAllowed'] = epa_df['successRateAllowed'].apply(lambda x: x['total'] if isinstance(x, dict) and 'total' in x else None)

				return epa_df[['team', 'year', 'epa', 'epaAllowed', 'successRate', 'successRateAllowed', 'explosiveness', 'explosivenessAllowed']]

		except Exception as e:
				print("Error fetching advanced metrics:", e)

In [5]:
# Fetch recruiting rankings: Composite Rank, Star Avg, Num Recruits
def fetch_recruiting_rankings(year, api_client):
    recruiting_api = cfbd.RecruitingApi(api_client)

    try:
        rankings = recruiting_api.get_team_recruiting_rankings(year=year)

        rankings_df = pd.DataFrame([r.to_dict() for r in rankings])

        return rankings_df[['team', 'year', 'rank', 'points']]

    except Exception as e:
        print("Error fetching recruiting rankings:", e)

In [6]:
# Fetch games
def fetch_games(year, week, api_client):
		games_api = cfbd.GamesApi(api_client)

		classification = cfbd.DivisionClassification('fbs')

		try:
				games = games_api.get_games(year=year, week=week, classification=classification)
				games_df = pd.DataFrame([g.to_dict() for g in games])

				games_df = games_df[games_df['homeClassification'] == classification]
				games_df = games_df[games_df['awayClassification'] == classification]

				return games_df[['season', 'week', 'homeId', 'homeTeam', 'awayTeam', 'awayId', 'homePoints', 'awayPoints']]

		except Exception as e:
				print("Error fetching games:", e)

In [7]:
merged_df = pd.DataFrame()

with cfbd.ApiClient(configuration) as api_client:
	rating_df = fetch_team_ratings(year, week, api_client)
	advanced_metrics_df = fetch_team_advanced_metrics(year, api_client)
	recruiting_rankings_df = fetch_recruiting_rankings(year, api_client)
	games = fetch_games(year, week, api_client)

	# After merging for a single year:
	merged_df = rating_df.merge(advanced_metrics_df, on="team").merge(recruiting_rankings_df, on="team")


In [8]:
# Merge game data with team data for home and away teams
# Assume merged_df contains team data and games contains game data

games_with_team_data = games.copy()

# Merge home team data
home_team_cols = [col for col in merged_df.columns if col not in ['team']]
home_merged = merged_df.rename(columns={col: f"home_{col}" for col in home_team_cols})
home_merged = home_merged.rename(columns={"team": "homeTeam"})
games_with_team_data = games_with_team_data.merge(home_merged, on="homeTeam", how="left")

# Merge away team data
away_team_cols = [col for col in merged_df.columns if col not in ['team']]
away_merged = merged_df.rename(columns={col: f"away_{col}" for col in away_team_cols})
away_merged = away_merged.rename(columns={"team": "awayTeam"})
games_with_team_data = games_with_team_data.merge(away_merged, on="awayTeam", how="left")

# games_with_team_data now contains game data plus home/away team ratings, advanced metrics, and recruiting data
print(games_with_team_data.head())

   season  week  homeId          homeTeam          awayTeam  awayId  \
0    2025     6     166  New Mexico State       Sam Houston    2534   
1    2025     6      58     South Florida         Charlotte    2429   
2    2025     6      48          Delaware  Western Kentucky      98   
3    2025     6      23    San José State        New Mexico     167   
4    2025     6     252               BYU     West Virginia     277   

  homePoints awayPoints  home_year_x  home_elo  ...  away_year_y  away_epa  \
0       None       None         2025      1147  ...         2025  0.102115   
1       None       None         2025      1477  ...         2025  0.087163   
2       None       None         2025      1501  ...         2025  0.142712   
3       None       None         2025      1423  ...         2025  0.164913   
4       None       None         2025      1702  ...         2025  0.117322   

   away_epaAllowed  away_successRate  away_successRateAllowed  \
0         0.229693          0.363679   

In [9]:
games_with_team_data.to_csv(f'cfbd_{year}_{week}_games.csv', index=False)