In [None]:
import pandas as pd
from tqdm import tqdm as tqdm
import numpy as np
import matplotlib.pyplot as plt
import cfbd

import requests
import requests.exceptions
import re

In [None]:
#Establish your API key here. Make sure to go to collegefootballdata.com in order to sign up for the API key (it's free!)
api_key = "INSERT API KEY HERE"

In [None]:
#Create lists for later DFs. I have provided 2019 - 2024, but you can choose different years if you would like
week_list = list(range(1,15))
year_list = list(range(2019,2024))

In [None]:
#Create DF for Elo data that you will gather in the next step
elo_df = pd.DataFrame()

In [None]:
#Use CFB Database to call in Elo data and modify DF. Elo is not necessary, but it is the best indicator of performance in my experience. If you choose to use another indicator of performance, make sure to keep things consistent.
for j in year_list:
    for i in week_list:
        params = {"year":j,'week':i}
        url = "https://api.collegefootballdata.com/ratings/elo"
        headers = {"Authorization":api_key}
        response = requests.request("GET",url, headers=headers, params=params)
        team_data = response.json()
        week_df = pd.json_normalize(team_data)
        week_df.insert(1,'week',i)
        elo_df =pd.concat([elo_df, week_df])

In [None]:
#Create unique key for each team, season, and week. This is important, as you will need it to complete your analysis.
elo_df.insert(3, 'uniqueKey', elo_df['team']+elo_df['year'].astype(str)+elo_df['week'].astype(str))

In [None]:
#Create media DF to determine what network games are played on.
media_df = pd.DataFrame()

In [None]:
#Use CFB Database to call in media data and modify DF. Note the 'mediaType':'tv' which is necessary to avoid double counting games that were on multiple forms of media (ex: tv and streaming)
for j in year_list:
    params = {"year":j, 'seasonType':'regular', 'mediaType':'tv', 'classification':'fbs'}
    url = "https://api.collegefootballdata.com/games/media"
    headers = {"Authorization":api_key}
    response = requests.request("GET",url, headers=headers, params=params)
    year_data = response.json()
    year_df = pd.json_normalize(year_data)
    media_df =pd.concat([media_df, year_df])

In [None]:
#Create unique keys for each team, season, and week, both home and away. These separate keys are important, as they are how you will include both home and away Elos and fans.
media_df.insert(7,'homeUniqueKey',media_df['homeTeam']+media_df['season'].astype(str)+media_df['week'].astype(str))
media_df.insert(10,'awayUniqueKey',media_df['awayTeam']+media_df['season'].astype(str)+media_df['week'].astype(str))

In [None]:
#Combine media and elo DFs for later use
media_elo_df = pd.merge(media_df,elo_df[['elo','uniqueKey']],left_on='homeUniqueKey',right_on='uniqueKey',how='left')
media_elo_df = media_elo_df.rename(columns={'elo':'homeElo'})
media_elo_df = pd.merge(media_elo_df,elo_df[['elo','uniqueKey']],left_on='awayUniqueKey',right_on='uniqueKey',how='left')
media_elo_df = media_elo_df.rename(columns={'elo':'awayElo'})
media_elo_df =media_elo_df.drop(columns= ['uniqueKey_x','uniqueKey_y'])

In [None]:
#Define flagship networks
flagship_networks = ['ESPN', 'ABC', 'CBS', 'NBC', 'Fox']

In [None]:
#Define P5 conferences
P5_Conferences = ['ACC', 'SEC', 'Big Ten', 'Big 12', 'Pac-12']

In [None]:
#Bring in the output from the file on the GitHub. This file has Reddit flair data from r/CFB that was pulled separately
fans_db = pd.read_csv("INSERT REDDIT DATA FILE HERE")

In [None]:
#Merge the two DFs to perform analysis
media_elo_fans_df = pd.merge(media_elo_df, fans_db[['Long Team', 'Flairs']], left_on= 'homeTeam', right_on='Long Team', how= 'left')
media_elo_fans_df = media_elo_fans_df.rename(columns={"Flairs":'homeFlairs'})
media_elo_fans_df = media_elo_fans_df.drop(columns=['Long Team'])
media_elo_fans_df = pd.merge(media_elo_fans_df, fans_db[['Long Team', 'Flairs']], left_on= 'awayTeam', right_on='Long Team', how= 'left')
media_elo_fans_df = media_elo_fans_df.rename(columns={"Flairs":'awayFlairs'})
media_elo_fans_df = media_elo_fans_df.drop(columns=['Long Team'])

In [None]:
#From here you can run your own probit model. Included in the repo is some example code to show one model you could run, but you should play with the model to work with variables you might think are important.