## Data Collection through CFBD API

In [1]:
import pandas as pd
import numpy as np
from time import sleep
import cfbd

In [2]:
# configure the api key and set up the api instance
configuration = cfbd.Configuration()
configuration.api_key['Authorization'] = 'x9iZ68qCB07RD29TpFRkgqOEGRPR3XAy/25dzX9u9Z/h+PweYHyvF1I/FLjLooLM'
configuration.api_key_prefix['Authorization'] = 'Bearer'

api_config = cfbd.ApiClient(configuration)
api_instance = cfbd.GamesApi(cfbd.ApiClient(configuration))

In [3]:
# define a function to process the stats dictionary for a given team into the format we want for our dictionary
def process_stats(stats):
    stat_dict = {}

    # loop over all statistics provided for a given team for a given game
    for stat in stats:

        # input the initial dictionary of the form {'category': 'statcategory', 'stat': 'value'} and add to new dictionary {'statcategory': 'value'}
        stat_dict[stat.category] = stat.stat
        
    return stat_dict

In [4]:
# input the team data for a given game and process then return the teams stats as a dictionary with proper home or away prefixes
def team_game_dict(team_data, prefix):
    game_dict = {}

    # add in the "outer-level" game meta-data for the given team
    game_dict[prefix + '_school'] = team_data.school
    game_dict[prefix + '_school_id'] = team_data.school_id
    game_dict[prefix + '_points'] = team_data.points

    # extract the game statistics for the given team
    game_stats = team_data.stats
    stat_dict = process_stats(game_stats)
    stat_dict = {prefix + '_' + key: val for key, val in stat_dict.items()}

    game_dict.update(stat_dict)

    return game_dict


In [5]:
# function to input an entire game data list from an api call and return the dictionary of all relevant game data
def game_processing(game):

    # determine which team was home or away and then use the appropriate calls to the team_game_dict function
    if game.teams[0].home_away == "away":
        home_dict = team_game_dict(game.teams[0], 'home')
        away_dict = team_game_dict(game.teams[1], 'away')
    else:
        home_dict = team_game_dict(game.teams[1], 'home')
        away_dict = team_game_dict(game.teams[0], 'away')

    # merge the dictionaries for the home and away team game data and add in the game id
    home_dict.update(away_dict)
    home_dict['id'] = game.id
    
    return home_dict
    
    

In [None]:
all_games_list = []

In [47]:
games_list = []
year = 2023

# loop over all the weeks in a season, up to 15 possible
for week in range(1, 16):
    
    # make the api call to gather all the game stats for a given week
    games_week = api_instance.get_team_game_stats(year=year, week=week, classification='fbs')

    if len(games_week) == 0:
        continue
    
    # loop over every game and extract the game data as a dictionary, appending it to the list
    for game in games_week:
        game_dict = game_processing(game)
        game_dict['year'] = year
        game_dict['week'] = week
        games_list.append(game_dict)
        
    print(f"year {year} week {week} finished")

year 2023 week 1 finished
year 2023 week 2 finished
year 2023 week 3 finished
year 2023 week 4 finished
year 2023 week 5 finished
year 2023 week 6 finished
year 2023 week 7 finished
year 2023 week 8 finished
year 2023 week 9 finished
year 2023 week 10 finished
year 2023 week 11 finished
year 2023 week 12 finished
year 2023 week 13 finished
year 2023 week 14 finished
year 2023 week 15 finished


In [None]:
all_games_list = all_games_list + games_list

In [53]:
games_df = pd.DataFrame.from_records(all_games_list)
games_df.head()

Unnamed: 0,home_school,home_school_id,home_points,home_fumblesRecovered,home_rushingTDs,home_puntReturnYards,home_puntReturnTDs,home_puntReturns,home_passingTDs,home_kickReturnYards,...,home_sacks,home_qbHurries,home_passesDeflected,away_totalFumbles,away_tacklesForLoss,away_defensiveTDs,away_tackles,away_sacks,away_qbHurries,away_passesDeflected
0,Ohio State,194,34,1,1,24,0,3,2,53,...,,,,,,,,,,
1,Troy,2653,10,1,1,11,0,2,0,54,...,,,,,,,,,,
2,Boston College,103,30,0,2,0,0,1,1,56,...,,,,,,,,,,
3,Penn State,213,26,1,1,9,0,2,1,129,...,,,,,,,,,,
4,UT San Antonio,2636,27,2,3,-4,0,5,0,45,...,,,,,,,,,,


In [55]:
games_df.drop_duplicates(inplace=True)
games_df.to_csv('all_games_data.csv')