## Data Cleaning
Processing data into numeric formats for model inputs and cleaning away critical missing/NaN values.

In [2]:
import pandas as pd
import numpy as np

In [3]:
games_df = pd.read_csv('all_games_data.csv')
print(games_df.head())
print(games_df.keys())

   Unnamed: 0     home_school  home_school_id  home_points  \
0           0      Ohio State             194           34   
1           1            Troy            2653           10   
2           2  Boston College             103           30   
3           3      Penn State             213           26   
4           4  UT San Antonio            2636           27   

   home_fumblesRecovered  home_rushingTDs  home_puntReturnYards  \
0                    1.0              1.0                  24.0   
1                    1.0              1.0                  11.0   
2                    0.0              2.0                   0.0   
3                    1.0              1.0                   9.0   
4                    2.0              3.0                  -4.0   

   home_puntReturnTDs  home_puntReturns  home_passingTDs  ...  home_sacks  \
0                 0.0               3.0              2.0  ...         NaN   
1                 0.0               2.0              0.0  ...         

In [4]:
empty_lens = {}
for key in games_df.keys():
    empty_lens[key] = games_df[key].isna().sum()
print(empty_lens)

remove_missing = [key for key in empty_lens.keys() if empty_lens[key] <= 10]
print(remove_missing)

{'Unnamed: 0': 0, 'home_school': 0, 'home_school_id': 0, 'home_points': 0, 'home_fumblesRecovered': 1, 'home_rushingTDs': 2, 'home_puntReturnYards': 2786, 'home_puntReturnTDs': 2786, 'home_puntReturns': 2786, 'home_passingTDs': 3, 'home_kickReturnYards': 1038, 'home_kickReturnTDs': 1038, 'home_kickReturns': 1038, 'home_kickingPoints': 211, 'home_firstDowns': 1, 'home_thirdDownEff': 1, 'home_fourthDownEff': 1, 'home_totalYards': 1, 'home_netPassingYards': 1, 'home_completionAttempts': 1, 'home_yardsPerPass': 1, 'home_rushingYards': 1, 'home_rushingAttempts': 1, 'home_yardsPerRushAttempt': 1, 'home_totalPenaltiesYards': 1, 'home_turnovers': 1, 'home_fumblesLost': 1, 'home_interceptions': 1, 'home_possessionTime': 9, 'away_school': 0, 'away_school_id': 0, 'away_points': 0, 'away_fumblesRecovered': 1, 'away_rushingTDs': 1, 'away_passingTDs': 2, 'away_kickReturnYards': 1027, 'away_kickReturnTDs': 1027, 'away_kickReturns': 1027, 'away_kickingPoints': 61, 'away_interceptionYards': 3340, 'away

In [5]:
clean_games = games_df
clean_games.dropna(subset=remove_missing, inplace=True)
clean_games.drop(columns=["Unnamed: 0"], inplace=True)

In [9]:
print(clean_games.keys())

Index(['Unnamed: 0', 'home_school', 'home_school_id', 'home_points',
       'home_fumblesRecovered', 'home_rushingTDs', 'home_puntReturnYards',
       'home_puntReturnTDs', 'home_puntReturns', 'home_passingTDs',
       'home_kickReturnYards', 'home_kickReturnTDs', 'home_kickReturns',
       'home_kickingPoints', 'home_firstDowns', 'home_thirdDownEff',
       'home_fourthDownEff', 'home_totalYards', 'home_netPassingYards',
       'home_completionAttempts', 'home_yardsPerPass', 'home_rushingYards',
       'home_rushingAttempts', 'home_yardsPerRushAttempt',
       'home_totalPenaltiesYards', 'home_turnovers', 'home_fumblesLost',
       'home_interceptions', 'home_possessionTime', 'away_school',
       'away_school_id', 'away_points', 'away_fumblesRecovered',
       'away_rushingTDs', 'away_passingTDs', 'away_kickReturnYards',
       'away_kickReturnTDs', 'away_kickReturns', 'away_kickingPoints',
       'away_interceptionYards', 'away_interceptionTDs',
       'away_passesIntercepted', 'awa

In [20]:
for key in clean_games.keys():
    print(key, clean_games.loc[0][key])

home_school Ohio State
home_school_id 194
home_points 34
home_fumblesRecovered 1.0
home_rushingTDs 1.0
home_puntReturnYards 24.0
home_puntReturnTDs 0.0
home_puntReturns 3.0
home_passingTDs 2.0
home_kickReturnYards 53.0
home_kickReturnTDs 0.0
home_kickReturns 2.0
home_kickingPoints 10.0
home_firstDowns 19.0
home_thirdDownEff 2-8
home_fourthDownEff 1-2
home_totalYards 420.0
home_netPassingYards 226.0
home_completionAttempts 12-15
home_yardsPerPass 15.1
home_rushingYards 194.0
home_rushingAttempts 40.0
home_yardsPerRushAttempt 4.8
home_totalPenaltiesYards 2-20
home_turnovers 1.0
home_fumblesLost 0.0
home_interceptions 1.0
home_possessionTime 27:34
away_school Navy
away_school_id 2426
away_points 17
away_fumblesRecovered 0.0
away_rushingTDs 2.0
away_passingTDs 0.0
away_kickReturnYards 64.0
away_kickReturnTDs 0.0
away_kickReturns 4.0
away_kickingPoints 5.0
away_interceptionYards 0.0
away_interceptionTDs 0.0
away_passesIntercepted 1.0
away_firstDowns 22.0
away_thirdDownEff 4-12
away_fourthDo

In [33]:
def get_percentage(x):
    x = str(x)
    x = x.split("-")
    #print(x)
    a, b = int(x[0]), int(x[-1])
    if b == 0:
        return 1
    else:
        return float(a/b)

In [34]:
def get_max(x):
    x = str(x)
    x = x.split("-")
    #print(x)
    return int(x[-1])

def get_min(x):
    x = str(x)
    x = x.split("-")
    #print(x)
    return int(x[0])

In [23]:
prefixes = ["home_", "away_"]
categories = ["completionAttempts", "totalPenaltiesYards", "thirdDownEff", "fourthDownEff"]

In [35]:
for prefix in prefixes:
    clean_games[prefix + "passAttempts"] = clean_games[prefix + "completionAttempts"].apply(get_max)
    clean_games[prefix + "passAccuracy"] = clean_games[prefix + "completionAttempts"].apply(get_percentage)
    clean_games[prefix + "penaltyYards"] = clean_games[prefix + "totalPenaltiesYards"].apply(get_max)
    clean_games[prefix + "penalties"] = clean_games[prefix + "totalPenaltiesYards"].apply(get_min)
    clean_games[prefix + "thirdDowns"] = clean_games[prefix + "thirdDownEff"].apply(get_max)
    clean_games[prefix + "thirdDownPercentage"] = clean_games[prefix + "thirdDownEff"].apply(get_percentage)
    clean_games[prefix + "fourthDowns"] = clean_games[prefix + "fourthDownEff"].apply(get_max)
    clean_games[prefix + "fourthDownPercentage"] = clean_games[prefix + "fourthDownEff"].apply(get_percentage)

In [36]:
clean_games.keys()

Index(['home_school', 'home_school_id', 'home_points', 'home_fumblesRecovered',
       'home_rushingTDs', 'home_puntReturnYards', 'home_puntReturnTDs',
       'home_puntReturns', 'home_passingTDs', 'home_kickReturnYards',
       'home_kickReturnTDs', 'home_kickReturns', 'home_kickingPoints',
       'home_firstDowns', 'home_thirdDownEff', 'home_fourthDownEff',
       'home_totalYards', 'home_netPassingYards', 'home_completionAttempts',
       'home_yardsPerPass', 'home_rushingYards', 'home_rushingAttempts',
       'home_yardsPerRushAttempt', 'home_totalPenaltiesYards',
       'home_turnovers', 'home_fumblesLost', 'home_interceptions',
       'home_possessionTime', 'away_school', 'away_school_id', 'away_points',
       'away_fumblesRecovered', 'away_rushingTDs', 'away_passingTDs',
       'away_kickReturnYards', 'away_kickReturnTDs', 'away_kickReturns',
       'away_kickingPoints', 'away_interceptionYards', 'away_interceptionTDs',
       'away_passesIntercepted', 'away_firstDowns', 'away_

In [45]:
clean_games.to_csv("clean_games_data.csv")

### Preliminary visualization of correlations
Here we will see correlations between various statistics, for both home and away teams, with a focus on the correlation between each individual statistic and the eventual score, and each individual statistic and the win/loss.

In [50]:
clean_games = pd.read_csv("clean_games_data.csv")

numeric_columns = clean_games.select_dtypes(exclude=['object']).columns
numeric_data = clean_games[numeric_columns]

In [64]:
numeric_data.dropna(axis=1, inplace=True)
numeric_data.drop(columns=["Unnamed: 0", "week", "year", "home_school_id", "away_school_id"], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  numeric_data.dropna(axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  numeric_data.drop(columns=["Unnamed: 0", "week", "year", "home_school_id", "away_school_id"], inplace=True)


In [65]:
home_keys = [key for key in numeric_data.keys() if "home" in key]
away_keys = [key for key in numeric_data.keys() if "away" in key]

In [66]:
home_games = numeric_data[home_keys]
away_games = numeric_data[away_keys]

In [70]:
home_corr = home_games.corr()
home_corr.style.background_gradient(cmap="coolwarm")

Unnamed: 0,home_points,home_fumblesRecovered,home_rushingTDs,home_passingTDs,home_firstDowns,home_totalYards,home_netPassingYards,home_yardsPerPass,home_rushingYards,home_rushingAttempts,home_yardsPerRushAttempt,home_turnovers,home_fumblesLost,home_interceptions,home_passAttempts,home_passAccuracy,home_penaltyYards,home_penalties,home_thirdDowns,home_thirdDownPercentage,home_fourthDowns,home_fourthDownPercentage
home_points,1.0,0.110134,0.698629,0.638042,0.680159,0.806728,0.502575,0.594224,0.567698,0.348364,0.545159,-0.230712,-0.079644,-0.236187,0.058286,0.451046,0.056887,0.052854,-0.124533,0.534238,-0.089214,0.262258
home_fumblesRecovered,0.110134,1.0,0.072012,0.014948,-0.045953,-0.013969,-0.035001,-0.011065,0.019719,0.062156,-0.013463,0.00384,0.018476,-0.010288,-0.027111,-0.013333,0.015786,0.031684,0.047157,-0.052275,-0.016875,-0.017172
home_rushingTDs,0.698629,0.072012,1.0,-0.001752,0.463787,0.55404,0.089414,0.352617,0.677146,0.477594,0.590443,-0.177066,-0.042497,-0.19671,-0.203784,0.236764,0.021837,-0.021266,-0.149701,0.422795,-0.065935,0.188863
home_passingTDs,0.638042,0.014948,-0.001752,1.0,0.489287,0.575886,0.640813,0.489871,0.088436,-0.03593,0.15747,-0.10038,-0.041415,-0.097155,0.30372,0.399294,0.044368,0.078242,-0.07342,0.349602,-0.005221,0.143174
home_firstDowns,0.680159,-0.045953,0.463787,0.489287,1.0,0.857611,0.626431,0.420124,0.499997,0.406909,0.427301,-0.047825,-0.00505,-0.05846,0.401566,0.458151,0.043721,0.061295,0.111178,0.601084,0.12638,0.138362
home_totalYards,0.806728,-0.013969,0.55404,0.575886,0.857611,1.0,0.707777,0.626468,0.608443,0.354536,0.602578,-0.085325,-0.007698,-0.105387,0.291444,0.471827,0.074,0.104506,0.012159,0.57339,0.05545,0.138261
home_netPassingYards,0.502575,-0.035001,0.089414,0.640813,0.626431,0.707777,1.0,0.567965,-0.129982,-0.21193,-0.02725,0.017798,0.001065,0.022431,0.694855,0.502998,0.059957,0.136025,0.073194,0.349673,0.092817,0.036566
home_yardsPerPass,0.594224,-0.011065,0.352617,0.489871,0.420124,0.626468,0.567965,1.0,0.241223,0.174649,0.219332,-0.158876,-0.008797,-0.200823,-0.095777,0.565552,0.028528,0.030398,-0.28999,0.383715,-0.110691,0.153894
home_rushingYards,0.567698,0.019719,0.677146,0.088436,0.499997,0.608443,-0.129982,0.241223,1.0,0.735729,0.876328,-0.139736,-0.01199,-0.173104,-0.371522,0.097183,0.036513,-0.006113,-0.065119,0.411933,-0.026437,0.152994
home_rushingAttempts,0.348364,0.062156,0.477594,-0.03593,0.406909,0.354536,-0.21193,0.174649,0.735729,1.0,0.374633,-0.091955,0.049653,-0.16162,-0.42467,0.021312,0.004093,-0.033438,0.286006,0.333539,0.100373,0.116714


In [69]:
away_corr = away_games.corr()
away_corr.style.background_gradient(cmap="coolwarm")

Unnamed: 0,away_points,away_fumblesRecovered,away_rushingTDs,away_passingTDs,away_firstDowns,away_totalYards,away_netPassingYards,away_yardsPerPass,away_rushingYards,away_rushingAttempts,away_yardsPerRushAttempt,away_turnovers,away_fumblesLost,away_interceptions,away_passAttempts,away_passAccuracy,away_penaltyYards,away_penalties,away_thirdDowns,away_thirdDownPercentage,away_fourthDowns,away_fourthDownPercentage
away_points,1.0,0.12312,0.693037,0.612975,0.636461,0.788354,0.429341,0.55597,0.557805,0.295289,0.566223,-0.272424,-0.106136,-0.277845,-0.049389,0.434256,0.085719,0.053909,-0.285991,0.517542,-0.170759,0.26468
away_fumblesRecovered,0.12312,1.0,0.06902,0.040905,-0.045573,-0.014873,-0.040545,-0.003912,0.023212,0.064644,-0.000991,-0.000869,0.021764,-0.020951,-0.046372,-0.025692,0.035303,0.040408,0.040529,-0.039485,-0.024843,-0.004275
away_rushingTDs,0.693037,0.06902,1.0,-0.04677,0.444889,0.548448,0.009533,0.322414,0.688919,0.456885,0.610641,-0.214839,-0.069416,-0.232082,-0.292175,0.224307,0.020013,-0.010087,-0.262184,0.422339,-0.105859,0.222445
away_passingTDs,0.612975,0.040905,-0.04677,1.0,0.446773,0.544954,0.620804,0.458413,0.048446,-0.095237,0.150007,-0.115214,-0.056218,-0.107221,0.257849,0.390641,0.089184,0.0729,-0.169372,0.319597,-0.068026,0.104043
away_firstDowns,0.636461,-0.045573,0.444889,0.446773,1.0,0.827786,0.555405,0.33826,0.476886,0.398552,0.390474,-0.079522,-0.013384,-0.09708,0.329363,0.410404,0.111292,0.093735,0.007076,0.556649,0.043712,0.157319
away_totalYards,0.788354,-0.014873,0.548448,0.544954,0.827786,1.0,0.643443,0.574707,0.60475,0.324955,0.609309,-0.130891,-0.016799,-0.164539,0.188841,0.442903,0.159011,0.137303,-0.129474,0.531313,-0.035849,0.154267
away_netPassingYards,0.429341,-0.040545,0.009533,0.620804,0.555405,0.643443,1.0,0.508942,-0.220524,-0.291006,-0.084691,0.003793,0.000947,0.00435,0.672353,0.474967,0.153183,0.153678,0.004056,0.271583,0.027952,0.020905
away_yardsPerPass,0.55597,-0.003912,0.322414,0.458413,0.33826,0.574707,0.508942,1.0,0.202783,0.122484,0.20764,-0.181207,-0.029109,-0.222478,-0.188166,0.559765,0.059859,0.03908,-0.379941,0.358141,-0.160361,0.141145
away_rushingYards,0.557805,0.023212,0.688919,0.048446,0.476886,0.60475,-0.220524,0.202783,1.0,0.716801,0.864496,-0.170666,-0.022342,-0.214143,-0.4589,0.070189,0.043229,0.015046,-0.169226,0.394402,-0.074733,0.17482
away_rushingAttempts,0.295289,0.064644,0.456885,-0.095237,0.398552,0.324955,-0.291006,0.122484,0.716801,1.0,0.320644,-0.107401,0.034632,-0.178966,-0.463019,-0.012403,0.006944,-0.005466,0.241994,0.307964,0.07624,0.118287


In [73]:
differential =  np.array(home_corr) - np.array(away_corr)
df = pd.DataFrame(differential)

In [74]:
df.style.background_gradient()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21
0,0.0,-0.012986,0.005592,0.025067,0.043699,0.018374,0.073233,0.038254,0.009892,0.053075,-0.021064,0.041711,0.026492,0.041658,0.107675,0.01679,-0.028832,-0.001055,0.161458,0.016695,0.081544,-0.002423
1,-0.012986,0.0,0.002992,-0.025957,-0.00038,0.000904,0.005544,-0.007153,-0.003493,-0.002488,-0.012472,0.004709,-0.003288,0.010663,0.019261,0.012359,-0.019517,-0.008724,0.006628,-0.012789,0.007968,-0.012897
2,0.005592,0.002992,0.0,0.045018,0.018898,0.005592,0.079881,0.030204,-0.011773,0.02071,-0.020198,0.037773,0.026919,0.035372,0.088391,0.012458,0.001824,-0.011179,0.112483,0.000457,0.039924,-0.033582
3,0.025067,-0.025957,0.045018,0.0,0.042513,0.030932,0.02001,0.031457,0.039991,0.059307,0.007464,0.014834,0.014803,0.010066,0.045872,0.008653,-0.044815,0.005342,0.095953,0.030004,0.062805,0.039132
4,0.043699,-0.00038,0.018898,0.042513,0.0,0.029825,0.071026,0.081864,0.023111,0.008357,0.036827,0.031697,0.008334,0.03862,0.072203,0.047747,-0.067571,-0.03244,0.104102,0.044435,0.082669,-0.018957
5,0.018374,0.000904,0.005592,0.030932,0.029825,0.0,0.064334,0.051761,0.003692,0.029581,-0.006731,0.045566,0.009101,0.059152,0.102604,0.028923,-0.08501,-0.032796,0.141632,0.042077,0.091299,-0.016006
6,0.073233,0.005544,0.079881,0.02001,0.071026,0.064334,0.0,0.059024,0.090543,0.079076,0.057441,0.014006,0.000118,0.018082,0.022502,0.028031,-0.093226,-0.017653,0.069138,0.07809,0.064865,0.015662
7,0.038254,-0.007153,0.030204,0.031457,0.081864,0.051761,0.059024,0.0,0.03844,0.052165,0.011692,0.022331,0.020312,0.021655,0.092389,0.005787,-0.031331,-0.008681,0.089951,0.025574,0.04967,0.012749
8,0.009892,-0.003493,-0.011773,0.039991,0.023111,0.003692,0.090543,0.03844,0.0,0.018928,0.011832,0.03093,0.010352,0.041039,0.087378,0.026994,-0.006715,-0.02116,0.104107,0.017531,0.048296,-0.021826
9,0.053075,-0.002488,0.02071,0.059307,0.008357,0.029581,0.079076,0.052165,0.018928,0.0,0.053989,0.015446,0.015021,0.017346,0.038349,0.033714,-0.002851,-0.027973,0.044011,0.025575,0.024133,-0.001573


### Reformatting the clean_games_data csv to have one team per row 

In [5]:
games_df = pd.read_csv("clean_games_data.csv")
games_df.drop(columns=["Unnamed: 0"], inplace=True)

In [10]:
home_keys = [key for key in games_df.keys() if "away" not in key]
away_keys = [key for key in games_df.keys() if "home" not in key]
print(home_keys)

['home_school', 'home_school_id', 'home_points', 'home_fumblesRecovered', 'home_rushingTDs', 'home_puntReturnYards', 'home_puntReturnTDs', 'home_puntReturns', 'home_passingTDs', 'home_kickReturnYards', 'home_kickReturnTDs', 'home_kickReturns', 'home_kickingPoints', 'home_firstDowns', 'home_thirdDownEff', 'home_fourthDownEff', 'home_totalYards', 'home_netPassingYards', 'home_completionAttempts', 'home_yardsPerPass', 'home_rushingYards', 'home_rushingAttempts', 'home_yardsPerRushAttempt', 'home_totalPenaltiesYards', 'home_turnovers', 'home_fumblesLost', 'home_interceptions', 'home_possessionTime', 'id', 'year', 'week', 'home_interceptionYards', 'home_interceptionTDs', 'home_passesIntercepted', 'home_totalFumbles', 'home_tacklesForLoss', 'home_defensiveTDs', 'home_tackles', 'home_sacks', 'home_qbHurries', 'home_passesDeflected', 'home_passAttempts', 'home_passAccuracy', 'home_penaltyYards', 'home_penalties', 'home_thirdDowns', 'home_thirdDownPercentage', 'home_fourthDowns', 'home_fourthDo

In [14]:
keys = []
for key in home_keys:
    if "home" in key:
        temp = key.split("home_")
        keys.append(temp[1])
    else:
        keys.append(key)
print(keys)

['school', 'school_id', 'points', 'fumblesRecovered', 'rushingTDs', 'puntReturnYards', 'puntReturnTDs', 'puntReturns', 'passingTDs', 'kickReturnYards', 'kickReturnTDs', 'kickReturns', 'kickingPoints', 'firstDowns', 'thirdDownEff', 'fourthDownEff', 'totalYards', 'netPassingYards', 'completionAttempts', 'yardsPerPass', 'rushingYards', 'rushingAttempts', 'yardsPerRushAttempt', 'totalPenaltiesYards', 'turnovers', 'fumblesLost', 'interceptions', 'possessionTime', 'id', 'year', 'week', 'interceptionYards', 'interceptionTDs', 'passesIntercepted', 'totalFumbles', 'tacklesForLoss', 'defensiveTDs', 'tackles', 'sacks', 'qbHurries', 'passesDeflected', 'passAttempts', 'passAccuracy', 'penaltyYards', 'penalties', 'thirdDowns', 'thirdDownPercentage', 'fourthDowns', 'fourthDownPercentage']


In [17]:
home_keys_swap = {key: key.split("home_")[-1] for key in home_keys}
away_keys_swap = {key: key.split("away_")[-1] for key in away_keys}

In [21]:
home_df = games_df[home_keys]
home_df.rename(columns=home_keys_swap, inplace=True)
home_df.insert(1, "home", 1)
home_df.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  home_df.rename(columns=home_keys_swap, inplace=True)


Unnamed: 0,school,home,school_id,points,fumblesRecovered,rushingTDs,puntReturnYards,puntReturnTDs,puntReturns,passingTDs,...,qbHurries,passesDeflected,passAttempts,passAccuracy,penaltyYards,penalties,thirdDowns,thirdDownPercentage,fourthDowns,fourthDownPercentage
0,Ohio State,1,194,34,1.0,1.0,24.0,0.0,3.0,2.0,...,,,15,0.8,20,2,8,0.25,2,0.5
1,Troy,1,2653,10,1.0,1.0,11.0,0.0,2.0,0.0,...,,,31,0.677419,55,5,16,0.3125,3,0.333333
2,Boston College,1,103,30,0.0,2.0,0.0,0.0,1.0,1.0,...,,,25,0.68,55,7,17,0.529412,3,0.666667
3,Penn State,1,213,26,1.0,1.0,9.0,0.0,2.0,1.0,...,,,47,0.680851,90,9,18,0.555556,2,0.5
4,UT San Antonio,1,2636,27,2.0,3.0,-4.0,0.0,5.0,0.0,...,,,24,0.625,115,14,17,0.294118,1,1.0


In [22]:
away_df = games_df[away_keys]
away_df.rename(columns=away_keys_swap, inplace=True)
away_df.insert(1, "home", 0)
away_df.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  away_df.rename(columns=away_keys_swap, inplace=True)


Unnamed: 0,school,home,school_id,points,fumblesRecovered,rushingTDs,passingTDs,kickReturnYards,kickReturnTDs,kickReturns,...,qbHurries,passesDeflected,passAttempts,passAccuracy,penaltyYards,penalties,thirdDowns,thirdDownPercentage,fourthDowns,fourthDownPercentage
0,Navy,0,2426,17,0.0,2.0,0.0,64.0,0.0,4.0,...,,,4,0.5,20,3,12,0.333333,1,1.0
1,UAB,0,5,48,1.0,4.0,2.0,64.0,0.0,2.0,...,,,20,0.65,35,3,14,0.642857,0,1.0
2,UMass,0,113,7,0.0,0.0,1.0,95.0,0.0,4.0,...,,,22,0.409091,23,4,11,0.272727,1,0.0
3,UCF,0,2116,24,1.0,2.0,1.0,142.0,0.0,5.0,...,,,22,0.545455,47,8,13,0.384615,2,0.5
4,Houston,0,248,7,1.0,1.0,0.0,89.0,0.0,6.0,...,,,50,0.5,47,9,16,0.25,4,0.5


In [26]:
new_games_df = pd.concat([home_df, away_df])
new_games_df = new_games_df.sort_values("id")
new_games_df.head(10)

Unnamed: 0,school,home,school_id,points,fumblesRecovered,rushingTDs,puntReturnYards,puntReturnTDs,puntReturns,passingTDs,...,qbHurries,passesDeflected,passAttempts,passAccuracy,penaltyYards,penalties,thirdDowns,thirdDownPercentage,fourthDowns,fourthDownPercentage
9,Vanderbilt,0,238,7,2.0,0.0,26.0,0.0,5.0,0.0,...,,,34,0.470588,66,7,14,0.214286,1,0.0
9,Temple,1,218,37,4.0,1.0,6.0,0.0,2.0,2.0,...,,,36,0.638889,23,3,17,0.117647,4,0.5
6,BYU,1,252,35,1.0,2.0,24.0,0.0,2.0,3.0,...,,,36,0.777778,150,15,13,0.384615,1,1.0
6,Connecticut,0,41,10,2.0,1.0,-1.0,0.0,1.0,0.0,...,,,48,0.520833,90,8,17,0.470588,4,0.25
3,Penn State,1,213,26,1.0,1.0,9.0,0.0,2.0,1.0,...,,,47,0.680851,90,9,18,0.555556,2,0.5
3,UCF,0,2116,24,1.0,2.0,,,,1.0,...,,,22,0.545455,47,8,13,0.384615,2,0.5
10,East Carolina,0,151,52,1.0,3.0,28.0,0.0,4.0,4.0,...,,,46,0.717391,72,8,8,0.25,1,1.0
10,North Carolina Central,1,2428,7,1.0,0.0,22.0,0.0,2.0,0.0,...,,,22,0.590909,91,10,14,0.285714,0,1.0
4,Houston,0,248,7,1.0,1.0,0.0,0.0,1.0,0.0,...,,,50,0.5,47,9,16,0.25,4,0.5
4,UT San Antonio,1,2636,27,2.0,3.0,-4.0,0.0,5.0,0.0,...,,,24,0.625,115,14,17,0.294118,1,1.0
