# Improving The Data

#### Extrapolating Historical Data from the Original Data Set

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
data = pd.read_csv('GAME_MERGED_REORGANIZED.csv')
data.head()

Unnamed: 0,game_id,settled_in,season,date_time,date_time_GMT,outcome,home_rink_side_start,venue,venue_time_zone_id,venue_time_zone_offset,...,away_shots,away_hits,away_pim,away_powerPlayOpportunities,away_powerPlayGoals,away_faceOffWinPercentage,away_giveaways,away_takeaways,type,venue_link
0,2010020003,REG,20102011,10/7/2010,20101007T160000+0000,away win REG,right,Hartwall Areena,America/Chicago,-5,...,37,20,15,1,0,57.6,13,3,R,/api/v1/venues/null
1,2010020002,REG,20102011,10/7/2010,20101007T230000+0000,away win REG,left,CONSOL Energy Center,America/New_York,-4,...,31,28,6,3,1,47.4,11,4,R,/api/v1/venues/null
2,2010020001,REG,20102011,10/7/2010,20101007T230000+0000,home win REG,right,Air Canada Centre,America/Toronto,-4,...,25,25,6,3,1,47.6,15,7,R,/api/v1/venues/null
3,2010020010,REG,20102011,10/8/2010,20101008T233000+0000,away win REG,left,Scotiabank Place,America/New_York,-4,...,34,17,4,2,0,55.9,2,5,R,/api/v1/venues/null
4,2010020006,REG,20102011,10/8/2010,20101008T190000+0000,away win REG,left,Ericsson Globe,America/New_York,-4,...,38,25,2,3,1,55.2,12,2,R,/api/v1/venues/null


Here is a small subset of the data, to test the code on. Multiple teams have multiple games in this subset of the dataset, representing the 2010-2011 season

In [4]:
##2011-2012 season has 2636 games.
test = data[0:2635]
##frequency that each team appears in a season
print(test['home_team_id'].value_counts())

23    99
6     99
3     95
26    94
4     94
28    93
27    93
1     93
18    93
15    93
14    90
5     89
17    89
16    88
19    87
13    86
24    85
7     85
9     85
8     85
10    82
12    82
29    82
2     82
20    82
22    82
30    82
21    82
25    82
11    41
52    41
Name: home_team_id, dtype: int64


#### Season Totals

Some historical data that we might want is home team and away team goals so far, and goals against so far. Add rows of 0's to write the new values into

In [5]:
test.insert(37, 'home_goals_so_far', np.zeros((test.shape[0],1)))
test.insert(38, 'away_goals_so_far', np.zeros((test.shape[0],1)))

This code loops through each entry, then loops backwards to find the teams previous entry, adding up the goals

In [6]:
for i in range(0,test.shape[0]):
    ##this is the value that will be looked for
    teamid = test['home_team_id'][i]
    ##exit condition for the while loop
    foundPreviousGoals = False
    j = i-1
    ##this while loop works backward, looking for the goals so far
    while(foundPreviousGoals != True):
        ##automatically exits if j==-1(i==0) or if j==0(cant find previous)
        if(j == 0 | j == -1):
            ##sets previous goals to 0 and exits
            test.at[i, 'home_goals_so_far'] = 0
            foundPreviousGoals = True
        ##if a matching team id is found back in the data
        elif(teamid == test['home_team_id'][j]):
            ##goals so far is set to previous goals so far + goals scored in the last game
            test.at[i, 'home_goals_so_far'] = (test['home_goals_so_far'][j] + test['home_goals'][j])
            foundPreviousGoals = True
        elif(teamid == test['away_team_id'][j]):
            ##goals so far is set to previous goals so far + goals scored in the last game
            test.at[i, 'home_goals_so_far'] = (test['away_goals_so_far'][j] + test['away_goals'][j])
            foundPreviousGoals = True
        ##j is reduced by one
        j=j-1
    

    awayteamid = test['away_team_id'][i]
    foundPreviousGoalsAway = False
    k = i-1
    ##this while loop works backward, looking for the away goals so far
    while(foundPreviousGoalsAway != True):
        ##automatically exits if j==-1(i==0) or if j==0(cant find previous)
        if(k == 0 | k == -1):
            ##sets previous goals to 0 and exits
            test.at[i, 'away_goals_so_far'] = 0
            foundPreviousGoalsAway = True
        ##if a matching team id is found back in the data
        elif(awayteamid == test['home_team_id'][k]):
            ##goals so far is set to previous goals so far + goals scored in the last game
            test.at[i, 'away_goals_so_far'] = (test['home_goals_so_far'][k] + test['home_goals'][k])
            foundPreviousGoalsAway = True
        elif(awayteamid == test['away_team_id'][k]):
            ##goals so far is set to previous goals so far + goals scored in the last game
            test.at[i, 'away_goals_so_far'] = (test['away_goals_so_far'][k] + test['away_goals'][k])
            foundPreviousGoalsAway = True
        ##j is reduced by one
        k=k-1
  
test

Unnamed: 0,game_id,settled_in,season,date_time,date_time_GMT,outcome,home_rink_side_start,venue,venue_time_zone_id,venue_time_zone_offset,...,away_pim,away_powerPlayOpportunities,away_powerPlayGoals,away_faceOffWinPercentage,away_giveaways,away_takeaways,type,venue_link,home_goals_so_far,away_goals_so_far
0,2010020003,REG,20102011,10/7/2010,20101007T160000+0000,away win REG,right,Hartwall Areena,America/Chicago,-5,...,15,1,0,57.6,13,3,R,/api/v1/venues/null,0.0,0.0
1,2010020002,REG,20102011,10/7/2010,20101007T230000+0000,away win REG,left,CONSOL Energy Center,America/New_York,-4,...,6,3,1,47.4,11,4,R,/api/v1/venues/null,0.0,0.0
2,2010020001,REG,20102011,10/7/2010,20101007T230000+0000,home win REG,right,Air Canada Centre,America/Toronto,-4,...,6,3,1,47.6,15,7,R,/api/v1/venues/null,0.0,0.0
3,2010020010,REG,20102011,10/8/2010,20101008T233000+0000,away win REG,left,Scotiabank Place,America/New_York,-4,...,4,2,0,55.9,2,5,R,/api/v1/venues/null,0.0,0.0
4,2010020006,REG,20102011,10/8/2010,20101008T190000+0000,away win REG,left,Ericsson Globe,America/New_York,-4,...,2,3,1,55.2,12,2,R,/api/v1/venues/null,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2630,2011030412,OT,20112012,6/3/2012,20120603T000000+0000,away win OT,left,Prudential Center,America/New_York,-4,...,6,1,0,61.5,4,3,P,/api/v1/venues/null,454.0,476.0
2631,2011030413,SO,20112012,6/5/2012,20120605T000000+0000,home win REG,right,Staples Center,America/Los_Angeles,-7,...,19,5,1,67.2,12,8,P,/api/v1/venues/null,478.0,455.0
2632,2011030414,REG,20112012,6/7/2012,20120607T000000+0000,away win REG,right,Staples Center,America/Los_Angeles,-7,...,46,1,0,35.2,5,13,P,/api/v1/venues/null,482.0,455.0
2633,2011030415,REG,20112012,6/10/2012,20120610T000000+0000,home win REG,left,Prudential Center,America/New_York,-4,...,12,5,1,43.4,8,5,P,/api/v1/venues/null,458.0,483.0


In [7]:
##test.to_csv(r'C:\Users\lafla\Desktop\Cs-254\Improved DataSet\test.csv')

In [8]:
test[test['home_team_id']==5]['home_goals']

1       2
16      2
36      3
44      3
69      5
       ..
2519    5
2543    4
2549    3
2558    5
2581    3
Name: home_goals, Length: 89, dtype: int64

In [9]:
test[test['away_team_id']==5]['away_goals']

30       3
55       5
88       4
111      0
124      3
        ..
2476     5
2510     5
2566     4
2575    10
2588     1
Name: away_goals, Length: 88, dtype: int64

In [10]:
test[test['home_team_id']==5]['home_goals_so_far']

1         0.0
16        2.0
36        7.0
44       10.0
69       18.0
        ...  
2519    525.0
2543    530.0
2549    534.0
2558    537.0
2581    556.0
Name: home_goals_so_far, Length: 89, dtype: float64

in games 1-44 as the home team, teamid 5 scores 2,2,3,3 points. As the away team in game 30, teamid 5 scores 3 points. by game 44 there running tally of previously scored points should be 2+2+3+3 = 10, excluding the 3 points that they score in game 44

Now to make a function with similar results, so that this process can be repeated for all important stats

In [11]:
test = data[0:2635]
def addHistoricalColumnsTotals(df,homeColumnRead, awayColumnRead, homeColumnWrite, awayColumnWrite):
    df.insert(df.shape[1], homeColumnWrite, np.zeros((df.shape[0],1)))
    df.insert(df.shape[1], awayColumnWrite, np.zeros((df.shape[0],1)))
    for i in range(0, df.shape[0]):
        homeid = df['home_team_id'][i]
        awayid = df['away_team_id'][i]
        j = i-1
        k = i-1
        foundPreviousHome = False
        foundPreviousAway = False
        
        while(foundPreviousHome != True):
            ##automatically exits if j==-1(i==0) or if j==0(cant find previous)
            if(j == 0 | j == -1):
                ##sets previous goals to 0 and exits
                df.at[i, homeColumnWrite] = 0
                foundPreviousHome = True
            ##if a matching team id is found back in the data
            elif(homeid == df['home_team_id'][j]):
                ##goals so far is set to previous goals so far + goals scored in the last game
                df.at[i, homeColumnWrite] = (df[homeColumnWrite][j] + df[homeColumnRead][j])
                foundPreviousHome = True
            elif(homeid == df['away_team_id'][j]):
                ##goals so far is set to previous goals so far + goals scored in the last game
                df.at[i, homeColumnWrite] = (df[awayColumnWrite][j] + df[awayColumnRead][j])
                foundPreviousHome = True
            ##j is reduced by one
            j=j-1
            
        ##this while loop works backward, looking for the away goals so far
        while(foundPreviousAway != True):
            ##automatically exits if j==-1(i==0) or if j==0(cant find previous)
            if(k == 0 | k == -1):
                ##sets previous goals to 0 and exits
                df.at[i, awayColumnWrite] = 0
                foundPreviousAway = True
            ##if a matching team id is found back in the data
            elif(awayid == df['home_team_id'][k]):
                ##goals so far is set to previous goals so far + goals scored in the last game
                df.at[i, awayColumnWrite] = (df[homeColumnWrite][k] + df[homeColumnRead][k])
                foundPreviousAway = True
            elif(awayid == df['away_team_id'][k]):
                ##goals so far is set to previous goals so far + goals scored in the last game
                df.at[i, awayColumnWrite] = (df[awayColumnWrite][k] + df[awayColumnRead][k])
                foundPreviousAway = True
            ##j is reduced by one
            k=k-1
        

addHistoricalColumnsTotals(test,'home_goals', 'away_goals', 'home_goals_so_far', 'away_goals_so_far')
test

Unnamed: 0,game_id,settled_in,season,date_time,date_time_GMT,outcome,home_rink_side_start,venue,venue_time_zone_id,venue_time_zone_offset,...,away_pim,away_powerPlayOpportunities,away_powerPlayGoals,away_faceOffWinPercentage,away_giveaways,away_takeaways,type,venue_link,home_goals_so_far,away_goals_so_far
0,2010020003,REG,20102011,10/7/2010,20101007T160000+0000,away win REG,right,Hartwall Areena,America/Chicago,-5,...,15,1,0,57.6,13,3,R,/api/v1/venues/null,0.0,0.0
1,2010020002,REG,20102011,10/7/2010,20101007T230000+0000,away win REG,left,CONSOL Energy Center,America/New_York,-4,...,6,3,1,47.4,11,4,R,/api/v1/venues/null,0.0,0.0
2,2010020001,REG,20102011,10/7/2010,20101007T230000+0000,home win REG,right,Air Canada Centre,America/Toronto,-4,...,6,3,1,47.6,15,7,R,/api/v1/venues/null,0.0,0.0
3,2010020010,REG,20102011,10/8/2010,20101008T233000+0000,away win REG,left,Scotiabank Place,America/New_York,-4,...,4,2,0,55.9,2,5,R,/api/v1/venues/null,0.0,0.0
4,2010020006,REG,20102011,10/8/2010,20101008T190000+0000,away win REG,left,Ericsson Globe,America/New_York,-4,...,2,3,1,55.2,12,2,R,/api/v1/venues/null,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2630,2011030412,OT,20112012,6/3/2012,20120603T000000+0000,away win OT,left,Prudential Center,America/New_York,-4,...,6,1,0,61.5,4,3,P,/api/v1/venues/null,454.0,476.0
2631,2011030413,SO,20112012,6/5/2012,20120605T000000+0000,home win REG,right,Staples Center,America/Los_Angeles,-7,...,19,5,1,67.2,12,8,P,/api/v1/venues/null,478.0,455.0
2632,2011030414,REG,20112012,6/7/2012,20120607T000000+0000,away win REG,right,Staples Center,America/Los_Angeles,-7,...,46,1,0,35.2,5,13,P,/api/v1/venues/null,482.0,455.0
2633,2011030415,REG,20112012,6/10/2012,20120610T000000+0000,home win REG,left,Prudential Center,America/New_York,-4,...,12,5,1,43.4,8,5,P,/api/v1/venues/null,458.0,483.0


In [12]:
test[test['home_team_id']==5]['home_goals']

1       2
16      2
36      3
44      3
69      5
       ..
2519    5
2543    4
2549    3
2558    5
2581    3
Name: home_goals, Length: 89, dtype: int64

In [13]:
test[test['away_team_id']==5]['away_goals']

30       3
55       5
88       4
111      0
124      3
        ..
2476     5
2510     5
2566     4
2575    10
2588     1
Name: away_goals, Length: 88, dtype: int64

In [14]:
test[test['home_team_id']==5]['home_goals_so_far']

1         0.0
16        2.0
36        7.0
44       10.0
69       18.0
        ...  
2519    525.0
2543    530.0
2549    534.0
2558    537.0
2581    556.0
Name: home_goals_so_far, Length: 89, dtype: float64

The function addHistoricalColumns produces identical results

In [15]:
season2011_2012 = data[0:2635]
addHistoricalColumnsTotals(season2011_2012,'home_goals', 'away_goals', 'home_goals_so_far', 'away_goals_so_far')
addHistoricalColumnsTotals(season2011_2012,'home_won', 'away_won', 'home_wins_so_far', 'away_wins_so_far')
addHistoricalColumnsTotals(season2011_2012,'home_shots', 'away_shots', 'home_shots_so_far', 'away_shots_so_far')
addHistoricalColumnsTotals(season2011_2012,'home_hits', 'away_hits', 'home_hits_so_far', 'away_hits_so_far')
addHistoricalColumnsTotals(season2011_2012,'home_pim', 'away_pim', 'home_pim_so_far', 'away_pim_so_far')
addHistoricalColumnsTotals(season2011_2012,'home_powerPlayOpportunities', 'away_powerPlayOpportunities', 'home_powerPlayOpportunities_so_far', 'away_powerPlayOpportunities_so_far')
addHistoricalColumnsTotals(season2011_2012,'home_powerPlayGoals', 'away_powerPlayGoals', 'home_powerPlayGoals_so_far', 'away_powerPlayGoals_so_far')
addHistoricalColumnsTotals(season2011_2012,'home_giveaways', 'away_giveaways', 'home_giveaways_so_far', 'away_giveaways_so_far')
addHistoricalColumnsTotals(season2011_2012,'home_takeaways', 'away_takeaways', 'home_takeaways_so_far', 'away_takeaways_so_far')
season2011_2012

Unnamed: 0,game_id,settled_in,season,date_time,date_time_GMT,outcome,home_rink_side_start,venue,venue_time_zone_id,venue_time_zone_offset,...,home_pim_so_far,away_pim_so_far,home_powerPlayOpportunities_so_far,away_powerPlayOpportunities_so_far,home_powerPlayGoals_so_far,away_powerPlayGoals_so_far,home_giveaways_so_far,away_giveaways_so_far,home_takeaways_so_far,away_takeaways_so_far
0,2010020003,REG,20102011,10/7/2010,20101007T160000+0000,away win REG,right,Hartwall Areena,America/Chicago,-5,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2010020002,REG,20102011,10/7/2010,20101007T230000+0000,away win REG,left,CONSOL Energy Center,America/New_York,-4,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2010020001,REG,20102011,10/7/2010,20101007T230000+0000,home win REG,right,Air Canada Centre,America/Toronto,-4,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2010020010,REG,20102011,10/8/2010,20101008T233000+0000,away win REG,left,Scotiabank Place,America/New_York,-4,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2010020006,REG,20102011,10/8/2010,20101008T190000+0000,away win REG,left,Ericsson Globe,America/New_York,-4,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2630,2011030412,OT,20112012,6/3/2012,20120603T000000+0000,away win OT,left,Prudential Center,America/New_York,-4,...,1788.0,1808.0,613.0,565.0,116.0,135.0,1616.0,1816.0,1360.0,1392.0
2631,2011030413,SO,20112012,6/5/2012,20120605T000000+0000,home win REG,right,Staples Center,America/Los_Angeles,-7,...,1814.0,1794.0,566.0,614.0,135.0,117.0,1820.0,1620.0,1395.0,1361.0
2632,2011030414,REG,20112012,6/7/2012,20120607T000000+0000,away win REG,right,Staples Center,America/Los_Angeles,-7,...,1833.0,1813.0,571.0,619.0,135.0,118.0,1829.0,1632.0,1399.0,1369.0
2633,2011030415,REG,20112012,6/10/2012,20120610T000000+0000,home win REG,left,Prudential Center,America/New_York,-4,...,1859.0,1859.0,620.0,577.0,118.0,135.0,1637.0,1837.0,1382.0,1409.0


In [16]:
##season2011_2012.to_csv(r'C:\Users\lafla\Desktop\Cs-254\Improved DataSet\season2011_2012.csv')

#### Previous Game

Some other information that might be useful besides the teams total cumulative stats, might be their performance in their previous game. addHistoricalColumnPrev adds a column containing each teams stat for the last game they played

In [17]:
test = data[0:2635]
def addHistoricalColumnsPrev(df,homeColumnRead, awayColumnRead, homeColumnWrite, awayColumnWrite):
    df.insert(df.shape[1], homeColumnWrite, np.zeros((df.shape[0],1)))
    df.insert(df.shape[1], awayColumnWrite, np.zeros((df.shape[0],1)))
    for i in range(0, df.shape[0]):
        homeid = df['home_team_id'][i]
        awayid = df['away_team_id'][i]
        j = i-1
        k = i-1
        foundPreviousHome = False
        foundPreviousAway = False
        
        while(foundPreviousHome != True):
            ##automatically exits if j==-1(i==0) or if j==0(cant find previous)
            if(j == 0 | j == -1):
                ##sets previous goals to 0 and exits
                df.at[i, homeColumnWrite] = 0
                foundPreviousHome = True
            ##if a matching team id is found back in the data
            elif(homeid == df['home_team_id'][j]):
                ##goals so far is set to previous goals so far + goals scored in the last game
                df.at[i, homeColumnWrite] = (df[homeColumnRead][j])
                foundPreviousHome = True
            elif(homeid == df['away_team_id'][j]):
                ##goals so far is set to previous goals so far + goals scored in the last game
                df.at[i, homeColumnWrite] = (df[awayColumnRead][j])
                foundPreviousHome = True
            ##j is reduced by one
            j=j-1
            
        ##this while loop works backward, looking for the away goals so far
        while(foundPreviousAway != True):
            ##automatically exits if j==-1(i==0) or if j==0(cant find previous)
            if(k == 0 | k == -1):
                ##sets previous goals to 0 and exits
                df.at[i, awayColumnWrite] = 0
                foundPreviousAway = True
            ##if a matching team id is found back in the data
            elif(awayid == df['home_team_id'][k]):
                ##goals so far is set to previous goals so far + goals scored in the last game
                df.at[i, awayColumnWrite] = (df[homeColumnRead][k])
                foundPreviousAway = True
            elif(awayid == df['away_team_id'][k]):
                ##goals so far is set to previous goals so far + goals scored in the last game
                df.at[i, awayColumnWrite] = (df[awayColumnRead][k])
                foundPreviousAway = True
            ##j is reduced by one
            k=k-1
        

addHistoricalColumnsPrev(test,'home_goals', 'away_goals', 'home_goals_last_game', 'away_goals_last_game')
test

Unnamed: 0,game_id,settled_in,season,date_time,date_time_GMT,outcome,home_rink_side_start,venue,venue_time_zone_id,venue_time_zone_offset,...,away_pim,away_powerPlayOpportunities,away_powerPlayGoals,away_faceOffWinPercentage,away_giveaways,away_takeaways,type,venue_link,home_goals_last_game,away_goals_last_game
0,2010020003,REG,20102011,10/7/2010,20101007T160000+0000,away win REG,right,Hartwall Areena,America/Chicago,-5,...,15,1,0,57.6,13,3,R,/api/v1/venues/null,0.0,0.0
1,2010020002,REG,20102011,10/7/2010,20101007T230000+0000,away win REG,left,CONSOL Energy Center,America/New_York,-4,...,6,3,1,47.4,11,4,R,/api/v1/venues/null,0.0,0.0
2,2010020001,REG,20102011,10/7/2010,20101007T230000+0000,home win REG,right,Air Canada Centre,America/Toronto,-4,...,6,3,1,47.6,15,7,R,/api/v1/venues/null,0.0,0.0
3,2010020010,REG,20102011,10/8/2010,20101008T233000+0000,away win REG,left,Scotiabank Place,America/New_York,-4,...,4,2,0,55.9,2,5,R,/api/v1/venues/null,0.0,0.0
4,2010020006,REG,20102011,10/8/2010,20101008T190000+0000,away win REG,left,Ericsson Globe,America/New_York,-4,...,2,3,1,55.2,12,2,R,/api/v1/venues/null,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2630,2011030412,OT,20112012,6/3/2012,20120603T000000+0000,away win OT,left,Prudential Center,America/New_York,-4,...,6,1,0,61.5,4,3,P,/api/v1/venues/null,1.0,2.0
2631,2011030413,SO,20112012,6/5/2012,20120605T000000+0000,home win REG,right,Staples Center,America/Los_Angeles,-7,...,19,5,1,67.2,12,8,P,/api/v1/venues/null,2.0,1.0
2632,2011030414,REG,20112012,6/7/2012,20120607T000000+0000,away win REG,right,Staples Center,America/Los_Angeles,-7,...,46,1,0,35.2,5,13,P,/api/v1/venues/null,4.0,0.0
2633,2011030415,REG,20112012,6/10/2012,20120610T000000+0000,home win REG,left,Prudential Center,America/New_York,-4,...,12,5,1,43.4,8,5,P,/api/v1/venues/null,3.0,1.0


In [18]:
##test.to_csv(r'C:\Users\lafla\Desktop\Cs-254\Improved DataSet\test.csv')

In [19]:
addHistoricalColumnsPrev(season2011_2012,'home_goals', 'away_goals', 'home_goals_last_game', 'away_goals_last_game')
addHistoricalColumnsPrev(season2011_2012,'home_won', 'away_won', 'home_won_previous_game', 'away_won_previous_game')
addHistoricalColumnsPrev(season2011_2012,'home_shots', 'away_shots', 'home_shots_last_game', 'away_shots_last_game')
addHistoricalColumnsPrev(season2011_2012,'home_hits', 'away_hits', 'home_hits_last_game', 'away_hits_last_game')
addHistoricalColumnsPrev(season2011_2012,'home_pim', 'away_pim', 'home_pim_last_game', 'away_pim_last_game')
addHistoricalColumnsPrev(season2011_2012,'home_powerPlayOpportunities', 'away_powerPlayOpportunities', 'home_powerPlayOpportunities_last_game', 'away_powerPlayOpportunities_last_game')
addHistoricalColumnsPrev(season2011_2012,'home_powerPlayGoals', 'away_powerPlayGoals', 'home_powerPlayGoals_last_game', 'away_powerPlayGoals_last_game')
addHistoricalColumnsPrev(season2011_2012,'home_giveaways', 'away_giveaways', 'home_giveaways_last_game', 'away_giveaways_last_game')
addHistoricalColumnsPrev(season2011_2012,'home_takeaways', 'away_takeaways', 'home_takeaways_last_game', 'away_takeaways_last_game')
addHistoricalColumnsPrev(season2011_2012,'home_faceOffWinPercentage', 'away_faceOffWinPercentage', 'home_faceOffWinPercentage_last_game', 'away_faceOffWinPercentage_last_game')
season2011_2012

Unnamed: 0,game_id,settled_in,season,date_time,date_time_GMT,outcome,home_rink_side_start,venue,venue_time_zone_id,venue_time_zone_offset,...,home_powerPlayOpportunities_last_game,away_powerPlayOpportunities_last_game,home_powerPlayGoals_last_game,away_powerPlayGoals_last_game,home_giveaways_last_game,away_giveaways_last_game,home_takeaways_last_game,away_takeaways_last_game,home_faceOffWinPercentage_last_game,away_faceOffWinPercentage_last_game
0,2010020003,REG,20102011,10/7/2010,20101007T160000+0000,away win REG,right,Hartwall Areena,America/Chicago,-5,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2010020002,REG,20102011,10/7/2010,20101007T230000+0000,away win REG,left,CONSOL Energy Center,America/New_York,-4,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2010020001,REG,20102011,10/7/2010,20101007T230000+0000,home win REG,right,Air Canada Centre,America/Toronto,-4,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2010020010,REG,20102011,10/8/2010,20101008T233000+0000,away win REG,left,Scotiabank Place,America/New_York,-4,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2010020006,REG,20102011,10/8/2010,20101008T190000+0000,away win REG,left,Ericsson Globe,America/New_York,-4,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2630,2011030412,OT,20112012,6/3/2012,20120603T000000+0000,away win OT,left,Prudential Center,America/New_York,-4,...,7.0,6.0,1.0,2.0,3.0,6.0,4.0,4.0,57.1,42.9
2631,2011030413,SO,20112012,6/5/2012,20120605T000000+0000,home win REG,right,Staples Center,America/Los_Angeles,-7,...,1.0,1.0,0.0,1.0,4.0,4.0,3.0,1.0,61.5,38.5
2632,2011030414,REG,20112012,6/7/2012,20120607T000000+0000,away win REG,right,Staples Center,America/Los_Angeles,-7,...,5.0,5.0,0.0,1.0,9.0,12.0,4.0,8.0,32.8,67.2
2633,2011030415,REG,20112012,6/10/2012,20120610T000000+0000,home win REG,left,Prudential Center,America/New_York,-4,...,1.0,6.0,0.0,0.0,5.0,8.0,13.0,10.0,35.2,64.8


In [20]:
##season2011_2012.to_csv(r'C:\Users\lafla\Desktop\Cs-254\Improved DataSet\season2011_2012.csv')

#### Averages

Average stats for each team might also be useful information

In [21]:
test = data[0:2635]
def numGamesPlayed(df, homeColumnWrite, awayColumnWrite):
    df.insert(df.shape[1], homeColumnWrite, np.zeros((df.shape[0],1)))
    df.insert(df.shape[1], awayColumnWrite, np.zeros((df.shape[0],1)))
    for i in range(0, df.shape[0]):
        homeid = df['home_team_id'][i]
        awayid = df['away_team_id'][i]
        j = i-1
        k = i-1
        foundPreviousHome = False
        foundPreviousAway = False
        
        while(foundPreviousHome != True):
            ##automatically exits if j==-1(i==0) or if j==0(cant find previous)
            if(j == 0 | j == -1):
                ##sets previous goals to 0 and exits
                df.at[i, homeColumnWrite] = 0
                foundPreviousHome = True
            ##if a matching team id is found back in the data
            elif(homeid == df['home_team_id'][j]):
                ##goals so far is set to previous goals so far + goals scored in the last game
                df.at[i, homeColumnWrite] = (df[homeColumnWrite][j]) +1
                foundPreviousHome = True
            elif(homeid == df['away_team_id'][j]):
                ##goals so far is set to previous goals so far + goals scored in the last game
                df.at[i, homeColumnWrite] = (df[awayColumnWrite][j]) +1
                foundPreviousHome = True
            ##j is reduced by one
            j=j-1
            
        ##this while loop works backward, looking for the away goals so far
        while(foundPreviousAway != True):
            ##automatically exits if j==-1(i==0) or if j==0(cant find previous)
            if(k == 0 | k == -1):
                ##sets previous goals to 0 and exits
                df.at[i, awayColumnWrite] = 0
                foundPreviousAway = True
            ##if a matching team id is found back in the data
            elif(awayid == df['home_team_id'][k]):
                ##goals so far is set to previous goals so far + goals scored in the last game
                df.at[i, awayColumnWrite] = (df[homeColumnWrite][k]) +1
                foundPreviousAway = True
            elif(awayid == df['away_team_id'][k]):
                ##goals so far is set to previous goals so far + goals scored in the last game
                df.at[i, awayColumnWrite] = (df[awayColumnWrite][k]) +1
                foundPreviousAway = True
            ##j is reduced by one
            k=k-1

            
addHistoricalColumnsTotals(test,'home_goals', 'away_goals', 'home_goals_so_far', 'away_goals_so_far')
numGamesPlayed(test,'home_games', 'away_games')
test

Unnamed: 0,game_id,settled_in,season,date_time,date_time_GMT,outcome,home_rink_side_start,venue,venue_time_zone_id,venue_time_zone_offset,...,away_powerPlayGoals,away_faceOffWinPercentage,away_giveaways,away_takeaways,type,venue_link,home_goals_so_far,away_goals_so_far,home_games,away_games
0,2010020003,REG,20102011,10/7/2010,20101007T160000+0000,away win REG,right,Hartwall Areena,America/Chicago,-5,...,0,57.6,13,3,R,/api/v1/venues/null,0.0,0.0,0.0,0.0
1,2010020002,REG,20102011,10/7/2010,20101007T230000+0000,away win REG,left,CONSOL Energy Center,America/New_York,-4,...,1,47.4,11,4,R,/api/v1/venues/null,0.0,0.0,0.0,0.0
2,2010020001,REG,20102011,10/7/2010,20101007T230000+0000,home win REG,right,Air Canada Centre,America/Toronto,-4,...,1,47.6,15,7,R,/api/v1/venues/null,0.0,0.0,0.0,0.0
3,2010020010,REG,20102011,10/8/2010,20101008T233000+0000,away win REG,left,Scotiabank Place,America/New_York,-4,...,0,55.9,2,5,R,/api/v1/venues/null,0.0,0.0,0.0,0.0
4,2010020006,REG,20102011,10/8/2010,20101008T190000+0000,away win REG,left,Ericsson Globe,America/New_York,-4,...,1,55.2,12,2,R,/api/v1/venues/null,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2630,2011030412,OT,20112012,6/3/2012,20120603T000000+0000,away win OT,left,Prudential Center,America/New_York,-4,...,0,61.5,4,3,P,/api/v1/venues/null,454.0,476.0,183.0,185.0
2631,2011030413,SO,20112012,6/5/2012,20120605T000000+0000,home win REG,right,Staples Center,America/Los_Angeles,-7,...,1,67.2,12,8,P,/api/v1/venues/null,478.0,455.0,186.0,184.0
2632,2011030414,REG,20112012,6/7/2012,20120607T000000+0000,away win REG,right,Staples Center,America/Los_Angeles,-7,...,0,35.2,5,13,P,/api/v1/venues/null,482.0,455.0,187.0,185.0
2633,2011030415,REG,20112012,6/10/2012,20120610T000000+0000,home win REG,left,Prudential Center,America/New_York,-4,...,1,43.4,8,5,P,/api/v1/venues/null,458.0,483.0,186.0,188.0


In [22]:
##test.to_csv(r'C:\Users\lafla\Desktop\Cs-254\Improved DataSet\test.csv')

In [23]:
def calcAvg(df,totalColumnHome, totalColumnAway, numColumnHome, numColumnAway, homeColumnWrite, awayColumnWrite):
    df.insert(df.shape[1], homeColumnWrite, np.zeros((df.shape[0],1)))
    df.insert(df.shape[1], awayColumnWrite, np.zeros((df.shape[0],1)))
    for i in range(0, df.shape[0]):
        if df[numColumnHome][i] == 0:
             df.at[i, homeColumnWrite] = 0
        else:
            df.at[i, homeColumnWrite] = df[totalColumnHome][i]/df[numColumnHome][i]
            
        if df[numColumnAway][i] == 0:
             df.at[i, awayColumnWrite] = 0
        else:
            df.at[i, awayColumnWrite] = df[totalColumnAway][i]/df[numColumnAway][i]
calcAvg(test, 'home_goals_so_far', 'away_goals_so_far', 'home_games', 'away_games', 'home_avg_goals', 'away_avg_goals')
test

Unnamed: 0,game_id,settled_in,season,date_time,date_time_GMT,outcome,home_rink_side_start,venue,venue_time_zone_id,venue_time_zone_offset,...,away_giveaways,away_takeaways,type,venue_link,home_goals_so_far,away_goals_so_far,home_games,away_games,home_avg_goals,away_avg_goals
0,2010020003,REG,20102011,10/7/2010,20101007T160000+0000,away win REG,right,Hartwall Areena,America/Chicago,-5,...,13,3,R,/api/v1/venues/null,0.0,0.0,0.0,0.0,0.000000,0.000000
1,2010020002,REG,20102011,10/7/2010,20101007T230000+0000,away win REG,left,CONSOL Energy Center,America/New_York,-4,...,11,4,R,/api/v1/venues/null,0.0,0.0,0.0,0.0,0.000000,0.000000
2,2010020001,REG,20102011,10/7/2010,20101007T230000+0000,home win REG,right,Air Canada Centre,America/Toronto,-4,...,15,7,R,/api/v1/venues/null,0.0,0.0,0.0,0.0,0.000000,0.000000
3,2010020010,REG,20102011,10/8/2010,20101008T233000+0000,away win REG,left,Scotiabank Place,America/New_York,-4,...,2,5,R,/api/v1/venues/null,0.0,0.0,0.0,0.0,0.000000,0.000000
4,2010020006,REG,20102011,10/8/2010,20101008T190000+0000,away win REG,left,Ericsson Globe,America/New_York,-4,...,12,2,R,/api/v1/venues/null,0.0,0.0,0.0,0.0,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2630,2011030412,OT,20112012,6/3/2012,20120603T000000+0000,away win OT,left,Prudential Center,America/New_York,-4,...,4,3,P,/api/v1/venues/null,454.0,476.0,183.0,185.0,2.480874,2.572973
2631,2011030413,SO,20112012,6/5/2012,20120605T000000+0000,home win REG,right,Staples Center,America/Los_Angeles,-7,...,12,8,P,/api/v1/venues/null,478.0,455.0,186.0,184.0,2.569892,2.472826
2632,2011030414,REG,20112012,6/7/2012,20120607T000000+0000,away win REG,right,Staples Center,America/Los_Angeles,-7,...,5,13,P,/api/v1/venues/null,482.0,455.0,187.0,185.0,2.577540,2.459459
2633,2011030415,REG,20112012,6/10/2012,20120610T000000+0000,home win REG,left,Prudential Center,America/New_York,-4,...,8,5,P,/api/v1/venues/null,458.0,483.0,186.0,188.0,2.462366,2.569149


In [24]:
test = data[0:2635]
addHistoricalColumnsTotals(test,'away_goals', 'home_goals', 'home_goals_against_so_far', 'away_goals_against_so_far')
addHistoricalColumnsTotals(test,'away_won', 'home_won', 'home_losses_so_far', 'away_losses_so_far')
addHistoricalColumnsPrev(test,'away_goals', 'home_goals', 'home_goals_against_last_game', 'away_goals_against_last_game')
test

Unnamed: 0,game_id,settled_in,season,date_time,date_time_GMT,outcome,home_rink_side_start,venue,venue_time_zone_id,venue_time_zone_offset,...,away_giveaways,away_takeaways,type,venue_link,home_goals_against_so_far,away_goals_against_so_far,home_losses_so_far,away_losses_so_far,home_goals_against_last_game,away_goals_against_last_game
0,2010020003,REG,20102011,10/7/2010,20101007T160000+0000,away win REG,right,Hartwall Areena,America/Chicago,-5,...,13,3,R,/api/v1/venues/null,0.0,0.0,0.0,0.0,0.0,0.0
1,2010020002,REG,20102011,10/7/2010,20101007T230000+0000,away win REG,left,CONSOL Energy Center,America/New_York,-4,...,11,4,R,/api/v1/venues/null,0.0,0.0,0.0,0.0,0.0,0.0
2,2010020001,REG,20102011,10/7/2010,20101007T230000+0000,home win REG,right,Air Canada Centre,America/Toronto,-4,...,15,7,R,/api/v1/venues/null,0.0,0.0,0.0,0.0,0.0,0.0
3,2010020010,REG,20102011,10/8/2010,20101008T233000+0000,away win REG,left,Scotiabank Place,America/New_York,-4,...,2,5,R,/api/v1/venues/null,0.0,0.0,0.0,0.0,0.0,0.0
4,2010020006,REG,20102011,10/8/2010,20101008T190000+0000,away win REG,left,Ericsson Globe,America/New_York,-4,...,12,2,R,/api/v1/venues/null,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2630,2011030412,OT,20112012,6/3/2012,20120603T000000+0000,away win OT,left,Prudential Center,America/New_York,-4,...,4,3,P,/api/v1/venues/null,462.0,420.0,94.0,86.0,2.0,1.0
2631,2011030413,SO,20112012,6/5/2012,20120605T000000+0000,home win REG,right,Staples Center,America/Los_Angeles,-7,...,12,8,P,/api/v1/venues/null,421.0,464.0,86.0,95.0,1.0,2.0
2632,2011030414,REG,20112012,6/7/2012,20120607T000000+0000,away win REG,right,Staples Center,America/Los_Angeles,-7,...,5,13,P,/api/v1/venues/null,421.0,468.0,86.0,96.0,0.0,4.0
2633,2011030415,REG,20112012,6/10/2012,20120610T000000+0000,home win REG,left,Prudential Center,America/New_York,-4,...,8,5,P,/api/v1/venues/null,469.0,424.0,97.0,86.0,1.0,3.0


In [25]:
##test.to_csv(r'C:\Users\lafla\Desktop\Cs-254\Improved DataSet\test.csv')

In [31]:
def addItAllUp(df):
    ##add the historical total columns
    addHistoricalColumnsTotals(df,'home_goals', 'away_goals', 'home_goals_so_far', 'away_goals_so_far')
    addHistoricalColumnsTotals(df,'away_goals', 'home_goals', 'home_goals_against_so_far', 'away_goals_against_so_far')
    addHistoricalColumnsTotals(df,'home_won', 'away_won', 'home_wins_so_far', 'away_wins_so_far')
    addHistoricalColumnsTotals(df,'away_won', 'home_won', 'home_losses_so_far', 'away_losses_so_far')
    addHistoricalColumnsTotals(df,'home_shots', 'away_shots', 'home_shots_so_far', 'away_shots_so_far')
    addHistoricalColumnsTotals(df,'home_hits', 'away_hits', 'home_hits_so_far', 'away_hits_so_far')
    addHistoricalColumnsTotals(df,'home_pim', 'away_pim', 'home_pim_so_far', 'away_pim_so_far')
    addHistoricalColumnsTotals(df,'home_powerPlayOpportunities', 'away_powerPlayOpportunities', 'home_powerPlayOpportunities_so_far', 'away_powerPlayOpportunities_so_far')
    addHistoricalColumnsTotals(df,'home_powerPlayGoals', 'away_powerPlayGoals', 'home_powerPlayGoals_so_far', 'away_powerPlayGoals_so_far')
    addHistoricalColumnsTotals(df,'home_giveaways', 'away_giveaways', 'home_giveaways_so_far', 'away_giveaways_so_far')
    addHistoricalColumnsTotals(df,'home_takeaways', 'away_takeaways', 'home_takeaways_so_far', 'away_takeaways_so_far')
    
    ##add the previous game columns
    addHistoricalColumnsPrev(df,'home_goals', 'away_goals', 'home_goals_last_game', 'away_goals_last_game')
    addHistoricalColumnsPrev(df,'away_goals', 'home_goals', 'home_goals_against_last_game', 'away_goals_against_last_game')
    addHistoricalColumnsPrev(df,'home_won', 'away_won', 'home_won_previous_game', 'away_won_previous_game')
    addHistoricalColumnsPrev(df,'away_won', 'home_won', 'home_lost_previous_game', 'away_lost_previous_game')
    addHistoricalColumnsPrev(df,'home_shots', 'away_shots', 'home_shots_last_game', 'away_shots_last_game')
    addHistoricalColumnsPrev(df,'home_hits', 'away_hits', 'home_hits_last_game', 'away_hits_last_game')
    addHistoricalColumnsPrev(df,'home_pim', 'away_pim', 'home_pim_last_game', 'away_pim_last_game')
    addHistoricalColumnsPrev(df,'home_powerPlayOpportunities', 'away_powerPlayOpportunities', 'home_powerPlayOpportunities_last_game', 'away_powerPlayOpportunities_last_game')
    addHistoricalColumnsPrev(df,'home_powerPlayGoals', 'away_powerPlayGoals', 'home_powerPlayGoals_last_game', 'away_powerPlayGoals_last_game')
    addHistoricalColumnsPrev(df,'home_giveaways', 'away_giveaways', 'home_giveaways_last_game', 'away_giveaways_last_game')
    addHistoricalColumnsPrev(df,'home_takeaways', 'away_takeaways', 'home_takeaways_last_game', 'away_takeaways_last_game')
    addHistoricalColumnsPrev(df,'home_faceOffWinPercentage', 'away_faceOffWinPercentage', 'home_faceOffWinPercentage_last_game', 'away_faceOffWinPercentage_last_game')
    
    ##add num games played columns
    numGamesPlayed(df,'home_games', 'away_games')
    
    ##add the average columns
    calcAvg(df, 'home_goals_so_far', 'away_goals_so_far', 'home_games', 'away_games', 'home_avg_goals', 'away_avg_goals')
    calcAvg(df, 'home_wins_so_far', 'away_wins_so_far', 'home_games', 'away_games', 'home_w_l_ratio', 'away_w_l_ratio')
    calcAvg(df, 'home_shots_so_far', 'away_shots_so_far', 'home_games', 'away_games', 'home_avg_shots', 'away_avg_shots')
    calcAvg(df, 'home_hits_so_far', 'away_hits_so_far', 'home_games', 'away_games', 'home_avg_hits', 'away_avg_hits')
    calcAvg(df, 'home_pim_so_far', 'away_pim_so_far', 'home_games', 'away_games', 'home_avg_pim', 'away_avg_pim')
    calcAvg(df, 'home_powerPlayOpportunities_so_far', 'away_powerPlayOpportunities_so_far', 'home_games', 'away_games', 'home_avg_powerPlayOpportunities', 'away_avg_powerPlayOpportunities')
    calcAvg(df, 'home_powerPlayGoals_so_far', 'away_powerPlayGoals_so_far', 'home_games', 'away_games', 'home_avg_powerPlayGoals', 'away_avg_powerPlayGoals')
    calcAvg(df, 'home_giveaways_so_far', 'away_giveaways_so_far', 'home_games', 'away_games', 'home_avg_giveaways', 'away_avg_giveaways')
    calcAvg(df, 'home_takeaways_so_far', 'away_takeaways_so_far', 'home_games', 'away_games', 'home_avg_takeaways', 'away_avg_takeaways')
    
    df.insert(df.shape[1], "home_goal_differential" ,df['home_goals_so_far'] - df['home_goals_against_so_far'])
    df.insert(df.shape[1], "away_goal_differential" ,df['away_goals_so_far'] - df['away_goals_against_so_far'])
    


In [32]:
season2010_2011 = data.iloc[0:1319,:].reset_index()
season2011_2012 = data.iloc[1320:2635,:].reset_index()
season2012_2013 = data[2636:3441].reset_index()
season2013_2014 = data[3442:4764].reset_index()
season2014_2015 = data[4765:6083].reset_index()
season2015_2016 = data[6084:7404].reset_index()
season2016_2017 = data[7405:8721].reset_index()
season2017_2018 = data[8722:10076].reset_index()
season2018_2019 = data[10077:].reset_index()




addItAllUp(season2010_2011)
addItAllUp(season2011_2012)
addItAllUp(season2012_2013)
addItAllUp(season2013_2014)
addItAllUp(season2014_2015)
addItAllUp(season2015_2016)
addItAllUp(season2016_2017)
addItAllUp(season2017_2018)
addItAllUp(season2018_2019)

In [34]:
seasons = [season2010_2011, season2011_2012, season2012_2013, season2013_2014, season2014_2015,
           season2015_2016, season2016_2017, season2017_2018, season2018_2019]
results = pd.concat(seasons)

In [35]:
results

Unnamed: 0,index,game_id,settled_in,season,date_time,date_time_GMT,outcome,home_rink_side_start,venue,venue_time_zone_id,...,home_avg_powerPlayOpportunities,away_avg_powerPlayOpportunities,home_avg_powerPlayGoals,away_avg_powerPlayGoals,home_avg_giveaways,away_avg_giveaways,home_avg_takeaways,away_avg_takeaways,home_goal_differential,away_goal_differential
0,0,2010020003,REG,20102011,10/7/2010,20101007T160000+0000,away win REG,right,Hartwall Areena,America/Chicago,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0
1,1,2010020002,REG,20102011,10/7/2010,20101007T230000+0000,away win REG,left,CONSOL Energy Center,America/New_York,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0
2,2,2010020001,REG,20102011,10/7/2010,20101007T230000+0000,home win REG,right,Air Canada Centre,America/Toronto,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0
3,3,2010020010,REG,20102011,10/8/2010,20101008T233000+0000,away win REG,left,Scotiabank Place,America/New_York,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0
4,4,2010020006,REG,20102011,10/8/2010,20101008T190000+0000,away win REG,left,Ericsson Globe,America/New_York,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1352,11429,2018030413,REG,20182019,6/2/2019,20190602T000000+0000,away win REG,left,Enterprise Center,America/Chicago,...,2.592233,2.990099,0.485437,0.544554,8.922330,8.485149,7.737864,7.613861,32.0,69.0
1353,11430,2018030414,REG,20182019,6/4/2019,20190604T000000+0000,home win REG,left,Enterprise Center,America/Chicago,...,2.615385,3.000000,0.490385,0.578431,8.903846,8.441176,7.769231,7.647059,27.0,74.0
1354,11431,2018030415,REG,20182019,6/7/2019,20190607T000000+0000,away win REG,left,TD Garden,America/New_York,...,2.990291,2.619048,0.572816,0.485714,8.417476,8.904762,7.660194,7.838095,72.0,29.0
1355,11432,2018030416,REG,20182019,6/10/2019,20190610T000000+0000,away win REG,left,Enterprise Center,America/Chicago,...,2.603774,2.990385,0.481132,0.567308,8.886792,8.375000,7.839623,7.692308,30.0,71.0


In [36]:
##results.to_csv(r'C:\Users\lafla\Desktop\Cs-254\Improved DataSet\complete_dataset_with_historical_data.csv')