In [3]:
"""
Extract Games

Use nhl API https://github.com/dword4/nhlapi to extract games & results

Save results in Google Big Query
"""

credential_keys = '/Users/antoinetl/Documents/code/Google Keys/My First Project-4938b2ab0dc6.json'

In [4]:
import pandas as pd
import requests  
from pandas.io.json import json_normalize
import datetime

pd.set_option('display.max_columns', None)

# Connexion GBQ
import pandas_gbq
from google.oauth2 import service_account


credentials = service_account.Credentials.from_service_account_file(
    credential_keys,
    )
pandas_gbq.context.credentials = credentials
# https://pandas-gbq.readthedocs.io/en/latest/intro.html

In [5]:
def extract_gamestats(gameID):
    
    '''
    Takes the gameID as an input and returns a dataframe that contains
    the teamSkaterStats info for the home/away teams for that specific game.
    '''
    
    r = requests.get(url='https://statsapi.web.nhl.com/api/v1/game/{}/boxscore'.format(gameID))
    data = r.json()
    df = pd.json_normalize(data = data['teams'])
    df = df.filter(regex='teamSkaterStats')
    df['gameID'] = gameID
    
    return df

In [6]:
# select date for extraction (july will be a complete season)
start_date = datetime.date(2008, 7, 1)
end_date = datetime.date(2018, 7, 1)

delta = datetime.timedelta(days=1)

data_list = []

while start_date <= end_date:
    
    r = requests.get(url='https://statsapi.web.nhl.com/api/v1/schedule?date=' + start_date.strftime("%Y-%m-%d"))
    data = r.json()
    
    df = pd.json_normalize(data = data['dates'], record_path='games', meta=['date'])
    
    # On ajoute des statistiques au niveau des matchs
    # Le if sert a skipper les journees sans matchs
    if df.empty==False :
        pd_list = []
        for games in df['gamePk']:
            tmp = extract_gamestats(gameID=games)
            pd_list.append(tmp)

        pd_tmp = pd.concat(pd_list, sort='False', ignore_index=True)
        df = df.merge(pd_tmp, left_on='gamePk', right_on='gameID', how='left')

        data_list.append(df)
        
        start_date += delta
    else:
        start_date += delta
    
    if (start_date.day == 1):
        print(start_date)

2008-08-01
2008-09-01
2008-10-01
2008-11-01
2008-12-01
2009-01-01
2009-02-01
2009-03-01
2009-04-01
2009-05-01
2009-06-01
2009-07-01
2009-08-01
2009-09-01
2009-10-01
2009-11-01
2009-12-01
2010-01-01
2010-02-01
2010-03-01
2010-04-01
2010-05-01
2010-06-01
2010-07-01
2010-08-01
2010-09-01
2010-10-01
2010-11-01
2010-12-01
2011-01-01
2011-02-01
2011-03-01
2011-04-01
2011-05-01
2011-06-01
2011-07-01
2011-08-01
2011-09-01
2011-10-01
2011-11-01
2011-12-01
2012-01-01
2012-02-01
2012-03-01
2012-04-01
2012-05-01
2012-06-01
2012-07-01
2012-08-01
2012-09-01
2012-10-01
2012-11-01
2012-12-01
2013-01-01
2013-02-01
2013-03-01
2013-04-01
2013-05-01
2013-06-01
2013-07-01
2013-08-01
2013-09-01
2013-10-01
2013-11-01
2013-12-01
2014-01-01
2014-02-01
2014-03-01
2014-04-01
2014-05-01
2014-06-01
2014-07-01
2014-08-01
2014-09-01
2014-10-01
2014-11-01
2014-12-01
2015-01-01
2015-02-01
2015-03-01
2015-04-01
2015-05-01
2015-06-01
2015-07-01
2015-08-01
2015-09-01
2015-10-01
2015-11-01
2015-12-01
2016-01-01
2016-02-01

KeyError: 'dates'

In [8]:
# Pour stack la version
df = pd.concat(data_list, sort='False', ignore_index=True)

In [10]:
# replace columns in dataframe because GCP does not support '.' in column indices
df.columns = df.columns.str.replace(r".", "_")

In [11]:
# TODO: Set project_id to your Google Cloud Platform project ID.
project_id = "rational-world-288611"

# TODO: Set table_id to the full destination table ID (including the dataset ID).
table_id = 'My_dataset.schedule_and_results'

pandas_gbq.to_gbq(df, table_id, project_id=project_id, if_exists='replace')

1it [00:07,  7.92s/it]


Unnamed: 0,away.teamStats.teamSkaterStats.blocked,away.teamStats.teamSkaterStats.giveaways,away.teamStats.teamSkaterStats.goals,away.teamStats.teamSkaterStats.hits,away.teamStats.teamSkaterStats.pim,away.teamStats.teamSkaterStats.powerPlayGoals,away.teamStats.teamSkaterStats.powerPlayOpportunities,away.teamStats.teamSkaterStats.shots,away.teamStats.teamSkaterStats.takeaways,gameID,gamePk,home.teamStats.teamSkaterStats.blocked,home.teamStats.teamSkaterStats.giveaways,home.teamStats.teamSkaterStats.goals,home.teamStats.teamSkaterStats.hits,home.teamStats.teamSkaterStats.pim,home.teamStats.teamSkaterStats.powerPlayGoals,home.teamStats.teamSkaterStats.powerPlayOpportunities,home.teamStats.teamSkaterStats.shots,home.teamStats.teamSkaterStats.takeaways,teams.away.leagueRecord.losses,teams.away.leagueRecord.ot,teams.away.leagueRecord.wins,teams.away.score,teams.away.team.id,teams.home.leagueRecord.losses,teams.home.leagueRecord.ot,teams.home.leagueRecord.wins,teams.home.score,teams.home.team.id,venue.id
count,10796.0,10796.0,10796.0,10796.0,10796.0,10796.0,10796.0,10639.0,10796.0,10796.0,10796.0,10796.0,10796.0,10796.0,10796.0,10796.0,10796.0,10796.0,10639.0,10796.0,10796.0,10051.0,10796.0,10796.0,10796.0,10796.0,10051.0,10796.0,10796.0,10796.0,6051.0
mean,13.861801,6.486847,2.517877,21.719248,11.741849,0.562616,3.207021,28.64038,5.546684,2011490000.0,2011490000.0,13.252408,9.148944,2.791775,23.68442,11.092627,0.659596,3.498055,30.483504,7.455539,13.444794,4.447518,17.669044,2.636995,16.493423,13.410152,4.443836,17.694146,2.913116,29.61106,5055.502727
std,5.720702,4.316638,1.604924,9.373166,9.109389,0.750771,1.715318,7.339369,3.413911,2353985.0,2353985.0,5.627442,5.131657,1.694224,9.56981,8.870962,0.81111,1.813514,7.653873,4.209494,10.35649,3.573661,13.283702,1.590627,10.648602,10.361273,3.582865,13.262589,1.661054,273.046205,141.431594
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2008010000.0,2008010000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,3.0
25%,10.0,3.0,1.0,16.0,6.0,0.0,2.0,24.0,3.0,2009021000.0,2009021000.0,10.0,6.0,2.0,18.0,6.0,0.0,2.0,26.0,5.0,4.0,1.0,6.0,1.0,8.0,4.0,1.0,6.0,2.0,8.0,5043.0
50%,14.0,6.0,2.0,21.0,10.0,0.0,3.0,28.0,5.0,2011021000.0,2011021000.0,13.0,9.0,3.0,23.0,9.0,0.0,3.0,30.0,7.0,12.0,4.0,16.0,3.0,16.0,12.0,4.0,16.0,3.0,16.0,5064.0
75%,17.0,9.0,4.0,27.0,15.0,1.0,4.0,33.0,8.0,2014020000.0,2014020000.0,17.0,12.0,4.0,29.0,14.0,1.0,5.0,35.0,10.0,21.0,7.0,28.0,4.0,24.0,21.0,7.0,28.0,4.0,24.0,5081.0
max,46.0,31.0,17.0,69.0,163.0,5.0,11.0,59.0,27.0,2015041000.0,2015041000.0,40.0,36.0,12.0,71.0,183.0,6.0,12.0,63.0,31.0,51.0,18.0,56.0,17.0,98.0,51.0,18.0,56.0,12.0,7256.0,5260.0
