### Extract Games

#### *Use nhl API https://github.com/dword4/nhlapi to extract games & results*
#### *Save results in Google Big Query*

In [28]:
import getpass
import pandas as pd
import requests  
from pandas.io.json import json_normalize
import datetime
import pandas_gbq
from google.oauth2 import service_account

pd.set_option('display.max_columns', None)

In [29]:
def get_keys():
    '''
    This function will return the path to Google Keys for each user.
    '''
    if getpass.getuser()=='antoinetl':
        credential_keys = '/Users/antoinetl/Documents/code/Google Keys/My First Project-4938b2ab0dc6.json'
    elif getpass.getuser()=='philippejacques':
        credential_keys = '/Users/philippejacques/Desktop/Projet/HockeyPrediction/Google Keys/rational-world-288611-dbe647d5aaf9.json'
    return credential_keys

In [30]:
# Connexion GBQ
#credential_keys = get_keys()
credential_keys = '/Users/antoinetl/Documents/nhl_prediction/nhl_prediction/account_keys/hockey-prediction-qc-9c75aa8a78f9.json'

# Connexion GBQ
credentials = service_account.Credentials.from_service_account_file(
    credential_keys,
    )
pandas_gbq.context.credentials = credentials
# https://pandas-gbq.readthedocs.io/en/latest/intro.html

In [31]:
def extract_gamestats(gameID):
    
    '''
    Takes the gameID as an input and returns a dataframe that contains
    the teamSkaterStats info for the home/away teams for that specific game.
    '''
    
    r = requests.get(url='https://statsapi.web.nhl.com/api/v1/game/{}/boxscore'.format(gameID))
    data = r.json()
    df = pd.json_normalize(data = data['teams'])
    df = df.filter(regex='teamSkaterStats')
    df['gameID'] = gameID
    
    return df

In [37]:
# select date for extraction (july will be a complete season)
start_date = datetime.date(2017, 7, 1)
end_date = datetime.date(2018, 7, 1)

delta = datetime.timedelta(days=1)

data_list = []

while start_date <= end_date:
    
    r = requests.get(url='https://statsapi.web.nhl.com/api/v1/schedule?date=' + start_date.strftime("%Y-%m-%d"))
    data = r.json()
    print(start_date)
    df = pd.json_normalize(data = data['dates'], record_path='games', meta=['date'])
    
    # On ajoute des statistiques au niveau des matchs
    # Le if sert a skipper les journees sans matchs
    if df.empty==False :
        pd_list = []
        for games in df['gamePk']:
            tmp = extract_gamestats(gameID=games)
            pd_list.append(tmp)

        pd_tmp = pd.concat(pd_list, sort='False', ignore_index=True)
        df = df.merge(pd_tmp, left_on='gamePk', right_on='gameID', how='left')

        data_list.append(df)
        
        start_date += delta
    else:
        start_date += delta
    
    if (start_date.day == 1):
        print(start_date)

2017-07-01
2017-07-02
2017-07-03
2017-07-04
2017-07-05
2017-07-06
2017-07-07
2017-07-08
2017-07-09
2017-07-10
2017-07-11
2017-07-12
2017-07-13
2017-07-14
2017-07-15
2017-07-16
2017-07-17
2017-07-18
2017-07-19
2017-07-20
2017-07-21
2017-07-22
2017-07-23
2017-07-24
2017-07-25
2017-07-26
2017-07-27
2017-07-28
2017-07-29
2017-07-30
2017-07-31
2017-08-01
2017-08-01
2017-08-02
2017-08-03
2017-08-04
2017-08-05
2017-08-06
2017-08-07
2017-08-08
2017-08-09
2017-08-10
2017-08-11
2017-08-12
2017-08-13
2017-08-14
2017-08-15
2017-08-16
2017-08-17
2017-08-18
2017-08-19
2017-08-20
2017-08-21
2017-08-22
2017-08-23
2017-08-24
2017-08-25
2017-08-26
2017-08-27
2017-08-28
2017-08-29
2017-08-30
2017-08-31
2017-09-01
2017-09-01
2017-09-02
2017-09-03
2017-09-04
2017-09-05
2017-09-06
2017-09-07
2017-09-08
2017-09-09
2017-09-10
2017-09-11
2017-09-12
2017-09-13
2017-09-14
2017-09-15
2017-09-16
2017-09-17
2017-09-18
2017-09-19
2017-09-20
2017-09-21
2017-09-22
2017-09-23
2017-09-24
2017-09-25
2017-09-26
2017-09-27

In [38]:
# Pour stack la version
df = pd.concat(data_list, sort='False', ignore_index=True)

In [39]:
# replace columns in dataframe because GCP does not support '.' in column indices
df.columns = df.columns.str.replace(r".", "_")

In [40]:
df.head()

Unnamed: 0,away_teamStats_teamSkaterStats_blocked,away_teamStats_teamSkaterStats_faceOffWinPercentage,away_teamStats_teamSkaterStats_giveaways,away_teamStats_teamSkaterStats_goals,away_teamStats_teamSkaterStats_hits,away_teamStats_teamSkaterStats_pim,away_teamStats_teamSkaterStats_powerPlayGoals,away_teamStats_teamSkaterStats_powerPlayOpportunities,away_teamStats_teamSkaterStats_powerPlayPercentage,away_teamStats_teamSkaterStats_shots,away_teamStats_teamSkaterStats_takeaways,content_link,date,gameDate,gameID,gamePk,gameType,home_teamStats_teamSkaterStats_blocked,home_teamStats_teamSkaterStats_faceOffWinPercentage,home_teamStats_teamSkaterStats_giveaways,home_teamStats_teamSkaterStats_goals,home_teamStats_teamSkaterStats_hits,home_teamStats_teamSkaterStats_pim,home_teamStats_teamSkaterStats_powerPlayGoals,home_teamStats_teamSkaterStats_powerPlayOpportunities,home_teamStats_teamSkaterStats_powerPlayPercentage,home_teamStats_teamSkaterStats_shots,home_teamStats_teamSkaterStats_takeaways,link,season,status_abstractGameState,status_codedGameState,status_detailedState,status_startTimeTBD,status_statusCode,teams_away_leagueRecord_losses,teams_away_leagueRecord_ot,teams_away_leagueRecord_type,teams_away_leagueRecord_wins,teams_away_score,teams_away_team_id,teams_away_team_link,teams_away_team_name,teams_home_leagueRecord_losses,teams_home_leagueRecord_ot,teams_home_leagueRecord_type,teams_home_leagueRecord_wins,teams_home_score,teams_home_team_id,teams_home_team_link,teams_home_team_name,venue_id,venue_link,venue_name
0,10,52.5,6,4,29,19,1.0,7.0,14.3,20,3,/api/v1/game/2017010001/content,2017-09-16,2017-09-16T21:00:00Z,2017010001,2017010001,PR,10,47.5,7,3,25,19,1.0,7.0,14.3,32,4,/api/v1/game/2017010001/feed/live,20172018,Final,6,Final,False,6,0,0.0,league,1,4,23,/api/v1/teams/23,Vancouver Canucks,0,1.0,league,0,3,26,/api/v1/teams/26,Los Angeles Kings,5081.0,/api/v1/venues/5081,STAPLES Center
1,0,0.0,0,2,0,0,0.0,0.0,0.0,0,0,/api/v1/game/2017010003/content,2017-09-17,2017-09-17T17:00:00Z,2017010003,2017010003,PR,0,0.0,0,3,0,0,0.0,0.0,0.0,0,0,/api/v1/game/2017010003/feed/live,20172018,Final,6,Final,False,6,0,1.0,league,0,2,4,/api/v1/teams/4,Philadelphia Flyers,0,0.0,league,1,3,2,/api/v1/teams/2,New York Islanders,,/api/v1/venues/null,NYCB Live/Nassau Coliseum
2,5,47.8,7,9,22,23,3.0,9.0,33.3,38,3,/api/v1/game/2017010002/content,2017-09-17,2017-09-17T21:00:00Z,2017010002,2017010002,PR,7,52.2,13,4,20,24,2.0,8.0,25.0,33,5,/api/v1/game/2017010002/feed/live,20172018,Final,6,Final,False,6,0,0.0,league,1,9,54,/api/v1/teams/54,Vegas Golden Knights,1,0.0,league,1,4,23,/api/v1/teams/23,Vancouver Canucks,5073.0,/api/v1/venues/5073,Rogers Arena
3,16,51.9,7,3,11,14,0.0,3.0,0.0,28,3,/api/v1/game/2017010005/content,2017-09-18,2017-09-18T23:00:00Z,2017010005,2017010005,PR,12,48.1,12,2,14,6,0.0,7.0,0.0,35,3,/api/v1/game/2017010005/feed/live,20172018,Final,6,Final,False,6,0,0.0,league,1,3,12,/api/v1/teams/12,Carolina Hurricanes,0,1.0,league,0,2,7,/api/v1/teams/7,Buffalo Sabres,5039.0,/api/v1/venues/5039,KeyBank Center
4,0,0.0,0,2,0,0,0.0,0.0,0.0,0,0,/api/v1/game/2017010009/content,2017-09-18,2017-09-18T23:00:00Z,2017010009,2017010009,PR,0,0.0,0,3,0,0,0.0,0.0,0.0,0,0,/api/v1/game/2017010009/feed/live,20172018,Final,6,Final,False,6,1,0.0,league,0,2,8,/api/v1/teams/8,Montréal Canadiens,0,0.0,league,1,3,6,/api/v1/teams/6,Boston Bruins,,/api/v1/venues/null,Centre Videotron


In [41]:
def implicit():
    from google.cloud import storage

    # If you don't specify credentials when constructing the client, the
    # client library will look for credentials in the environment.
    storage_client = storage.Client()

    # Make an authenticated API request
    buckets = list(storage_client.list_buckets())
    print(buckets)

In [42]:
# TODO: Set project_id to your Google Cloud Platform project ID.
project_id = "hockey-prediction-qc"

# TODO: Set table_id to the full destination table ID (including the dataset ID).
table_id = 'hockey_prediction_qc.historical_games_detailed'

pandas_gbq.to_gbq(df, table_id, project_id=project_id, if_exists='replace')

1it [00:04,  4.48s/it]
