In [1]:
cd ../

c:\Users\Nicole\Desktop\MDS\capstone\canucks_mds_capstone


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [2]:
import pandas as pd
import requests

In [3]:
# TEAM STANDINGS CURRENT
api_endpoint = 'https://api-web.nhle.com/v1/standings/now'
response = requests.get(api_endpoint)
data = response.json()

# Extract team standings data
team_standings = []
for team_data in data['standings']:
    team_standings.append({
        'team_name': team_data['teamName']['default'],
        'wins': team_data['wins'],
        'losses': team_data['losses'],
        'ot_losses': team_data['otLosses'],
        'points': team_data['points'],
        'goal_differential': team_data['goalDifferential'],
        'goals_for': team_data['goalFor'],
        'goals_against': team_data['goalAgainst']
    })
team_standings_df = pd.DataFrame(team_standings)
team_standings_df

Unnamed: 0,team_name,wins,losses,ot_losses,points,goal_differential,goals_for,goals_against
0,New York Rangers,55,23,4,114,53,282,229
1,Dallas Stars,52,21,9,113,64,298,234
2,Carolina Hurricanes,52,23,7,111,63,279,216
3,Winnipeg Jets,52,24,6,110,60,259,199
4,Florida Panthers,52,24,6,110,68,268,200
5,Vancouver Canucks,50,23,9,109,56,279,223
6,Boston Bruins,47,20,15,109,43,267,224
7,Colorado Avalanche,50,25,7,107,50,304,254
8,Edmonton Oilers,49,27,6,104,57,294,237
9,Toronto Maple Leafs,46,26,10,102,40,303,263


In [4]:
# AT A PAST DATE 

# get data
api_endpoint = 'https://api-web.nhle.com/v1/standings/2022-10-13'
response = requests.get(api_endpoint)
data = response.json()

# convert relevant data to pd df
team_standings = []
for team_data in data['standings']:
    team_standings.append({
        'team_name': team_data['teamName']['default'],
        'points': team_data['points']
})

team_standings_df = pd.DataFrame(team_standings)

# determine vancouvers ranking
team_standings_df['van_rank'] = team_standings_df['points'].rank(method='min', ascending=False).astype(int)



In [5]:
def vancouver_ranking(dates):
    """function to obtain standings data from nhl api at given dates"""
    
    van_rank = pd.DataFrame(columns=['rank_date', 'van_rank'])
    failed_dates = []

    for date in dates:
        
        api_endpoint = f'https://api-web.nhle.com/v1/standings/{date}'
        response = requests.get(api_endpoint)
       
        # Check if request is successful 
        status_messages = {
            400: "Bad Request - The server could not understand the request due to invalid syntax.",
            401: "Unauthorized - Authentication is required and has failed.",
            403: "Forbidden - The client does not have access rights to the content.",
            404: "Not Found - The server cannot find the requested resource.",
            429: "Rate Limit Exceeded - Too many requests in the time duration.",
            500: "Internal Server Error - The server encountered an unexpected condition.",
            503: "Service Unavailable - The server is not ready to handle the request.",
            }

        if response.status_code != 200:
            print(f'Request was unsuccessful - unable to fetch data for: {date}')
            print(f'Request error code: {response.status_code}')
            if response.status_code in status_messages: 
                print(f'Error: {status_messages[response.status_code]}')
                failed_dates.append(date)
            continue

        data = response.json() 

        # Check if data exists
        if data['standings'] == []:
            print(f'No data available for: {date}')
            failed_dates.append(date)
            continue

        # convert required data to dataframe
        team_standings = []
        for team_data in data['standings']:
            team_standings.append({
                'team_name': team_data['teamName']['default'],
                'points': team_data['points']
            })

        team_standings_df = pd.DataFrame(team_standings)

        # determine Canucks ranking
        team_standings_df['van_rank'] = team_standings_df['points'].rank(method='min', ascending=False).astype(int)

        # keep date and Canucks ranking
        van_df = team_standings_df[team_standings_df['team_name'] == 'Vancouver Canucks'].copy()
        van_df.loc[:, 'rank_date'] = date
        van_df = van_df[['rank_date', 'van_rank']]
        
        # add to dataframe to return
        van_rank = pd.concat([van_rank, van_df], ignore_index=True)
        
        print(f'Data Success for: {date}')

    # Summary 
    if failed_dates != []:
        print(f'Request Complete. \nUnable to fetch data for {len(failed_dates)} out of {len(dates)} dates.\nFailed dates: \n{failed_dates}')
    else: 
        print(f'Request Complete.')

    return van_rank  
        

In [6]:
# obtain dates to use 
df = pd.read_parquet('data/output/processed.parquet')
df = df.sort_values(by='calculate_date')

# dates for 1/week
calc_dates = df['calculate_date'].unique()  
monday_dates = [date.strftime('%Y-%m-%d') for date in calc_dates if date.weekday() == 0]

# add in start date of processed.parquet 
monday_dates.insert(0, '2022-01-20')

In [7]:
# obtain ranking df
van_rank_df = vancouver_ranking(monday_dates)

Data Success for: 2022-01-20
Data Success for: 2022-01-24
Data Success for: 2022-01-31
Data Success for: 2022-02-07
Data Success for: 2022-02-14
Data Success for: 2022-02-21
Data Success for: 2022-02-28
Data Success for: 2022-03-07
Data Success for: 2022-03-14
Data Success for: 2022-03-21
Data Success for: 2022-03-28
Data Success for: 2022-04-04
Data Success for: 2022-04-11
Data Success for: 2022-04-18
Data Success for: 2022-04-25
No data available for: 2022-09-26
No data available for: 2022-10-03
Data Success for: 2022-10-10
Data Success for: 2022-10-17
Data Success for: 2022-10-24
Data Success for: 2022-10-31
Data Success for: 2022-11-07
Data Success for: 2022-11-14
Data Success for: 2022-11-21
Data Success for: 2022-11-28
Data Success for: 2022-12-05
Data Success for: 2022-12-12
Data Success for: 2022-12-19
Data Success for: 2022-12-26
Data Success for: 2023-01-02
Data Success for: 2023-01-09
Data Success for: 2023-01-16
Data Success for: 2023-01-23
Data Success for: 2023-01-30
Data

In [27]:
# function to add in ranking 
def add_vancouver_ranking(df):

    df = df.sort_values(by='calculate_date') 
    df_rank = pd.read_csv('data/vancouver_ranking.csv') 
    df_rank['rank_date'] = pd.to_datetime(df_rank['rank_date'])
    
    # merge dataframes, fill NAs and clean
    df = pd.merge(df, df_rank, left_on='calculate_date', right_on='rank_date', how='left')
    df['van_rank'] = df['van_rank'].ffill()
    df['van_rank'] =  df['van_rank'].astype(int)
    df.drop('rank_date', axis=1,inplace=True)

    return df 

In [22]:
df_rank = pd.read_csv('data/vancouver_ranking.csv')

In [16]:
df = pd.read_parquet('data/output/processed.parquet')

In [28]:
df_new = add_vancouver_ranking(df)

In [39]:
df_new.head()

Unnamed: 0,identifier,event_id,event_name,venue,calculate_date,price_level_name,cap,revenue_to_date,s/t-rate,opens,...,dl_iscurrent,dl_isdeleted,dl_recordstartdateutc,dl_recordenddateutc,dl_load_id,event_date,price_code,opponent,target_host_sold-today,van_rank
0,AllData_11005B3CED352DAF_Rogers Arena_Canucks_...,11005B3CED352DAF,Canucks vs. Ducks,Rogers Arena,2022-01-20,18: 18/H,85,1010.0,1.0,3,...,True,False,2024-04-11T09:59:14.544526-07:00,,0,2022-02-19,H,Ducks,0.0,21
1,AllData_11005B3CED352DAF_Rogers Arena_Canucks_...,11005B3CED352DAF,Canucks vs. Ducks,Rogers Arena,2022-01-20,4: 4/3,64,249.0,1.0,0,...,True,False,2024-04-11T09:59:14.544526-07:00,,0,2022-02-19,3,Ducks,0.0,21
2,AllData_11005B3CED352DAF_Rogers Arena_Canucks_...,11005B3CED352DAF,Canucks vs. Ducks,Rogers Arena,2022-01-20,28: 28/R,757,2151.0,1.0,10,...,True,False,2024-04-11T09:59:14.544526-07:00,,0,2022-02-19,R,Ducks,7.0,21
3,AllData_11005B3CED352DAF_Rogers Arena_Canucks_...,11005B3CED352DAF,Canucks vs. Ducks,Rogers Arena,2022-01-20,13: 13/C,275,2376.0,1.0,7,...,True,False,2024-04-11T09:59:14.544526-07:00,,0,2022-02-19,C,Ducks,2.0,21
4,AllData_11005B3CED352DAF_Rogers Arena_Canucks_...,11005B3CED352DAF,Canucks vs. Ducks,Rogers Arena,2022-01-20,27: 27/Q,876,8429.0,1.0,78,...,True,False,2024-04-11T09:59:14.544526-07:00,,0,2022-02-19,Q,Ducks,44.0,21
