In [None]:
# Import package to make HTTP requests, i.e. fetch URL similar to a browser request
import requests
# Import Json library to decode Json
import json
# for data manipulation and analysis
import pandas as pd
# for sql
from sqlalchemy import create_engine, text
import os
from dotenv import load_dotenv, dotenv_values

In [None]:
load_dotenv()

# Extract Team Data

In [None]:
# Define the URL for the API endpoint.
team_url = 'https://api.sportradar.com/nhl/production/v7/en/league/teams.json'
# Retrieve the API key from the environment variables
api_key = os.getenv('API_KEY')
# Create a dictionary of parameters to be sent with the API request.
team_params = {'api_key': api_key}
# Send a GET request to the API endpoint
team_api_request = requests.get(team_url, params=team_params)

In [None]:
team_api_request

In [None]:
team_api_request.url

In [None]:
# converts the JSON response text into a Python dictionary.
json.loads(team_api_request.text)

In [None]:
# Check data type
type(json.loads(team_api_request.text))

In [None]:
team_api_response = json.loads(team_api_request.text)

In [None]:
# only get teams information
team_api_response['teams']

In [None]:
teams = team_api_response['teams']

In [None]:
type(teams)

In [None]:
# Set a dictionary to store data
teams_data = {
    'id':[],
    'name':[],
    'area':[],
    'alias':[]
}
# get information from the loop
for team in teams:
    id = team['id']
    teams_data['id'].append(id)
    print(id)
    
    name = team['name']
    teams_data['name'].append(name)
    print(name)
    
    area = team.get('market', None)
    teams_data['area'].append(area)
    print(area if area is not None else 'None')
    
    alias = team['alias']
    teams_data['alias'].append(alias)
    print(alias)
    
    print('-'*75)
    #alis = team[]

In [None]:
teams_data

In [None]:
df =pd.DataFrame(teams_data)

In [None]:
df.head()

In [None]:
df.to_csv('nhl_teams.csv', index=False)

# Extract Player Data

In [None]:
player_url = 'https://api.sportradar.com/nhl/production/v7/en/teams/44151f7a-0f24-11e2-8525-18a905767e44/profile.json'
# 3XZ4cbvj3w6hRONhHDelw92rvKqucxRk5ux3ZBpI

player_api_request = requests.get(player_url, params=team_params)

In [None]:
player_api_request

In [None]:
json.loads(player_api_request.text)

In [None]:
player_api_request = json.loads(player_api_request.text)

In [None]:
type(player_api_request)

In [None]:
player_api_request['players']

In [None]:
players = player_api_request['players']

In [None]:
# Create a dictionary store data
players_data = {
    'id':[],
    'status':[],
    'full_name':[],
    'first_name':[],
    'last_name':[],
    'height':[],
    'weight':[],
    'handedness':[],
    'position':[],
    'primary_position':[],
    'jersey_number':[],
    'experience':[],
    'birth_place':[],
    'birthdate':[]
}
# Create a loop to get information and store in dictionary
for player in players:
    id = player['id']
    players_data['id'].append(id)
    print('Id:',id)
    
    status = player['status']
    players_data['status'].append(status)
    print('status:',status)
    
    full_name = player['full_name']
    players_data['full_name'].append(full_name)
    print('full name:',full_name)
    
    first_name = player['first_name']
    players_data['first_name'].append(first_name)
    print('first name:',first_name)
    
    last_name = player['last_name']
    players_data['last_name'].append(last_name)
    print('last name:',last_name)
    
    height = player['height']
    players_data['height'].append(height)
    print('height:',height)
    
    weight = player['weight']
    players_data['weight'].append(weight)
    print('weight:',weight)
    
    handedness = player['handedness']
    players_data['handedness'].append(handedness)
    print('handedness:',handedness)
    
    position = player['position']
    players_data['position'].append(position)
    print('position:',position)
    
    primary_position = player['primary_position']
    players_data['primary_position'].append(primary_position)
    print('primary position:',primary_position)
 
    jersey_number = player['jersey_number']
    players_data['jersey_number'].append(jersey_number)
    print('jersey number:',jersey_number)
    
    experience = player['experience']
    players_data['experience'].append(experience)
    print('experience:',experience)
    
    birth_place = player['birth_place']
    players_data['birth_place'].append(birth_place)
    print('birth place:',birth_place)
    
    birthdate = player['birthdate']
    players_data['birthdate'].append(pd.to_datetime(birthdate).date())
    print('birthdate:',pd.to_datetime(birthdate).date())
    
    print('-'*75)


In [None]:
players_data

In [None]:
players_df =pd.DataFrame(players_data)

In [None]:
players_df.head()

In [None]:
players_df.to_csv('nhl_players.csv', index=False)

# Extract Team Stats Data

In [None]:
import time # Importing the time module to use the sleep function for handling rate limits

# Initialize an empty dictionary to store statistics data for each team.
team_stats_data  = {}
# Iterate over each team ID obtained from 'teams_data' dictionary.
for team_id in teams_data['id']:
    team_stats_url = f'https://api.sportradar.com/nhl/production/v7/en/seasons/2023/REG/teams/{team_id}/statistics.json'
    
    while True:  # Loop to retry requests if 429 error is encountered
        team_stats_api_request = requests.get(team_stats_url, params=team_params)
        if team_stats_api_request.status_code == 429:
            wait_time = int(team_stats_api_request.headers.get('Retry-After', 60))  # Use the Retry-After header if available
            print(f"Rate limit hit, retrying after {wait_time} seconds...")
            time.sleep(wait_time)  # Wait for the time specified before retrying
            continue  # Retry the request
        if team_stats_api_request.status_code == 200:
            team_stats_data[team_id] = json.loads(team_stats_api_request.text)
            break  # Exit the loop if successful
        else:
            print(f"Failed to fetch data for team ID {team_id}: {team_stats_api_request.status_code}")
            break  # Exit loop on other errors

In [None]:
# Assuming team_stats_data is your dictionary containing all the team data
team_name_and_records = {}

for team_id, team_info in team_stats_data.items():
    if 'own_record' in team_info:
        team_name_and_records[team_info['name']] = team_info['own_record']
    else:
        team_name_and_records[team_info['name']] = "No own record found"

# Now team_name_and_records contains the names and 'own_record' for each team
for name, record in team_name_and_records.items():
    print(f"Team Name: {name}")
    print("Own Record:", record)
    print("-" * 50)  # Print a separator line for clarity


In [None]:
# Make sure have all the team
number_of_teams = len(team_stats_data)
print(f"Total number of teams: {number_of_teams}")

In [None]:
team_name_and_records

In [None]:
simplified_data_list = []

# Extracting and formatting the data
for team, team_data in team_name_and_records.items():
    # Check if 'statistics' key exists in the team's data
    if 'statistics' in team_data:
        stats = team_data['statistics']
        goaltending_stats = team_data['goaltending']
        team_info = {
            'team_name': team,
            'games_played': stats['total'].get('games_played', 0),
            'goals': stats['total'].get('goals', 0),
            'assists': stats['total'].get('assists', 0),
            'penalties': stats['total'].get('penalties', 0),
            'penalty_minutes': stats['total'].get('penalty_minutes', 0),
            'shots': stats['total'].get('shots', 0),
            'hits': stats['total'].get('hits', 0),
            'blocked_shots': stats['total'].get('blocked_shots', 0),
            'powerplays': stats['total'].get('powerplays', 0),
            'points': stats['total'].get('points', 0),
            'powerplay_shots': stats['powerplay'].get('shots', 0),
            'powerplay_goals': stats['powerplay'].get('goals', 0),
            'powerplay_percentage': stats['powerplay'].get('percentage', 0.0),
            'shots_against': goaltending_stats['total'].get('shots_against', 0.0),  
            'goals_against': goaltending_stats['total'].get('goals_against', 0),  
            'saves': goaltending_stats['total'].get('saves', 0),  
            'saves_percentage': goaltending_stats['total'].get('saves_pct', 0.0),  
        }
        simplified_data_list.append(team_info)
    else:
        print(f"Missing 'statistics' for team: {team}")

In [None]:
simplified_data_list

In [None]:
type(simplified_data_list)

In [None]:
# dictionary to store data
team_stats = {
    'name':[],
    'games_played':[],
    'goals': [],
    'assists':[],
    'points': [],
    'penalties':[],
    'penalty_minutes':[],
    'shots':[],
    'hits':[],
    'blocked_shots':[],
    'powerplays':[],
    'powerplay_shots':[],
    'powerplay_goals':[],
    'powerplay_percentage':[],
    'shots_against':[],
    'goals_against':[],
    'saves':[],
    'saves_percentage':[]
}
# loop for get the data
for stat in simplified_data_list:
    name = stat['team_name']
    team_stats['name'].append(name)
    print(name)
    
    games_played = stat['games_played']
    team_stats['games_played'].append(games_played)
    print(games_played)
    
    goals = stat['goals']
    team_stats['goals'].append(goals)
    print(goals)
    
    assists = stat['assists']
    team_stats['assists'].append(assists)
    print(assists)
    
    penalties = stat['penalties']
    team_stats['penalties'].append(penalties)
    print(penalties)
    
    penalty_minutes = stat['penalty_minutes']
    team_stats['penalty_minutes'].append(penalty_minutes)
    print(penalty_minutes)
    
    shots = stat['shots']
    team_stats['shots'].append(shots)
    print(shots)
    
    hits = stat['hits']
    team_stats['hits'].append(hits)
    print(hits)
    
    blocked_shots = stat['blocked_shots']
    team_stats['blocked_shots'].append(blocked_shots)
    print(blocked_shots)
    
    powerplays = stat['powerplays']
    team_stats['powerplays'].append(powerplays)
    print(powerplays)
    
    points = stat['points']
    team_stats['points'].append(points)
    print(points)
    
    powerplay_shots = stat['powerplay_shots']
    team_stats['powerplay_shots'].append(powerplay_shots)
    print(powerplay_shots)
    
    powerplay_goals = stat['powerplay_goals']
    team_stats['powerplay_goals'].append(powerplay_goals)
    print(powerplay_goals)
    
    powerplay_percentage = stat['powerplay_percentage']
    team_stats['powerplay_percentage'].append(powerplay_percentage)
    print(powerplay_percentage)
    
    shots_against = stat['shots_against']
    team_stats['shots_against'].append(shots_against)
    print(shots_against)
    
    goals_against = stat['goals_against']
    team_stats['goals_against'].append(goals_against)
    print(powerplay_percentage)
    
    saves = stat['saves']
    team_stats['saves'].append(saves)
    print(saves)
    
    saves_percentage = stat['saves_percentage']
    team_stats['saves_percentage'].append(saves_percentage)
    print(saves_percentage)
    

In [None]:
team_stats

In [None]:
df =pd.DataFrame(team_stats)

In [None]:
df.head()

In [None]:
df.to_csv('nhl_teams_stats.csv', index=False)

# LA kings player stats

In [None]:
kings_player_stats_url = 'https://api.sportradar.com/nhl/production/v7/en/seasons/2023/REG/teams/44151f7a-0f24-11e2-8525-18a905767e44/statistics.json'
# 3XZ4cbvj3w6hRONhHDelw92rvKqucxRk5ux3ZBpI
team_params = {'api_key':'3XZ4cbvj3w6hRONhHDelw92rvKqucxRk5ux3ZBpI'}

kings_player_stats_api_request = requests.get(kings_player_stats_url, params=team_params)

In [None]:
kings_player_stats_api_request

In [None]:
json.loads(kings_player_stats_api_request.text)

In [None]:
kings_player_stats_api_response = (json.loads(kings_player_stats_api_request.text))

In [None]:
kings_player_stats_api_response['players']

In [None]:
players_stats = kings_player_stats_api_response['players']

### For Goalies

In [None]:
goalies_stats_data = {
    'full_name':[],
    'games_played':[],
    'shots_against':[],
    'goals_against':[],
    'saves':[],
    'shutouts':[],
    'saves_pct':[],
    'avg_goals_against':[]
}

for stats in players_stats:
    if stats['position'] != 'G':
        continue
    
    full_name = stats['full_name']
    goalies_stats_data['full_name'].append(full_name)
    print('full_name:',full_name)
    
    games_played = stats['goaltending']['total']['games_played']
    goalies_stats_data['games_played'].append(games_played)
    print('games_played:',games_played)
    
    shots_against = stats['goaltending']['total']['shots_against']
    goalies_stats_data['shots_against'].append(shots_against)
    print('shots_against:',shots_against)
    
    goals_against = stats['goaltending']['total']['goals_against']
    goalies_stats_data['goals_against'].append(goals_against)
    print('goals_against:',goals_against)
    
    saves = stats['goaltending']['total']['saves']
    goalies_stats_data['saves'].append(saves)
    print('saves:',saves)
    
    shutouts = stats['goaltending']['total']['shutouts']
    goalies_stats_data['shutouts'].append(shutouts)
    print('shutouts:',shutouts)
    
    saves_pct = stats['goaltending']['total']['saves_pct']
    goalies_stats_data['saves_pct'].append(saves_pct)
    print('saves_pct:',saves_pct)
    
    avg_goals_against = stats['goaltending']['total']['avg_goals_against']
    goalies_stats_data['avg_goals_against'].append(avg_goals_against)
    print('avg_goals_against:',avg_goals_against)
    

In [None]:
goalies_stats_data

In [None]:
df =pd.DataFrame(goalies_stats_data)

In [None]:
df.head()

In [None]:
df.to_csv('goalies_stats_data.csv', index=False)

### For players

In [None]:
players_stats_data = {
    'full_name':[],
    'games_played':[],
    'goals':[],
    'assists':[],
    'points':[],
    'penalties':[],
    'penalty_minutes':[],
    'shots':[],
    'blocked_att':[],
    'missed_shots':[],
    'hits':[],
    'blocked_shots':[],
    'powerplay_shots':[],
    'powperplay_goals':[],
    'powerplay_assists':[]
}

for stats in players_stats:
    if stats['position'] == 'G':
        continue
        
    full_name = stats['full_name']
    players_stats_data['full_name'].append(full_name)
    print('full_name:',full_name)
    
    games_played = stats['statistics']['total']['games_played']
    players_stats_data['games_played'].append(games_played)
    print('games_played:',games_played)
    
    goals = stats['statistics']['total']['goals']
    players_stats_data['goals'].append(goals)
    print('goals:',goals)
    
    assists = stats['statistics']['total']['assists']
    players_stats_data['assists'].append(assists)
    print('assists:',assists)
    
    points = stats['statistics']['total']['points']
    players_stats_data['points'].append(points)
    print('points:',points)
    
    penalties = stats['statistics']['total']['penalties']
    players_stats_data['penalties'].append(penalties)
    print('penalties:',penalties)
    
    penalty_minutes = stats['statistics']['total']['penalty_minutes']
    players_stats_data['penalty_minutes'].append(penalty_minutes)
    print('penalty minutes:',penalty_minutes)
    
    shots = stats['statistics']['total']['shots']
    players_stats_data['shots'].append(shots)
    print('shots:',shots)
    
    blocked_att = stats['statistics']['total']['blocked_att']
    players_stats_data['blocked_att'].append(blocked_att)
    print('blocked_att:',blocked_att)
    
    missed_shots = stats['statistics']['total']['missed_shots']
    players_stats_data['missed_shots'].append(missed_shots)
    print('missed_shots:',missed_shots)
    
    hits = stats['statistics']['total']['hits']
    players_stats_data['hits'].append(hits)
    print('hits:',hits)
    
    blocked_shots = stats['statistics']['total']['blocked_shots']
    players_stats_data['blocked_shots'].append(blocked_shots)
    print('blocked_shots:',blocked_shots)
    
    powerplay_shots = stats['statistics']['powerplay']['shots']
    players_stats_data['powerplay_shots'].append(powerplay_shots)
    print('powerplay_shots:',powerplay_shots)
    
    powperplay_goals = stats['statistics']['powerplay']['goals']
    players_stats_data['powperplay_goals'].append(powperplay_goals)
    print('powperplay_goals:',powperplay_goals)
    
    powerplay_assists = stats['statistics']['powerplay']['assists']
    players_stats_data['powerplay_assists'].append(powerplay_assists)
    print('powerplay_assists:',powerplay_assists)
    
    
    

In [None]:
players_stats_data

In [None]:
df =pd.DataFrame(players_stats_data)

In [None]:
df.head()

In [None]:
df.to_csv('players_stats_data.csv', index=False)

# Connect Database and Load the Data

In [None]:
# connect to database
raw_host = os.getenv('Database_host')
raw_username = os.getenv('Database_username')
raw_password = os.getenv('Database_password')
raw_schema = os.getenv('Database_schema')

raw_db_config = {
    "host": raw_host,
    "username": raw_username,
    "password": raw_password,
    "schema": raw_schema
}

#driver://username:password@host/database
raw_engine = create_engine(f"mysql+mysqlconnector://{raw_db_config['username']}:{raw_db_config['password']}@{raw_db_config['host']}/{raw_db_config['schema']}")



## Team stats

In [None]:
nhl_teams_source_csv = 'nhl_teams_cleaned.csv'

In [None]:
nhl_teams_source_csv_df = pd.read_csv(nhl_teams_source_csv)

In [None]:
nhl_teams_source_csv_df.head()

In [None]:
raw_table = "nhl_teams"

nhl_teams_source_csv_df.to_sql(raw_table, raw_engine, index=False, if_exists="replace")

## Player stats

In [None]:
nhl_players_source_csv = 'nhl_players_cleaned.csv'

In [None]:
nhl_players_source_csv_df = pd.read_csv(nhl_players_source_csv)

In [None]:
nhl_players_source_csv_df.head()

In [None]:
raw_table = "players"

nhl_players_source_csv_df.to_sql(raw_table, raw_engine, index=False, if_exists="replace")

## Teams Stats

In [None]:
nhl_teams_stats_source_csv = 'nhl_teams_stats_cleaned.csv'

In [None]:
nhl_teams_stats_source_csv_df = pd.read_csv(nhl_teams_stats_source_csv)

In [None]:
nhl_teams_stats_source_csv_df.head()

In [None]:
raw_table = "nhl_team_stats"

nhl_teams_stats_source_csv_df.to_sql(raw_table, raw_engine, index=False, if_exists="replace")

## player_stats_data

In [None]:
player_stats_source_csv ='players_stats_data.csv'

In [None]:
player_stats_source_csv_df = pd.read_csv(player_stats_source_csv)

In [None]:
player_stats_source_csv_df.head()

In [None]:
raw_table = "PlayerStats"

player_stats_source_csv_df.to_sql(raw_table, raw_engine, index=False, if_exists="replace")

## Goalies_stats_data

In [None]:
goalies_stats_source_csv ='goalies_stats_data.csv'

In [None]:
goalies_stats_source_csv_df = pd.read_csv(goalies_stats_source_csv)

In [None]:
goalies_stats_source_csv_df.head()

In [None]:
raw_table = "GoalieStats"

goalies_stats_source_csv_df.to_sql(raw_table, raw_engine, index=False, if_exists="replace")