### Production Features Pipeline

In [None]:
# select web scraper; 'SCRAPINGANT' or 'SELENIUM'
# SCRAPINGANT requires a subscription but includes a proxy server

# WEBSCRAPER = 'SCRAPINGANT'
WEBSCRAPER = "SELENIUM"

In [None]:
import os

import pandas as pd
import numpy as np

import hopsworks

from datetime import datetime, timedelta
from pytz import timezone

import json

import time

from pathlib import Path  # for Windows/Linux compatibility

# change working directory to project root when running from notebooks folder to make it easier to import modules
# and to access sibling folders
print(os.path.basename(os.getcwd()))
if os.path.basename(os.getcwd()) == "notebooks":

    INDIRECT = False

    os.chdir(
        ".."
    ) 
print(os.path.basename(os.getcwd()))

from src.utils.webscraping import (
    get_new_games,
    activate_web_driver,
    get_todays_matchups,
)

from src.data.cleaning import (
    process_games,
    add_TARGET,
)

from src.data.build_features import (
    process_features,
)

from src.utils.hopsworks_utils import (
    save_feature_names,
    convert_feature_names,
)

from src.utils.constants import (
    FEATURE_GROUP_VERSION,
)

DATAPATH = Path(r"data")

**Load API keys**

In [None]:
from dotenv import load_dotenv

load_dotenv()

try:
    HOPSWORKS_API_KEY = os.environ["HOPSWORKS_API_KEY"]
except:
    raise Exception("Set environment variable HOPSWORKS_API_KEY")

# if scrapingant is chosen then set the api key, otherwise load the selenium webdriver
if WEBSCRAPER == "SCRAPINGANT":
    try:
        SCRAPINGANT_API_KEY = os.environ["SCRAPINGANT_API_KEY"]
    except:
        raise Exception("Set environment variable SCRAPINGANT_API_KEY")
    driver = None

elif WEBSCRAPER == "SELENIUM":
    driver = activate_web_driver()
    SCRAPINGANT_API_KEY = ""

**Scrape New Completed Games and Format Them**

In [None]:
df_new = get_new_games(SCRAPINGANT_API_KEY, driver)

if df_new.empty:
    print("No new games to process")
else:

    # get the SEASON of the last game in the database
    # this will used when constructing rows for prediction
    SEASON = df_new["SEASON"].max()

    df_new.head()

**Retrieve todays games**

In [None]:
# retrieve list of teams playing today

# get today's games on NBA schedule
matchups, game_ids = get_todays_matchups(SCRAPINGANT_API_KEY, driver)

if matchups is None:
    print("No games today")
else:
    print(matchups)
    print(game_ids)

**Close Webdriver**

In [None]:
if WEBSCRAPER == "SELENIUM":
    driver.close()

**Check if anything is going on in the season**

In [None]:
UPDATE_WF = True

if (df_new.empty) and (matchups is None):
    print("No new games to process")

    UPDATE_WF = False
    exit()

**Create Rows for Today's Games with Empty Stats**

In [None]:
# reformat today's matchups to the new games dataframe

if matchups is None:
    print("No games going on. Nothing to do.")
    exit()

else:

    df_today = df_new.drop(df_new.index)  # empty copy of df_new with same columns
    for i, matchup in enumerate(matchups):
        game_details = {
            "HOME_TEAM_ID": matchup[1],
            "VISITOR_TEAM_ID": matchup[0],
            "GAME_DATE_EST": datetime.now(timezone("EST")).strftime("%Y-%m-%d"),
            "GAME_ID": int(game_ids[i]),
            "SEASON": SEASON,
        }
        game_details_df = pd.DataFrame(game_details, index=[i])
        # append to new games dataframe
        df_today = pd.concat([df_today, game_details_df], ignore_index=True)

    # blank rows will be filled with 0 to prevent issues with feature engineering
    df_today = df_today.fillna(0)

    df_today

**Access Feature Store**

In [None]:
print(HOPSWORKS_API_KEY[:5])