### Production Features Pipeline

In [1]:
# select web scraper; 'SCRAPINGANT' or 'SELENIUM'
# SCRAPINGANT requires a subscription but includes a proxy server

# WEBSCRAPER = 'SCRAPINGANT'
WEBSCRAPER = "SELENIUM"

In [2]:
import os

import pandas as pd
import numpy as np

import hopsworks

from datetime import datetime, timedelta
from pytz import timezone

import json

import time

from pathlib import Path  # for Windows/Linux compatibility

# change working directory to project root when running from notebooks folder to make it easier to import modules
# and to access sibling folders
os.chdir("..")


from src.utils.webscraping import (
    get_new_games,
    activate_web_driver,
    get_todays_matchups,
)

from src.data.cleaning import (
    process_games,
    add_TARGET,
)

from src.data.build_features import (
    process_features,
)

from src.utils.hopsworks_utils import (
    save_feature_names,
    convert_feature_names,
)

from src.utils.constants import (
    FEATURE_GROUP_VERSION,
)

DATAPATH = Path(r"data")

**Load API keys**

In [3]:
from dotenv import load_dotenv

load_dotenv()

try:
    HOPSWORKS_API_KEY = os.environ["HOPSWORKS_API_KEY"]
except:
    raise Exception("Set environment variable HOPSWORKS_API_KEY")

# if scrapingant is chosen then set the api key, otherwise load the selenium webdriver
if WEBSCRAPER == "SCRAPINGANT":
    try:
        SCRAPINGANT_API_KEY = os.environ["SCRAPINGANT_API_KEY"]
    except:
        raise Exception("Set environment variable SCRAPINGANT_API_KEY")
    driver = None

elif WEBSCRAPER == "SELENIUM":
    driver = activate_web_driver()
    SCRAPINGANT_API_KEY = ""

**Scrape New Completed Games and Format Them**

In [4]:
df_new = get_new_games(SCRAPINGANT_API_KEY, driver)

if df_new.empty:
    print("No new games to process")
else:

    # get the SEASON of the last game in the database
    # this will used when constructing rows for prediction
    SEASON = df_new["SEASON"].max()

    df_new.head()

Current month is 03
Scraping https://www.nba.com/stats/teams/boxscores?SeasonType=Regular+Season&DateFrom=03/14/25&DateTo=03/21/25


FIRST  <html class="userconsent-cntry-us userconsent-state- userconsent-reg-us" data-build="19565" data-theme="" data-version="4.64.1" lang="en"><head><script src="https://bam.nr-data.net/1/NRJS-93744526e47188ec9f0?a=927622108&amp;sa=1&amp;v=1177.96a4d39&amp;t=Unnamed%20Transaction&amp;rst=3720&amp;ck=1&amp;ref=https://www.nba.com/stats/teams/boxscores&amp;be=401&amp;fe=3695&amp;dc=1027&amp;af=err,xhr,stn,ins,spa&amp;perf=%7B%22timing%22:%7B%22of%22:1742608266025,%22n%22:0,%22f%22:1,%22dn%22:2,%22dne%22:73,%22c%22:73,%22s%22:75,%22ce%22:81,%22rq%22:81,%22rp%22:363,%22rpe%22:365,%22dl%22:368,%22di%22:745,%22ds%22:1027,%22de%22:1027,%22dc%22:3689,%22l%22:3695,%22le%22:3702%7D,%22navigation%22:%7B%7D%7D&amp;fp=472&amp;fcp=572&amp;jsonp=NREUM.setToken" type="text/javascript"></script><script src="https://js-agent.newrelic.com/nr-spa-1177.min.js"></script><script async="" src="https://static.criteo.net/js/ld/publishertag.prebid.144.js" type="text/javascript"></script><meta charset="utf-8"/>

No new games to process
