# Imports

In [1]:
import pandas as pd

from dotenv import load_dotenv
import os
import requests
from sqlalchemy import create_engine

from packages.utils import extract_nba_stats

# Constants

In [2]:
dotenv_path = ("/Users/maukanmir/Documents/Machine-Learning/Web-Scraping-Code/Player-Salaries/dot.env")
load_dotenv(dotenv_path)

DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv("DB_PASSWORD")
DB_HOST = os.getenv("DB_HOST")
DB_PORT = os.getenv("DB_PORT")
DB_NAME = os.getenv("DB_NAME")
TABLE_NAME = "player_stats"

engine = create_engine(f'postgresql+psycopg2://{DB_USER}:{DB_PASSWORD}@{DB_HOST}/{DB_NAME}')

In [3]:
years = [str(year) for year in range(1970, 2025)]
pages = range(1, 4)

In [4]:
all_data= pd.DataFrame()
for year in years:
  for page in pages:
    base_url = f"https://basketball.realgm.com/nba/stats/{year}/Averages/Qualified/points/All/desc/{page}/Regular_Season"
    response = requests.get(base_url)
    try:
      year = int(year)
      df = extract_nba_stats(response.text, year)
      all_data = pd.concat([all_data, df])
    except Exception as error:
      print(f" The error is {error}")

In [13]:
all_data.drop("Rank", inplace=True, axis=1)
cols_to_change = [ col for col in all_data.columns if col not in ["Team", "Name", "GP", "season", "id"]]

for col in cols_to_change:
  all_data[col] = all_data[col].apply(lambda x: float(x) if str(x).isnumeric() else 0)
all_data["GP"] = all_data["GP"].apply(lambda x: int(x))

In [14]:
try:
    all_data.to_sql(TABLE_NAME, engine, if_exists='replace', index=False)
    print("Data successfully written to the database.")
except Exception as e:
    print(f"Database operation failed. Error: {e}")

Data successfully written to the database.
