# Imports

In [1]:
import pandas as pd

from dotenv import load_dotenv
import os
import requests
from sqlalchemy import create_engine

from packages.utils import extract_nba_stats

# Constants

In [2]:
dotenv_path = ("/Users/maukanmir/Documents/Machine-Learning/Web-Scraping-Code/Player-Salaries/dot.env")
load_dotenv(dotenv_path)

DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv("DB_PASSWORD")
DB_HOST = os.getenv("DB_HOST")
DB_PORT = os.getenv("DB_PORT")
DB_NAME = os.getenv("DB_NAME")
TABLE_NAME = "player_stats"

engine = create_engine(f'postgresql+psycopg2://{DB_USER}:{DB_PASSWORD}@{DB_HOST}/{DB_NAME}')

In [3]:
years = [str(year) for year in range(1970, 2025)]
pages = range(1, 4)

In [16]:
all_data= pd.DataFrame()
for year in years:
  for page in pages:
    base_url = f"https://basketball.realgm.com/nba/stats/{year}/Averages/Qualified/points/All/desc/{page}/Regular_Season"
    response = requests.get(base_url)
    try:
      year = int(year)
      df = extract_nba_stats(response.text, year)
      all_data = pd.concat([all_data, df])
    except Exception as error:
      print(f" The error is {error}")

In [17]:
columns = ["3PM", "3PA", "3P%", "ORB", "DRB", "SPG", "BPG", "TOV"]

for col in columns:
  df[col] = df[col].apply(lambda x: 0 if x == "-" else x)

Unnamed: 0,Rank,Name,Team,GP,MPG,PPG,FGM,FGA,FG%,3PM,...,FTA,FT%,ORB,DRB,RPG,APG,SPG,BPG,TOV,season
0,1,Jerry West,LAL,74,42.0,31.2,11.2,22.6,.497,-,...,10.6,.824,-,-,4.6,7.5,-,-,-,1969-1970
1,2,Kareem Abdul-Jabbar,MIL,82,43.1,28.8,11.4,22.1,.518,-,...,9.1,.653,-,-,14.5,4.1,-,-,-,1969-1970
2,3,Elvin Hayes,SDR,82,44.7,27.5,11.1,24.6,.452,-,...,7.6,.688,-,-,16.9,2.0,-,-,-,1969-1970
3,4,Billy Cunningham,PHI,81,39.4,26.1,9.9,21.1,.469,-,...,8.6,.729,-,-,13.6,4.3,-,-,-,1969-1970
4,5,Lou Hudson,ATL,80,38.6,25.4,10.4,19.6,.531,-,...,5.6,.824,-,-,4.7,3.5,-,-,-,1969-1970
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60,261,Tristan Thompson,CLE,49,11.2,3.3,1.5,2.4,.608,0.0,...,1.1,.288,1.5,2.1,3.6,1.0,0.2,0.3,0.6,2023-2024
61,262,Zeke Nnaji,DEN,58,9.9,3.2,1.2,2.6,.463,0.1,...,1.1,.677,1.1,1.1,2.2,0.6,0.3,0.7,0.5,2023-2024
62,263,J.T. Thor,CHA,63,12.4,3.2,1.3,2.9,.437,0.4,...,0.3,.550,0.7,1.6,2.3,0.5,0.2,0.4,0.2,2023-2024
63,264,Dalen Terry,CHI,59,11.5,3.1,1.2,2.7,.439,0.3,...,0.7,.581,0.5,1.4,1.9,1.4,0.5,0.3,0.5,2023-2024


In [18]:
all_data.drop("Rank", inplace=True, axis=1)
cols_to_change = [ col for col in all_data.columns if col not in ["Team", "Name", "GP", "season", "id"]]

for col in cols_to_change:
  all_data[col] = all_data[col].apply(lambda x: float(x) if str(x).isnumeric() else 0)
all_data["GP"] = all_data["GP"].apply(lambda x: int(x))

In [14]:
try:
    all_data.to_sql(TABLE_NAME, engine, if_exists='replace', index=False)
    print("Data successfully written to the database.")
except Exception as e:
    print(f"Database operation failed. Error: {e}")

Data successfully written to the database.
