# Imports

In [15]:
import os
import pandas as pd


from dotenv import load_dotenv
import requests
from sqlalchemy import create_engine

from packages.utils import extract_player_salaries, extract_team_info

In [18]:
dotenv_path = ("/Users/maukanmir/Documents/Machine-Learning/Web-Scraping-Code/Player-Salaries/dot.env")
load_dotenv(dotenv_path)

DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv("DB_PASSWORD")
DB_HOST = os.getenv("DB_HOST")
DB_PORT = os.getenv("DB_PORT")
DB_NAME = os.getenv("DB_NAME")
TABLE_NAME_PlAYER_SALARIES = "player_salaries"
TABLE_NAME_TEAM_SALARIES = "team_salaries"

engine = create_engine(f'postgresql+psycopg2://{DB_USER}:{DB_PASSWORD}@{DB_HOST}/{DB_NAME}')

# Grabbing Player Salaries

In [20]:
years = [str(year) for year in range(2011, 2025)]
all_data = pd.DataFrame()

for year in years:
  url = f"https://www.spotrac.com/nba/rankings/player/_/year/{year}/sort/cash_total"
  response = requests.get(url)
  year = int(year)
  df = extract_player_salaries(response.text, year)
  all_data = pd.concat([all_data, df])

# Save To Posgres

In [21]:
try:
    all_data.to_sql(TABLE_NAME_PlAYER_SALARIES, engine, if_exists='replace', index=False)
    print("Data successfully written to the database.")
except Exception as e:
    print(f"Database operation failed. Error: {e}")

Data successfully written to the database.


# NBA Team Salaries

In [5]:
years = [str(year) + "-" + str(year+1) for year in range(1990, 2024)]

In [7]:
all_data = pd.DataFrame()
for year in years:
  base_url = f"https://hoopshype.com/salaries/{year}/"
  response = requests.get(base_url)
  df = extract_team_info(response.text, year)
  all_data = pd.concat([all_data, df])

In [8]:
nba_team_abbreivated = {
  "Atlanta":"ATL",
  "Cleveland": "CLE",
  "New York": "NYK",
  "Charlotte": "CHA",
  "Detroit": "DET",
  "Dallas": "DAL",
  "Philadelphia": "PHI",
  "Milwaukee": "MIL",
  "Phoenix":"PHX",
  "Brooklyn":"BKN",
  "Boston":"BOS",
  "Portland":"POR",
  "Golden State":"GSW",
  "San Antonio":"SAS",
  "Indiana":"IND",
  "Utah":"UT",
  "Oklahoma City":"OKC",
  "Houston":"HOU",
  "Denver":"DEN",
  "LA Clippers":"LAC",
  "Chicago":"CHI",
  "Washington":"WAS",
  "Sacramento":"SAC",
  "Miami":"MIA",
  "Minnesota":"MIN",
  "Orlando":"ORL",
  "New Orleans":"NOP",
  "Memphis":"MEM",
  "Toronto":"TOR",
  "LA Lakers":"LAL"
}

In [9]:
all_data["Team"] = all_data["Team"].apply(lambda x: nba_team_abbreivated[x])

# Download Dataset into DB

In [10]:
try:
    all_data.to_sql(TABLE_NAME_TEAM_SALARIES, engine, if_exists='replace', index=False)
    print("Data successfully written to the database.")
except Exception as e:
    print(f"Database operation failed. Error: {e}")

Data successfully written to the database.
