# Total Stats Code Extraction

In [1]:
import os
import sys
import requests

import pandas as pd
from dotenv import load_dotenv
from sqlalchemy import create_engine
from bs4 import BeautifulSoup

from packages.utils import get_total_stats

current_dir = os.path.dirname(os.path.abspath('/Users/maukanmir/Documents/Machine-Learning/Web-Scraping-Code/Ball-Dont-Lie-API/Scraping-Notebook/advanced_stats.ipynb'))
project_root = os.path.join(current_dir, '..')
sys.path.insert(0, project_root)

In [2]:
dotenv_path = ("/Users/maukanmir/Documents/Machine-Learning/Web-Scraping-Code/Ball-Dont-Lie-API/dot.env")
load_dotenv(dotenv_path)

DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv("DB_PASSWORD")
DB_HOST = os.getenv("DB_HOST")
DB_PORT = os.getenv("DB_PORT")
DB_NAME = os.getenv("DB_NAME")
TABLE_NAME = "Total_Stats"

DB_USER = os.getenv("DB_USER")

engine = create_engine(f'postgresql+psycopg2://{DB_USER}:{DB_PASSWORD}@{DB_HOST}/{DB_NAME}')

In [3]:
years = [str(year) for year in range(1980, 2025)]
all_data= pd.DataFrame()

for year in years:
  base_url = f"https://www.basketball-reference.com/leagues/NBA_{year}.html"
  response = requests.get(base_url)
  soup = BeautifulSoup(response.text, "html.parser")
  try:
    year = int(year)
    df = get_total_stats(soup, year)
    all_data = pd.concat([all_data, df])
  except Exception as error:
    print(f" The error is {error}")

In [None]:
try:
    all_data.to_sql(TABLE_NAME, engine, if_exists='replace', index=False)
    print("Data successfully written to the database.")
except Exception as e:
    print(f"Database operation failed. Error: {e}")

In [None]:
all_data.to_csv("data/total_stats.csv", index=False)

In [13]:
table = soup.find("table", {"id": "per_game-team"})

# Extract the headers
headers = [th.text.strip() for th in table.find("thead").find_all("th")]

# Extract the rows from the tbody
rows = table.find("tbody").find_all("tr")

# Extract the data for each row
data = []
for row in rows:
    cells = row.find_all("td")
    row_data = [cell.text.strip() for cell in cells]
    data.append(row_data)

# Create a DataFrame from the parsed data
df = pd.DataFrame(data, columns=headers[1:])  # Skip the first column (Rk)

# Display the DataFrame
print(df)

                       Team   G     MP    FG   FGA   FG%    3P   3PA   3P%  \
0           Indiana Pacers*  82  240.3  47.0  92.7  .507  13.2  35.3  .374   
1           Boston Celtics*  82  241.8  43.9  90.2  .487  16.5  42.5  .388   
2    Oklahoma City Thunder*  82  241.5  44.5  89.3  .499  13.3  34.2  .389   
3          Milwaukee Bucks*  82  241.5  43.1  88.5  .487  14.2  38.1  .373   
4             Atlanta Hawks  82  242.1  43.0  92.5  .465  13.7  37.7  .364   
5       Los Angeles Lakers*  82  242.1  43.7  87.5  .499  11.8  31.4  .377   
6         Dallas Mavericks*  82  240.3  43.1  89.7  .481  14.6  39.5  .369   
7     Golden State Warriors  82  241.8  43.7  91.6  .477  14.8  38.9  .380   
8          Sacramento Kings  82  242.1  43.3  90.9  .477  14.4  39.3  .366   
9             Phoenix Suns*  82  241.2  42.5  86.1  .493  12.4  32.6  .382   
10                Utah Jazz  82  241.5  42.0  89.9  .467  12.9  36.5  .354   
11    Los Angeles Clippers*  82  240.3  42.4  86.7  .489  12.6  

In [14]:
df

Unnamed: 0,Team,G,MP,FG,FGA,FG%,3P,3PA,3P%,2P,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,Indiana Pacers*,82,240.3,47.0,92.7,0.507,13.2,35.3,0.374,33.8,...,0.782,10.1,31.4,41.5,30.8,7.7,5.9,12.9,21.4,123.3
1,Boston Celtics*,82,241.8,43.9,90.2,0.487,16.5,42.5,0.388,27.4,...,0.807,10.7,35.6,46.3,26.9,6.8,6.6,11.9,16.2,120.6
2,Oklahoma City Thunder*,82,241.5,44.5,89.3,0.499,13.3,34.2,0.389,31.3,...,0.825,8.8,33.2,42.0,27.1,8.5,6.6,12.7,18.8,120.1
3,Milwaukee Bucks*,82,241.5,43.1,88.5,0.487,14.2,38.1,0.373,29.0,...,0.774,9.4,34.8,44.2,26.5,6.8,5.0,12.9,19.2,119.0
4,Atlanta Hawks,82,242.1,43.0,92.5,0.465,13.7,37.7,0.364,29.3,...,0.797,12.5,32.2,44.7,26.6,7.5,4.5,13.5,18.6,118.3
5,Los Angeles Lakers*,82,242.1,43.7,87.5,0.499,11.8,31.4,0.377,31.8,...,0.782,8.2,34.9,43.1,28.5,7.4,5.5,14.0,15.6,118.0
6,Dallas Mavericks*,82,240.3,43.1,89.7,0.481,14.6,39.5,0.369,28.5,...,0.758,9.7,33.2,42.9,25.7,6.9,5.0,12.5,18.3,117.9
7,Golden State Warriors,82,241.8,43.7,91.6,0.477,14.8,38.9,0.38,28.9,...,0.78,12.1,34.6,46.7,29.3,7.0,4.6,14.3,19.5,117.8
8,Sacramento Kings,82,242.1,43.3,90.9,0.477,14.4,39.3,0.366,29.0,...,0.745,10.8,33.2,44.0,28.3,7.6,4.2,13.1,19.9,116.6
9,Phoenix Suns*,82,241.2,42.5,86.1,0.493,12.4,32.6,0.382,30.0,...,0.808,10.1,33.9,44.1,27.0,7.4,6.0,14.9,18.0,116.2
