# NBA Data Analysis

**Name:** Brayden Uglione

**Date:** 11/13/24

**Exercise:** Project #2, Part 2: Web Scraped Data Analysis and Visualizations

**Purpose:** Scrapes data from Basketball Reference for various NBA seasons, analyze the data, and answer questions regarding team performance metrics.

**Citations:**

https://realpython.com/beautiful-soup-web-scraper-python/

https://www.youtube.com/watch?v=A1s1aGHoODs

https://www.youtube.com/watch?v=bargNl2WeN4

### Import Libraries

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

### Function to Scrape NBA Statistics

In [None]:
def scrape_nba_stats(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Initialize empty lists
    per_game_stats = []
    total_stats_team = []
    total_stats_opponent = []
    advanced_stats = []

    # Scrape Per Game Stats
    per_game_table = soup.find('table', {'id': 'per_game-team'})
    for row in per_game_table.find_all('tr')[1:]:
        cols = row.find_all('td')
        
        if cols:
            if cols[0].text.strip() == "League Average":
                continue
            
            team = cols[0].text.strip()
            three_point_attempts = float(cols[7].text.strip())
            total_rebounds = float(cols[17].text.strip())
            
            per_game_stats.append({
                'Team': team,
                '3PA': three_point_attempts,
                'TRB': total_rebounds
            })

    # Scrape Total Stats (Team)
    total_stats_table = soup.find('table', {'id': 'totals-team'})
    for row in total_stats_table.find_all('tr')[1:]:
        cols = row.find_all('td')
        
        if cols:
            if cols[0].text.strip() == "League Average":
                continue
            
            team = cols[0].text.strip()
            points = int(cols[-1].text.strip())
            assists = int(cols[-6].text.strip())
            turnovers = int(cols[-3].text.strip())
            
            total_stats_team.append({
                'Team': team,
                'PTS': points,
                'AST': assists,
                'TOV': turnovers
            })
    
    # Scrape Total Stats (Opponent)
    opponent_stats_table = soup.find('table', {'id': 'totals-opponent'})
    for row in opponent_stats_table.find_all('tr')[1:]:
        cols = row.find_all('td')
        
        if cols:
            if cols[0].text.strip() == "League Average":
                continue
            
            team = cols[0].text.strip() 
            points = int(cols[-1].text.strip())
            total_stats_opponent.append({
                'Team': team,
                'PTS_Opponent': points
            })

    # Scrape Advanced Stats
    advanced_stats_table = soup.find('table', {'id': 'advanced-team'})
    for row in advanced_stats_table.find_all('tr')[1:]:
        cols = row.find_all('td')
        
        if cols:
            if cols[0].text.strip() == "League Average":
                continue
            
            team = cols[0].text.strip()
            offensive_rating = float(cols[9].text.strip())
            wins = int(cols[2].text.strip())
            losses = int(cols[3].text.strip())
            
            advanced_stats.append({
                'Team': team,
                'ORTg': offensive_rating,
                'W': wins,
                'L': losses
            })

    # Convert lists to DataFrames
    df_per_game = pd.DataFrame(per_game_stats)
    df_total_team = pd.DataFrame(total_stats_team)
    df_total_opponent = pd.DataFrame(total_stats_opponent)
    df_advanced = pd.DataFrame(advanced_stats)

    # Merge all DataFrames on the Team column
    merged_df = df_per_game.merge(df_total_team, on='Team', how='outer').merge(df_total_opponent, on='Team', how='outer').merge(df_advanced, on='Team', how='outer')
    
    # Export the DataFrame to a CSV file
    merged_df.to_csv('nba_team_stats.csv', index=False)

    # return merged_df
    return merged_df

In [None]:
url = "https://www.basketball-reference.com/leagues/NBA_2024.html"
nba_data_df = scrape_nba_stats(url)
print(nba_data_df)

## Data Analysis and Visualization