In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime
import psycopg2
from dotenv import load_dotenv
import os

# Web Scrape Stats

In [None]:
def get_player_stats(first,last):
    """
    Returns stats and height of player obtained from sportsreference.com.
    """
    url = f'https://www.sports-reference.com/cbb/players/{first.lower()}-{last.lower()}-1.html'
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Extract basic player info (height, weight, birth date)
    player_info = soup.find_all('p')
    height = None
    for info in player_info:
        if 'cm' in info.text and 'kg' in info.text:  # Height and weight are usually mentioned with these units
            height = info.text.strip()

    # Extracting tables for per game stats and advanced stats
    tables = soup.find_all('table')
    stats = {}
    for table in tables:
        if table.get('id') in ['players_per_game', 'players_advanced']:
            df = pd.read_html(str(table))[0]
            if isinstance(df.columns, pd.MultiIndex):
                df.columns = df.columns.droplevel(0)  # Dropping multi-level column headers only if they exist
            stats[table.get('id')] = df
    
    return height, stats

def get_player_information(first, last):
    """
    Obtains players date of birth from basketballreference.com.
    """
    url = f'https://www.basketball-reference.com/players/{last[0].lower()}/{last[:5].lower() + first[:2].lower()}01.html'
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    
    element_with_date_birth = soup.find('span', attrs={'data-birth': True})

    # Extract the 'data-birth' attribute
    date_birth = element_with_date_birth['data-birth'] if element_with_date_birth else None

    return date_birth

def get_draft_class(year):
    """
    Returns a list of player names from the given NBA draft year.
    """
    url = f"https://www.basketball-reference.com/draft/NBA_{year}.html"
    response = requests.get(url)
    # Parse the HTML content using BeautifulSoup
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find the table containing the player names
    # Assuming the player names are in 'td' tags with 'data-stat' attribute set to 'player'
    player_name_cells = soup.find_all('td', {'data-stat': 'player'})

    # Extract and print the player names
    player_names = [cell.get_text() for cell in player_name_cells]
    return player_names

In [None]:
# Replace the URL with the page you want to scrape
height, stats = get_player_stats("zion", "williamson")
birth_date = get_player_information("Zion", "Williamson")

players_per_game = stats.get('players_per_game')
players_advanced = stats.get('players_advanced')

players_per_game.drop(columns=["Season", "School", "Conf", "Class", "GS", "2P", "2PA", "2P%", "ORB", "DRB", "Unnamed: 28"], inplace=True)
players_advanced.drop(columns=["Season", "School", "Conf", "Class", "G", "GS", "MP", "Unnamed: 21", "Unnamed: 26", "BPM", "WS", "WS/40"], inplace=True)

print("Height and Weight:", height)
print("Birth Date:", birth_date)
print("\nPer Game Stats:\n", players_per_game.iloc[-2])
print("\nAdvanced Stats:\n", players_advanced.iloc[-2])

In [None]:
stats.get('players_per_game').iloc[-2]

In [None]:
get_draft_class(2013)

# SQL functions

In [17]:
def sql_connection():
    """
    Creates a connection to the postgresql server
    """
    
    load_dotenv()

    # Accessing variables from .env file
    db_name = os.getenv('DB_NAME')
    db_user = os.getenv('DB_USER')
    db_pass = os.getenv('DB_PASS')
    db_host = os.getenv('DB_HOST')
    db_port = os.getenv('DB_PORT')
    
    try:
        conn = psycopg2.connect(
            dbname=db_name,
            user=db_user,
            password=db_pass,
            host=db_host,
            port=db_port
        )
        print("Connected to the database.")
        return (conn, conn.cursor())
    except Exception as e:
        print("Unable to connect to the database.")
        print(e)
        return None

def insert_player(conn, cursor, player_tuple):
    """
    Inserts a row into the Player table
    
    cursor : SQL connection cursor
    player_tuple : <tuple> contains player information
    
    Returns the created ID of the player
    """
    insert_statement = """
    INSERT INTO "Players" ("PlayerName", "DOB", "Rating", "DraftYear") VALUES (%s, %s, %s, %s) RETURNING "PlayerID";
    """
    try:
        cursor.execute(insert_statement, player_tuple)
        generated_id = cursor.fetchone()[0]  # Fetch the generated ID
        conn.commit()
        print("Added Player Successfully")
        return generated_id
    except Exception as e:
        print("Unable to insert to the Players Table.")
        print(e)
        return None
    
def insert_stats(conn, cursor, stat_tuple):
    """
    Inserts a row into the PlayerStats table
    
    conn : SQL connection
    cursor : SQL connection cursor
    stat_dict : <tuple> contains stat to be insert into the table
    """
    insert_statement = """
    INSERT INTO "PlayerStats" ("PlayerID", "Games", "MPG", "FG", "FGA", "FGP", "3P", "3PA", "3PP", "FT", "FTA", "FTP", "RPG", "APG", "SPG", "BPG", "TOV", "PF", "PPG", "SOS", "PER", "TS", "eFG", "3PAr", "FTr", "PProd", "ORBP", "DRBP", "TRBP", "ASTP", "STLP", "BLKP", "TOVP", "USG", "OWS", "DWS", "OBPM", "DBPM") VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) RETURNING "StatID";
    """
    
    print(insert_statement, stat_tuple)
    try:
        cursor.execute(insert_statement, stat_tuple)
        generated_id = cursor.fetchone()[0]  # Fetch the generated ID
        conn.commit()
        print("Added Stat Successfully")
        return generated_id
    except Exception as e:
        print("Unable to insert to the Stats Table.")
        print(e)
        print("Data tuple:", stat_tuple)
        return None

In [13]:
conn, cursor = sql_connection()

Connected to the database.


In [14]:
pid = insert_player(cursor, ("Lebron James", "1988-02-04", 5, 2003)) 

Added Player Successfully


In [15]:
pid

4

In [18]:
stat_tuple = (4, 10, 22.4, 7.2, 10.4, .672, 1.4, 3.3, .314, 3.2, 4.7, .702, 5.2, 7.5, 2.1, 1.3, 2.5, 3.3, 20.1, 8.22, 22.3, .654, .703, .133, .421, 431, 14.5, 13.5, 13.9, 22.4, 10.9, 8.2, 13.4, 23.2, 12.1, 6.4, 13.1, 5.4)
insert_stats(conn, cursor, stat_tuple)


    INSERT INTO "PlayerStats" ("PlayerID", "Games", "MPG", "FG", "FGA", "FGP", "3P", "3PA", "3PP", "FT", "FTA", "FTP", "RPG", "APG", "SPG", "BPG", "TOV", "PF", "PPG", "SOS", "PER", "TS", "eFG", "3PAr", "FTr", "PProd", "ORBP", "DRBP", "TRBP", "ASTP", "STLP", "BLKP", "TOVP", "USG", "OWS", "DWS", "OBPM", "DBPM") VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) RETURNING "StatID";
     (4, 10, 22.4, 7.2, 10.4, 0.672, 1.4, 3.3, 0.314, 3.2, 4.7, 0.702, 5.2, 7.5, 2.1, 1.3, 2.5, 3.3, 20.1, 8.22, 22.3, 0.654, 0.703, 0.133, 0.421, 431, 14.5, 13.5, 13.9, 22.4, 10.9, 8.2, 13.4, 23.2, 12.1, 6.4, 13.1, 5.4)
hi
Added Stat Successfully


2

In [None]:
def find_names(fullname):
    """
    Takes in a full name and returns a tuple of the first and last name in lower case
    """
    space = fullname.index(" ")
    first = fullname[:space].lower()
    last = fullname[space+1:].lower()
    return (first, last)

def fill_draft_class(conn, cursor, year):
    """
    Fills the database with players from the given draft year
    """
    inserted = []
    player_names = get_draft_class(year)
    for player in player_names:
        first, last = find_names(player)
        height, stats = get_player_stats(first, last)
        birth_date = get_player_information(first, last)
        
        if height is not None and stats is not None and birth_date is not None:
            player_tuple = (player, birth_date, 0, year)
            playerID = insert_player(conn, cursor, player_tuple)
            
            if playerID is not None:
                players_per_game = stats.get('players_per_game')
                players_advanced = stats.get('players_advanced')

                players_per_game.drop(columns=["Season", "School", "Conf", "Class", "GS", "2P", "2PA", "2P%", "ORB", "DRB", "Unnamed: 28"], inplace=True)
                players_advanced.drop(columns=["Season", "School", "Conf", "Class", "G", "GS", "MP", "Unnamed: 21", "Unnamed: 26", "BPM", "WS", "WS/40"], inplace=True)
                
                stat_tuple = tuple([playerID] + players_per_game.iloc[-2].tolist() + players_advanced.iloc[-2].tolist())
                insert_stats(conn, cursor, stat_tuple)
                
                inserted.append(player)
                
    return inserted

def fill_drafts(conn, cursor, start, end):
    """
    Fills the database with information on players from draft classes start-end
    """
    players = []
    for draft_year in range(start, end+1):
        draft_players = fill_draft_class(conn, cursor, draft_year)
        players += draft_players
    
    return players