In [1]:
import requests
from bs4 import BeautifulSoup
import time
import random
import pandas as pd
import numpy as np


    


def get_pages():
    urls = [f"https://basketball.realgm.com/nba/stats/2023/Averages/Qualified/points/All/desc/{i}/Regular_Season" for i in range(1,5)]
    
    html = requests.get(urls[0]).content
    time.sleep(random.uniform(1,3))
    soup = BeautifulSoup(html, 'html.parser')

    table = soup.find_all('table', {"data-tablesaw-mode":"swipe"})[0]
    stats = [stat.text for stat in table.find('thead').find_all('th')][1:]
    stats_df = pd.DataFrame(columns=stats)

    for url in urls:
        html = requests.get(url).content
        time.sleep(random.uniform(1,3))
        soup = BeautifulSoup(html, 'html.parser')
        
        try:
            table = soup.find_all('table', {"data-tablesaw-mode":"swipe"})[0]
        except Exception:
            break
        else:
            player_stats_list = []
            rows = table.find('tbody').find_all('tr')
            for row in rows:
                player_stats = row.getText(separator='***').split('***')[1:]
                player_stats_list.append(player_stats)


            stats_df = pd.concat([stats_df, pd.DataFrame(player_stats_list, columns=stats)])
        
    return stats_df

        


data_df = get_pages()

data = data_df.copy()

for column in data.columns[2:]:
    data[column] = [float(point) for point in data[column]]

data

def get_positions():
    url = "https://basketball.realgm.com/nba/players"
    html = requests.get(url).content
    time.sleep(random.uniform(1,3))
    soup = BeautifulSoup(html, 'html.parser')

    table_class = soup.find_all("table", {"data-tablesaw-mode": "swipe"})[0].find("tbody")
    rows = table_class.find_all("tr")

    player_positions = { rows[i].find("td", {"data-th": "Player"}).text: rows[i].find("td", {"data-th": "Pos"}).text
                         for i in range(len(rows)) }
    
    positions = []
    for player in data['Player']:
        try:
            position = player_positions[player]
        except Exception:
            position = None
        finally:
            positions.append(position)

    data['Position'] = positions

get_positions()

In [2]:
df = data.copy()

df.loc[df['Position'].isin(['SF','PF']), 'Position'] = 'F'
df.loc[df['Position'].isin(['SG','PG']), 'Position'] = 'G'
df.loc[df['Position'].isin(['FC']), 'Position'] = 'C'
df.loc[df['Position'].isin(['GF']), 'Position'] = 'F'

df

Unnamed: 0,Player,Team,GP,MPG,PPG,FGM,FGA,FG%,3PM,3PA,...,FT%,ORB,DRB,RPG,APG,SPG,BPG,TOV,PF,Position
0,Luka Doncic,DAL,12.0,36.9,34.3,11.7,23.5,0.496,2.3,8.2,...,0.748,1.0,7.7,8.7,8.1,2.0,0.7,3.3,3.3,F
1,Joel Embiid,PHI,10.0,35.2,32.3,10.8,20.1,0.537,0.9,3.8,...,0.831,1.6,8.5,10.1,4.1,0.6,1.8,4.2,3.4,C
2,Jayson Tatum,BOS,14.0,37.5,31.9,10.3,20.9,0.491,3.5,9.5,...,0.866,0.8,6.6,7.4,3.9,0.9,1.4,2.4,2.2,F
3,Donovan Mitchell,CLE,11.0,39.1,31.6,11.2,21.8,0.513,4.2,9.6,...,0.875,1.0,3.7,4.7,6.1,1.4,0.6,3.4,2.9,G
4,Shai Gilgeous-Alexander,OKC,13.0,35.9,31.5,11.6,21.5,0.539,1.1,2.8,...,0.940,0.8,3.5,4.4,5.8,2.1,1.5,3.4,2.5,G
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2,Josh Okogie,PHX,12.0,5.3,1.0,0.3,1.2,0.286,0.0,0.6,...,0.667,0.3,0.9,1.2,0.1,0.3,0.2,0.4,0.7,G
3,"Paul Reed, Jr.",PHI,11.0,7.2,0.5,0.3,1.2,0.231,0.0,0.2,...,0.000,0.5,1.2,1.6,0.1,1.0,0.5,0.3,1.8,F
4,KZ Okpala,SAC,6.0,6.4,0.5,0.2,0.8,0.200,0.2,0.7,...,0.000,0.3,0.7,1.0,0.3,0.0,0.0,0.0,0.7,G
5,Jaxson Hayes,NOP,5.0,7.4,0.4,0.2,1.0,0.200,0.0,0.4,...,0.000,0.2,0.8,1.0,0.6,0.2,0.2,0.2,1.0,F


In [3]:
from sklearn.decomposition import PCA
import plotly.express as px
import plotly.graph_objects as go


features = df.columns[2:-1]

def graph_team(team=None):
    if team == None:
        team_data = df.copy()
    else:
        team_data = df.loc[df['Team'] == team]

    X = np.array(team_data[features])
    pca = PCA(n_components=3)
    principalComponents = pca.fit_transform(X)


    plotly_df = team_data
    
    for i in range(len(principalComponents[0])):
        plotly_df[str(i)] = principalComponents[:,i]

    fig = px.scatter_3d(plotly_df, x='0', y='1', z='2',
                     hover_name='Player',
                     color='Position',
                     hover_data={'0': False, '1': False, 'Team': False},
                     title=team)

    fig.update_traces(marker=dict(size=5))

    fig.show()
    

for team in set(df['Team']):
    graph_team()
    break


# Cluster before PCA, then graph
# Graph each team, graph each position
# Trade analyzer