In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from PIL import Image
from io import BytesIO

In [2]:
def get_player_id(player_name):
    first_name, last_name = player_name.split(" ")
    last_name = last_name[0:5].lower()
    first_name = first_name[0:2].lower()

    formatted_name = last_name + first_name + '01'
    url = f"https://www.basketball-reference.com/players/d/{formatted_name}.html"
    return url

player_name = "Kevin Durant"
name = get_player_id(player_name)
print(name)

https://www.basketball-reference.com/players/d/duranke01.html


In [3]:
def scrape_player_stats(player_href):
    """
    Scrapes detailed player statistics from their profile page.
    Args:
        player_href (str): URL of the player's profile page.    
    Returns:
        DataFrame: A pandas DataFrame containing the player's statistics.
    """
    response = requests.get(player_href)  # Send a GET request to the player's profile page
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')  # Parse the HTML content
        table = soup.find('table', id='per_game_stats')  # Find the table with per-game statistics
        if table:
            df = pd.read_html(str(table))[0]  # Read the table into a DataFrame
            df = df[df['Season'].notna()]  # Remove rows without season data
            df['Season'] = df['Season'].str.split('-').str[0]  # Keep only the starting year of the season
            df = df[df['Age'].notnull()]  # Include rows where 'Age' is not null - this elimates the season totals
            return df
    return pd.DataFrame()  # Return an empty DataFrame if the page couldn't be loaded or the table wasn't found

In [4]:
def player_headshot(player_href):
    response = requests.get(player_href)
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser') 

        media_item = soup.find('div', class_='media-item')
        if media_item:
            img_tag = media_item.find('img')
            if img_tag and 'src' in img_tag.attrs:
                img_url = img_tag['src']
                
                img_response = requests.get(img_url)
                if img_response.status_code == 200:
                    image = Image.open(BytesIO(img_response.content))
                    return image 
    return None

player_href = "https://www.basketball-reference.com/players/d/duranke01.html"

# Fetch the headshot image
headshot_image = player_headshot(player_href)
headshot_image

In [None]:
# Creating radar chart: Offense
# required stats: Points, 3pt attempts, true shooting %, screen assists + offensive rebounds, usage %, assists - turnovers
data = scrape_player_stats(name)
data.head()

Unnamed: 0,Season,Age,Team,Lg,Pos,G,GS,MP,FG,FGA,...,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Awards
0,2007,19,SEA,NBA,SG,80,80,34.6,7.3,17.1,...,0.9,3.5,4.4,2.4,1.0,0.9,2.9,1.5,20.3,ROY-1
1,2008,20,OKC,NBA,SF,74,74,39.0,8.9,18.8,...,1.0,5.5,6.5,2.8,1.3,0.7,3.0,1.8,25.3,
2,2009,21,OKC,NBA,SF,82,82,39.5,9.7,20.3,...,1.3,6.3,7.6,2.8,1.4,1.0,3.3,2.1,30.1,"MVP-2,AS,NBA1"
3,2010,22,OKC,NBA,SF,78,78,38.9,9.1,19.7,...,0.7,6.1,6.8,2.7,1.1,1.0,2.8,2.0,27.7,"MVP-5,AS,NBA1"
4,2011,23,OKC,NBA,SF,66,66,38.6,9.7,19.7,...,0.6,7.4,8.0,3.5,1.3,1.2,3.8,2.0,28.0,"MVP-2,AS,NBA1"


In [17]:
df = data.iloc[[-1]]
df = df[['PTS', '3PA', 'FGA', 'FTA', 'ORB', 'AST', 'TOV']]
df['AST'] = pd.to_numeric(df['AST'], errors='coerce')
df['TOV'] = pd.to_numeric(df['TOV'], errors='coerce')
df['PTS'] = pd.to_numeric(df['PTS'], errors='coerce')
df['FGA'] = pd.to_numeric(df['FGA'], errors='coerce')
df['FTA'] = pd.to_numeric(df['FTA'], errors='coerce')
df

Unnamed: 0,PTS,3PA,FGA,FTA,ORB,AST,TOV
26,27.1,5.5,18.9,6.0,0.5,4.7,3.2


In [26]:
df['AST/TOV'] = df['AST']/df['TOV']
df['TS%'] = df['PTS']/(2*(df['FGA'] + 0.4*df['FTA']))
df.drop(columns=['FGA', 'FTA', 'AST', 'TOV'], inplace=True)
df_melted = df.melt(var_name='Category', value_name='Value')
df_melted

Unnamed: 0,Category,Value
0,PTS,27.1
1,3PA,5.5
2,ORB,0.5
3,AST/TOV,1.46875
4,TS%,0.63615


In [27]:
import plotly.express as px
fig = px.line_polar(df_melted, r='Value', theta='Category', line_close=True)
fig.update_traces(fill='toself')
fig.show()

In [None]:
#need to obtain league averages for scale comparison