In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
from io import StringIO
import os

In [2]:
# setting the years needed and url to look at player
year = 2025
url_start = 'https://www.basketball-reference.com/players/j/{name}01/gamelog/{logYear}'

In [3]:
# path to create the GameLog folder if it didn't exist
main_folder = "GameLogs" 
DataFrame_folder = "DataFrames"
if not os.path.exists(main_folder):
    os.makedirs(main_folder)
    
if not os.path.exists(DataFrame_folder):
    os.makedirs(DataFrame_folder)

In [4]:
# get full name from input
full_name = input("Enter players name (firstname lastname): ").strip()
# split the name into first name last name
split_name = full_name.split()
if len(split_name) != 2: # raise Error if theres less than 1 name or more than 2
    raise ValueError("Please enter both first and last name. Make sure to include any '-' !")

first_name = split_name[0] 
last_name = split_name[1]
if len(last_name) >= 5:
    last_name = last_name[:5]
    
player_name = last_name.lower() + first_name[:2].lower() # formats the name so the website can use it. Lebron James -> jamesle

Enter players name (firstname lastname):  Anthony Edwards


In [5]:
# looks up the corresponding year and creates/overwrites the html file in my JokicGameLogs folder
url = url_start.format(name = player_name, logYear = year)
try: 
    response = requests.get(url)
    response.raise_for_status()
except requests.exceptions.Timeout as e:
    print("Request timed out:", e)
except requests.exceptions.RequestException as e:
    print("An error occurred:", e)
# to write a file for each 'x'
file_path = f"GameLogs/{player_name}{year}GameLog.html"
with open(file_path, "w+", encoding="utf-8") as f:
    f.write(response.text)

In [6]:
with open(f"GameLogs/{player_name}{year}GameLog.html", encoding="utf-8") as f:
    page = f.read()
    soup = BeautifulSoup(page, "html.parser")
    stats_table = soup.find(id="pgl_basic")
    stats_df = pd.read_html(StringIO(str(stats_table)))[0]
    df = stats_df

In [7]:
# Drop rows of fluff where the index + 1 is divisible by 20 (21, 41, 61, ...)
if 20 in df.index:
    df = df.drop(20)
if 41 in df.index:
    df = df.drop(41)
if 62 in df.index:
    df = df.drop(62)
if 83 in df.index:
    df = df.drop(83)
if 104 in df.index:
    df = df.drop(104)

##### DATAFRAME IS READY
Now lets clean up column names and nulls

In [8]:
# drop the 'Rk' column so we can use our index col instead
if 'Rk' in df.columns:
    df = df.drop(columns=['Rk'])

# drop null rows for subset df
game_only_df = df.dropna(subset=['G'])
# fill null Game played values with "DNP"
df.fillna({"G":"DNP"}, inplace=True)

In [9]:
# name all colums correctly
df.rename(columns={'Unnamed: 5': 'Location'}, inplace=True)
df.rename(columns={'Unnamed: 7': 'WLSpread'}, inplace=True)
df.fillna({'Location':'Home'}, inplace=True)
df.replace({'@': 'Away'}, inplace=True)
df = df.drop(columns = ['Tm','WLSpread','GS','MP','PF','GmSc', '+/-','Age','G'])

In [13]:
df

Unnamed: 0,Date,Location,Opp,FG,FGA,FG%,3P,3PA,3P%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PTS
0,2024-10-22,Away,LAL,10,25,.400,5,13,.385,2,3,.667,0,6,6,3,0,0,4,27
1,2024-10-24,Away,SAC,10,24,.417,5,15,.333,7,7,1.000,1,6,7,4,1,0,3,32
2,2024-10-26,Home,TOR,9,21,.429,5,12,.417,1,3,.333,0,6,6,4,1,1,4,24
3,2024-10-29,Home,DAL,12,20,.600,7,13,.538,6,12,.500,0,6,6,3,0,1,5,37
4,2024-11-01,Home,DEN,11,23,.478,7,14,.500,0,0,,0,4,4,4,1,1,2,29
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63,2025-02-28,Away,UTA,Not With Team,Not With Team,Not With Team,Not With Team,Not With Team,Not With Team,Not With Team,Not With Team,Not With Team,Not With Team,Not With Team,Not With Team,Not With Team,Not With Team,Not With Team,Not With Team,Not With Team
64,2025-03-02,Away,PHO,12,22,.545,6,14,.429,14,14,1.000,0,5,5,7,2,0,1,44
65,2025-03-04,Home,PHI,5,11,.455,2,6,.333,6,7,.857,0,5,5,1,0,0,2,18
66,2025-03-05,Away,CHO,9,20,.450,6,13,.462,5,8,.625,0,3,3,6,2,1,0,29


In [32]:
with open(f"DataFrames/{player_name}{year}DataFrame.html", "w+", encoding="utf-8") as f:
    f.write(df.to_html(index = False))