In [24]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
from io import StringIO
import os

In [25]:
# setting the years needed and url to look at player
year = 2025
url_start = 'https://www.basketball-reference.com/players/j/{name}01/gamelog/{logYear}'

In [28]:
# path to create the GameLog folder if it didn't exist
main_folder = "GameLogs" 
DataFrame_folder = "DataFrames"
if not os.path.exists(main_folder):
    os.makedirs(main_folder)
    
if not os.path.exists(DataFrame_folder):
    os.makedirs(DataFrame_folder)

In [17]:
# get full name from input
full_name = input("Enter players name (firstname lastname): ").strip()
# split the name into first name last name
split_name = full_name.split()
if len(split_name) != 2: # raise Error if theres less than 1 name or more than 2
    raise ValueError("Please enter both first and last name. Make sure to include any '-' !")

first_name = split_name[0] 
last_name = split_name[1]
if len(last_name) >= 5:
    last_name = last_name[:5]
    
player_name = last_name.lower() + first_name[:2].lower() # formats the name so the website can use it. Lebron James -> jamesle

Enter players name (firstname lastname):  nikola jokic


In [18]:
# looks up the corresponding year and creates/overwrites the html file in my JokicGameLogs folder
url = url_start.format(name = player_name, logYear = year)
try: 
    response = requests.get(url)
    response.raise_for_status()
except requests.exceptions.Timeout as e:
    print("Request timed out:", e)
except requests.exceptions.RequestException as e:
    print("An error occurred:", e)
# to write a file for each 'x'
file_path = f"GameLogs/{player_name}{year}GameLog.html"
with open(file_path, "w+", encoding="utf-8") as f:
    f.write(response.text)

In [19]:
with open(f"GameLogs/{player_name}{year}GameLog.html", encoding="utf-8") as f:
    page = f.read()
    soup = BeautifulSoup(page, "html.parser")
    stats_table = soup.find(id="pgl_basic")
    stats_df = pd.read_html(StringIO(str(stats_table)))[0]
    df = stats_df

In [20]:
# Drop rows of fluff where the index + 1 is divisible by 20 (21, 41, 61, ...)
if 20 in df.index:
    df = df.drop(20)
if 41 in df.index:
    df = df.drop(41)
if 62 in df.index:
    df = df.drop(62)
if 83 in df.index:
    df = df.drop(83)
if 104 in df.index:
    df = df.drop(104)

##### DATAFRAME IS READY
Now lets clean up column names and nulls

In [21]:
# drop the 'Rk' column so we can use our index col instead
if 'Rk' in df.columns:
    df = df.drop(columns=['Rk'])

# drop null rows for subset df
game_only_df = df.dropna(subset=['G'])
# fill null Game played values with "DNP"
df.fillna({"G":"DNP"}, inplace=True)

In [22]:
# name all colums correctly
df.rename(columns={'Unnamed: 5': 'Location'}, inplace=True)
df.rename(columns={'Unnamed: 7': 'WLSpread'}, inplace=True)
df.fillna({'Location':'Home'}, inplace=True)
df.replace({'@': 'Away'}, inplace=True)

In [31]:
df

Unnamed: 0,G,Date,Age,Tm,Location,Opp,WLSpread,GS,MP,FG,...,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,GmSc,+/-
0,1,2024-10-24,29-248,DEN,Home,OKC,L (-15),1,35:12,6,...,8,12,13,2,1,3,3,16,21.7,-9
1,2,2024-10-26,29-250,DEN,Home,LAC,L (-5),1,36:46,14,...,6,9,4,2,1,5,2,41,31.2,+7
2,3,2024-10-28,29-252,DEN,Away,TOR,W (+2),1,43:41,18,...,7,10,4,1,2,3,2,40,33.5,+9
3,4,2024-10-29,29-253,DEN,Away,BRK,W (+5),1,40:31,9,...,12,18,16,0,1,1,3,29,38.1,+8
4,5,2024-11-01,29-256,DEN,Away,MIN,L (-3),1,39:42,8,...,7,9,13,3,1,3,1,26,30.1,+6
5,6,2024-11-02,29-257,DEN,Home,UTA,W (+26),1,29:42,10,...,11,16,9,1,0,5,1,27,27.1,+31
6,7,2024-11-04,29-259,DEN,Home,TOR,W (+2),1,38:12,10,...,9,14,13,1,2,7,2,28,26.1,-1
7,8,2024-11-06,29-261,DEN,Home,OKC,W (+2),1,39:29,9,...,13,20,16,2,2,5,2,23,29.4,+8
8,9,2024-11-08,29-263,DEN,Home,MIA,W (+13),1,40:00,11,...,9,11,14,2,0,5,2,30,35.4,+26
9,10,2024-11-10,29-265,DEN,Home,DAL,W (+2),1,38:01,13,...,10,18,15,3,0,4,2,37,44.8,+13


In [32]:
with open(f"DataFrames/{player_name}{year}DataFrame.html", "w+", encoding="utf-8") as f:
    f.write(df.to_html(index = False))