In [119]:
from selenium import webdriver
from bs4 import BeautifulSoup
import requests
import pandas as pd
from io import StringIO
import os

In [120]:
# setting the years needed and url to look at player
year = 2025
url_start = 'https://www.basketball-reference.com/players/j/{}01/gamelog/2025'

In [121]:
# path to create the GameLog folder if it didn't exist
main_folder = "GameLogs" 
if not os.path.exists(main_folder):
    os.makedir(main_folder)

In [122]:
# get full name from input
full_name = input("Enter players name (firstname lastname): ").strip()
# split the name into first name last name
split_name = full_name.split()
if len(split_name) != 2: # raise Error if theres less than 1 name or more than 2
    raise ValueError("Please enter both first and last name. Make sure to include any '-' !")

first_name = split_name[0] 
last_name = split_name[1]
player_name = last_name[:5].lower() + first_name[:2].lower() # formats the name so the website can use it. Lebron James -> jamesle

Enter players name (firstname lastname):  Jamal Murray


In [123]:
# looks up the corresponding year and creates/overwrites the html file in my JokicGameLogs folder
url = url_start.format(player_name)
response = requests.get(url)
# to write a file for each 'x'
file_path = f"GameLogs/{player_name}GameLog.html"
with open(file_path, "w+", encoding="utf-8") as f:
    f.write(response.text)

In [124]:
with open(f"GameLogs/{player_name}GameLog.html", encoding="utf-8") as f:
    page = f.read()
    soup = BeautifulSoup(page, "html.parser")
    stats_table = soup.find(id="pgl_basic")
    stats_df = pd.read_html(StringIO(str(stats_table)))[0]
    df = stats_df

In [125]:
# Drop rows of fluff where the index + 1 is divisible by 20 (21, 41, 61, ...)
if 20 in df.index:
    df = df.drop(20)
if 41 in df.index:
    df = df.drop(41)
if 62 in df.index:
    df = df.drop(62)
if 83 in df.index:
    df = df.drop(83)

##### DATAFRAME IS READY
Now lets clean up column names and nulls

In [126]:
# drop the 'Rk' column so we can use our index col instead
if 'Rk' in df.columns:
    df = df.drop(columns=['Rk'])

# drop null rows for subset df
game_only_df = df.dropna(subset=['G'])
# fill null Game played values with "DNP"
df.fillna({"G":"DNP"}, inplace=True)

In [127]:
# name all colums correctly
df.rename(columns={'Unnamed: 5': 'Location'}, inplace=True)
df.rename(columns={'Unnamed: 7': 'WLSpread'}, inplace=True)
df.fillna({'Location':'Home'}, inplace=True)
df.replace({'@': 'Away'}, inplace=True)

In [63]:
dfs = {}
dfs[player_name] = df

In [128]:
df

Unnamed: 0,G,Date,Age,Tm,Location,Opp,WLSpread,GS,MP,FG,...,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,GmSc,+/-
0,1,2024-10-24,27-244,DEN,Home,OKC,L (-15),1,38:27,4,...,4,6,4,2,0,3,2,12,8.1,-2
1,2,2024-10-26,27-246,DEN,Home,LAC,L (-5),1,36:51,7,...,2,2,5,2,0,2,1,22,18.7,+6
2,3,2024-10-28,27-248,DEN,Away,TOR,W (+2),1,39:43,6,...,8,9,7,1,0,0,2,17,13.6,+8
3,4,2024-10-29,27-249,DEN,Away,BRK,W (+5),1,36:44,8,...,3,3,3,0,0,1,3,24,13.9,+3
4,5,2024-11-01,27-252,DEN,Away,MIN,L (-3),1,22:16,2,...,2,2,3,1,0,0,0,6,5.6,+8
5,DNP,2024-11-02,27-253,DEN,Home,UTA,W (+26),Inactive,Inactive,Inactive,...,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive
6,DNP,2024-11-04,27-255,DEN,Home,TOR,W (+2),Inactive,Inactive,Inactive,...,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive
7,DNP,2024-11-06,27-257,DEN,Home,OKC,W (+2),Inactive,Inactive,Inactive,...,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive,Inactive
8,6,2024-11-08,27-259,DEN,Home,MIA,W (+13),1,34:55,9,...,4,4,6,0,0,4,0,28,21.1,+28
9,7,2024-11-10,27-261,DEN,Home,DAL,W (+2),1,38:07,7,...,1,1,6,0,0,2,3,18,9.0,+15
