In [1]:
import re
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import json

In [2]:
# URL della pagina
url = 'https://www.nba.com/game/hou-vs-orl-0022300066/play-by-play?period=All'

# Configura Selenium
options = Options()
options.add_argument("--disable-gpu")
options.add_argument("--no-sandbox")

In [3]:
# Avvia il WebDriver
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=options)

driver.get(url)

# Attendi qualche secondo per il caricamento della pagina
time.sleep(1)

# Ottieni l'HTML della pagina
html_content = driver.page_source
soup = BeautifulSoup(html_content, 'html.parser')

In [4]:
# Find all play event articles
plays = soup.find_all('article', class_='GamePlayByPlayRow_article__asoO2')
plays

[<article class="GamePlayByPlayRow_article__asoO2" data-is-home-team="true"><div class="GamePlayByPlayRow_row__2iX_w" data-is-home-team="true"><p class="GamePlayByPlayRow_clock__o_PxT" data-is-home-team="true"><span class="GamePlayByPlayRow_clockElement__LfzHV">12:00</span></p><div class="GamePlayByPlayRow_descBlock__By8pv" data-content="4" data-id="nba:games:game-details-play-by-play:play" data-is-home-team="true" data-pos="2/486" data-premium="false" data-section="Play-By-Play" data-text="Jump Ball Carter Jr. vs. Sengun: Tip to Banchero" data-track="video" href="/video/4" title="Watch Video"><a class="StatEventLink_sel__pAwmA GamePlayByPlayRow_statEvent__Ru8Pr" data-id="nba:games:game-details-box-score:video-box-score" data-pos="" data-premium="false" data-track="video" href="/stats/events/?CFID=&amp;CFPARAMS=&amp;GameEventID=4&amp;GameID=0022300066&amp;Season=2023-24&amp;flag=1&amp;title=Jump%20Ball%20Carter%20Jr.%20vs.%20Sengun:%20Tip%20to%20Banchero"><span class="GamePlayByPlayRow

In [14]:
play_data = []
    
for play in plays:
    print(play)

    # Extract clock time
    time_element = play.find('span', class_='GamePlayByPlayRow_clockElement__LfzHV')
    time = time_element.text if time_element else None
    
    # Extract team event
    article_element = play.find('div', class_='GamePlayByPlayRow_row__2iX_w')
    is_home_team = True if article_element.get('data-is-home-team') == 'true' else False

    # Extract description text
    desc_element = play.find('span', class_='GamePlayByPlayRow_desc__XLzrU')
    description = desc_element.text if desc_element else None

    # Extract score if available
    score_element = play.find('span', class_='GamePlayByPlayRow_scoring__Ax2hd')
        
    # Initialize data dictionary with defaults
    data = {
        'time': time,
        'is_home_team': is_home_team,
        'action_type': None,
        'player': None,
        'made': None,
        'shot_type': None,
        'player_points': None,
        'assist_player': None,
        'assist_count': None,
        'score': None
    }

    if 'jump ball' in description.lower():
        data['action_type'] = 'Jump Ball'

    elif 'block' in description.lower():
        data['action_type'] = 'Block'

    elif 'steal' in description.lower():
        data['action_type'] = 'Steal'

    elif 'free throw' in description.lower():
        data['action_type'] = 'Free Throw'

    elif 'turnover' in description.lower():
        data['action_type'] = 'Turnover'

    elif 'foul' in description.lower():
        data['action_type'] = 'Foul'

    elif 'sub' in description.lower():
        data['action_type'] = 'SUB'

    elif 'timeout' in description.lower():
        data['action_type'] = 'Timeout'

    elif 'rebound' in description.lower():
        data['action_type'] = 'Rebound'
    
    elif 'violation' in description.lower():
        data['action_type'] = 'Violation'
    
    elif 'instant replay' in description.lower():
        data['action_type'] = 'Instant replay'

    elif score_element:
        data['action_type'] = 'shot'
        data['made'] = int(1)

        shot_pattern = r"([A-Za-z\s\W]+)\s?(\d+')?\s?(\d+PT[a-zA-Z]?)?\s?(Alley Oop Dunk|Driving Layup|Putback Dunk|Running Reverse Layup|Finger Roll Layup|Putback Layup|Running Pull-Up Jump Shot|Driving Floating Bank Jump Shot|Tip Dunk Shot|Cutting Finger Roll Layup Shot|Tip Layup Shot|Dunk|Driving Floating Jump Shot|Jump Shot|Driving Finger Roll Layup|Step Back Jump Shot|Running Finger Roll Layup|Turnaround Hook Shot|Running Layup|Running Jump Shot|Cutting Dunk Shot|Reverse Layup|Floating Jump Shot|Turnaround Jump Shot|Layup|Hook Shot|Pullup Jump Shot|Running Dunk|Cutting Layup Shot|Fadeaway Jumper|Running Alley Oop Dunk Shot|Turnaround Fadeaway|Driving Dunk|Driving Reverse Layup)\s?\((\d+)\s+PTS\)(\s\(([A-Za-z\s\W]+)\s(\d+)\sAST\))?"

        shot_match = re.search(shot_pattern, description)

        data['player'] = shot_match.group(1).strip()
        data['shot_type'] = shot_match.group(4).strip()
        data['player_points'] = int(shot_match.group(5)) if shot_match.group(5) else None
        data['assist_player'] = shot_match.group(7).strip() if shot_match.group(7) else None
        data['assist_count'] = int(shot_match.group(8)) if shot_match.group(8) else None
        data['score'] = score_element
        

    elif 'miss' in description.lower():
        data['action_type'] = 'Shot'
        data['made'] = int(0)


    play_data.append(data)

<article class="GamePlayByPlayRow_article__asoO2" data-is-home-team="true"><div class="GamePlayByPlayRow_row__2iX_w" data-is-home-team="true"><p class="GamePlayByPlayRow_clock__o_PxT" data-is-home-team="true"><span class="GamePlayByPlayRow_clockElement__LfzHV">12:00</span></p><div class="GamePlayByPlayRow_descBlock__By8pv" data-content="4" data-id="nba:games:game-details-play-by-play:play" data-is-home-team="true" data-pos="2/486" data-premium="false" data-section="Play-By-Play" data-text="Jump Ball Carter Jr. vs. Sengun: Tip to Banchero" data-track="video" href="/video/4" title="Watch Video"><a class="StatEventLink_sel__pAwmA GamePlayByPlayRow_statEvent__Ru8Pr" data-id="nba:games:game-details-box-score:video-box-score" data-pos="" data-premium="false" data-track="video" href="/stats/events/?CFID=&amp;CFPARAMS=&amp;GameEventID=4&amp;GameID=0022300066&amp;Season=2023-24&amp;flag=1&amp;title=Jump%20Ball%20Carter%20Jr.%20vs.%20Sengun:%20Tip%20to%20Banchero"><span class="GamePlayByPlayRow_

In [15]:
desc_element = plays[234].find('span', class_='GamePlayByPlayRow_desc__XLzrU')
description = desc_element.text if desc_element else None
description


"Carter Jr. 1' Alley Oop Dunk (6 PTS) (F. Wagner 2 AST)"

In [16]:
df = pd.DataFrame(play_data)
#pd.set_option('display.max_rows', None)
#pd

In [17]:
pd.set_option('display.max_rows', None)
df[df['made']==1]

Unnamed: 0,time,is_home_team,action_type,player,made,shot_type,player_points,assist_player,assist_count,score
8,10:40,True,shot,Suggs,1.0,Jump Shot,3.0,Carter Jr.,1.0,[0 - 3]
14,10:04,False,shot,Sengun,1.0,Running Layup,2.0,Brooks,1.0,[2 - 3]
17,09:38,False,shot,Brooks,1.0,Running Layup,2.0,,,[4 - 3]
20,09:16,False,shot,Smith Jr.,1.0,Cutting Dunk Shot,2.0,Sengun,1.0,[6 - 3]
27,08:26,True,shot,F. Wagner,1.0,Step Back Jump Shot,3.0,,,[6 - 6]
31,07:53,True,shot,F. Wagner,1.0,Jump Shot,6.0,Banchero,1.0,[8 - 9]
34,07:14,True,shot,F. Wagner,1.0,Jump Shot,9.0,Banchero,2.0,[8 - 12]
39,06:47,True,shot,Carter Jr.,1.0,Putback Dunk,2.0,,,[8 - 14]
40,06:34,False,shot,Ja. Green,1.0,Layup,4.0,Sengun,2.0,[10 - 14]
41,06:22,True,shot,Carter Jr.,1.0,Cutting Dunk Shot,4.0,F. Wagner,1.0,[10 - 16]
