In [1]:
import time
import random
import os
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException, ElementClickInterceptedException, TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from urllib.parse import urlparse, parse_qs, urlunparse, urlencode

def human_delay(min_s=3, max_s=8):
    time.sleep(random.uniform(min_s, max_s))

def wait_and_click(element, description=""):
    try:
        element = wait.until(EC.element_to_be_clickable(element))
        driver.execute_script("arguments[0].scrollIntoView({block: 'center', behavior: 'smooth'});", element)
        human_delay(1, 2)
        element.click()
        print(f"{description} cliccato con successo." if description else "Elemento cliccato con successo.")
        return True
    except Exception as e:
        print(f"Errore durante il click su {description}: {str(e)}")
        return False

def add_page_to_url(url, page_num):
    """Aggiunge o modifica il parametro page nell'URL"""
    parsed = urlparse(url)
    query = parse_qs(parsed.query)
    query['page'] = [str(page_num)]
    new_query = urlencode(query, doseq=True)
    return urlunparse(parsed._replace(query=new_query))

def get_champion_name(url):
    """Estrae il nome dello scacchista dall'URL"""
    parsed = urlparse(url)
    query = parse_qs(parsed.query)
    if 'p1' in query:
        return query['p1'][0].replace(' ', '_')
    # Alternativa se il nome non è nei parametri URL
    return os.path.basename(parsed.path).replace('-', '_').title()

def wait_for_download_complete(directory, timeout=30):
    """Attende che un nuovo file venga completamente scaricato"""
    initial_files = set(os.listdir(directory))
    end_time = time.time() + timeout
    
    while time.time() < end_time:
        current_files = set(os.listdir(directory))
        new_files = current_files - initial_files
        
        if new_files:
            # Prendi il file più recente
            newest_file = max([os.path.join(directory, f) for f in new_files], 
                            key=os.path.getctime)
            
            # Attendi che il file sia completamente scritto
            file_size = -1
            while time.time() < end_time:
                try:
                    current_size = os.path.getsize(newest_file)
                    if current_size == file_size and current_size > 0:
                        return newest_file
                    file_size = current_size
                    time.sleep(1)
                except (FileNotFoundError, OSError):
                    time.sleep(1)
                    continue
        
        time.sleep(1)
    
    return None

# Cartella principale per i download
main_download_dir = os.path.join(os.getcwd(), "chess_downloads")
if not os.path.exists(main_download_dir):
    os.makedirs(main_download_dir)

# Setup browser con opzioni per il download
chrome_options = webdriver.ChromeOptions()
prefs = {
    "download.default_directory": main_download_dir,
    "download.prompt_for_download": False,
    "download.directory_upgrade": True,
    "safebrowsing.enabled": True
}
chrome_options.add_experimental_option("prefs", prefs)

driver = webdriver.Chrome(options=chrome_options)
driver.maximize_window()
wait = WebDriverWait(driver, 20)

# Navigazione iniziale
driver.get("https://www.chess.com/games")
human_delay(5, 8)

# Cookie
try:
    cookie_btn = wait.until(EC.element_to_be_clickable((By.ID, "onetrust-accept-btn-handler")))
    cookie_btn.click()
    print("Cookie accettati.")
except Exception:
    print("Cookie già accettati o non trovati.")

# Raccolta link scacchisti
champion_links = []
try:
    champions = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "a.post-preview-title[href*='/games/']")))
    champion_links = [champ.get_attribute("href") for champ in champions]
    print(f"Trovati {len(champion_links)} scacchisti.")
except Exception as e:
    print(f"Errore raccolta scacchisti: {str(e)}")

# Processa ogni scacchista
for idx, champion_url in enumerate(champion_links, 1):
    champion_name = get_champion_name(champion_url)
    print(f"\n{'='*50}\nScacchista {idx}/{len(champion_links)}: {champion_name}\n{'='*50}")
    
    # Crea cartella specifica per lo scacchista
    champion_dir = os.path.join(main_download_dir, champion_name)
    if not os.path.exists(champion_dir):
        os.makedirs(champion_dir)
    
    max_pages = 301  # Numero massimo di pagine da scaricare per ogni scacchista, sono il massimo che mi serve
    page_number = 1
    
    while page_number <= max_pages:
        current_url = add_page_to_url(champion_url, page_number)
        print(f"\nPagina {page_number}/{max_pages} - URL: {current_url}")
        
        driver.get(current_url)
        human_delay(5, 10)
        
        attempts = 0
        max_attempts = 3
        success = False
        
        while attempts < max_attempts and not success:
            attempts += 1
            print(f"\nTentativo {attempts}/{max_attempts}")
            
            # Seleziona tutte le partite
            if not wait_and_click((By.ID, "master-games-check-all"), "Seleziona tutto"):
                human_delay(5, 8)
                continue
            
            # Download
            if not wait_and_click((By.CSS_SELECTOR, ".master-games-download-icon"), "Pulsante download"):
                human_delay(5, 8)
                continue
            
            # Verifica CAPTCHA
            try:
                if driver.find_elements(By.CSS_SELECTOR, "iframe[title*='challenge']"):
                    print("CAPTCHA rilevato! Risolvi manualmente...")
                    input("Premi Invio dopo aver risolto il CAPTCHA...")
            except:
                pass
            
            # Attendi il download
            downloaded_file = wait_for_download_complete(main_download_dir)
            
            if downloaded_file:
                # Sposta il file nella cartella dello scacchista
                new_filename = f"{champion_name}_{page_number}.pgn"
                new_path = os.path.join(champion_dir, new_filename)
                
                try:
                    os.rename(downloaded_file, new_path)
                    print(f"File salvato in: {new_path}")
                    success = True
                except Exception as e:
                    print(f"Errore durante lo spostamento del file: {str(e)}")
                    success = False
            else:
                print("Download non completato o file non trovato")
                success = False
            
            if success:
                print(f"Download completato per {champion_name} - pagina {page_number}")
                human_delay(5, 8)  # Pausa dopo il download
        
        if not success:
            print(f"Fallito dopo {max_attempts} tentativi per pagina {page_number}. Passo alla pagina successiva.")
        
        page_number += 1  # Passa alla pagina successiva indipendentemente dall'esito

print("\nScript completato con successo! Tutti gli scacchisti processati.")
print(f"Le cartelle con i download sono state create in: {main_download_dir}")
driver.quit()

Cookie accettati.
Trovati 22 scacchisti.

Scacchista 1/22: Garry_Kasparov

Pagina 1/301 - URL: https://www.chess.com/games/garry-kasparov?page=1

Tentativo 1/3
Seleziona tutto cliccato con successo.
Pulsante download cliccato con successo.
File salvato in: C:\Users\ferro\Desktop\es.epicode\Progetto finale\DATI\chess_downloads\Garry_Kasparov\Garry_Kasparov_1.pgn
Download completato per Garry_Kasparov - pagina 1

Pagina 2/301 - URL: https://www.chess.com/games/garry-kasparov?page=2

Tentativo 1/3
Seleziona tutto cliccato con successo.
Pulsante download cliccato con successo.
File salvato in: C:\Users\ferro\Desktop\es.epicode\Progetto finale\DATI\chess_downloads\Garry_Kasparov\Garry_Kasparov_2.pgn
Download completato per Garry_Kasparov - pagina 2

Pagina 3/301 - URL: https://www.chess.com/games/garry-kasparov?page=3

Tentativo 1/3
Seleziona tutto cliccato con successo.
Pulsante download cliccato con successo.
File salvato in: C:\Users\ferro\Desktop\es.epicode\Progetto finale\DATI\chess_do

WebDriverException: Message: unknown error: net::ERR_ADDRESS_UNREACHABLE
  (Session info: chrome=136.0.7103.114)
Stacktrace:
	(No symbol) [0x00007FFCE645B1FC]
	GetHandleVerifier [0x00007FF712ACCF45+75717]
	GetHandleVerifier [0x00007FF712ACCFA0+75808]
	(No symbol) [0x00007FF712898F9A]
	(No symbol) [0x00007FF7128959F4]
	(No symbol) [0x00007FF712886789]
	(No symbol) [0x00007FF7128884F8]
	(No symbol) [0x00007FF712886A96]
	(No symbol) [0x00007FF712886516]
	(No symbol) [0x00007FF7128861DA]
	(No symbol) [0x00007FF712883E8A]
	(No symbol) [0x00007FF71288465C]
	(No symbol) [0x00007FF71289CF3A]
	(No symbol) [0x00007FF71294013E]
	(No symbol) [0x00007FF71291737A]
	(No symbol) [0x00007FF71293F39C]
	(No symbol) [0x00007FF712917153]
	(No symbol) [0x00007FF7128E0421]
	(No symbol) [0x00007FF7128E11B3]
	GetHandleVerifier [0x00007FF712DCD71D+3223453]
	GetHandleVerifier [0x00007FF712DC7CC2+3200322]
	GetHandleVerifier [0x00007FF712DE5AF3+3322739]
	GetHandleVerifier [0x00007FF712AE6A1A+180890]
	GetHandleVerifier [0x00007FF712AEE11F+211359]
	GetHandleVerifier [0x00007FF712AD5294+109332]
	GetHandleVerifier [0x00007FF712AD5442+109762]
	GetHandleVerifier [0x00007FF712ABBA59+4825]
	ExitProcess [0x00007FFCE43C137C+960572]
	ExitProcess [0x00007FFCE43415E8+436904]
	(No symbol) [0x00007FFCE639DEB0]
