In [1]:
import sys
import os

# Add project root to Python path
current_dir = os.path.dirname(os.path.abspath('__file__' if '__file__' in globals() else 'lottomatica_scraper_test.ipynb'))
project_root = os.path.dirname(current_dir)

if project_root not in sys.path:
    sys.path.insert(0, project_root)

# Import the Lottomatica scraper
from src.scraper.lottomatica_selenium_scraper import LottomaticaSeleniumScraper
from src.storage.csv_storage import CSVBettingOddsStorage

In [2]:
"https://www.lottomatica.it/scommesse/live/calcio/bolivia-nacional-b/cd-municipal-tarija-ca-ciclon"

'https://www.lottomatica.it/scommesse/live/calcio/bolivia-nacional-b/cd-municipal-tarija-ca-ciclon'

In [3]:
# Configuration - Replace with actual Lottomatica/Better.it betting URL
url = "https://www.lottomatica.it/scommesse/live/calcio/bolivia-nacional-b/cd-municipal-tarija-ca-ciclon"
duration_minutes = 0.5  # 30 seconds
interval_seconds = 5    # Every 5 seconds

print("Lottomatica Scraper Configuration:")
print(f"  URL: {url}")
print(f"  Duration: {duration_minutes} minutes (30 seconds)")
print(f"  Interval: {interval_seconds} seconds")
print("  Note: Update the URL with an actual match page")
print("=" * 60)

Lottomatica Scraper Configuration:
  URL: https://www.lottomatica.it/scommesse/live/calcio/bolivia-nacional-b/cd-municipal-tarija-ca-ciclon
  Duration: 0.5 minutes (30 seconds)
  Interval: 5 seconds
  Note: Update the URL with an actual match page


In [5]:
# Create storage and scraper
storage = CSVBettingOddsStorage(filename_prefix="lottomatica_scraper", output_dir="data")
scraper = LottomaticaSeleniumScraper(headless=False, storage=storage)

# Run unified scraper for 30 seconds with 5-second intervals
print("Starting Lottomatica scraper...")
result = scraper.scrape(
    url=url,
    duration_minutes=duration_minutes,
    interval_seconds=interval_seconds
)

# Display results
print(f"\nResults:")
print(f"  Successful scrapes: {result['successful_scrapes']}")
print(f"  Failed scrapes: {result['failed_scrapes']}")
print(f"  Success rate: {result['success_rate']:.1f}%")
print(f"  Session duration: {result['session_duration']}")
print(f"  Data saved to: {result['storage_path']}")

# Clean up
scraper.close()

Starting Lottomatica scraper...
Starting continuous (0.5 minutes) scraping session
   URL: https://www.lottomatica.it/scommesse/live/calcio/bolivia-nacional-b/cd-municipal-tarija-ca-ciclon
   Duration: 0.5 minutes
   Interval: 5 seconds
   Storage: CSVBettingOddsStorage
Session started at 2025-06-15 20:22:07
Session will end at 2025-06-15 20:22:37
   Press Ctrl+C to stop early
------------------------------------------------------------
✓ CSV storage initialized: data\lottomatica_scraper_20250615_202207.csv
Chrome WebDriver setup successful
Navigating to: https://www.lottomatica.it/scommesse/live/calcio/bolivia-nacional-b/cd-municipal-tarija-ca-ciclon
Cookie banner accepted
Page loaded - team names visible
Teams: CD Municipal Tarija vs CA CICLON
1X2 Main odds extracted
Double Chance odds extracted
Error extracting Gol/NoGol odds: Message: no such element: Unable to locate element: {"method":"xpath","selector":"//span[normalize-space(text())='Gol/NoGol']/ancestor::div[contains(@class, '

In [6]:
# Display summary of extraction results
print("Scraper Result Summary:")
print(f"Teams: {result.get('teams', 'Not found')}")
print(f"1X2 odds: Home={result.get('home_odds')}, Draw={result.get('draw_odds')}, Away={result.get('away_odds')}")
print(f"Double Chance: 1X={result.get('home_or_draw')}, X2={result.get('away_or_draw')}, 12={result.get('home_or_away')}")
print(f"Both Teams Score: Yes={result.get('both_teams_score_yes')}, No={result.get('both_teams_score_no')}")
print(f"Over/Under 2.5: Over={result.get('over_2_5')}, Under={result.get('under_2_5')}")
print(f"Over/Under 1.5: Over={result.get('over_1_5')}, Under={result.get('under_1_5')}")

# Count non-null values
non_null_count = sum(1 for v in result.values() if v is not None and v != 'Not found')
print(f"\nTotal non-null values extracted: {non_null_count}")

Scraper Result Summary:
Teams: Not found
1X2 odds: Home=None, Draw=None, Away=None
Double Chance: 1X=None, X2=None, 12=None
Both Teams Score: Yes=None, No=None
Over/Under 2.5: Over=None, Under=None
Over/Under 1.5: Over=None, Under=None

Total non-null values extracted: 7


In [7]:
# Display the full result dictionary
print("Full result dictionary:")
for key, value in result.items():
    print(f"  {key}: {value}")
    
print(f"\nResult type: {type(result)}")
print(f"Result keys: {list(result.keys())}")

Full result dictionary:
  successful_scrapes: 3
  failed_scrapes: 0
  total_scrapes: 3
  success_rate: 100.0
  data: [BettingOdds(timestamp=datetime.datetime(2025, 6, 15, 20, 22, 24, 373835), source='Lottomatica', match_id='cd-municipal-tarija-ca-ciclon', home_team='CD Municipal Tarija', away_team='CA CICLON', home_win=1.4, draw=3.95, away_win=7.25, home_or_draw=1.03, away_or_draw=2.55, home_or_away=1.18, over_1_5=None, under_1_5=None, over_2_5=None, under_2_5=None, over_3_5=None, under_3_5=None, both_teams_score_yes=None, both_teams_score_no=None), BettingOdds(timestamp=datetime.datetime(2025, 6, 15, 20, 22, 35, 580366), source='Lottomatica', match_id='cd-municipal-tarija-ca-ciclon', home_team='CD Municipal Tarija', away_team='CA CICLON', home_win=1.4, draw=3.95, away_win=7.25, home_or_draw=1.03, away_or_draw=2.55, home_or_away=1.18, over_1_5=None, under_1_5=None, over_2_5=None, under_2_5=None, over_3_5=None, under_3_5=None, both_teams_score_yes=None, both_teams_score_no=None), Bettin

In [10]:
# Create a simple diagnostic scraper to inspect page structure
print("Running diagnostic test to inspect available markets...")

try:
    # Create a custom diagnostic scraper that shows market structure
    from selenium import webdriver
    from selenium.webdriver.chrome.options import Options
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.webdriver.common.by import By
    
    # Set up browser manually for diagnostic
    chrome_options = Options()
    chrome_options.add_argument("--headless=new")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    
    driver = webdriver.Chrome(options=chrome_options)
    
    print(f"Navigating to: {url}")
    driver.get(url)
    
    # Wait for page load
    wait = WebDriverWait(driver, 10)
    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "span.event-name")))
    
    print("Page loaded, checking available markets...")
    
    # Check all slot headers to see what markets are available
    slot_headers = driver.find_elements(By.CSS_SELECTOR, ".slot-header")
    print(f"Found {len(slot_headers)} slot headers:")
    for i, header in enumerate(slot_headers):
        print(f"  {i+1}. '{header.text.strip()}'")
    
    # Check for quote wrappers with data-spreadid
    spread_wrappers = driver.find_elements(By.CSS_SELECTOR, "div.quote-wrapper[data-spreadid]")
    print(f"\nFound {len(spread_wrappers)} quote wrappers with data-spreadid:")
    for wrapper in spread_wrappers:
        spread_id = wrapper.get_attribute("data-spreadid")
        print(f"  data-spreadid: '{spread_id}'")
        
    # Check specifically for Under/Over containers
    under_over_containers = driver.find_elements(By.XPATH, "//div[@class='slot-header' and contains(text(), 'Under/Over')]/..")
    print(f"\nFound {len(under_over_containers)} Under/Over containers")
    
    # Check for Gol/NoGol containers  
    gol_nogol_containers = driver.find_elements(By.XPATH, "//div[@class='slot-header' and contains(text(), 'Gol/NoGol')]/..")
    print(f"Found {len(gol_nogol_containers)} Gol/NoGol containers")
    
    driver.quit()
    print("Diagnostic test completed successfully.")
    
except Exception as e:
    print(f"Error during diagnostic test: {e}")
    if 'driver' in locals():
        driver.quit()

Running diagnostic test to inspect available markets...
Navigating to: https://www.lottomatica.it/scommesse/live/calcio/bolivia-nacional-b/cd-municipal-tarija-ca-ciclon
Error during diagnostic test: Message: invalid session id: session deleted as the browser has closed the connection
from disconnected: not connected to DevTools
  (Session info: chrome=137.0.7151.104)
Stacktrace:
	GetHandleVerifier [0x0x7ff7d1ebfe75+79173]
	GetHandleVerifier [0x0x7ff7d1ebfed0+79264]
	(No symbol) [0x0x7ff7d1c79e5a]
	(No symbol) [0x0x7ff7d1c65c25]
	(No symbol) [0x0x7ff7d1c8ac44]
	(No symbol) [0x0x7ff7d1d003c5]
	(No symbol) [0x0x7ff7d1d20922]
	(No symbol) [0x0x7ff7d1cf8743]
	(No symbol) [0x0x7ff7d1cc14c1]
	(No symbol) [0x0x7ff7d1cc2253]
	GetHandleVerifier [0x0x7ff7d218a2ad+3004797]
	GetHandleVerifier [0x0x7ff7d21846fd+2981325]
	GetHandleVerifier [0x0x7ff7d21a3350+3107360]
	GetHandleVerifier [0x0x7ff7d1eda9fe+188622]
	GetHandleVerifier [0x0x7ff7d1ee228f+219487]
	GetHandleVerifier [0x0x7ff7d1ec8dc4+115860]
	