In [9]:
import requests
from bs4 import BeautifulSoup
import time
import json
import pandas as pd
from datetime import datetime, timedelta
import schedule
import threading
from typing import Dict, List, Optional
import re
import logging
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service

# Logging be√°ll√≠t√°sa
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

In [16]:
class SofaScoreScraper:
    def __init__(self, headless: bool = True):
        self.headless = headless
        self.driver = None
        self.session = requests.Session()
        self.base_headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'Accept-Language': 'en-US,en;q=0.5',
            'Accept-Encoding': 'gzip, deflate',
            'Connection': 'keep-alive',
            'Upgrade-Insecure-Requests': '1',
        }
        self.session.headers.update(self.base_headers)
        
    def setup_selenium(self):
        """Selenium WebDriver be√°ll√≠t√°sa"""
        try:
            chrome_options = Options()
            if self.headless:
                chrome_options.add_argument("--headless")
            chrome_options.add_argument("--no-sandbox")
            chrome_options.add_argument("--disable-dev-shm-usage")
            chrome_options.add_argument("--disable-gpu")
            chrome_options.add_argument("--window-size=1920,1080")
            chrome_options.add_argument(f"--user-agent={self.base_headers['User-Agent']}")
            
            # Felh≈ë k√∂rnyezethez optimaliz√°lt be√°ll√≠t√°sok
            chrome_options.add_argument("--disable-extensions")
            chrome_options.add_argument("--disable-plugins")
            chrome_options.add_argument("--disable-images")
            
            service = Service(ChromeDriverManager().install())
            self.driver = webdriver.Chrome(service=service, options=chrome_options)
            logger.info("Selenium WebDriver successfully initialized")
            return True
        except Exception as e:
            logger.error(f"Failed to setup Selenium: {e}")
            return False
    
    def extract_match_id_from_url(self, url: str) -> Optional[str]:
        """Match ID kinyer√©se az URL-b≈ël"""
        try:
            # SofaScore URL pattern: /match/team1-team2/abc#id:12345,tab:statistics
            if '#id:' in url:
                match_id = url.split('#id:')[1].split(',')[0]
                return match_id
            return None
        except Exception as e:
            logger.error(f"Error extracting match ID: {e}")
            return None
    
    def scrape_with_requests(self, url: str) -> Optional[Dict]:
        """Pr√≥b√°lkoz√°s requests-tel (gyorsabb, de nem mindig m≈±k√∂dik)"""
        try:
            response = self.session.get(url, timeout=10)
            response.raise_for_status()
            
            soup = BeautifulSoup(response.content, 'lxml')
            return self.parse_statistics(soup)
        except Exception as e:
            logger.warning(f"Requests method failed: {e}")
            return None
    
    def scrape_with_selenium(self, url: str) -> Optional[Dict]:
        """Selenium haszn√°lata dinamikus tartalom bet√∂lt√©s√©hez"""
        try:
            if not self.driver and not self.setup_selenium():
                return None
            
            self.driver.get(url)
            
            # V√°runk, hogy bet√∂lt≈ëdj√∂n az oldal
            wait = WebDriverWait(self.driver, 20)
            
            # T√∂bb lehets√©ges selector pr√≥b√°l√°sa
            selectors_to_try = [
                ".pt_sm.bdr-b_lg.ov_hidden",
                "[data-testid='statistics']",
                ".statistics",
                "[class*='statistics']",
                "[class*='stat']"
            ]
            
            stats_container = None
            for selector in selectors_to_try:
                try:
                    stats_container = wait.until(
                        EC.presence_of_element_located((By.CSS_SELECTOR, selector))
                    )
                    logger.info(f"Found stats container with selector: {selector}")
                    break
                except:
                    continue
            
            if not stats_container:
                logger.warning("No statistics container found with any selector")
                # Pr√≥b√°ljuk meg a teljes oldal parsing-ot
                time.sleep(5)  # Tov√°bbi v√°rakoz√°s
            else:
                time.sleep(3)  # Statisztik√°k bet√∂lt≈ëd√©s√©re v√°rakoz√°s
            
            # HTML tartalom lek√©r√©se √©s debug info
            page_source = self.driver.page_source
            
            # Debug: megn√©zz√ºk mi van az oldalon
            soup = BeautifulSoup(page_source, 'lxml')
            logger.info(f"Page title: {soup.title.string if soup.title else 'No title'}")
            
            # Keres√ºnk b√°rmilyen div-et ami statisztik√°ra utalhat
            potential_stats = soup.find_all('div', string=re.compile(r'xG|Expected|Goals|Shots|Possession', re.I))
            logger.info(f"Found {len(potential_stats)} potential stat elements")
            
            return self.parse_statistics(soup)
            
        except Exception as e:
            logger.error(f"Selenium scraping failed: {e}")
            return None
    
    def parse_statistics(self, soup: BeautifulSoup) -> Dict:
        """Statisztik√°k kinyer√©se a HTML-b≈ël - jav√≠tott verzi√≥ t√∂bb selector-ral"""
        stats = {
            'timestamp': datetime.now().isoformat(),
            'team_stats': {},
            'xg_data': {},
            'raw_stats': []
        }
        
        try:
            # Debug: n√©zz√ºk meg mi van az oldalon
            logger.info("Starting to parse statistics...")
            
            # T√∂bb lehets√©ges statisztika kont√©ner keres√©se
            container_selectors = [
                'div.pt_sm.bdr-b_lg.ov_hidden',
                '[data-testid*="stat"]',
                '.statistics-container',
                '[class*="statistics"]',
                '[class*="stat-row"]',
                'div[class*="flex"]'  # SofaScore gyakran flexbox-ot haszn√°l
            ]
            
            stats_containers = []
            for selector in container_selectors:
                containers = soup.select(selector)
                if containers:
                    stats_containers.extend(containers)
                    logger.info(f"Found {len(containers)} containers with selector: {selector}")
            
            # Ha nincs specifikus kont√©ner, keres√ºnk az eg√©sz oldalon
            if not stats_containers:
                logger.info("No specific containers found, searching entire page...")
                stats_containers = [soup]
            
            # Statisztik√°k keres√©se k√ºl√∂nb√∂z≈ë m√≥dszerekkel
            for container in stats_containers:
                # 1. m√≥dszer: Flexbox alap√∫ keres√©s
                self._parse_flexbox_stats(container, stats)
                
                # 2. m√≥dszer: Sz√∂veg alap√∫ keres√©s
                self._parse_text_based_stats(container, stats)
                
                # 3. m√≥dszer: Table-szer≈± strukt√∫ra
                self._parse_table_like_stats(container, stats)
            
            # xG specifikus keres√©s
            self._extract_xg_data(soup, stats)
            
            # Csapat nevek kinyer√©se
            team_names = self.extract_team_names(soup)
            if team_names:
                stats['teams'] = team_names
            
            logger.info(f"Successfully parsed {len(stats['raw_stats'])} statistics")
            
            # Debug: logoljuk az els≈ë p√°r statisztik√°t
            for i, stat in enumerate(stats['raw_stats'][:3]):
                logger.info(f"Stat {i+1}: {stat}")
            
            return stats
            
        except Exception as e:
            logger.error(f"Error parsing statistics: {e}")
            return stats
    
    def _parse_flexbox_stats(self, container, stats):
        """Flexbox alap√∫ statisztik√°k parsing"""
        try:
            # Keres√ºnk sorokra osztott statisztik√°kat
            rows = container.find_all('div', class_=re.compile(r'.*flex.*|.*row.*'))
            
            for row in rows:
                text_elements = row.find_all(string=True)
                text_content = [t.strip() for t in text_elements if t.strip()]
                
                # Ha 3 elemet tal√°lunk: √©rt√©k1, n√©v, √©rt√©k2
                if len(text_content) >= 3:
                    # Keres√ºnk sz√°mokat a sz√∂vegben
                    numbers = [t for t in text_content if re.match(r'^[\d.,]+%?$', t)]
                    stat_names = [t for t in text_content if not re.match(r'^[\d.,]+%?$', t) and len(t) > 1]
                    
                    if len(numbers) >= 2 and stat_names:
                        stat_name = stat_names[0]
                        stats['raw_stats'].append({
                            'stat_name': stat_name,
                            'home_value': numbers[0],
                            'away_value': numbers[-1]
                        })
        except Exception as e:
            logger.debug(f"Error in flexbox parsing: {e}")
    
    def _parse_text_based_stats(self, container, stats):
        """Sz√∂veg alap√∫ statisztika keres√©s"""
        try:
            # Keress√ºk az √∂sszes sz√∂veget ami statisztik√°ra utalhat
            stat_patterns = [
                r'(\d+[\.,]?\d*)\s*(xG|Expected goals?|Goals?|Shots?|Possession|Pass|Corner|Yellow|Red)',
                r'(xG|Expected goals?|Goals?|Shots?|Possession|Pass|Corner|Yellow|Red)\s*(\d+[\.,]?\d*)',
                r'(\d+[\.,]?\d*%?)\s*([A-Za-z\s]+)\s*(\d+[\.,]?\d*%?)'
            ]
            
            page_text = container.get_text()
            
            for pattern in stat_patterns:
                matches = re.finditer(pattern, page_text, re.IGNORECASE)
                for match in matches:
                    groups = match.groups()
                    if len(groups) >= 2:
                        # Pr√≥b√°ljuk meghat√°rozni melyik a n√©v √©s melyik az √©rt√©k
                        if groups[1].replace(' ', '').replace('.', '').replace(',', '').isdigit():
                            stat_name = groups[0]
                            value = groups[1]
                        else:
                            stat_name = groups[1]
                            value = groups[0]
                        
                        if stat_name and value:
                            stats['raw_stats'].append({
                                'stat_name': stat_name,
                                'home_value': value,
                                'away_value': 'N/A'
                            })
        except Exception as e:
            logger.debug(f"Error in text-based parsing: {e}")
    
    def _parse_table_like_stats(self, container, stats):
        """Table-szer≈± strukt√∫r√°k parsing"""
        try:
            # Keres√ºnk tr, td elemeket
            rows = container.find_all(['tr', 'div'])
            
            for row in rows:
                cells = row.find_all(['td', 'div', 'span'])
                if len(cells) >= 3:
                    cell_texts = [cell.get_text(strip=True) for cell in cells]
                    
                    # Sz≈±rj√ºk ki az √ºres cell√°kat
                    cell_texts = [text for text in cell_texts if text]
                    
                    if len(cell_texts) >= 3:
                        # √Åltal√°ban: √©rt√©k1, statisztika_n√©v, √©rt√©k2
                        potential_left = cell_texts[0]
                        potential_name = cell_texts[1]
                        potential_right = cell_texts[-1]
                        
                        # Ellen≈ërizz√ºk hogy van-e sz√°m√©rt√©k
                        if (re.match(r'^[\d.,]+%?$', potential_left) and 
                            re.match(r'^[\d.,]+%?$', potential_right) and
                            not re.match(r'^[\d.,]+%?$', potential_name)):
                            
                            stats['raw_stats'].append({
                                'stat_name': potential_name,
                                'home_value': potential_left,
                                'away_value': potential_right
                            })
        except Exception as e:
            logger.debug(f"Error in table-like parsing: {e}")
    
    def _extract_xg_data(self, soup, stats):
        """xG adatok specifikus kinyer√©se"""
        try:
            # xG keres√©se k√ºl√∂nb√∂z≈ë m√≥dokon
            xg_patterns = [
                r'xG[:\s]*(\d+[\.,]?\d*)',
                r'Expected\s+goals?[:\s]*(\d+[\.,]?\d*)',
                r'(\d+[\.,]?\d*)\s*xG'
            ]
            
            page_text = soup.get_text()
            
            xg_values = []
            for pattern in xg_patterns:
                matches = re.findall(pattern, page_text, re.IGNORECASE)
                xg_values.extend(matches)
            
            if len(xg_values) >= 2:
                stats['xg_data'] = {
                    'home_xg': xg_values[0],
                    'away_xg': xg_values[1],
                    'stat_name': 'xG'
                }
                logger.info(f"Found xG data: {stats['xg_data']}")
            elif len(xg_values) == 1:
                stats['xg_data'] = {
                    'home_xg': xg_values[0],
                    'away_xg': 'N/A',
                    'stat_name': 'xG'
                }
            
        except Exception as e:
            logger.debug(f"Error extracting xG data: {e}")


    def extract_team_names(self, soup: BeautifulSoup) -> Optional[Dict]:
        """Csapat nevek kinyer√©se"""
        try:
            # K√ºl√∂nb√∂z≈ë selectorok pr√≥b√°l√°sa
            selectors = [
                'h1[data-testid="match-header-team-name"]',
                '.team-name',
                '[class*="team"][class*="name"]',
                'h1, h2, h3'  # Fallback
            ]
            
            for selector in selectors:
                elements = soup.select(selector)
                if len(elements) >= 2:
                    return {
                        'home_team': elements[0].get_text(strip=True),
                        'away_team': elements[1].get_text(strip=True)
                    }
            
            return None
        except Exception as e:
            logger.error(f"Error extracting team names: {e}")
            return None
    
    def scrape_match_stats(self, url: str) -> Optional[Dict]:
        """F≈ë scraping f√ºggv√©ny - el≈ësz√∂r requests, majd Selenium"""
        logger.info(f"Starting to scrape: {url}")
        
        # El≈ësz√∂r pr√≥b√°lkoz√°s requests-tel
        stats = self.scrape_with_requests(url)
        
        # Ha nem siker√ºlt, Selenium haszn√°lata
        if not stats or len(stats.get('raw_stats', [])) == 0:
            logger.info("Trying with Selenium...")
            stats = self.scrape_with_selenium(url)
        
        return stats
    
    def cleanup(self):
        """Er≈ëforr√°sok felszabad√≠t√°sa"""
        if self.driver:
            self.driver.quit()
            self.driver = None

In [17]:
class MatchScheduler:
    def __init__(self, scraper: SofaScoreScraper):
        self.scraper = scraper
        self.results = []
        self.running = False
        self.thread = None
        
    def add_match(self, url: str, interval_minutes: int = 5):
        """M√©rk≈ëz√©s hozz√°ad√°sa az √ºtemez√©shez"""
        def job():
            try:
                stats = self.scraper.scrape_match_stats(url)
                if stats:
                    stats['url'] = url
                    self.results.append(stats)
                    logger.info(f"Successfully scraped match data. Total results: {len(self.results)}")
                else:
                    logger.warning("No stats retrieved")
            except Exception as e:
                logger.error(f"Error in scheduled job: {e}")
        
        schedule.every(interval_minutes).minutes.do(job)
        logger.info(f"Scheduled match scraping every {interval_minutes} minutes for: {url}")
        
        # Els≈ë futtat√°s azonnal
        job()
    
    def start_scheduler(self):
        """√útemez≈ë ind√≠t√°sa k√ºl√∂n sz√°lon"""
        def run_scheduler():
            self.running = True
            while self.running:
                schedule.run_pending()
                time.sleep(1)
        
        self.thread = threading.Thread(target=run_scheduler, daemon=True)
        self.thread.start()
        logger.info("Scheduler started")
    
    def stop_scheduler(self):
        """√útemez≈ë le√°ll√≠t√°sa"""
        self.running = False
        schedule.clear()
        logger.info("Scheduler stopped")
    
    def get_latest_stats(self) -> Optional[Dict]:
        """Legfrissebb statisztik√°k lek√©r√©se"""
        if self.results:
            return self.results[-1]
        return None
    
    def save_results_to_csv(self, filename: str = None):
        """Eredm√©nyek ment√©se CSV-be"""
        if not self.results:
            logger.warning("No results to save")
            return
        
        if not filename:
            filename = f"sofascore_stats_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
        
        # DataFrame k√©sz√≠t√©se
        rows = []
        for result in self.results:
            base_row = {
                'timestamp': result.get('timestamp'),
                'url': result.get('url'),
                'home_team': result.get('teams', {}).get('home_team', 'Unknown'),
                'away_team': result.get('teams', {}).get('away_team', 'Unknown'),
                'home_xg': result.get('xg_data', {}).get('home_xg', 'N/A'),
                'away_xg': result.get('xg_data', {}).get('away_xg', 'N/A')
            }
            
            # Egy√©b statisztik√°k hozz√°ad√°sa
            for stat in result.get('raw_stats', []):
                base_row[f"home_{stat['stat_name']}"] = stat['home_value']
                base_row[f"away_{stat['stat_name']}"] = stat['away_value']
            
            rows.append(base_row)
        
        df = pd.DataFrame(rows)
        df.to_csv(filename, index=False)
        logger.info(f"Results saved to {filename}")
        return filename

In [18]:
# Haszn√°lati p√©lda √©s tesztel√©s

# Scraper inicializ√°l√°s
scraper = SofaScoreScraper(headless=True)  # headless=False a tesztel√©shez

# Tesztel√©s egyedi URL-lel
test_url = "https://www.sofascore.com/football/match/1-fc-koln-leicester-city/Gswdb#id:14250691,tab:statistics"

print("üèà Testing single match scraping...")
stats = scraper.scrape_match_stats(test_url)

if stats:
    print(f"‚úÖ Successfully scraped statistics!")
    print(f"üìä Found {len(stats.get('raw_stats', []))} statistics")
    
    if stats.get('xg_data'):
        print(f"‚öΩ xG found: {stats['xg_data']}")
    else:
        print("‚ùå No xG data found")
    
    xg_stats = [s for s in stats['raw_stats'] if 'xG' in s['stat_name'] or 'Expected' in s['stat_name']]
    print(f"üéØ xG related stats: {xg_stats}")

    if stats.get('teams'):
        print(f"üèüÔ∏è Teams: {stats['teams']}")
    
    # Els≈ë n√©h√°ny statisztika megjelen√≠t√©se
    print("\nüìà Sample statistics:")
    for stat in stats.get('raw_stats', [])[:5]:
        print(f"  {stat['stat_name']}: {stat['home_value']} - {stat['away_value']}")
else:
    print("‚ùå Failed to scrape statistics")

2025-07-25 18:18:42,563 - INFO - Starting to scrape: https://www.sofascore.com/football/match/1-fc-koln-leicester-city/Gswdb#id:14250691,tab:statistics
2025-07-25 18:18:42,651 - INFO - Trying with Selenium...


üèà Testing single match scraping...


2025-07-25 18:18:43,610 - INFO - Get LATEST chromedriver version for google-chrome
2025-07-25 18:18:43,700 - INFO - Get LATEST chromedriver version for google-chrome
2025-07-25 18:18:43,765 - INFO - Driver [C:\Users\Adam\.wdm\drivers\chromedriver\win64\138.0.7204.168\chromedriver-win32/chromedriver.exe] found in cache
2025-07-25 18:18:44,824 - INFO - Selenium WebDriver successfully initialized
2025-07-25 18:18:46,963 - INFO - Found stats container with selector: .pt_sm.bdr-b_lg.ov_hidden
2025-07-25 18:18:50,248 - INFO - Page title: 1. FC K√∂ln vs Leicester City live score, H2H and lineups | Sofascore
2025-07-25 18:18:50,256 - INFO - Found 6 potential stat elements
2025-07-25 18:18:50,257 - INFO - Starting to parse statistics...
2025-07-25 18:18:50,283 - INFO - Found 1 containers with selector: div.pt_sm.bdr-b_lg.ov_hidden
2025-07-25 18:18:50,329 - INFO - Found 1 containers with selector: [data-testid*="stat"]
2025-07-25 18:18:50,509 - INFO - Found 253 containers with selector: div[clas

‚úÖ Successfully scraped statistics!
üìä Found 354 statistics
üèüÔ∏è Teams: {'home_team': '1. FC K√∂lnvsLeicester Citylive score, H2H results, standings and prediction', 'away_team': 'Lineups'}

üìà Sample statistics:
  Match overview: 48% - 0
  Ball possession: 48% - 52%
  Corner kicks: 2 - 3
  Fouls: 9 - 6
  Free kicks: 6 - 9


In [None]:
# √útemezett scraping p√©lda

print("\nüïê Setting up scheduled scraping...")
scheduler = MatchScheduler(scraper)

# M√©rk≈ëz√©s hozz√°ad√°sa 2 perces intervallumal (tesztel√©shez)
scheduler.add_match(test_url, interval_minutes=1)

# √útemez≈ë ind√≠t√°sa
scheduler.start_scheduler()

print("‚è∞ Scheduler started! It will scrape every 1 minutes.")
print("üí° Run the next cell to check results after a few minutes...")

2025-07-25 18:23:48,875 - INFO - Scheduled match scraping every 1 minutes for: https://www.sofascore.com/football/match/1-fc-koln-leicester-city/Gswdb#id:14250691,tab:statistics
2025-07-25 18:23:48,876 - INFO - Starting to scrape: https://www.sofascore.com/football/match/1-fc-koln-leicester-city/Gswdb#id:14250691,tab:statistics
2025-07-25 18:23:48,964 - INFO - Trying with Selenium...
2025-07-25 18:23:48,989 - INFO - Found stats container with selector: .pt_sm.bdr-b_lg.ov_hidden



üïê Setting up scheduled scraping...


2025-07-25 18:23:52,272 - INFO - Page title: 1. FC K√∂ln vs Leicester City live score, H2H and lineups | Sofascore
2025-07-25 18:23:52,280 - INFO - Found 6 potential stat elements
2025-07-25 18:23:52,283 - INFO - Starting to parse statistics...
2025-07-25 18:23:52,320 - INFO - Found 1 containers with selector: div.pt_sm.bdr-b_lg.ov_hidden
2025-07-25 18:23:52,366 - INFO - Found 1 containers with selector: [data-testid*="stat"]
2025-07-25 18:23:52,542 - INFO - Found 253 containers with selector: div[class*="flex"]
2025-07-25 18:23:52,855 - INFO - Successfully parsed 354 statistics
2025-07-25 18:23:52,856 - INFO - Stat 1: {'stat_name': 'Match overview', 'home_value': '48%', 'away_value': '0'}
2025-07-25 18:23:52,857 - INFO - Stat 2: {'stat_name': 'Ball possession', 'home_value': '48%', 'away_value': '52%'}
2025-07-25 18:23:52,857 - INFO - Stat 3: {'stat_name': 'Corner kicks', 'home_value': '2', 'away_value': '3'}
2025-07-25 18:23:52,858 - INFO - Successfully scraped match data. Total resu

‚è∞ Scheduler started! It will scrape every 1 minutes.
üí° Run the next cell to check results after a few minutes...


2025-07-25 18:24:48,888 - INFO - Starting to scrape: https://www.sofascore.com/football/match/1-fc-koln-leicester-city/Gswdb#id:14250691,tab:statistics
2025-07-25 18:24:48,985 - INFO - Trying with Selenium...
2025-07-25 18:24:49,020 - INFO - Found stats container with selector: .pt_sm.bdr-b_lg.ov_hidden
2025-07-25 18:24:52,687 - INFO - Page title: 1. FC K√∂ln vs Leicester City live score, H2H and lineups | Sofascore
2025-07-25 18:24:52,696 - INFO - Found 6 potential stat elements
2025-07-25 18:24:52,697 - INFO - Starting to parse statistics...
2025-07-25 18:24:52,726 - INFO - Found 1 containers with selector: div.pt_sm.bdr-b_lg.ov_hidden
2025-07-25 18:24:52,778 - INFO - Found 1 containers with selector: [data-testid*="stat"]
2025-07-25 18:24:52,971 - INFO - Found 253 containers with selector: div[class*="flex"]
2025-07-25 18:24:53,296 - INFO - Successfully parsed 354 statistics
2025-07-25 18:24:53,297 - INFO - Stat 1: {'stat_name': 'Match overview', 'home_value': '49%', 'away_value': '

In [21]:
# Eredm√©nyek ellen≈ërz√©se √©s ment√©se

print("üìä Current results:")
print(f"Total scraping results: {len(scheduler.results)}")

if scheduler.results:
    latest = scheduler.get_latest_stats()
    if latest:
        print(f"\nüïê Latest timestamp: {latest.get('timestamp')}")
        if latest.get('xg_data'):
            print(f"‚öΩ Latest xG: {latest['xg_data']}")
    
    # CSV ment√©s
    csv_file = scheduler.save_results_to_csv()
    print(f"üíæ Results saved to: {csv_file}")
    
    # Egyszer≈± DataFrame megjelen√≠t√©s
    if len(scheduler.results) > 0:
        sample_data = []
        for result in scheduler.results[-3:]:  # Utols√≥ 3 eredm√©ny
            sample_data.append({
                'Time': result.get('timestamp', '')[:19],  # D√°tum/id≈ë r√∂vid√≠tve
                'Home xG': result.get('xg_data', {}).get('home_xg', 'N/A'),
                'Away xG': result.get('xg_data', {}).get('away_xg', 'N/A'),
                'Stats Count': len(result.get('raw_stats', []))
            })
        
        df_sample = pd.DataFrame(sample_data)
        print("\nüìã Sample results:")
        print(df_sample.to_string(index=False))

2025-07-25 18:25:10,616 - INFO - Results saved to sofascore_stats_20250725_182510.csv


üìä Current results:
Total scraping results: 3

üïê Latest timestamp: 2025-07-25T18:24:53.347017
üíæ Results saved to: sofascore_stats_20250725_182510.csv

üìã Sample results:
               Time Home xG Away xG  Stats Count
2025-07-25T18:23:52     N/A     N/A          354
2025-07-25T18:24:52     N/A     N/A          354
2025-07-25T18:24:53     N/A     N/A          354


In [6]:
# Felh≈ë deployment seg√©dk√≥d

"""
üåê FELH≈ê DEPLOYMENT OPCI√ìK:

1. GOOGLE COLAB:
   - Ingyenes GPU/TPU access
   - Jupyter notebook k√∂rnyezet
   - Korl√°tozott fut√°si id≈ë (12-24 √≥ra)

2. KAGGLE KERNELS:
   - Heti 30 √≥ra GPU id≈ë
   - Internet access korl√°tozott
   - Notebook k√∂rnyezet

3. GITHUB ACTIONS (Ingyenes tier):
   - Cron job alap√∫ √ºtemez√©s
   - 2000 perc/h√≥ limit
   - Eredm√©nyek GitHub-ra ment√©se

4. RAILWAY/RENDER (Ingyenes tier):
   - 24/7 fut√°s
   - Kis resource limit
   - Web app form√°ban

GitHub Actions workflow p√©lda (.github/workflows/scraper.yml):

name: SofaScore Scraper
on:
  schedule:
    - cron: '*/30 * * * *'  # 30 percenk√©nt
  workflow_dispatch:

jobs:
  scrape:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v2
    - name: Set up Python
      uses: actions/setup-python@v2
      with:
        python-version: 3.9
    - name: Install dependencies
      run: |
        pip install requests beautifulsoup4 selenium pandas
        sudo apt-get update
        sudo apt-get install -y chromium-browser
    - name: Run scraper
      run: python scraper.py
    - name: Commit results
      run: |
        git config --local user.email "action@github.com"
        git config --local user.name "GitHub Action"
        git add *.csv
        git commit -m "Update scraping results" || exit 0
        git push
"""

print("üöÄ Deployment options printed above!")

üöÄ Deployment options printed above!


In [22]:
# Cleanup √©s le√°ll√≠t√°s

print("üßπ Cleaning up...")
scheduler.stop_scheduler()
scraper.cleanup()
print("‚úÖ Cleanup completed!")

2025-07-25 18:27:13,484 - INFO - Scheduler stopped


üßπ Cleaning up...
‚úÖ Cleanup completed!


# ## üìù Haszn√°lati √∫tmutat√≥
# 
# ### Alapvet≈ë haszn√°lat:
# 1. Futtasd le a csomagtelep√≠t√©st √©s importokat
# 2. √Åll√≠tsd be a `test_url` v√°ltoz√≥t a k√≠v√°nt SofaScore m√©rk≈ëz√©s URL-j√©vel
# 3. Futtasd le a tesztel≈ë cell√°t
# 4. Ind√≠tsd el az √ºtemezett scrapinget
# 
# ### Testreszab√°si lehet≈ës√©gek:
# - `interval_minutes`: Scraping gyakoris√°g m√≥dos√≠t√°sa
# - `headless`: False √©rt√©kkel l√°that√≥ b√∂ng√©sz≈ë ablak
# - `save_results_to_csv()`: Automatikus ment√©s be√°ll√≠t√°sa
# 
# ### Hibaelh√°r√≠t√°s:
# - Ha nem m≈±k√∂dik a requests m√≥dszer, a Selenium automatikusan √°tveszi
# - Chrome driver automatikusan telep√ºl a webdriver-manager seg√≠ts√©g√©vel
# - R√©szletes logok a hib√°k nyomon k√∂vet√©s√©hez
# 
# ### Teljes√≠tm√©ny optimaliz√°l√°s:
# - Headless m√≥d gyorsabb fut√°shoz
# - Requests el≈ësz√∂r pr√≥b√°lkozik (gyorsabb)
# - Selenium csak sz√ºks√©g eset√©n (megb√≠zhat√≥bb)