In [None]:
import requests
import pandas as pd
from datetime import datetime, timedelta
import time
import json
import os
import gzip  # Importato per la decompressione manuale gzip
import brotli  # Importato per la decompressione manuale Brotli


class VirtualSportsCollector:
    def __init__(self):
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36',
            'Accept': 'application/json, text/plain, */*',
            'Accept-Encoding': 'gzip, deflate, br',
            'Accept-Language': 'en-US,en;q=0.6',
            'Origin': 'https://www.eurobet.it',
            'Referer': 'https://www.eurobet.it/',
            'X-EB-Accept-Language': 'it_IT',
            'X-EB-MarketId': '5',
            'X-EB-PlatformId': '1',
            'Connection': 'keep-alive',
            'Sec-Fetch-Dest': 'empty',
            'Sec-Fetch-Mode': 'cors',
            'Sec-Fetch-Site': 'same-site'
        }
        self.base_url = "https://virtualservice.eurobet.it/virtual-winning-service/virtual-schedule/services/winningresult/68/17/{}"
        self.csv_filename = "virtual_matches_data.csv"
        self.excel_filename = "virtual_matches_data.xlsx"

    def create_match_id(self, row):
        """Crea un identificatore univoco per ogni partita."""
        date_val = str(row.get('date', ''))
        hour_val = str(row.get('hour', ''))
        home_team_val = str(row.get('home_team', ''))
        away_team_val = str(row.get('away_team', ''))
        return f"{date_val}_{hour_val}_{home_team_val}_{away_team_val}"

    def load_existing_data(self):
        """Carica i dati esistenti dal CSV, se esiste."""
        if os.path.exists(self.csv_filename):
            try:
                dtype_spec = {  # Specifica dtype per colonne potenzialmente problematiche
                    'odds_1': 'object', 'result': 'object',
                    'over_under_25': 'object', 'odds_over_under_25': 'object',
                    'goal_no_goal': 'object', 'odds_goal_no_goal': 'object',
                    'home_goals': 'Int64', 'away_goals': 'Int64'  # Usa Int64 per permettere NaN interi
                }
                df = pd.read_csv(self.csv_filename, dtype=dtype_spec)
                if 'datetime' in df.columns:
                    df['datetime'] = pd.to_datetime(df['datetime'], errors='coerce')
                # Rimuovi colonne completamente vuote che potrebbero essere state create da errori precedenti
                df.dropna(axis=1, how='all', inplace=True)
                return df
            except pd.errors.EmptyDataError:
                print(f"Il file CSV {self.csv_filename} è vuoto. Verrà creato un nuovo DataFrame.")
                return pd.DataFrame()
            except Exception as e:
                print(f"Errore durante il caricamento del file CSV {self.csv_filename}: {e}")
                return pd.DataFrame()
        return pd.DataFrame()

    def get_virtual_data(self, start_date, end_date):
        """Recupera i dati virtuali per l'intervallo di date specificato."""
        all_matches = []
        current_date = start_date

        while current_date <= end_date:
            date_str = current_date.strftime("%d-%m-%Y")
            url = self.base_url.format(date_str)
            print(f"Tentativo di recupero dati per {date_str} da URL: {url}")
            data = None
            response = None

            try:
                response = requests.get(url, headers=self.headers, timeout=25)
                response.raise_for_status()

                if not response.content:
                    print(f"Risposta vuota ricevuta per {date_str} (Status: {response.status_code}). URL: {url}")
                    time.sleep(1.5)
                    current_date += timedelta(days=1)
                    continue

                try:
                    data = response.json()
                except json.JSONDecodeError:
                    print(f"Errore di decodifica JSON standard per {date_str}. Status: {response.status_code}")
                    content_encoding = response.headers.get('Content-Encoding', '').lower()
                    print(f"Header Content-Encoding: {content_encoding if content_encoding else 'Non presente'}")

                    decompressed_successfully = False
                    if content_encoding == 'br':
                        print("Tentativo di decompressione Brotli manuale.")
                        try:
                            decompressed_content = brotli.decompress(response.content)
                            data = json.loads(decompressed_content.decode('utf-8'))
                            print(f"Contenuto Brotli per {date_str} decompresso e parsato manualmente.")
                            decompressed_successfully = True
                        except Exception as e_decompress:
                            print(f"Fallimento decompressione/parsing Brotli per {date_str}: {e_decompress}")

                    elif content_encoding == 'gzip' or response.content.startswith(b'\x1f\x8b\x08'):
                        print("Tentativo di decompressione Gzip manuale.")
                        try:
                            decompressed_content = gzip.decompress(response.content)
                            data = json.loads(decompressed_content.decode('utf-8'))
                            print(f"Contenuto Gzip per {date_str} decompresso e parsato manualmente.")
                            decompressed_successfully = True
                        except Exception as e_decompress:
                            print(f"Fallimento decompressione/parsing Gzip per {date_str}: {e_decompress}")

                    if not decompressed_successfully:
                        print(f"Decodifica JSON fallita per {date_str} anche dopo tentativi manuali (se applicabili).")
                        print(f"Contenuto grezzo (primi 200 byte): {response.content[:200]}...")
                        time.sleep(1.5)
                        current_date += timedelta(days=1)
                        continue

                # Elaborazione dei dati
                if data and 'result' in data and data['result'] is not None and 'groupDate' in data['result']:
                    for group in data['result']['groupDate']:
                        if 'events' in group and group['events'] is not None:
                            for event in group['events']:
                                try:
                                    # --- Inizio Logica di Parsing Team Names Migliorata ---
                                    parsed_home_team = None
                                    parsed_away_team = None

                                    event_desc_raw = event.get('eventDescription')

                                    if isinstance(event_desc_raw, str) and event_desc_raw.strip():
                                        cleaned_desc = event_desc_raw.strip()
                                        parts = cleaned_desc.split(' - ', 1)  # Divide al massimo una volta

                                        if parts[0]:  # Nome squadra casa
                                            parsed_home_team = parts[0].strip()
                                            if not parsed_home_team