preleviamo dati da goldbet big match

In [7]:
import requests
import pandas as pd
from datetime import datetime, timedelta
import time
import json
import os

class VirtualSportsCollector:
    def __init__(self):
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36',
            'Accept': 'application/json, text/plain, */*',
            'Accept-Encoding': 'gzip, deflate, br, zstd',
            'Accept-Language': 'en-US,en;q=0.6',
            'Origin': 'https://www.eurobet.it',
            'X-EB-Accept-Language': 'it_IT',
            'X-EB-MarketId': '5',
            'X-EB-PlatformId': '1',
            'Connection': 'keep-alive'
        }
        self.base_url = "https://virtualservice.eurobet.it/virtual-winning-service/virtual-schedule/services/winningresult/55/22/{}"
        self.csv_filename = "virtual_matches_data.csv"
        self.excel_filename = "virtual_matches_data.xlsx"

    def create_match_id(self, row):
        """Create a unique identifier for each match"""
        return f"{row['date']}_{row['hour']}_{row['home_team']}_{row['away_team']}"

    def load_existing_data(self):
        """Load existing data from CSV if it exists"""
        if os.path.exists(self.csv_filename):
            return pd.read_csv(self.csv_filename)
        return pd.DataFrame()

    def get_virtual_data(self, start_date, end_date):
        all_matches = []
        current_date = start_date

        while current_date <= end_date:
            date_str = current_date.strftime("%d-%m-%Y")
            url = self.base_url.format(date_str)

            try:
                response = requests.get(url, headers=self.headers)
                if response.status_code == 200:
                    data = response.json()
                    if 'result' in data and 'groupDate' in data['result']:
                        for group in data['result']['groupDate']:
                            for event in group['events']:
                                match_data = {
                                    'date': event['date'],
                                    'hour': event['hour'],
                                    'home_team': event['eventDescription'].split(' - ')[0],
                                    'away_team': event['eventDescription'].split(' - ')[1],
                                    'score': event['finalResult'],
                                    'home_goals': int(event['finalResult'].split('-')[0]),
                                    'away_goals': int(event['finalResult'].split('-')[1]),
                                    'datetime': pd.to_datetime(f"{event['date']} {event['hour']}", format='%d-%m-%Y %H:%M:%S')
                                }

                                for odd_group in event['oddGroup']:
                                    if odd_group['betDescriptionAbbr'] == '1X2':
                                        match_data['odds_1'] = odd_group['odds'][0]
                                        match_data['result'] = odd_group['resultDescription'][0]
                                    elif odd_group['betDescriptionAbbr'] == 'U/O 2.5':
                                        match_data['over_under_25'] = odd_group['resultDescription'][0]
                                        match_data['odds_over_under_25'] = odd_group['odds'][0]
                                    elif odd_group['betDescriptionAbbr'] == 'Goal/No Goal':
                                        match_data['goal_no_goal'] = odd_group['resultDescription'][0]
                                        match_data['odds_goal_no_goal'] = odd_group['odds'][0]

                                all_matches.append(match_data)

                time.sleep(1)  # Respect rate limiting
            except Exception as e:
                print(f"Error fetching data for {date_str}: {e}")

            current_date += timedelta(days=1)

        return pd.DataFrame(all_matches)

    def merge_and_save_data(self, new_data):
        """Merge new data with existing data, remove duplicates, and save"""
        existing_data = self.load_existing_data()

        if not existing_data.empty:
            # Convert datetime column in existing data if it's not already datetime
            existing_data['datetime'] = pd.to_datetime(existing_data['datetime'])

        # Combine existing and new data
        combined_data = pd.concat([existing_data, new_data], ignore_index=True)

        # Create unique identifier for each match
        combined_data['match_id'] = combined_data.apply(self.create_match_id, axis=1)

        # Remove duplicates based on match_id
        combined_data = combined_data.drop_duplicates(subset=['match_id'], keep='first')

        # Sort by datetime in descending order (most recent first)
        combined_data = combined_data.sort_values('datetime', ascending=False)

        # Drop the match_id column as it's no longer needed
        combined_data = combined_data.drop('match_id', axis=1)

        # Save to CSV and Excel
        combined_data.to_csv(self.csv_filename, index=False)
        combined_data.to_excel(self.excel_filename, index=False)

        return combined_data

    def collect_data(self, days_back=1):
        """Main method to collect and process data"""
        end_date = datetime.now()
        start_date = end_date - timedelta(days=days_back)

        print(f"Collecting data from {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}")

        new_data = self.get_virtual_data(start_date, end_date)
        if not new_data.empty:
            final_data = self.merge_and_save_data(new_data)
            print(f"Data saved successfully")
            print(f"Total matches in database: {len(final_data)}")
            print(f"Files saved as: {self.csv_filename} and {self.excel_filename}")
        else:
            print("No new data collected")

def main(days_back=1):
    collector = VirtualSportsCollector()
    collector.collect_data(days_back)

if __name__ == "__main__":
    main()  # Default 90 days

Collecting data from 2024-11-20 to 2024-11-21
Data saved successfully
Total matches in database: 19794
Files saved as: virtual_matches_data.csv and virtual_matches_data.xlsx


predizione

In [14]:
import pandas as pd
import numpy as np
from collections import defaultdict
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, precision_score, recall_score, f1_score
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import json

class VirtualMatchPredictor:
    def __init__(self):
        self.markov_matrix = None
        self.state_counts = defaultdict(int)
        self.transition_counts = defaultdict(lambda: defaultdict(int))
        self.prior_draw_probability = None
        self.model_metrics = {}
        self.predictions_history = []

    def preprocess_data(self, df):
        """Preprocessa il DataFrame per l'analisi."""
        df = df.copy()
        df['datetime'] = pd.to_datetime(df['datetime'])
        df['hour'] = pd.to_datetime(df['hour'], format='%H:%M:%S').dt.hour
        return df.sort_values('datetime')

    def build_markov_chain(self, df):
        """Costruisce la catena di Markov dai risultati."""
        results = df['result'].tolist()

        # Conta le transizioni
        for i in range(len(results)-1):
            current_state = results[i]
            next_state = results[i+1]
            self.state_counts[current_state] += 1
            self.transition_counts[current_state][next_state] += 1

        # Calcola la matrice di transizione
        states = ['1', '2', 'X']
        self.markov_matrix = np.zeros((3, 3))

        for i, current in enumerate(states):
            total = sum(self.transition_counts[current].values())
            if total > 0:
                for j, next_state in enumerate(states):
                    self.markov_matrix[i][j] = self.transition_counts[current][next_state] / total

        return self.markov_matrix

    def calculate_team_statistics(self, df):
        """Calcola statistiche per squadra."""
        team_stats = {}

        for team in set(df['home_team'].unique()) | set(df['away_team'].unique()):
            home_games = df[df['home_team'] == team]
            away_games = df[df['away_team'] == team]

            stats = {
                'total_games': len(home_games) + len(away_games),
                'goals_scored': home_games['home_goals'].sum() + away_games['away_goals'].sum(),
                'goals_conceded': home_games['away_goals'].sum() + away_games['home_goals'].sum(),
                'draws': len(home_games[home_games['result'] == 'X']) + len(away_games[away_games['result'] == 'X']),
                'draw_percentage': 0
            }

            if stats['total_games'] > 0:
                stats['draw_percentage'] = (stats['draws'] / stats['total_games']) * 100

            team_stats[team] = stats

        return team_stats

    def evaluate_model(self, df_test):
        """Valuta le prestazioni del modello."""
        y_true = []
        y_pred = []

        for i in range(len(df_test)-1):
            subset = df_test.iloc[:i+1]
            last_result = subset['result'].iloc[-1]
            pred = self.calculate_draw_probability_bayes(subset, last_result)

            # Converti probabilità in previsione binaria (pareggio o no)
            pred_binary = 'X' if pred > self.prior_draw_probability else '1'
            actual = df_test['result'].iloc[i+1]

            y_true.append(actual)
            y_pred.append(pred_binary)

        # Calcola metriche
        self.model_metrics = {
            'accuracy': accuracy_score(y_true, y_pred),
            'precision': precision_score(y_true, y_pred, average='weighted', labels=['X']),
            'recall': recall_score(y_true, y_pred, average='weighted', labels=['X']),
            'f1': f1_score(y_true, y_pred, average='weighted', labels=['X']),
            'confusion_matrix': confusion_matrix(y_true, y_pred),
            'classification_report': classification_report(y_true, y_pred)
        }

        return self.model_metrics

    def generate_prediction_report(self, next_match_data, prediction_results):
        """Genera un report dettagliato per la prossima partita."""
        report = {
            "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
            "match_details": {
                "home_team": next_match_data['home_team'],
                "away_team": next_match_data['away_team'],
                "datetime": next_match_data['datetime']
            },
            "prediction_details": {
                "draw_probability": round(prediction_results['draw_probability'] * 100, 2),
                "confidence_level": self._calculate_confidence_level(prediction_results['draw_probability']),
                "historical_context": {
                    "prior_draw_probability": round(self.prior_draw_probability * 100, 2),
                    "hour_trend": self._get_hour_trend(next_match_data['hour'])
                }
            },
            "model_performance": {
                "accuracy": round(self.model_metrics['accuracy'] * 100, 2),
                "precision_for_draws": round(self.model_metrics['precision'] * 100, 2),
                "recall_for_draws": round(self.model_metrics['recall'] * 100, 2),
                "f1_score": round(self.model_metrics['f1'] * 100, 2)
            },
            "recommendations": self._generate_recommendations(prediction_results['draw_probability'])
        }

        # Salva il report
        filename = f"match_prediction_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
        with open(filename, 'w', encoding='utf-8') as f:
            json.dump(report, f, indent=4, ensure_ascii=False)

        return report

    def _calculate_confidence_level(self, probability):
        """Calcola il livello di confidenza della previsione."""
        if probability > 0.7: return "MOLTO ALTO"
        elif probability > 0.6: return "ALTO"
        elif probability > 0.5: return "MEDIO"
        elif probability > 0.4: return "BASSO"
        else: return "MOLTO BASSO"

    def _get_hour_trend(self, hour):
        """Analizza il trend orario."""
        if not hasattr(self, 'hourly_trends'):
            return "Dati insufficienti"

        trend = self.hourly_trends.get(hour, 0)
        if trend > 0.3: return "Ora favorevole ai pareggi"
        elif trend < 0.1: return "Ora sfavorevole ai pareggi"
        else: return "Trend orario neutro"

    def _generate_recommendations(self, draw_probability):
        """Genera raccomandazioni basate sulla probabilità."""
        recommendations = []

        if draw_probability > 0.6:
            recommendations.append("Alta probabilità di pareggio - Situazione molto interessante")
        elif draw_probability > 0.5:
            recommendations.append("Probabilità media di pareggio - Valutare altri fattori")
        else:
            recommendations.append("Bassa probabilità di pareggio - Meglio evitare")

        recommendations.append(f"Confidenza del modello: {self.model_metrics['accuracy']:.2%}")

        return recommendations

    def plot_confusion_matrix(self):
        """Visualizza la matrice di confusione."""
        plt.figure(figsize=(10, 8))
        sns.heatmap(self.model_metrics['confusion_matrix'],
                   annot=True,
                   fmt='d',
                   cmap='Blues',
                   xticklabels=['1', '2', 'X'],
                   yticklabels=['1', '2', 'X'])
        plt.title('Matrice di Confusione')
        plt.ylabel('Reale')
        plt.xlabel('Previsto')

        # Salva il grafico
        plt.savefig('confusion_matrix.png')
        plt.close()

    def plot_markov_transitions(self):
        """Visualizza le transizioni di Markov."""
        plt.figure(figsize=(10, 8))
        sns.heatmap(self.markov_matrix,
                   annot=True,
                   fmt='.2f',
                   cmap='YlOrRd',
                   xticklabels=['1', '2', 'X'],
                   yticklabels=['1', '2', 'X'])
        plt.title('Matrice di Transizione di Markov')
        plt.ylabel('Stato Attuale')
        plt.xlabel('Stato Successivo')

        # Salva il grafico
        plt.savefig('markov_transitions.png')
        plt.close()

    def predict_next_draw(self, df, next_match_data):
        """Predice la probabilità di pareggio per il prossimo match."""
        # Preprocessa i dati
        df = self.preprocess_data(df)

        # Costruisce la catena di Markov
        self.build_markov_chain(df)

        # Calcola la probabilità con Bayes
        last_result = df['result'].iloc[-1]
        draw_prob = self.calculate_draw_probability_bayes(df, last_result)

        # Analizza i trend orari
        self.hourly_trends = self.analyze_daily_trends(df)

        # Valuta il modello
        self.evaluate_model(df)

        # Genera i grafici
        self.plot_confusion_matrix()
        self.plot_markov_transitions()

        # Prepara i risultati
        prediction_results = {
            'draw_probability': draw_prob,
            'hourly_trends': self.hourly_trends,
            'markov_matrix': self.markov_matrix,
            'model_metrics': self.model_metrics
        }

        # Genera e salva il report
        report = self.generate_prediction_report(next_match_data, prediction_results)

        return prediction_results, report

def analyze_matches(df, next_match_data):
    """Funzione principale per l'analisi delle partite."""
    predictor = VirtualMatchPredictor()

    # Converti il DataFrame se necessario
    df = pd.read_csv(StringIO(df)) if isinstance(df, str) else df

    # Esegui la predizione
    prediction_results, report = predictor.predict_next_draw(df, next_match_data)

    return prediction_results, report

# Esempio di utilizzo:
"""
next_match_data = {
    'home_team': 'JUV',
    'away_team': 'INT',
    'datetime': '2024-11-22 20:00:00',
    'hour': 20
}

results, report = analyze_matches(df, next_match_data)
"""

"\nnext_match_data = {\n    'home_team': 'JUV',\n    'away_team': 'INT',\n    'datetime': '2024-11-22 20:00:00',\n    'hour': 20\n}\n\nresults, report = analyze_matches(df, next_match_data)\n"

Dati da prevedere.

In [18]:
import requests
import pandas as pd
from datetime import datetime
import time
import json
from pathlib import Path

class VirtualOddsCollector:
    def __init__(self):
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36',
            'Accept': 'application/json, text/plain, */*',
            'Accept-Encoding': 'gzip, deflate, br, zstd',
            'Accept-Language': 'en-US,en;q=0.6',
            'Origin': 'https://www.eurobet.it',
            'X-EB-Accept-Language': 'it_IT',
            'X-EB-MarketId': '5',
            'X-EB-PlatformId': '1',
            'Connection': 'keep-alive',
            'Referer': 'https://www.eurobet.it/'
        }
        self.data_dir = Path('data')
        self.data_dir.mkdir(exist_ok=True)
        self.csv_filename = self.data_dir / 'virtual_odds_detail.csv'
        self.base_url = "https://virtualservice.eurobet.it/virtual-detail-service/virtual-schedule/services/22/sport/{}"

        print(f"File will be saved to: {self.csv_filename}")

    def get_match_odds(self, match_code):
        """Get odds for a specific match"""
        url = self.base_url.format(match_code)
        try:
            print(f"Fetching data for match code: {match_code}")
            response = requests.get(url, headers=self.headers, timeout=10)
            response.raise_for_status()

            data = response.json()
            if 'result' not in data or 'eventdetail' not in data['result']:
                print(f"No valid data found for match {match_code}")
                return None

            event_info = data['result']['eventdetail']['eventInfo']
            bet_groups = data['result']['eventdetail']['betGroupList']

            match_data = {
                'match_code': match_code,
                'timestamp': datetime.fromtimestamp(event_info['eventData']/1000).strftime('%Y-%m-%d %H:%M:%S'),
                'home_team': event_info['teamHomeDescription'],
                'away_team': event_info['teamAwayDescription'],
                'channel': event_info['channelDescription'],
                'event_code': event_info['eventCode'],
                'program_code': event_info['programCode'],
                'collection_time': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            }

            for bet_group in bet_groups:
                bet_type = bet_group['betDescription']
                market_id = bet_group['marketId']

                if bet_group['oddGroupList'] and bet_group['oddGroupList'][0]['oddList']:
                    odds = bet_group['oddGroupList'][0]['oddList']

                    for odd in odds:
                        desc = odd['oddDescription']
                        value = odd['oddValue']
                        result_code = odd['resultCode']

                        key_base = f"{bet_type}_{desc}".lower().replace('/', '_').replace(' ', '_')
                        match_data[f"{key_base}_odds"] = value
                        match_data[f"{key_base}_code"] = result_code

            print(f"Successfully collected odds for {match_data['home_team']} vs {match_data['away_team']}")
            return match_data

        except requests.exceptions.RequestException as e:
            print(f"Network error for match {match_code}: {e}")
        except json.JSONDecodeError as e:
            print(f"JSON parsing error for match {match_code}: {e}")
        except Exception as e:
            print(f"Unexpected error for match {match_code}: {e}")
        return None

    def collect_matches(self, match_codes):
        """Collect odds for multiple matches"""
        all_matches = []

        print(f"\nStarting collection for match codes: {match_codes}")

        for match_code in match_codes:
            match_data = self.get_match_odds(match_code)
            if match_data:
                all_matches.append(match_data)
            time.sleep(1)

        if not all_matches:
            print("No match data collected!")
            return pd.DataFrame()

        df = pd.DataFrame(all_matches)

        if self.csv_filename.exists():
            try:
                existing_df = pd.read_csv(self.csv_filename)
                print(f"Loaded {len(existing_df)} existing records")
                df = pd.concat([existing_df, df], ignore_index=True)
                df = df.drop_duplicates(subset=['match_code', 'timestamp'], keep='last')
                print(f"After merging and removing duplicates: {len(df)} records")
            except Exception as e:
                print(f"Error loading existing data: {e}")

        df['timestamp'] = pd.to_datetime(df['timestamp'])
        df = df.sort_values('timestamp', ascending=False)

        try:
            df.to_csv(self.csv_filename, index=False)
            print(f"\nSuccessfully saved data to {self.csv_filename}")
            print(f"Total matches in database: {len(df)}")

            print("\nPreview of saved data:")
            basic_cols = ['timestamp', 'home_team', 'away_team']
            odds_cols = [col for col in df.columns if '1x2_finale' in col and 'odds' in col]
            print(df[basic_cols + odds_cols].head())

        except Exception as e:
            print(f"Error saving data: {e}")

        return df

# Variabili per generare i codici delle partite
BASE_CODE = "55_2402405092"  # Codice base
START_NUMBER = 384  # Numero iniziale

if __name__ == "__main__":
    # Genera 5 codici incrementando di 2 ogni volta
    match_codes = []
    current_number = START_NUMBER

    for _ in range(5):
        match_code = f"{BASE_CODE}_{current_number}"
        match_codes.append(match_code)
        current_number += 2

    collector = VirtualOddsCollector()
    collector.collect_matches(match_codes)

File will be saved to: data\virtual_odds_detail.csv

Starting collection for match codes: ['55_2402405092_384', '55_2402405092_386', '55_2402405092_388', '55_2402405092_390', '55_2402405092_392']
Fetching data for match code: 55_2402405092_384
Successfully collected odds for LAZ vs JUV
Fetching data for match code: 55_2402405092_386
Successfully collected odds for ROM vs NAP
Fetching data for match code: 55_2402405092_388
Successfully collected odds for MIL vs FIO
Fetching data for match code: 55_2402405092_390
Successfully collected odds for LAZ vs INT
Fetching data for match code: 55_2402405092_392
Successfully collected odds for JUV vs NAP
Loaded 3 existing records
After merging and removing duplicates: 6 records

Successfully saved data to data\virtual_odds_detail.csv
Total matches in database: 6

Preview of saved data:
            timestamp home_team away_team  1x2_finale_1_odds  \
7 2024-11-21 22:55:00       JUV       NAP                183   
6 2024-11-21 22:50:00       LAZ     