In [10]:
# Football Competition Optimization
# Implementação de algoritmos para maximizar a competitividade através da reordenação de partidas

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from itertools import combinations, permutations
from collections import defaultdict
import random
from datetime import datetime, timedelta
import networkx as nx
from scipy.optimize import linear_sum_assignment
import warnings
warnings.filterwarnings('ignore')

# Configurações do matplotlib
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")


In [11]:
# ================================
# 1. CARREGAMENTO E PREPARAÇÃO DOS DADOS
# ================================

def load_and_prepare_data(file_path):
    """Carrega e prepara os dados do CSV"""
    df = pd.read_csv(file_path)
    
    # Renomear colunas para facilitar o uso
    df.columns = ['competition_id', 'round_number', 'result', 'date', 
                  'odds_home', 'odds_tie', 'odds_away', 'winner', 'home_team', 'away_team']
    
    # Converter data
    df['date'] = pd.to_datetime(df['date'], format='%d.%m.%Y')
    
    # Extrair placar
    df[['home_score', 'away_score']] = df['result'].str.split(':', expand=True).astype(int)
    
    # Calcular pontos (3 para vitória, 1 para empate, 0 para derrota)
    df['home_points'] = np.where(df['home_score'] > df['away_score'], 3, 
                                np.where(df['home_score'] == df['away_score'], 1, 0))
    df['away_points'] = np.where(df['away_score'] > df['home_score'], 3, 
                                np.where(df['home_score'] == df['away_score'], 1, 0))
    
    return df

In [12]:
# ================================
# 2. MÉTRICAS DE COMPETITIVIDADE
# ================================

def calculate_competitiveness_metrics(standings_by_round):
    """Calcula métricas de competitividade ao longo das rodadas"""
    metrics = {}
    
    for round_num, standings in standings_by_round.items():
        # Ordenar por pontos (decrescente)
        sorted_standings = standings.sort_values('points', ascending=False)
        
        # 1. Diferença entre 1º e último
        point_spread = sorted_standings.iloc[0]['points'] - sorted_standings.iloc[-1]['points']
        
        # 2. Desvio padrão dos pontos (menor = mais equilibrado)
        points_std = sorted_standings['points'].std()
        
        # 3. Número de times com chance matemática de título
        max_points = sorted_standings.iloc[0]['points']
        remaining_rounds = max(standings_by_round.keys()) - round_num
        max_possible_points = remaining_rounds * 3
        
        title_contenders = len(sorted_standings[
            sorted_standings['points'] + max_possible_points >= max_points
        ])
        
        # 4. Coeficiente de variação dos pontos
        cv_points = points_std / sorted_standings['points'].mean() if sorted_standings['points'].mean() > 0 else 0
        
        # 5. Índice de equilíbrio (baseado na distância entre posições consecutivas)
        point_differences = sorted_standings['points'].diff().dropna().abs()
        balance_index = 1 / (1 + point_differences.mean()) if len(point_differences) > 0 else 0
        
        metrics[round_num] = {
            'point_spread': point_spread,
            'points_std': points_std,
            'title_contenders': title_contenders,
            'cv_points': cv_points,
            'balance_index': balance_index
        }
    
    return metrics

def simulate_season(matches, schedule_order):
    """Simula uma temporada completa com a ordem de jogos especificada"""
    # Reordenar matches pela nova ordem
    ordered_matches = matches.iloc[schedule_order].copy()
    ordered_matches['new_round'] = range(1, len(ordered_matches) + 1)
    
    # Obter todos os times
    all_teams = list(set(ordered_matches['home_team'].tolist() + ordered_matches['away_team'].tolist()))
    
    # Inicializar tabela
    standings = pd.DataFrame({
        'team': all_teams,
        'points': 0,
        'matches_played': 0,
        'wins': 0,
        'draws': 0,
        'losses': 0,
        'goals_for': 0,
        'goals_against': 0
    })
    standings.set_index('team', inplace=True)
    
    standings_by_round = {}
    
    # Simular rodada por rodada
    for round_num in range(1, ordered_matches['new_round'].max() + 1):
        round_matches = ordered_matches[ordered_matches['new_round'] == round_num]
        
        for _, match in round_matches.iterrows():
            home_team = match['home_team']
            away_team = match['away_team']
            home_score = match['home_score']
            away_score = match['away_score']
            home_points = match['home_points']
            away_points = match['away_points']
            
            # Atualizar estatísticas
            standings.loc[home_team, 'points'] += home_points
            standings.loc[away_team, 'points'] += away_points
            standings.loc[home_team, 'matches_played'] += 1
            standings.loc[away_team, 'matches_played'] += 1
            standings.loc[home_team, 'goals_for'] += home_score
            standings.loc[home_team, 'goals_against'] += away_score
            standings.loc[away_team, 'goals_for'] += away_score
            standings.loc[away_team, 'goals_against'] += home_score
            
            if home_points == 3:
                standings.loc[home_team, 'wins'] += 1
                standings.loc[away_team, 'losses'] += 1
            elif away_points == 3:
                standings.loc[away_team, 'wins'] += 1
                standings.loc[home_team, 'losses'] += 1
            else:
                standings.loc[home_team, 'draws'] += 1
                standings.loc[away_team, 'draws'] += 1
        
        standings_by_round[round_num] = standings.copy()
    
    return standings_by_round

In [13]:
# ================================
# 3. ALGORITMOS DE OTIMIZAÇÃO
# ================================

class FootballScheduleOptimizer:
    def __init__(self, matches_df):
        self.matches = matches_df.copy()
        self.teams = list(set(matches_df['home_team'].tolist() + matches_df['away_team'].tolist()))
        self.n_teams = len(self.teams)
        self.n_matches = len(matches_df)
        
    def rank_reversed_recursive_circle(self):
        """
        Rank-Reversed Recursive Circle Method (REC)
        Ordena as partidas priorizando jogos entre times de rankings opostos
        """
        print("Executando Rank-Reversed Recursive Circle Method...")
        
        # Calcular força inicial dos times baseada nas odds
        team_strength = {}
        for team in self.teams:
            home_odds = self.matches[self.matches['home_team'] == team]['odds_home'].mean()
            away_odds = self.matches[self.matches['away_team'] == team]['odds_away'].mean()
            # Força inversamente proporcional às odds (odds menores = time mais forte)
            team_strength[team] = 1 / ((home_odds + away_odds) / 2) if not pd.isna(home_odds + away_odds) else 1.0
        
        # Ordenar times por força
        ranked_teams = sorted(team_strength.keys(), key=lambda x: team_strength[x], reverse=True)
        
        # Criar círculos de confrontos priorizando opostos
        schedule = []
        matches_used = set()
        
        # Dividir temporada em fases
        total_rounds = self.n_teams - 1  # Em um turno completo
        matches_per_phase = len(self.matches) // 4  # Dividir em 4 fases
        
        for phase in range(4):
            phase_matches = []
            
            # Para cada fase, priorizar diferentes tipos de confronto
            if phase == 0:  # Início: times fortes vs médios
                priority_func = lambda t1, t2: abs(ranked_teams.index(t1) - ranked_teams.index(t2))
            elif phase == 1:  # Meio-início: times extremos
                priority_func = lambda t1, t2: -abs(ranked_teams.index(t1) - ranked_teams.index(t2))
            elif phase == 2:  # Meio-fim: equilibrados
                priority_func = lambda t1, t2: abs(abs(ranked_teams.index(t1) - ranked_teams.index(t2)) - len(ranked_teams)//2)
            else:  # Final: decisivos
                priority_func = lambda t1, t2: -abs(ranked_teams.index(t1) - ranked_teams.index(t2))
            
            # Selecionar partidas para esta fase
            available_matches = []
            for idx, match in self.matches.iterrows():
                if idx not in matches_used:
                    home = match['home_team']
                    away = match['away_team']
                    priority = priority_func(home, away)
                    available_matches.append((idx, priority))
            
            # Ordenar por prioridade e selecionar
            available_matches.sort(key=lambda x: x[1], reverse=(phase % 2 == 1))
            
            for idx, _ in available_matches[:matches_per_phase]:
                if len(phase_matches) < matches_per_phase:
                    phase_matches.append(idx)
                    matches_used.add(idx)
            
            schedule.extend(phase_matches)
        
        # Adicionar partidas restantes
        for idx in self.matches.index:
            if idx not in matches_used:
                schedule.append(idx)
        
        return schedule
    
    def expected_turning_point_algorithm(self):
        """
        Expected Turning Point Algorithm
        Otimiza para maximizar pontos de virada na competição
        """
        print("Executando Expected Turning Point Algorithm...")
        
        # Calcular probabilidades de vitória baseadas nas odds
        def get_win_probabilities(match):
            odds_home = match['odds_home']
            odds_tie = match['odds_tie'] 
            odds_away = match['odds_away']
            
            # Converter odds para probabilidades implícitas
            prob_home = 1 / odds_home if not pd.isna(odds_home) else 0.33
            prob_tie = 1 / odds_tie if not pd.isna(odds_tie) else 0.33
            prob_away = 1 / odds_away if not pd.isna(odds_away) else 0.33
            
            # Normalizar
            total = prob_home + prob_tie + prob_away
            return prob_home/total, prob_tie/total, prob_away/total
        
        # Ordenar partidas por potencial de mudança na tabela
        match_impact = []
        
        for idx, match in self.matches.iterrows():
            prob_home, prob_tie, prob_away = get_win_probabilities(match)
            
            # Calcular impacto esperado na diferença de pontos
            expected_impact = (prob_home * 3 + prob_tie * 1) + (prob_away * 3 + prob_tie * 1)
            
            # Jogos mais equilibrados (odds similares) têm maior potencial de virada
            odds_variance = np.var([match['odds_home'], match['odds_tie'], match['odds_away']])
            equilibrium_factor = 1 / (1 + odds_variance) if not pd.isna(odds_variance) else 0.5
            
            match_impact.append((idx, expected_impact * equilibrium_factor))
        
        # Distribuir partidas de alto impacto ao longo da temporada
        match_impact.sort(key=lambda x: x[1], reverse=True)
        
        schedule = []
        high_impact = [x[0] for x in match_impact[:len(match_impact)//3]]
        medium_impact = [x[0] for x in match_impact[len(match_impact)//3:2*len(match_impact)//3]]
        low_impact = [x[0] for x in match_impact[2*len(match_impact)//3:]]
        
        # Intercalar impactos diferentes
        max_len = max(len(high_impact), len(medium_impact), len(low_impact))
        
        for i in range(max_len):
            if i < len(low_impact):
                schedule.append(low_impact[i])
            if i < len(medium_impact):
                schedule.append(medium_impact[i])
            if i < len(high_impact):
                schedule.append(high_impact[i])
        
        return schedule
    
    def round_robin_randomization(self):
        """
        Round Robin Randomization
        Embaralha a ordem tradicional do round-robin
        """
        print("Executando Round Robin Randomization...")
        
        # Criar round-robin tradicional primeiro
        teams = self.teams.copy()
        random.shuffle(teams)  # Randomizar ordem inicial dos times
        
        # Algoritmo de round-robin circular
        if len(teams) % 2 == 1:
            teams.append('BYE')  # Time fantasma se ímpar
        
        n = len(teams)
        rounds = []
        
        for round_num in range(n - 1):
            round_matches = []
            for i in range(n // 2):
                team1 = teams[i]
                team2 = teams[n - 1 - i]
                if team1 != 'BYE' and team2 != 'BYE':
                    # Alternar mando de campo aleatoriamente
                    if random.random() > 0.5:
                        round_matches.append((team1, team2))
                    else:
                        round_matches.append((team2, team1))
            
            rounds.append(round_matches)
            
            # Rotacionar times (exceto o primeiro)
            teams = [teams[0]] + [teams[-1]] + teams[1:-1]
        
        # Mapear partidas geradas para partidas reais
        schedule = []
        used_matches = set()
        
        for round_matches in rounds:
            for home, away in round_matches:
                # Encontrar partida correspondente
                match_found = False
                for idx, match in self.matches.iterrows():
                    if (idx not in used_matches and 
                        ((match['home_team'] == home and match['away_team'] == away) or
                         (match['home_team'] == away and match['away_team'] == home))):
                        schedule.append(idx)
                        used_matches.add(idx)
                        match_found = True
                        break
                
                if not match_found:
                    # Se não encontrou exata, pegar qualquer disponível entre esses times
                    for idx, match in self.matches.iterrows():
                        if (idx not in used_matches and 
                            set([match['home_team'], match['away_team']]) == set([home, away])):
                            schedule.append(idx)
                            used_matches.add(idx)
                            break
        
        # Adicionar partidas restantes aleatoriamente
        remaining = [idx for idx in self.matches.index if idx not in used_matches]
        random.shuffle(remaining)
        schedule.extend(remaining)
        
        return schedule
    
    def tau_maximizer_schedule(self):
        """
        τ-maximizer schedule
        Maximiza a métrica τ (tau) de competitividade
        """
        print("Executando τ-maximizer schedule...")
        
        # Definir função τ (medida de competitividade)
        def calculate_tau(partial_standings):
            """Calcula τ baseado no equilíbrio da tabela"""
            if len(partial_standings) == 0:
                return 0
            
            points = partial_standings['points'].values
            if len(points) < 2:
                return 0
            
            # τ é inversamente relacionado à variabilidade dos pontos
            mean_points = np.mean(points)
            if mean_points == 0:
                return 1.0
            
            # Coeficiente de variação inverso
            cv = np.std(points) / mean_points
            tau = 1 / (1 + cv)
            
            # Bônus por mais times próximos ao líder
            max_points = np.max(points)
            close_to_leader = np.sum(points >= max_points * 0.8) / len(points)
            
            return tau * (1 + close_to_leader)
        
        # Algoritmo greedy para maximizar τ
        schedule = []
        used_matches = set()
        
        # Inicializar standings vazio
        standings = pd.DataFrame({
            'team': self.teams,
            'points': 0.0,
            'matches_played': 0
        })
        standings.set_index('team', inplace=True)
        
        while len(used_matches) < len(self.matches):
            best_match = None
            best_tau = -float('inf')
            
            # Testar cada partida disponível
            for idx, match in self.matches.iterrows():
                if idx in used_matches:
                    continue
                
                # Simular resultado desta partida
                temp_standings = standings.copy()
                
                home_team = match['home_team']
                away_team = match['away_team']
                home_points = match['home_points']
                away_points = match['away_points']
                
                temp_standings.loc[home_team, 'points'] += home_points
                temp_standings.loc[away_team, 'points'] += away_points
                temp_standings.loc[home_team, 'matches_played'] += 1
                temp_standings.loc[away_team, 'matches_played'] += 1
                
                # Calcular τ resultante
                tau = calculate_tau(temp_standings)
                
                if tau > best_tau:
                    best_tau = tau
                    best_match = idx
            
            if best_match is not None:
                # Aplicar melhor partida
                match = self.matches.loc[best_match]
                home_team = match['home_team']
                away_team = match['away_team']
                home_points = match['home_points']
                away_points = match['away_points']
                
                standings.loc[home_team, 'points'] += home_points
                standings.loc[away_team, 'points'] += away_points
                standings.loc[home_team, 'matches_played'] += 1
                standings.loc[away_team, 'matches_played'] += 1
                
                schedule.append(best_match)
                used_matches.add(best_match)
            else:
                # Fallback: pegar qualquer partida restante
                remaining = [idx for idx in self.matches.index if idx not in used_matches]
                if remaining:
                    schedule.append(remaining[0])
                    used_matches.add(remaining[0])
        
        return schedule
    
    def iterative_maximum_weighted_matching(self):
        """
        Iterative Maximum Weighted-Matching Scheduler
        Usa matching de peso máximo iterativo
        """
        print("Executando Iterative Maximum Weighted-Matching Scheduler...")
        
        # Criar grafo de partidas com pesos baseados em competitividade
        def calculate_match_weight(match, current_round):
            """Calcula peso da partida baseado em vários fatores"""
            
            # Fator 1: Equilíbrio das odds (jogos mais equilibrados têm peso maior)
            odds = [match['odds_home'], match['odds_tie'], match['odds_away']]
            odds_balance = 1 / (1 + np.var(odds)) if not any(pd.isna(odds)) else 0.5
            
            # Fator 2: Importância temporal (jogos importantes no final)
            total_rounds = len(self.matches)
            temporal_weight = 1 + (current_round / total_rounds) * 0.5
            
            # Fator 3: Rivalidade (baseado em frequência de confrontos)
            rivalry_bonus = 1.0  # Simplificado
            
            return odds_balance * temporal_weight * rivalry_bonus
        
        schedule = []
        used_matches = set()
        
        # Dividir em blocos para processamento iterativo
        block_size = max(1, len(self.matches) // 10)  # 10 blocos
        
        for block in range(0, len(self.matches), block_size):
            # Criar grafo bipartido para este bloco
            available_matches = [
                (idx, self.matches.loc[idx]) for idx in self.matches.index 
                if idx not in used_matches
            ]
            
            if not available_matches:
                break
            
            # Calcular pesos
            weights = []
            match_indices = []
            
            for idx, match in available_matches[:block_size]:
                weight = calculate_match_weight(match, len(schedule))
                weights.append(weight)
                match_indices.append(idx)
            
            # Ordenar por peso (greedy aproximation do matching)
            sorted_matches = sorted(zip(match_indices, weights), 
                                  key=lambda x: x[1], reverse=True)
            
            # Adicionar ao schedule
            for idx, _ in sorted_matches:
                schedule.append(idx)
                used_matches.add(idx)
        
        # Adicionar partidas restantes
        remaining = [idx for idx in self.matches.index if idx not in used_matches]
        schedule.extend(remaining)
        
        return schedule

In [14]:
# ================================
# 4. ANÁLISE E COMPARAÇÃO
# ================================

def compare_algorithms(matches_df, competition_id):
    """Compara todos os algoritmos para uma competição"""
    print(f"\n=== Analisando competição: {competition_id} ===")
    
    # Filtrar dados da competição
    comp_matches = matches_df[matches_df['competition_id'] == competition_id].copy()
    comp_matches = comp_matches.sort_values('round_number').reset_index(drop=True)
    
    if len(comp_matches) == 0:
        print(f"Nenhuma partida encontrada para {competition_id}")
        return None
    
    print(f"Total de partidas: {len(comp_matches)}")
    print(f"Times participantes: {len(set(comp_matches['home_team'].tolist() + comp_matches['away_team'].tolist()))}")
    
    optimizer = FootballScheduleOptimizer(comp_matches)
    results = {}
    
    # Algoritmo original (ordem cronológica)
    print("\nCalculando métricas do cronograma original...")
    original_order = list(range(len(comp_matches)))
    original_standings = simulate_season(comp_matches, original_order)
    original_metrics = calculate_competitiveness_metrics(original_standings)
    results['Original'] = {
        'schedule': original_order,
        'standings': original_standings,
        'metrics': original_metrics
    }
    
    # Executar cada algoritmo
    algorithms = {
        'REC': optimizer.rank_reversed_recursive_circle,
        'Turning Point': optimizer.expected_turning_point_algorithm,
        'Round Robin': optimizer.round_robin_randomization,
        'τ-maximizer': optimizer.tau_maximizer_schedule,
        'Max Matching': optimizer.iterative_maximum_weighted_matching
    }
    
    for name, algorithm in algorithms.items():
        try:
            print(f"\nExecutando {name}...")
            schedule = algorithm()
            standings = simulate_season(comp_matches, schedule)
            metrics = calculate_competitiveness_metrics(standings)
            
            results[name] = {
                'schedule': schedule,
                'standings': standings,
                'metrics': metrics
            }
            print(f"✓ {name} concluído")
            
        except Exception as e:
            print(f"✗ Erro em {name}: {str(e)}")
            continue
    
    return results

def plot_competitiveness_comparison(results):
    """Plota comparação das métricas de competitividade"""
    fig, axes = plt.subplots(2, 3, figsize=(18, 12))
    axes = axes.ravel()
    
    metrics_names = ['point_spread', 'points_std', 'title_contenders', 'cv_points', 'balance_index']
    metrics_labels = ['Diferença de Pontos', 'Desvio Padrão', 'Candidatos ao Título', 
                     'Coef. Variação', 'Índice de Equilíbrio']
    
    colors = plt.cm.Set3(np.linspace(0, 1, len(results)))
    
    for i, (metric, label) in enumerate(zip(metrics_names, metrics_labels)):
        ax = axes[i]
        
        for j, (name, data) in enumerate(results.items()):
            rounds = sorted(data['metrics'].keys())
            values = [data['metrics'][round_num][metric] for round_num in rounds]
            ax.plot(rounds, values, label=name, linewidth=2, color=colors[j])
        
        ax.set_title(f'{label}', fontsize=12, fontweight='bold')
        ax.set_xlabel('Rodada')
        ax.set_ylabel(label)
        ax.legend()
        ax.grid(True, alpha=0.3)
    
    # Plot final com resumo das métricas
    ax = axes[5]
    algorithms = list(results.keys())
    
    # Calcular score final para cada algoritmo
    final_scores = []
    for name, data in results.items():
        final_round = max(data['metrics'].keys())
        final_metrics = data['metrics'][final_round]
        
        # Score composto (menor é melhor para spread e std, maior é melhor para o resto)
        score = (
            -final_metrics['point_spread'] * 0.3 +  # Menor diferença é melhor
            -final_metrics['points_std'] * 0.2 +    # Menor desvio é melhor
            final_metrics['title_contenders'] * 0.3 + # Mais candidatos é melhor
            final_metrics['balance_index'] * 0.2     # Maior equilíbrio é melhor
        )
        final_scores.append(score)
    
    bars = ax.bar(algorithms, final_scores, color=colors[:len(algorithms)])
    ax.set_title('Score Final de Competitividade', fontsize=12, fontweight='bold')
    ax.set_ylabel('Score Composto')
    ax.tick_params(axis='x', rotation=45)
    
    # Adicionar valores nas barras
    for bar, score in zip(bars, final_scores):
        height = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2., height,
                f'{score:.2f}', ha='center', va='bottom')
    
    plt.tight_layout()
    plt.show()

def generate_final_report(results, competition_id):
    """Gera relatório final da análise"""
    print(f"\n{'='*60}")
    print(f"RELATÓRIO FINAL - {competition_id}")
    print(f"{'='*60}")
    
    # Ranking dos algoritmos
    algorithm_scores = {}
    
    for name, data in results.items():
        final_round = max(data['metrics'].keys())
        final_metrics = data['metrics'][final_round]
        
        # Múltiplas métricas de competitividade
        competitiveness_score = (
            (1 / (1 + final_metrics['point_spread'])) * 0.25 +  # Menor diferença
            (1 / (1 + final_metrics['points_std'])) * 0.25 +    # Menor desvio
            (final_metrics['title_contenders'] / len(set(results[name]['standings'][final_round].index))) * 0.25 +  # Mais candidatos
            final_metrics['balance_index'] * 0.25  # Maior equilíbrio
        )
        
        algorithm_scores[name] = {
            'score': competitiveness_score,
            'point_spread': final_metrics['point_spread'],
            'std_dev': final_metrics['points_std'],
            'title_contenders': final_metrics['title_contenders'],
            'balance_index': final_metrics['balance_index']
        }
    
    # Ordenar por score
    ranked_algorithms = sorted(algorithm_scores.items(), 
                             key=lambda x: x[1]['score'], reverse=True)
    
    print("\nRANKING DOS ALGORITMOS:")
    print("-" * 60)
    
    for i, (name, metrics) in enumerate(ranked_algorithms, 1):
        print(f"{i}. {name}")
        print(f"   Score de Competitividade: {metrics['score']:.4f}")
        print(f"   Diferença de Pontos: {metrics['point_spread']:.1f}")
        print(f"   Desvio Padrão: {metrics['std_dev']:.2f}")
        print(f"   Candidatos ao Título: {metrics['title_contenders']}")
        print(f"   Índice de Equilíbrio: {metrics['balance_index']:.3f}")
        print()
    
    # Recomendações
    print("RECOMENDAÇÕES:")
    print("-" * 60)
    
    best_algorithm = ranked_algorithms[0][0]
    best_metrics = ranked_algorithms[0][1]
    
    print(f"🏆 Melhor algoritmo: {best_algorithm}")
    print(f"   Melhoria na competitividade: {((best_metrics['score'] / algorithm_scores['Original']['score']) - 1) * 100:.1f}%")
    
    if best_metrics['point_spread'] < algorithm_scores['Original']['point_spread']:
        improvement = algorithm_scores['Original']['point_spread'] - best_metrics['point_spread']
        print(f"   Redução na diferença de pontos: {improvement:.1f} pontos")
    
    if best_metrics['title_contenders'] > algorithm_scores['Original']['title_contenders']:
        improvement = best_metrics['title_contenders'] - algorithm_scores['Original']['title_contenders']
        print(f"   Aumento de candidatos ao título: +{improvement} times")
    
    return ranked_algorithms

In [15]:
# ================================
# 5. FUNÇÃO PRINCIPAL DE EXECUÇÃO
# ================================

def main():
    """Função principal que executa toda a análise"""
    
    print("🏈 OTIMIZADOR DE COMPETITIVIDADE EM CAMPEONATOS DE FUTEBOL")
    print("=" * 65)
    print()
    
    # Carregar dados
    try:
        df = load_and_prepare_data('data/formatted/football.csv')
        print(f"✓ Dados carregados: {len(df)} partidas")
        print(f"✓ Competições encontradas: {df['competition_id'].nunique()}")
        print(f"✓ Período: {df['date'].min().strftime('%Y')} - {df['date'].max().strftime('%Y')}")
        print()
    except FileNotFoundError:
        print("❌ Erro: Arquivo 'data/formatted/football.csv' não encontrado!")
        print("   Certifique-se de que o arquivo existe no caminho especificado.")
        return
    except Exception as e:
        print(f"❌ Erro ao carregar dados: {str(e)}")
        return
    
    # Listar competições disponíveis
    competitions = df['competition_id'].value_counts().head(10)
    print("COMPETIÇÕES COM MAIS DADOS:")
    print("-" * 40)
    for comp, count in competitions.items():
        print(f"  {comp}: {count} partidas")
    print()
    
    # Processar cada competição principal
    all_results = {}
    
    for competition_id in competitions.index[:3]:  # Processar top 3 competições
        print(f"\n🔄 Processando: {competition_id}")
        
        try:
            results = compare_algorithms(df, competition_id)
            if results:
                all_results[competition_id] = results
                
                # Gerar visualizações
                print("📊 Gerando visualizações...")
                plot_competitiveness_comparison(results)
                
                # Gerar relatório
                ranking = generate_final_report(results, competition_id)
                
                print(f"✅ Análise concluída para {competition_id}")
                print("-" * 60)
                
        except Exception as e:
            print(f"❌ Erro ao processar {competition_id}: {str(e)}")
            continue
    
    # Análise consolidada
    if all_results:
        print(f"\n{'='*80}")
        print("ANÁLISE CONSOLIDADA - TODAS AS COMPETIÇÕES")
        print(f"{'='*80}")
        
        # Calcular performance média de cada algoritmo
        algorithm_performance = defaultdict(list)
        
        for comp_results in all_results.values():
            for alg_name, data in comp_results.items():
                final_round = max(data['metrics'].keys())
                final_metrics = data['metrics'][final_round]
                
                score = (
                    (1 / (1 + final_metrics['point_spread'])) * 0.25 +
                    (1 / (1 + final_metrics['points_std'])) * 0.25 +
                    (final_metrics['title_contenders'] / 20) * 0.25 +  # Assumindo ~20 times
                    final_metrics['balance_index'] * 0.25
                )
                algorithm_performance[alg_name].append(score)
        
        # Ranking médio
        avg_performance = {alg: np.mean(scores) for alg, scores in algorithm_performance.items()}
        ranked_avg = sorted(avg_performance.items(), key=lambda x: x[1], reverse=True)
        
        print("\nRANKING MÉDIO DOS ALGORITMOS:")
        print("-" * 50)
        for i, (alg, avg_score) in enumerate(ranked_avg, 1):
            std_score = np.std(algorithm_performance[alg]) if len(algorithm_performance[alg]) > 1 else 0
            print(f"{i}. {alg:20} | Score: {avg_score:.4f} ± {std_score:.4f}")
        
        # Gerar gráfico comparativo consolidado
        plt.figure(figsize=(12, 8))
        
        algorithms = list(avg_performance.keys())
        scores = [avg_performance[alg] for alg in algorithms]
        errors = [np.std(algorithm_performance[alg]) if len(algorithm_performance[alg]) > 1 else 0 
                 for alg in algorithms]
        
        bars = plt.bar(algorithms, scores, yerr=errors, capsize=5, 
                      color=plt.cm.Set3(np.linspace(0, 1, len(algorithms))))
        
        plt.title('Performance Média dos Algoritmos\n(Todas as Competições)', 
                 fontsize=16, fontweight='bold')
        plt.ylabel('Score de Competitividade', fontsize=12)
        plt.xticks(rotation=45, ha='right')
        plt.grid(True, alpha=0.3)
        
        # Adicionar valores nas barras
        for bar, score, error in zip(bars, scores, errors):
            height = bar.get_height()
            plt.text(bar.get_x() + bar.get_width()/2., height + error + 0.001,
                    f'{score:.3f}', ha='center', va='bottom', fontweight='bold')
        
        plt.tight_layout()
        plt.show()
        
        print(f"\n🏆 MELHOR ALGORITMO GERAL: {ranked_avg[0][0]}")
        print(f"   Score médio: {ranked_avg[0][1]:.4f}")
        
        original_score = avg_performance.get('Original', 0)
        if original_score > 0:
            improvement = ((ranked_avg[0][1] / original_score) - 1) * 100
            print(f"   Melhoria média: {improvement:.1f}%")
        
        print(f"\n✅ Análise completa finalizada!")
        print(f"   Total de competições analisadas: {len(all_results)}")
        print(f"   Algoritmos testados: {len(algorithm_performance)}")
        
    else:
        print("❌ Nenhuma competição foi processada com sucesso.")

In [16]:
# ================================
# 6. UTILITÁRIOS ADICIONAIS
# ================================

def export_optimal_schedule(results, competition_id, algorithm_name, original_matches):
    """Exporta o cronograma otimizado para CSV"""
    
    if algorithm_name not in results:
        print(f"Algoritmo {algorithm_name} não encontrado nos resultados.")
        return
    
    optimal_schedule = results[algorithm_name]['schedule']
    optimized_matches = original_matches.iloc[optimal_schedule].copy()
    
    # Adicionar nova numeração de rodadas
    optimized_matches['new_round_number'] = range(1, len(optimized_matches) + 1)
    
    # Calcular novas datas (exemplo: uma partida por semana)
    start_date = optimized_matches['date'].min()
    new_dates = [start_date + timedelta(weeks=i) for i in range(len(optimized_matches))]
    optimized_matches['new_date'] = new_dates
    
    # Salvar
    filename = f"optimized_schedule_{competition_id}_{algorithm_name}.csv"
    optimized_matches.to_csv(filename, index=False)
    
    print(f"✅ Cronograma otimizado salvo em: {filename}")
    
    return optimized_matches

def analyze_single_competition(df, competition_id):
    """Análise detalhada de uma competição específica"""
    
    comp_matches = df[df['competition_id'] == competition_id].copy()
    
    if len(comp_matches) == 0:
        print(f"Competição {competition_id} não encontrada.")
        return
    
    print(f"\n📊 ANÁLISE DETALHADA: {competition_id}")
    print("=" * 60)
    
    # Estatísticas básicas
    teams = set(comp_matches['home_team'].tolist() + comp_matches['away_team'].tolist())
    print(f"Times: {len(teams)}")
    print(f"Partidas: {len(comp_matches)}")
    print(f"Temporadas: {comp_matches['competition_id'].str[-4:].nunique()}")
    print(f"Período: {comp_matches['date'].min().strftime('%d/%m/%Y')} - {comp_matches['date'].max().strftime('%d/%m/%Y')}")
    
    # Análise de equilíbrio das partidas (baseado em odds)
    comp_matches['odds_balance'] = comp_matches.apply(lambda row: 
        np.var([row['odds_home'], row['odds_tie'], row['odds_away']]) 
        if not any(pd.isna([row['odds_home'], row['odds_tie'], row['odds_away']])) else np.nan, axis=1)
    
    balanced_matches = comp_matches[comp_matches['odds_balance'] < comp_matches['odds_balance'].quantile(0.3)]
    
    print(f"Partidas equilibradas (30% menores variâncias de odds): {len(balanced_matches)}")
    print(f"Percentual de equilíbrio: {len(balanced_matches)/len(comp_matches)*100:.1f}%")
    
    # Distribuição de resultados
    home_wins = len(comp_matches[comp_matches['winner'] == 'h'])
    draws = len(comp_matches[comp_matches['winner'] == 'd'])
    away_wins = len(comp_matches[comp_matches['winner'] == 'a'])
    
    print(f"\nDistribuição de resultados:")
    print(f"  Vitórias mandante: {home_wins} ({home_wins/len(comp_matches)*100:.1f}%)")
    print(f"  Empates: {draws} ({draws/len(comp_matches)*100:.1f}%)")
    print(f"  Vitórias visitante: {away_wins} ({away_wins/len(comp_matches)*100:.1f}%)")
    
    return comp_matches

In [17]:
# ================================
# 7. INTERFACE DE USUÁRIO
# ================================

def interactive_menu():
    """Menu interativo para o usuário"""
    
    print("\n🔧 MENU INTERATIVO")
    print("=" * 30)
    print("1. Executar análise completa")
    print("2. Analisar competição específica")
    print("3. Comparar algoritmos específicos")
    print("4. Exportar cronograma otimizado")
    print("5. Visualizar métricas detalhadas")
    print("0. Sair")
    
    choice = input("\nEscolha uma opção: ")
    
    return choice

In [18]:
# ================================
# 8. EXECUÇÃO DO PROGRAMA
# ================================

if __name__ == "__main__":
    # Executar análise principal
    main()
    
    # Menu interativo opcional
    while True:
        try:
            choice = interactive_menu()
            
            if choice == "0":
                print("👋 Saindo do programa...")
                break
            elif choice == "1":
                main()
            elif choice == "2":
                competition = input("Digite o ID da competição: ")
                try:
                    df = load_and_prepare_data('data/formatted/football.csv')
                    analyze_single_competition(df, competition)
                except Exception as e:
                    print(f"Erro: {str(e)}")
            elif choice == "3":
                print("Funcionalidade em desenvolvimento...")
            elif choice == "4":
                print("Funcionalidade em desenvolvimento...")
            elif choice == "5":
                print("Funcionalidade em desenvolvimento...")
            else:
                print("Opção inválida!")
                
        except KeyboardInterrupt:
            print("\n👋 Programa interrompido pelo usuário.")
            break
        except Exception as e:
            print(f"Erro inesperado: {str(e)}")

print("\n" + "="*80)
print("🏈 FOOTBALL COMPETITION OPTIMIZER - PRONTO PARA EXECUÇÃO")
print("="*80)
print()
print("Para executar, certifique-se de que o arquivo 'data/formatted/football.csv' existe")
print("e execute: main()")
print()
print("Algoritmos implementados:")
print("✅ Rank-Reversed Recursive Circle Method (REC)")  
print("✅ Expected Turning Point Algorithm")
print("✅ Round Robin Randomization")
print("✅ τ-maximizer schedule")
print("✅ Iterative Maximum Weighted-Matching Scheduler")
print()
print("Métricas de competitividade:")
print("• Diferença de pontos entre 1º e último")
print("• Desvio padrão dos pontos")
print("• Número de candidatos ao título")
print("• Coeficiente de variação dos pontos")
print("• Índice de equilíbrio da tabela")
print("="*80)

🏈 OTIMIZADOR DE COMPETITIVIDADE EM CAMPEONATOS DE FUTEBOL

❌ Erro: Arquivo 'data/formatted/football.csv' não encontrado!
   Certifique-se de que o arquivo existe no caminho especificado.

🔧 MENU INTERATIVO
1. Executar análise completa
2. Analisar competição específica
3. Comparar algoritmos específicos
4. Exportar cronograma otimizado
5. Visualizar métricas detalhadas
0. Sair
Opção inválida!

🔧 MENU INTERATIVO
1. Executar análise completa
2. Analisar competição específica
3. Comparar algoritmos específicos
4. Exportar cronograma otimizado
5. Visualizar métricas detalhadas
0. Sair
Opção inválida!

🔧 MENU INTERATIVO
1. Executar análise completa
2. Analisar competição específica
3. Comparar algoritmos específicos
4. Exportar cronograma otimizado
5. Visualizar métricas detalhadas
0. Sair
Opção inválida!

🔧 MENU INTERATIVO
1. Executar análise completa
2. Analisar competição específica
3. Comparar algoritmos específicos
4. Exportar cronograma otimizado
5. Visualizar métricas detalhadas
0. Sa