In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import torchvision.models as models
from torch.utils.data import DataLoader, Dataset
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import os
from pathlib import Path
import json
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Para FID e LPIPS
from scipy.linalg import sqrtm
from scipy import linalg
import lpips  # pip install lpips
from torchvision.models import inception_v3

In [None]:
print("üî¨ ADVANCED EVALUATION - CycleGAN Real2Cartoon")
print("üìä M√©tricas: FID, LPIPS, IS, Visual Analysis")
print("=" * 60)

# ================================
# CONFIGURA√á√ïES
# ================================

In [None]:
class EvalConfig:
    # Caminhos
    MODEL_PATH = "models_complex/cyclegan_perfection"
    DATASET_PATH = "dataset/real2cartoon"
    RESULTS_PATH = "evaluation_results"
    
    # Par√¢metros de avalia√ß√£o
    IMG_SIZE = 256
    BATCH_SIZE = 8  # Maior para efici√™ncia
    NUM_SAMPLES_FID = 1000  # Para FID robusto
    NUM_SAMPLES_VISUAL = 25  # Para an√°lise visual
    
    # Device
    DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    # Criar diret√≥rios
    os.makedirs(RESULTS_PATH, exist_ok=True)

config = EvalConfig()

print(f"üì± Device: {config.DEVICE}")
print(f"üìÅ Model path: {config.MODEL_PATH}")
print(f"üéØ FID samples: {config.NUM_SAMPLES_FID}")


# ================================
# DATASET PARA AVALIA√á√ÉO
# ================================

In [None]:
class EvaluationDataset(Dataset):
    def __init__(self, root_path, mode='test', transform=None):
        self.transform = transform
        
        # Caminhos para dom√≠nios A (real) e B (cartoon)
        if mode == 'test':
            self.path_A = Path(root_path) / 'testA'
            self.path_B = Path(root_path) / 'testB'
        else:
            self.path_A = Path(root_path) / 'trainA'  # Fallback
            self.path_B = Path(root_path) / 'trainB'
        
        # Listar todas as imagens
        self.images_A = sorted(list(self.path_A.glob('*.jpg')) + list(self.path_A.glob('*.png')))
        self.images_B = sorted(list(self.path_B.glob('*.jpg')) + list(self.path_B.glob('*.png')))
        
        self.length = max(len(self.images_A), len(self.images_B))
        
        print(f"üìä Dataset carregado: {len(self.images_A)} reais, {len(self.images_B)} cartoons")
    
    def __len__(self):
        return self.length
    
    def __getitem__(self, idx):
        img_A_path = self.images_A[idx % len(self.images_A)]
        img_B_path = self.images_B[idx % len(self.images_B)]
        
        img_A = Image.open(img_A_path).convert('RGB')
        img_B = Image.open(img_B_path).convert('RGB')
        
        if self.transform:
            img_A = self.transform(img_A)
            img_B = self.transform(img_B)
        
        return {'A': img_A, 'B': img_B, 'path_A': str(img_A_path), 'path_B': str(img_B_path)}

# Transforma√ß√µes para avalia√ß√£o
eval_transform = transforms.Compose([
    transforms.Resize((config.IMG_SIZE, config.IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # [-1, 1]
])

# Transforma√ß√µes para FID (necessita [0, 1])
fid_transform = transforms.Compose([
    transforms.Resize((299, 299)),  # InceptionV3 input size
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))  # ImageNet normalization
])

# ================================
# CARREGAR MODELOS TREINADOS
# ================================

In [None]:
def load_trained_models():
    """Carregar modelos do checkpoint salvo"""
    
    print("üîÑ Carregando modelos treinados...")
    
    try:
        # Carregar checkpoint
        checkpoint_path = f"{config.MODEL_PATH}/best_model.pth"
        if not os.path.exists(checkpoint_path):
            checkpoint_path = f"{config.MODEL_PATH}/latest_checkpoint.pth"
        
        checkpoint = torch.load(checkpoint_path, map_location=config.DEVICE)
        print(f"‚úÖ Checkpoint carregado: √©poca {checkpoint['epoch']}")
        
        # DEFINIR ARQUITETURAS (copiado do notebook de treinamento)
        class ResidualBlock(nn.Module):
            def __init__(self, channels):
                super(ResidualBlock, self).__init__()
                self.block = nn.Sequential(
                    nn.ReflectionPad2d(1),
                    nn.Conv2d(channels, channels, 3),
                    nn.InstanceNorm2d(channels),
                    nn.ReLU(inplace=True),
                    nn.ReflectionPad2d(1),
                    nn.Conv2d(channels, channels, 3),
                    nn.InstanceNorm2d(channels)
                )
            
            def forward(self, x):
                return x + self.block(x)
        
        class Generator(nn.Module):
            def __init__(self, input_channels=3, output_channels=3, num_residual_blocks=6):
                super(Generator, self).__init__()
                
                # Encoder
                model = [
                    nn.ReflectionPad2d(3),
                    nn.Conv2d(input_channels, 64, 7),
                    nn.InstanceNorm2d(64),
                    nn.ReLU(inplace=True)
                ]
                
                # Downsampling
                in_features = 64
                out_features = in_features * 2
                for _ in range(2):
                    model += [
                        nn.Conv2d(in_features, out_features, 3, stride=2, padding=1),
                        nn.InstanceNorm2d(out_features),
                        nn.ReLU(inplace=True)
                    ]
                    in_features = out_features
                    out_features = in_features * 2
                
                # Residual blocks
                for _ in range(num_residual_blocks):
                    model += [ResidualBlock(in_features)]
                
                # Upsampling
                out_features = in_features // 2
                for _ in range(2):
                    model += [
                        nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False),
                        nn.ReflectionPad2d(1),
                        nn.Conv2d(in_features, out_features, kernel_size=3, stride=1, padding=0),
                        nn.InstanceNorm2d(out_features),
                        nn.ReLU(inplace=True)
                    ]
                    in_features = out_features
                    out_features = in_features // 2
                
                # Output layer
                model += [
                    nn.ReflectionPad2d(3),
                    nn.Conv2d(64, output_channels, 7),
                    nn.Tanh()
                ]
                
                self.model = nn.Sequential(*model)
            
            def forward(self, x):
                return self.model(x)
        
        # Criar modelos
        G_AB = Generator(num_residual_blocks=6).to(config.DEVICE)
        G_BA = Generator(num_residual_blocks=6).to(config.DEVICE)
        
        # Carregar pesos
        G_AB.load_state_dict(checkpoint['G_AB_state_dict'])
        G_BA.load_state_dict(checkpoint['G_BA_state_dict'])
        
        # Modo avalia√ß√£o
        G_AB.eval()
        G_BA.eval()
        
        print("‚úÖ Modelos carregados e prontos para avalia√ß√£o")
        
        return G_AB, G_BA, checkpoint
        
    except Exception as e:
        print(f"‚ùå Erro ao carregar modelos: {e}")
        print("üí° Certifique-se que o treinamento foi conclu√≠do")
        return None, None, None

# ================================
# IMPLEMENTA√á√ÉO FID (Frechet Inception Distance)
# ================================

In [None]:
class FIDCalculator:
    def __init__(self, device):
        self.device = device
        # Carregar InceptionV3 pr√©-treinado
        self.inception = inception_v3(pretrained=True, transform_input=False)
        self.inception.fc = nn.Identity()  # Remover √∫ltima camada
        self.inception.eval().to(device)
        
        print("‚úÖ InceptionV3 carregado para FID")
    
    def get_activations(self, images):
        """Extrair features do InceptionV3"""
        with torch.no_grad():
            features = self.inception(images)
        return features.cpu().numpy()
    
    def calculate_fid(self, real_images, fake_images):
        """Calcular FID score"""
        
        print(f"üîÑ Calculando FID... Real: {len(real_images)}, Fake: {len(fake_images)}")
        
        # Extrair features
        real_features = []
        fake_features = []
        
        # Process em batches
        batch_size = 32
        
        for i in range(0, len(real_images), batch_size):
            batch_real = torch.stack(real_images[i:i+batch_size]).to(self.device)
            real_features.append(self.get_activations(batch_real))
        
        for i in range(0, len(fake_images), batch_size):
            batch_fake = torch.stack(fake_images[i:i+batch_size]).to(self.device)
            fake_features.append(self.get_activations(batch_fake))
        
        # Concatenar features
        real_features = np.concatenate(real_features, axis=0)
        fake_features = np.concatenate(fake_features, axis=0)
        
        # Calcular estat√≠sticas
        mu_real = np.mean(real_features, axis=0)
        mu_fake = np.mean(fake_features, axis=0)
        
        sigma_real = np.cov(real_features, rowvar=False)
        sigma_fake = np.cov(fake_features, rowvar=False)
        
        # FID calculation
        diff = mu_real - mu_fake
        
        # Covari√¢ncia m√©dia
        covmean, _ = linalg.sqrtm(sigma_real.dot(sigma_fake), disp=False)
        
        if np.iscomplexobj(covmean):
            covmean = covmean.real
        
        fid = diff.dot(diff) + np.trace(sigma_real + sigma_fake - 2 * covmean)
        
        return fid

# ================================
# IMPLEMENTA√á√ÉO LPIPS
# ================================

In [None]:
class LPIPSCalculator:
    def __init__(self, device):
        self.device = device
        # Carregar LPIPS pr√©-treinado
        self.lpips_fn = lpips.LPIPS(net='alex').to(device)  # Usar AlexNet
        print("‚úÖ LPIPS (AlexNet) carregado")
    
    def calculate_lpips(self, real_images, fake_images):
        """Calcular LPIPS score"""
        
        print(f"üîÑ Calculando LPIPS... {len(real_images)} pares")
        
        lpips_scores = []
        
        with torch.no_grad():
            for real, fake in zip(real_images, fake_images):
                real = real.unsqueeze(0).to(self.device)
                fake = fake.unsqueeze(0).to(self.device)
                
                score = self.lpips_fn(real, fake)
                lpips_scores.append(score.item())
        
        return np.mean(lpips_scores)

# ================================
# IMPLEMENTA√á√ÉO IS (Inception Score)
# ================================

In [None]:
class ISCalculator:
    def __init__(self, device):
        self.device = device
        # InceptionV3 completo para classifica√ß√£o
        self.inception = inception_v3(pretrained=True, transform_input=False)
        self.inception.eval().to(device)
        print("‚úÖ InceptionV3 carregado para IS")
    
    def calculate_is(self, fake_images, splits=10):
        """Calcular Inception Score"""
        
        print(f"üîÑ Calculando IS... {len(fake_images)} imagens")
        
        all_preds = []
        
        with torch.no_grad():
            for i in range(0, len(fake_images), 32):
                batch = torch.stack(fake_images[i:i+32]).to(self.device)
                preds = F.softmax(self.inception(batch), dim=1)
                all_preds.append(preds.cpu().numpy())
        
        all_preds = np.concatenate(all_preds, axis=0)
        
        # Split em grupos para calcular IS
        split_scores = []
        for i in range(splits):
            part = all_preds[i * len(all_preds) // splits: (i + 1) * len(all_preds) // splits]
            
            # P(y) marginal
            py = np.mean(part, axis=0)
            
            # KL divergence
            scores = []
            for j in range(part.shape[0]):
                pyx = part[j, :]
                scores.append(np.sum(pyx * np.log(pyx / py + 1e-16)))
            
            split_scores.append(np.exp(np.mean(scores)))
        
        return np.mean(split_scores), np.std(split_scores)

# ================================
# GERA√á√ÉO DE AMOSTRAS PARA AVALIA√á√ÉO
# ================================

In [None]:
def generate_evaluation_samples(G_AB, G_BA, num_samples=2000):
    """Gerar amostras para avalia√ß√£o FID/LPIPS/IS"""
    
    print(f"üéØ Gerando {num_samples} amostras para avalia√ß√£o...")
    
    # Dataset
    eval_dataset = EvaluationDataset(config.DATASET_PATH, mode='test', transform=eval_transform)
    eval_loader = DataLoader(eval_dataset, batch_size=config.BATCH_SIZE, shuffle=False)
    
    # FID dataset (diferente normaliza√ß√£o)
    fid_dataset = EvaluationDataset(config.DATASET_PATH, mode='test', transform=fid_transform)
    fid_loader = DataLoader(fid_dataset, batch_size=config.BATCH_SIZE, shuffle=False)
    
    # Armazenar amostras
    samples = {
        'real_A': [], 'real_B': [],
        'fake_A': [], 'fake_B': [],
        'cycle_A': [], 'cycle_B': [],
        'real_A_fid': [], 'real_B_fid': [],
        'fake_A_fid': [], 'fake_B_fid': [],
        'paths_A': [], 'paths_B': []
    }
    
    count = 0
    
    with torch.no_grad():
        # Gera√ß√£o para LPIPS (normaliza√ß√£o [-1,1])
        for batch_idx, batch in enumerate(eval_loader):
            if count >= num_samples:
                break
                
            real_A = batch['A'].to(config.DEVICE)
            real_B = batch['B'].to(config.DEVICE)
            
            # Transforma√ß√µes
            fake_B = G_AB(real_A)  # A‚ÜíB
            fake_A = G_BA(real_B)  # B‚ÜíA
            
            # Cycles
            cycle_A = G_BA(fake_B)  # A‚ÜíB‚ÜíA
            cycle_B = G_AB(fake_A)  # B‚ÜíA‚ÜíB
            
            # Armazenar (CPU para economizar VRAM)
            batch_size = real_A.size(0)
            for i in range(min(batch_size, num_samples - count)):
                samples['real_A'].append(real_A[i].cpu())
                samples['real_B'].append(real_B[i].cpu())
                samples['fake_A'].append(fake_A[i].cpu())
                samples['fake_B'].append(fake_B[i].cpu())
                samples['cycle_A'].append(cycle_A[i].cpu())
                samples['cycle_B'].append(cycle_B[i].cpu())
                samples['paths_A'].append(batch['path_A'][i])
                samples['paths_B'].append(batch['path_B'][i])
                count += 1
            
            if batch_idx % 20 == 0:
                progress = (count / num_samples) * 100
                print(f"   Progresso: {count}/{num_samples} ({progress:.1f}%)")
        
        # Gera√ß√£o para FID (normaliza√ß√£o ImageNet)
        count = 0
        for batch_idx, batch in enumerate(fid_loader):
            if count >= num_samples:
                break
                
            real_A = batch['A'].to(config.DEVICE)
            real_B = batch['B'].to(config.DEVICE)
            
            # Converter para range [-1,1] para generators
            real_A_gen = (real_A - 0.485) / 0.229 * 0.5  # Aproxima√ß√£o
            real_B_gen = (real_B - 0.485) / 0.229 * 0.5
            
            fake_B = G_AB(real_A_gen)
            fake_A = G_BA(real_B_gen)
            
            # Converter fake para ImageNet normalization
            fake_B_fid = (fake_B * 0.229) + 0.485
            fake_A_fid = (fake_A * 0.229) + 0.485
            
            batch_size = real_A.size(0)
            for i in range(min(batch_size, num_samples - count)):
                samples['real_A_fid'].append(real_A[i].cpu())
                samples['real_B_fid'].append(real_B[i].cpu())
                samples['fake_A_fid'].append(fake_A_fid[i].cpu())
                samples['fake_B_fid'].append(fake_B_fid[i].cpu())
                count += 1
    
    print(f"‚úÖ {count} amostras geradas com sucesso!")
    return samples

# ================================
# AN√ÅLISE VISUAL AVAN√áADA
# ================================

In [None]:
def create_detailed_comparison_grid(samples, num_examples=8):
    """Criar grid de compara√ß√£o detalhado"""
    
    print("üé® Criando compara√ß√£o visual detalhada...")
    
    fig, axes = plt.subplots(num_examples, 6, figsize=(18, 3*num_examples))
    fig.suptitle('CycleGAN Real2Cartoon - An√°lise Visual Detalhada', fontsize=16, fontweight='bold')
    
    # Headers
    headers = ['Real A', 'A‚ÜíB (Fake)', 'A‚ÜíB‚ÜíA (Cycle)', 'Real B', 'B‚ÜíA (Fake)', 'B‚ÜíA‚ÜíB (Cycle)']
    
    for col, header in enumerate(headers):
        axes[0, col].set_title(header, fontweight='bold', fontsize=12)
    
    # Denormalize function
    def denorm(tensor):
        return (tensor * 0.5 + 0.5).clamp(0, 1).permute(1, 2, 0).numpy()
    
    for row in range(num_examples):
        # Real A ‚Üí Fake B ‚Üí Cycle A
        axes[row, 0].imshow(denorm(samples['real_A'][row]))
        axes[row, 1].imshow(denorm(samples['fake_B'][row]))
        axes[row, 2].imshow(denorm(samples['cycle_A'][row]))
        
        # Real B ‚Üí Fake A ‚Üí Cycle B
        axes[row, 3].imshow(denorm(samples['real_B'][row]))
        axes[row, 4].imshow(denorm(samples['fake_A'][row]))
        axes[row, 5].imshow(denorm(samples['cycle_B'][row]))
        
        # Remove axis
        for col in range(6):
            axes[row, col].set_xticks([])
            axes[row, col].set_yticks([])
    
    plt.tight_layout()
    plt.savefig(f"{config.RESULTS_PATH}/detailed_visual_comparison.png", dpi=150, bbox_inches='tight')
    plt.close()
    
    print(f"‚úÖ Compara√ß√£o visual salva: detailed_visual_comparison.png")

def analyze_failure_cases(samples, num_worst=5):
    """Analisar os piores casos"""
    
    print("üîç Analisando failure cases...")
    
    # Calcular L1 loss para cada amostra
    l1_losses_A2B = []
    l1_losses_B2A = []
    
    for i in range(len(samples['real_A'])):
        # A‚ÜíB cycle consistency
        loss_A = F.l1_loss(samples['real_A'][i], samples['cycle_A'][i]).item()
        l1_losses_A2B.append((loss_A, i))
        
        # B‚ÜíA cycle consistency  
        loss_B = F.l1_loss(samples['real_B'][i], samples['cycle_B'][i]).item()
        l1_losses_B2A.append((loss_B, i))
    
    # Pegar os piores casos
    worst_A2B = sorted(l1_losses_A2B, reverse=True)[:num_worst]
    worst_B2A = sorted(l1_losses_B2A, reverse=True)[:num_worst]
    
    # Visualizar failure cases
    fig, axes = plt.subplots(2, num_worst*3, figsize=(15, 6))
    fig.suptitle('Failure Cases Analysis', fontsize=16, fontweight='bold')
    
    def denorm(tensor):
        return (tensor * 0.5 + 0.5).clamp(0, 1).permute(1, 2, 0).numpy()
    
    # A‚ÜíB worst cases
    for i, (loss, idx) in enumerate(worst_A2B):
        axes[0, i*3].imshow(denorm(samples['real_A'][idx]))
        axes[0, i*3].set_title(f'Real A\nL1: {loss:.3f}')
        
        axes[0, i*3+1].imshow(denorm(samples['fake_B'][idx]))
        axes[0, i*3+1].set_title('A‚ÜíB')
        
        axes[0, i*3+2].imshow(denorm(samples['cycle_A'][idx]))
        axes[0, i*3+2].set_title('A‚ÜíB‚ÜíA')
    
    # B‚ÜíA worst cases
    for i, (loss, idx) in enumerate(worst_B2A):
        axes[1, i*3].imshow(denorm(samples['real_B'][idx]))
        axes[1, i*3].set_title(f'Real B\nL1: {loss:.3f}')
        
        axes[1, i*3+1].imshow(denorm(samples['fake_A'][idx]))
        axes[1, i*3+1].set_title('B‚ÜíA')
        
        axes[1, i*3+2].imshow(denorm(samples['cycle_B'][idx]))
        axes[1, i*3+2].set_title('B‚ÜíA‚ÜíB')
    
    # Remove axis
    for i in range(2):
        for j in range(num_worst*3):
            axes[i, j].set_xticks([])
            axes[i, j].set_yticks([])
    
    plt.tight_layout()
    plt.savefig(f"{config.RESULTS_PATH}/failure_cases_analysis.png", dpi=150, bbox_inches='tight')
    plt.close()
    
    # Retornar estat√≠sticas
    failure_stats = {
        'worst_A2B_loss': worst_A2B[0][0],
        'avg_A2B_loss': np.mean([loss for loss, _ in l1_losses_A2B]),
        'worst_B2A_loss': worst_B2A[0][0],
        'avg_B2A_loss': np.mean([loss for loss, _ in l1_losses_B2A]),
        'worst_A2B_indices': [idx for _, idx in worst_A2B],
        'worst_B2A_indices': [idx for _, idx in worst_B2A]
    }
    
    print(f"‚úÖ Failure cases analisados:")
    print(f"   Pior A‚ÜíB‚ÜíA loss: {failure_stats['worst_A2B_loss']:.4f}")
    print(f"   Pior B‚ÜíA‚ÜíB loss: {failure_stats['worst_B2A_loss']:.4f}")
    
    return failure_stats


# ================================
# FUN√á√ÉO PRINCIPAL DE AVALIA√á√ÉO
# ================================

In [None]:
def run_complete_advanced_evaluation():
    """Executar avalia√ß√£o completa com m√©tricas avan√ßadas"""
    
    print("üöÄ INICIANDO AVALIA√á√ÉO AVAN√áADA COMPLETA")
    print("=" * 60)
    
    # 1. Carregar modelos
    G_AB, G_BA, checkpoint = load_trained_models()
    if G_AB is None:
        return None
    
    # 2. Gerar amostras
    samples = generate_evaluation_samples(G_AB, G_BA, config.NUM_SAMPLES_FID)
    
    # 3. Inicializar calculadoras
    fid_calc = FIDCalculator(config.DEVICE)
    lpips_calc = LPIPSCalculator(config.DEVICE)
    is_calc = ISCalculator(config.DEVICE)
    
    print("\nüìä CALCULANDO M√âTRICAS AVAN√áADAS")
    print("=" * 40)
    
    # 4. Calcular FID scores
    fid_A2B = fid_calc.calculate_fid(samples['real_B_fid'], samples['fake_B_fid'])
    fid_B2A = fid_calc.calculate_fid(samples['real_A_fid'], samples['fake_A_fid'])
    
    # 5. Calcular LPIPS scores
    lpips_A2B = lpips_calc.calculate_lpips(samples['real_A'][:100], samples['fake_B'][:100])
    lpips_B2A = lpips_calc.calculate_lpips(samples['real_B'][:100], samples['fake_A'][:100])
    
    # 6. Calcular IS scores
    is_fake_A, is_std_A = is_calc.calculate_is(samples['fake_A_fid'][:1000])
    is_fake_B, is_std_B = is_calc.calculate_is(samples['fake_B_fid'][:1000])
    
    # 7. An√°lises visuais
    create_detailed_comparison_grid(samples, num_examples=8)
    failure_stats = analyze_failure_cases(samples, num_worst=5)
    
    # 8. Compilar resultados
    advanced_metrics = {
        'metadata': {
            'timestamp': datetime.now().isoformat(),
            'evaluation_type': 'advanced_perceptual_analysis',
            'num_samples_fid': len(samples['real_A_fid']),
            'num_samples_lpips': 100,
            'num_samples_is': 1000,
            'model_epoch': checkpoint['epoch']
        },
        'fid_scores': {
            'A_to_B': float(fid_A2B),
            'B_to_A': float(fid_B2A),
            'average': float((fid_A2B + fid_B2A) / 2)
        },
        'lpips_scores': {
            'A_to_B': float(lpips_A2B),
            'B_to_A': float(lpips_B2A),
            'average': float((lpips_A2B + lpips_B2A) / 2)
        },
        'inception_scores': {
            'fake_A_mean': float(is_fake_A),
            'fake_A_std': float(is_std_A),
            'fake_B_mean': float(is_fake_B),
            'fake_B_std': float(is_std_B)
        },
        'failure_analysis': failure_stats,
        'literature_comparison_fid': {
            'CycleGAN_original': {'horse2zebra': 77.2, 'summer2winter': 75.8},
            'AttentionGAN': {'selfie2anime': 71.4},
            'UNIT': {'face_translation': 85.2},
            'your_model_A2B': float(fid_A2B),
            'your_model_B2A': float(fid_B2A)
        }
    }
    
    # 9. Salvar resultados
    results_path = f"{config.RESULTS_PATH}/advanced_evaluation_metrics.json"
    with open(results_path, 'w') as f:
        json.dump(advanced_metrics, f, indent=2, ensure_ascii=False)
    
    # 10. Relat√≥rio final
    print("\nüéâ RESULTADOS FINAIS")
    print("=" * 30)
    print(f"üìä FID A‚ÜíB: {fid_A2B:.2f}")
    print(f"üìä FID B‚ÜíA: {fid_B2A:.2f}")
    print(f"üìä LPIPS A‚ÜíB: {lpips_A2B:.4f}")
    print(f"üìä LPIPS B‚ÜíA: {lpips_B2A:.4f}")
    print(f"üìä IS Fake A: {is_fake_A:.2f}¬±{is_std_A:.2f}")
    print(f"üìä IS Fake B: {is_fake_B:.2f}¬±{is_std_B:.2f}")
    
    # Interpreta√ß√£o FID
    if fid_A2B < 50:
        print("‚úÖ FID A‚ÜíB: Excelente qualidade!")
    elif fid_A2B < 100:
        print("üëç FID A‚ÜíB: Boa qualidade")
    else:
        print("‚ö†Ô∏è FID A‚ÜíB: Qualidade moderada")
    
    if fid_B2A < 50:
        print("‚úÖ FID B‚ÜíA: Excelente qualidade!")
    elif fid_B2A < 100:
        print("üëç FID B‚ÜíA: Boa qualidade")
    else:
        print("‚ö†Ô∏è FID B‚ÜíA: Qualidade moderada")
    
    print(f"\nüíæ Resultados salvos em: {results_path}")
    print(f"üé® Visualiza√ß√µes salvas em: {config.RESULTS_PATH}/")
    
    return advanced_metrics

# ================================
# EXECUTAR AVALIA√á√ÉO
# ================================

In [None]:
if __name__ == "__main__":
    # Verificar depend√™ncias
    try:
        import lpips
        print("‚úÖ LPIPS dispon√≠vel")
    except ImportError:
        print("‚ùå LPIPS n√£o encontrado. Instale com: pip install lpips")
    
    # Executar avalia√ß√£o completa
    advanced_results = run_complete_advanced_evaluation()
    
    if advanced_results:
        print("\nüèÜ AVALIA√á√ÉO AVAN√áADA CONCLU√çDA COM SUCESSO!")
        print("üìä M√©tricas perceptuais calculadas")
        print("üé® An√°lises visuais geradas")
        print("üîç Failure cases identificados")
        print("üíæ Resultados salvos em JSON")
        
    else:
        print("‚ùå Erro durante avalia√ß√£o. Verifique os modelos salvos.")