Análise Comparativa

In [2]:
import sys
import os
sys.path.append(os.path.join(os.getcwd(), '..'))

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, f1_score
from skimage.metrics import structural_similarity as ssim 
import pandas as pd
from tqdm import tqdm

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

Preparação dos Dados

In [5]:
data_dir = 'C:\\Users\\anthonny.paz\\Documents\\GitHub\\TCC-AUTOENCONDER\\modelos_TCC\\data'

# Verifica se a pasta já existe para evitar a criação de outra
if not os.path.exists(data_dir):
    raise FileNotFoundError(f"A pasta '{data_dir}' não existe. Crie-a manualmente antes de rodar o código.")


transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_dataset = datasets.MNIST(root=data_dir, train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root=data_dir, train=False, download=True, transform=transform)

batch_size = 256
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Definição dos Modelos

Autoencoder Linear

In [6]:
class LinearAutoencoder(nn.Module):
    def __init__(self, latent_dim):
        super().__init__()

        self.encoder = nn.Sequential(
            nn.Flatten(),
            nn.Linear(784, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, latent_dim)
        )

        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 256),
            nn.ReLU(),
            nn.Linear(256, 512),
            nn.ReLU(),
            nn.Linear(512, 784),
            nn.Tanh(),
            nn.Unflatten(1, (1, 28, 28))
        )
    
    def forward(self, x):
        return self.decoder(self.encoder(x))

Autoencoder Convolucional

In [7]:
class ConvAutoencoder(nn.Module):
    def __init__(self, latent_dim):
        super().__init__()
        
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 16, 3, stride=2, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.Conv2d(16, 32, 3, stride=2, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Flatten(),
            nn.Linear(7*7*32, latent_dim)
        )

        self.decoder = nn.Sequential(
            nn.Linear(latent_dim,  7*7*32),
            nn.Unflatten(1, (32, 7, 7)),
            nn.ConvTranspose2d(32, 16, 3, stride=2, padding=1, output_padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.ConvTranspose2d(16, 1, 3, stride=2, padding=1, output_padding=1),
            nn.Tanh()
        )

    def forward(self, x):
        return self.decoder(self.encoder(x))

Sparse Autoencoder

In [8]:
class SparseAutoencoder(nn.Module):
    def __init__(self, latent_dim):
        super().__init__()

        self.encoder = nn.Sequential(
            nn.Linear(784, 512),
            nn.ReLU(),
            nn.Linear(512, latent_dim),
            nn.L1Loss()
        )

        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 512),
            nn.ReLU(),
            nn.Linear(512, 784),
            nn.Tanh()
        )

    def forward(self, x):
        x_flat = x.view(-1, 784)
        z = self.encoder(x_flat)
        return self.decoder(z).view(-1, 1, 28, 28)

Denoising Autoencoder

In [9]:
class DenoisingAutoencoder(nn.Module):
    def __init__(self, latent_dim):
        super().__init__()
        
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 16, 3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(16, 32, 3, stride=2, padding=1),
            nn.ReLU(),
            nn.Flatten(),
            nn.Linear(7*7*32, latent_dim)
        )

        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 7*7*32),
            nn.Unflatten(1, (32, 7, 7)),
            nn.ConvTranspose2d(32, 16, 3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(16, 1, 3, stride=2, padding=1, output_padding=1),
            nn.Tanh()
        )

    def add_noise(self, x):
        noise = torch.randn_like(x) * 0.5
        return torch.clamp(x + noise, -1., 1.)
    
    def forward(self, x):
        x_noisy = self.add_noise(x)
        return self.decoder(self.encoder(x_noisy))

Deep Convolutional Autoencoder

In [10]:
class DeepConvAutoencoder(nn.Module):
    def __init__(self, latent_dim):
        super().__init__()
        
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 32, 3, stride=2, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 64, 3, stride=2, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 128, 3, stride=2, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Flatten(),
            nn.Linear(4*4*128, latent_dim)
        )
        
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 4*4*128),
            nn.Unflatten(1, (128, 4, 4)),
            nn.ConvTranspose2d(128, 64, 3, stride=2, padding=1, output_padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.ConvTranspose2d(64, 32, 3, stride=2, padding=1, output_padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.ConvTranspose2d(32, 1, 3, stride=2, padding=1, output_padding=1),
            nn.Tanh()
        )

    def forward(self, x):
        return self.decoder(self.encoder(x))


Variational Autoencoder

In [11]:
class VariationalAutoencoder(nn.Module):
    def __init__(self, latent_dim):
        super().__init__()
        self.latent_dim = latent_dim

        self.encoder = nn.Sequential(
            nn.Conv2d(1, 32, 3, stride=2, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 64, 3, stride=2, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Flatten(),
            nn.Linear(7*7*32),
            nn.ReLU()
        )

        self.fc_mu = nn.Linear(512, latent_dim)
        self.fc_logvar = nn.Linear(512, latent_dim)

        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 7*7*64),
            nn.ReLU(),
            nn.Unflatten(1, (64, 7, 7)),
            nn.ConvTranspose2d(64, 32, 3, stride=2, padding=1, output_padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.LazyConvTranspose2d(32, 1, 3, stride=2, padding=1, output_padding=1),
            nn.Tanh()
        )

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps*std
    
    def forward(self, x):
        h = self.encoder(x)
        mu = self.fc_mu(h)
        logvar = self.fc_logvar(h)
        z = self.reparameterize(mu, logvar)
        return self.decoder(z), mu, logvar

Atualizando as Configurações

In [12]:
models_config = [
    {'name' : 'Linear', 'model' : LinearAutoencoder, 'params': {'latent_dim': [64, 32, 16, 8, 4, 2]}, 'is_vae': False},
    
    {'name': 'Conv Shallow', 'model': ConvAutoencoder, 'params': {'latent_dim': [64, 32, 16, 8, 4, 2]}, 'is_vae': False},

    {'name': 'Conv Deep', 'model': DeepConvAutoencoder, 'params': {'latent_dim': [64, 32, 16, 8, 4, 2]}, 'is_vae': False},

    {'name': 'Denoising', 'model': DenoisingAutoencoder, 'params': {'latent_dim': [64, 32, 16, 8, 4, 2]}, 'is_vae': False},

    {'name': 'Sparse', 'model': SparseAutoencoder, 'params': {'latent_dim': [64, 32, 16, 8, 4, 2]}, 'is_vae': False},

    {'name': 'VAE', 'model': VariationalAutoencoder, 'params': {'latent_dim': [64, 32, 16, 8, 4, 2]}, 'is_vae': True},
]

Treinamento

In [13]:
def train_model(model, train_loader, test_loader, epochs=20, is_vae=False):
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.MSELoss()

    train_losses = []
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        total_kl = 0

        for batch, _ in tqdm(train_loader, desc=f'Epoch {epoch+1}/{epochs}'):
            batch = batch.to(device)
            optimizer.zero_grad()

            if is_vae:
                recon_batch, mu, logvar = model(batch)
                recon_loss = criterion(recon_batch, batch)
                kl_loss = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
            else:
                recon_batch = model(batch)
                loss = criterion(recon_batch, batch)
                kl_loss = torch.tensor(0)

            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            total_kl += kl_loss.item() if is_vae else 0

        avg_loss = total_loss / len(train_loader)
        avg_kl = total_kl / len(train_loader) if is_vae else 0
        train_losses.append(avg_loss)

        print(f'Loss: {avg_loss:.4f}', end='')
        if is_vae:
            print(f' | KL Loss: {avg_kl:.4f}')
        else:
            print()

    return train_losses

Visualização do Espaço Latente

In [14]:
def visualize_latent_space(model, test_loader, logvar, n_samples=1000, ):
    model.eval()
    latents = []
    labels = []

    with torch.no_grad():
        for batch, label in test_loader:
            if len(latents) > n_samples:
                break

            if isinstance(model, VariationalAutoencoder):
                mu, _ = model.encoder(batch.to(device))
                z = model.reparameterize(mu, logvar)
                latents.append(mu.cpu().numpy())
            else:
                z = model.encoder(batch.to(device)).cpu().numpy()
    
    latents = np.concatenate(latents)[:n_samples]
    labels = np.concatenate(labels)[:n_samples]

    plt.figure(figsize=(10, 8))
    if latents.shape[1] == 2:
        plt.scatter(latents[:,0], latents[:,1], c=labels, cmap='tab10', alpha=0.6)
    else:
        from sklearn.manifold import TSNE
        tsne = TSNE(n_components=2)
        latents_2d = tsne.fit_transform(latents)
        plt.scatter(latents_2d[:,0], latents_2d[:,1], c=labels, cmap='tab10', alpha=0.6)
    
    plt.colorbar
    plt.title(f'Espaço Latente - {model.__class__.__name__}')
    plt.show()
