
# Tugas Individu: Eksplorasi Autoencoder

Notebook ini berisi eksperimen Autoencoder (Conv-AE, VAE, Beta-VAE) pada Fashion-MNIST.

Tujuan: memahami dampak arsitektur encoder-decoder, dimensi laten, dan regularisasi pada kualitas rekonstruksi dan struktur latent.

Jalankan sel berurutan. Notebook menggunakan PyTorch.


In [None]:

# Setup imports and config
import os, math, random
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision
from torchvision import transforms
from torchvision.utils import make_grid, save_image

# Reproducibility
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Device:', device)

# Output directory
out_dir = Path('outputs_autoencoder')
out_dir.mkdir(exist_ok=True)


In [None]:

# Dataset: Fashion-MNIST
batch_size = 128
transform = transforms.Compose([transforms.ToTensor()])

train_dataset = torchvision.datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
test_dataset  = torchvision.datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)
test_loader  = DataLoader(test_dataset,  batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)

# Visual check
imgs, labels = next(iter(train_loader))
plt.figure(figsize=(4,4))
plt.axis('off')
plt.imshow(np.transpose(make_grid(imgs[:16], nrow=4).cpu(), (1,2,0)).squeeze(), cmap='gray')
plt.title('Sample Fashion-MNIST')
plt.show()


In [None]:

# Model definitions: ConvAutoencoder, VAE (and reuse VAE for Beta-VAE by adjusting beta)
class ConvAutoencoder(nn.Module):
    def __init__(self, latent_dim=32):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 32, 3, stride=2, padding=1), # 28->14
            nn.ReLU(True),
            nn.Conv2d(32, 64, 3, stride=2, padding=1), # 14->7
            nn.ReLU(True),
            nn.Conv2d(64, 128, 3, stride=2, padding=1), # 7->4
            nn.ReLU(True),
        )
        self.flatten = nn.Flatten()
        self._enc_out = 128 * 4 * 4
        self.fc_enc = nn.Linear(self._enc_out, latent_dim)
        self.fc_dec = nn.Linear(latent_dim, self._enc_out)
        self.decoder_conv = nn.Sequential(
            nn.ConvTranspose2d(128, 64, 3, stride=2, padding=1, output_padding=1),
            nn.ReLU(True),
            nn.ConvTranspose2d(64, 32, 3, stride=2, padding=1, output_padding=1),
            nn.ReLU(True),
            nn.ConvTranspose2d(32, 1, 3, stride=2, padding=1, output_padding=1),
            nn.Sigmoid(),
        )

    def encode(self, x):
        h = self.encoder(x)
        h = self.flatten(h)
        z = self.fc_enc(h)
        return z

    def decode(self, z):
        h = self.fc_dec(z)
        h = h.view(-1, 128, 4, 4)
        xrec = self.decoder_conv(h)
        xrec = xrec[:, :, 2:30, 2:30]  # center crop to 28x28
        return xrec

    def forward(self, x):
        z = self.encode(x)
        return self.decode(z)


class VAE(nn.Module):
    def __init__(self, latent_dim=16):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 32, 3, stride=2, padding=1),
            nn.ReLU(True),
            nn.Conv2d(32, 64, 3, stride=2, padding=1),
            nn.ReLU(True),
            nn.Conv2d(64, 128, 3, stride=2, padding=1),
            nn.ReLU(True),
        )
        self.flatten = nn.Flatten()
        self._enc_out = 128 * 4 * 4
        self.fc_mu = nn.Linear(self._enc_out, latent_dim)
        self.fc_logvar = nn.Linear(self._enc_out, latent_dim)
        self.fc_dec = nn.Linear(latent_dim, self._enc_out)
        self.decoder_conv = nn.Sequential(
            nn.ConvTranspose2d(128, 64, 3, stride=2, padding=1, output_padding=1),
            nn.ReLU(True),
            nn.ConvTranspose2d(64, 32, 3, stride=2, padding=1, output_padding=1),
            nn.ReLU(True),
            nn.ConvTranspose2d(32, 1, 3, stride=2, padding=1, output_padding=1),
            nn.Sigmoid(),
        )

    def encode(self, x):
        h = self.encoder(x)
        h = self.flatten(h)
        mu = self.fc_mu(h)
        logvar = self.fc_logvar(h)
        return mu, logvar

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps*std

    def decode(self, z):
        h = self.fc_dec(z)
        h = h.view(-1, 128, 4, 4)
        xrec = self.decoder_conv(h)
        xrec = xrec[:, :, 2:30, 2:30]
        return xrec

    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparameterize(mu, logvar)
        return self.decode(z), mu, logvar


In [None]:

# Loss functions and training loops
bce_loss = nn.BCELoss(reduction='sum')

def loss_ae(recon_x, x):
    return bce_loss(recon_x, x) / x.size(0)

def loss_vae(recon_x, x, mu, logvar, beta=1.0):
    BCE = bce_loss(recon_x, x)
    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return (BCE + beta*KLD) / x.size(0), BCE / x.size(0), KLD / x.size(0)

def train_ae(model, loader, optimizer, epoch, device):
    model.train()
    running = 0.0
    for x, _ in loader:
        x = x.to(device)
        optimizer.zero_grad()
        xrec = model(x)
        loss = loss_ae(xrec, x)
        loss.backward()
        optimizer.step()
        running += loss.item()
    avg = running / len(loader)
    print(f'Epoch {epoch} AE train loss: {avg:.4f}')
    return avg

def eval_ae(model, loader, device):
    model.eval()
    running=0.0
    with torch.no_grad():
        for x,_ in loader:
            x = x.to(device)
            xrec = model(x)
            running += loss_ae(xrec, x).item()
    return running / len(loader)

def train_vae(model, loader, optimizer, epoch, device, beta=1.0):
    model.train()
    running=0.0; bce=0.0; kld=0.0
    for x,_ in loader:
        x = x.to(device)
        optimizer.zero_grad()
        xrec, mu, logvar = model(x)
        loss, b, k = loss_vae(xrec, x, mu, logvar, beta)
        loss.backward()
        optimizer.step()
        running += loss.item(); bce += b.item(); kld += k.item()
    n = len(loader)
    print(f'Epoch {epoch} VAE train loss: {running/n:.4f} BCE:{bce/n:.4f} KLD:{kld/n:.4f}')
    return running/n, bce/n, kld/n

def eval_vae(model, loader, device, beta=1.0):
    model.eval()
    running=0.0; bce=0.0; kld=0.0
    with torch.no_grad():
        for x,_ in loader:
            x = x.to(device)
            xrec, mu, logvar = model(x)
            loss, b, k = loss_vae(xrec, x, mu, logvar, beta)
            running += loss.item(); bce += b.item(); kld += k.item()
    n = len(loader)
    return running/n, bce/n, kld/n


In [None]:

# Quick experiment: ConvAE baseline (small epochs to keep runtime moderate)
latent_dim = 32
ae = ConvAutoencoder(latent_dim=latent_dim).to(device)
opt = optim.Adam(ae.parameters(), lr=1e-3)

n_epochs = 6
train_losses = []; val_losses = []
for e in range(1, n_epochs+1):
    tr = train_ae(ae, train_loader, opt, e, device)
    val = eval_ae(ae, test_loader, device)
    train_losses.append(tr); val_losses.append(val)

# Save model and plot losses
torch.save(ae.state_dict(), out_dir / f'conv_ae_latent{latent_dim}.pt')

plt.figure(); plt.plot(train_losses, label='train'); plt.plot(val_losses, label='val'); plt.legend(); plt.title('ConvAE Loss'); plt.show()

# Show reconstructions
def show_recon(model, loader, device, n=8, is_vae=False):
    model.eval()
    x,_ = next(iter(loader))
    x = x[:n].to(device)
    with torch.no_grad():
        if is_vae:
            xrec,_,_ = model(x)
        else:
            xrec = model(x)
    grid = make_grid(torch.cat([x.cpu(), xrec.cpu()]), nrow=n)
    plt.figure(figsize=(12,3)); plt.axis('off'); plt.imshow(np.transpose(grid,(1,2,0)).squeeze(), cmap='gray'); plt.show()

show_recon(ae, test_loader, device, n=8, is_vae=False)


In [None]:

# VAE experiment
latent_dim = 16
vae = VAE(latent_dim=latent_dim).to(device)
optv = optim.Adam(vae.parameters(), lr=1e-3)
n_epochs = 8
for e in range(1, n_epochs+1):
    train_vae(vae, train_loader, optv, e, device, beta=1.0)

torch.save(vae.state_dict(), out_dir / f'vae_latent{latent_dim}.pt')
# show recon
def show_recon_vae(model, loader, device, n=8):
    model.eval()
    x,_ = next(iter(loader))
    x = x[:n].to(device)
    with torch.no_grad():
        xrec, mu, logvar = model(x)
    grid = make_grid(torch.cat([x.cpu(), xrec.cpu()]), nrow=n)
    plt.figure(figsize=(12,3)); plt.axis('off'); plt.imshow(np.transpose(grid,(1,2,0)).squeeze(), cmap='gray'); plt.show()

show_recon_vae(vae, test_loader, device, n=8)

# Beta-VAE (beta=4)
beta = 4.0
bvae = VAE(latent_dim=latent_dim).to(device)
optb = optim.Adam(bvae.parameters(), lr=1e-3)
n_epochs = 8
for e in range(1, n_epochs+1):
    train_vae(bvae, train_loader, optb, e, device, beta=beta)

torch.save(bvae.state_dict(), out_dir / f'beta_vae_latent{latent_dim}_beta{int(beta)}.pt')
show_recon_vae(bvae, test_loader, device, n=8)


In [None]:

# Latent interpolation utilities (works for AE and VAE)
def interpolate_and_show(model, loader, device, steps=10, is_vae=False):
    x,_ = next(iter(loader))
    a = x[0:1].to(device); b = x[1:2].to(device)
    if is_vae:
        _, mu_a, _ = model(a); _, mu_b, _ = model(b)
        za, zb = mu_a, mu_b
    else:
        za = model.encode(a); zb = model.encode(b)
    imgs = []
    for alpha in np.linspace(0,1,steps):
        z = (1-alpha)*za + alpha*zb
        xr = model.decode(z)
        imgs.append(xr.cpu())
    grid = make_grid(torch.cat(imgs, dim=0), nrow=steps)
    plt.figure(figsize=(12,2)); plt.axis('off'); plt.imshow(np.transpose(grid,(1,2,0)).squeeze(), cmap='gray'); plt.title('Latent interpolation'); plt.show()

# interpolation examples
interpolate_and_show(ae, test_loader, device, steps=12, is_vae=False)
interpolate_and_show(vae, test_loader, device, steps=12, is_vae=True)


In [None]:

# Quick latent dimension sweep for ConvAE (small training to compare trend)
latent_list = [8,16,32]
results = {}
for ld in latent_list:
    m = ConvAutoencoder(latent_dim=ld).to(device)
    optm = optim.Adam(m.parameters(), lr=1e-3)
    for e in range(1,4):
        train_ae(m, train_loader, optm, e, device)
    # compute BCE per-batch average on test set
    m.eval()
    total = 0.0
    with torch.no_grad():
        for x,_ in test_loader:
            x = x.to(device)
            xrec = m(x)
            total += bce_loss(xrec, x).item()
    results[ld] = total / len(test_loader)
print('Sweep results (lower better):', results)


In [None]:

# Save a sample reconstruction image for the report
x_sample, _ = next(iter(test_loader))
x_sample = x_sample[:16].to(device)
with torch.no_grad():
    xrec = ae(x_sample)
grid = make_grid(torch.cat([x_sample.cpu(), xrec.cpu()]), nrow=8)
save_image(grid, out_dir / 'ae_reconstruction_grid.png')
print('Saved reconstruction grid to', out_dir / 'ae_reconstruction_grid.png')



## Analisis & Refleksi

- **Perbandingan**: ConvAE biasanya menghasilkan rekonstruksi paling tajam; VAE cenderung lebih blurrier namun memberikan latent space yang lebih teratur; Beta-VAE meningkatkan regularisasi tetapi mengorbankan detail.

- **Kesulitan**: Menyeimbangkan loss (BCE vs KLD) untuk VAE; menyesuaikan output size di decoder (cropping pada convtransposes).

- **Bantuan AI**: Jika Anda menggunakan bantuan AI untuk debugging atau saran arsitektur, cantumkan di bagian refleksi laporan.

- **Saran pengembangan**: coba dataset lain (CelebA), augmentasi, dan eksperimen learning-rate schedule atau warmup untuk KLD.
