In [1]:
import random
import numpy as np
import mlflow
from config import RAW_DATA_DIR

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms
import torchvision
from torchvision.datasets import MNIST
from tqdm.auto import tqdm
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

from IPython.display import HTML

[32m2025-07-18 08:37:54.618[0m | [1mINFO    [0m | [36mconfig[0m:[36m<module>[0m:[36m9[0m - [1mPROJ_ROOT path is: /home/arys/projects/unsupervized_ml_sandbox[0m


In [2]:
torch.manual_seed(0)
torch.cuda.manual_seed(0)
np.random.seed(0)
random.seed(0)

### Prep Dataset ###
tensor_transforms = transforms.Compose(
    [
        transforms.Resize((28, 28)),
        transforms.ToTensor()
    ]
)

train_dataset = torchvision.datasets.MNIST(root=RAW_DATA_DIR / 'mnist', train=True, transform=tensor_transforms,
                                           download=True)
val_dataset = torchvision.datasets.MNIST(root=RAW_DATA_DIR / 'mnist', train=False, transform=tensor_transforms,
                                         download=True)
batch_size = 1024
train_dl = torch.utils.data.DataLoader(train_dataset, batch_size=1024, shuffle=True, num_workers=8)
val_dl = torch.utils.data.DataLoader(val_dataset, batch_size=1024, shuffle=True, num_workers=8)

### Set Device ###
device = "cuda" if torch.cuda.is_available() else "cpu"

In [3]:
import math


class VAE(nn.Module):
    def __init__(self, latent_dim=2, input_size=28 * 28):
        super().__init__()
        self.input_size: int = input_size
        self.latent_dim: int = latent_dim


        self.encoder_conv = nn.Sequential(

            ### Convolutional Encoding ###
            nn.Conv2d(in_channels=input_size, out_channels=8, kernel_size=3, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(8),
            nn.ReLU(),

            nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(16),
            nn.ReLU(),

            nn.Conv2d(in_channels=16, out_channels=self.latent_dim, kernel_size=3, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(self.latent_dim),
            nn.ReLU(),


        )

        #########################################################
        ### The New Layers Added in from Original AutoEncoder ###
        self.conv_mu =  nn.Conv2d(in_channels=self.latent_dim, out_channels=self.latent_dim, kernel_size=3, stride=1, padding="same")
        self.conv_logvar = nn.Conv2d(in_channels=self.latent_dim, out_channels=self.latent_dim, kernel_size=3, stride=1, padding="same")
        #########################################################

        self.decoder_conv = nn.Sequential(
            nn.ConvTranspose2d(in_channels=self.latent_dim, out_channels=16, kernel_size=3, stride=2, padding=1, output_padding=1, bias=False),
            nn.BatchNorm2d(16),
            nn.ReLU(),

            nn.ConvTranspose2d(in_channels=16, out_channels=8, kernel_size=3, stride=2, padding=1, output_padding=1, bias=False),
            nn.BatchNorm2d(8),
            nn.ReLU(),

            nn.ConvTranspose2d(in_channels=8, out_channels=input_size, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.Sigmoid()
        )


    def forward_dec(self, x):
        return self.decoder(x)

    def forward_enc(self, x):
        conv_enc = self.encoder_conv(x)

        #############################################
        ### Compute Mu and Sigma ###
        mu = self.conv_mu(conv_enc)
        logvar = self.conv_logvar(conv_enc)

        ### Sample with Reparamaterization Trick ###
        sigma = torch.exp(0.5*logvar)
        noise = torch.randn_like(sigma, device=sigma.device)
        z = mu + sigma*noise
        ############################################

        return z, mu, logvar
    
    def forward(self, x):
        z, mu, logvar = self.forward_enc(x)

        return z, self.decoder(z), mu, logvar



In [4]:
def VAELoss(x, x_hat, mean, log_var, kl_weight=1, reconstruction_weight=1):
    pixel_mse = ((x - x_hat) ** 2)

    reconstruction_loss = pixel_mse.sum(axis=-1).mean()
    # reconstruction_loss = pixel_mse.mean()

    kl = (1 + log_var - mean ** 2 - torch.exp(log_var))

    kl_per_image = -0.5 * torch.sum(kl, dim=-1)

    kl_loss = torch.mean(kl_per_image)
    #print(reconstruction_loss, kl_loss)

    return reconstruction_loss * reconstruction_weight + kl_weight * kl_loss


x = torch.randn(4, 128)
x_hat = torch.randn(4, 128)

mean = torch.randn(4, 2)
log_var = torch.randn(4, 2)

VAELoss(x, x_hat, mean, log_var)


tensor(289.8410)

# Trainning

In [5]:
val_dataset.data.shape[1]

28

In [6]:
from loguru import logger

mlflow.set_experiment("unsupervized_ml_sandbox")

logger.info(f"model_name : VAE")



[32m2025-07-18 08:37:58.773[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m5[0m - [1mmodel_name : VAE[0m


In [None]:
with mlflow.start_run(run_name="vae"):
    kl_weight = 1
    input_size = val_dataset.data.shape[1] ** 2
    epochs = 1000
    model = VAE(latent_dim=2, input_size=input_size).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.0005)
    val_step = 10

    mlflow.log_param("kl_weight", kl_weight)
    mlflow.log_param("input_size", input_size)
    mlflow.log_param("epochs", epochs)
    mlflow.log_param("model", model)
    mlflow.log_param("optimizer", optimizer)
    mlflow.log_param("val_step", val_step)

    torch.set_float32_matmul_precision('high')

    train_loss = []

    encoded_data_per_eval = []
    train_losses = []
    val_losses = []

    model.train()

    train = True

    for epoch in tqdm(range(epochs)):
        train_loss_epoch = []

        for imgs, _ in train_dl:
            imgs = imgs.to(device)
            imgs = imgs.flatten(1)

            encoded, decoded, mu, logvar = model(imgs)

            loss = VAELoss(imgs, decoded, mu, logvar, kl_weight=kl_weight)
            train_loss_epoch.append(loss.item())

            optimizer.zero_grad()
            optimizer.step()
        avg_train_loss = np.mean(train_loss_epoch)

        if epoch % val_step == 0:
            model.eval()
            val_loss_epoch = []

            with torch.no_grad():
                for img, _ in val_dl:
                    img = img.to(device)
                    img = img.flatten(1)
                    encoded, decoded, mu, logvar = model(img)
                    loss = VAELoss(img, decoded, mu, logvar, kl_weight=kl_weight)
                    val_loss_epoch.append(loss.item())

            avg_val_loss = np.mean(val_loss_epoch)

            print(f"Epoch {epoch} — train_loss: {avg_train_loss:.6f} — val_loss: {avg_val_loss:.6f}")
            mlflow.log_metric("val_loss", avg_train_loss, step=epoch)
            mlflow.log_metric("train_loss", avg_val_loss, step=epoch)
            # on sauvegarde les moyennes
            train_losses.append(avg_train_loss)
            val_losses.append(avg_val_loss)

            model.train()





  0%|          | 0/1000 [00:00<?, ?it/s]

Epoch 0 — train_loss: 181.805996 — val_loss: 181.868721
Epoch 10 — train_loss: 181.805536 — val_loss: 181.866811
Epoch 20 — train_loss: 181.804031 — val_loss: 181.871133
Epoch 30 — train_loss: 181.806849 — val_loss: 181.870490
Epoch 40 — train_loss: 181.805868 — val_loss: 181.872772
Epoch 50 — train_loss: 181.805077 — val_loss: 181.870129
Epoch 60 — train_loss: 181.804157 — val_loss: 181.868692
Epoch 70 — train_loss: 181.806827 — val_loss: 181.867186
Epoch 80 — train_loss: 181.803819 — val_loss: 181.867885
Epoch 90 — train_loss: 181.804091 — val_loss: 181.874025
Epoch 100 — train_loss: 181.805730 — val_loss: 181.867329
Epoch 110 — train_loss: 181.806418 — val_loss: 181.872432
Epoch 120 — train_loss: 181.806146 — val_loss: 181.867928
Epoch 130 — train_loss: 181.805069 — val_loss: 181.868866
Epoch 140 — train_loss: 181.805601 — val_loss: 181.861818
Epoch 150 — train_loss: 181.804605 — val_loss: 181.873141
Epoch 160 — train_loss: 181.804865 — val_loss: 181.867365
Epoch 170 — train_loss: 1

In [9]:
model

VAE(
  (encoder): Sequential(
    (0): Linear(in_features=784, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=32, bias=True)
  )
  (fn_mu): Linear(in_features=32, out_features=2, bias=True)
  (fn_logvar): Linear(in_features=32, out_features=2, bias=True)
  (decoder): Sequential(
    (0): Linear(in_features=2, out_features=32, bias=True)
    (1): ReLU()
    (2): Linear(in_features=32, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=128, bias=True)
    (5): ReLU()
    (6): Linear(in_features=128, out_features=784, bias=True)
    (7): Sigmoid()
  )
)