In [1]:
import numpy as np
import pandas as pd
import math

from sklearn.datasets import load_iris
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from torch.utils.data import DataLoader, Dataset

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from tqdm.auto import tqdm

In [2]:
if torch.cuda.is_available():
    device = torch.device("cuda")
elif torch.backends.mps.is_available() and torch.backends.mps.is_built():
    device = torch.device("mps")
else:
    device = torch.device("cpu")
    
    
print(f'Actual device: {device}')

Actual device: mps


In [3]:
X = load_iris()['data']
X
min_max = MinMaxScaler()
#std_scl = StandardScaler()
X = min_max.fit_transform(X)
#X = std_scl.fit_transform(X)
X[:10]

array([[0.22222222, 0.625     , 0.06779661, 0.04166667],
       [0.16666667, 0.41666667, 0.06779661, 0.04166667],
       [0.11111111, 0.5       , 0.05084746, 0.04166667],
       [0.08333333, 0.45833333, 0.08474576, 0.04166667],
       [0.19444444, 0.66666667, 0.06779661, 0.04166667],
       [0.30555556, 0.79166667, 0.11864407, 0.125     ],
       [0.08333333, 0.58333333, 0.06779661, 0.08333333],
       [0.19444444, 0.58333333, 0.08474576, 0.04166667],
       [0.02777778, 0.375     , 0.06779661, 0.04166667],
       [0.16666667, 0.45833333, 0.08474576, 0.        ]])

In [4]:
class IrisDataset(Dataset):
    def __init__(self, data):
        self.data = torch.tensor(data, dtype = torch.float32)
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx]

dataset = IrisDataset(X)
dataloader = DataLoader(dataset, batch_size = 32, shuffle = True)

for batch in dataloader:
    print(f'{batch.shape}\n{batch}')
    break

torch.Size([32, 4])
tensor([[0.2500, 0.8750, 0.0847, 0.0000],
        [0.8333, 0.3750, 0.8983, 0.7083],
        [0.4167, 0.2500, 0.5085, 0.4583],
        [0.3056, 0.5833, 0.1186, 0.0417],
        [0.3056, 0.4167, 0.5932, 0.5833],
        [0.3333, 0.9167, 0.0678, 0.0417],
        [0.2778, 0.7083, 0.0847, 0.0417],
        [0.5556, 0.1250, 0.5763, 0.5000],
        [0.1111, 0.5000, 0.1017, 0.0417],
        [0.3611, 0.4167, 0.5932, 0.5833],
        [0.5833, 0.4583, 0.7627, 0.7083],
        [0.2222, 0.7500, 0.0847, 0.0833],
        [0.3889, 0.3750, 0.5424, 0.5000],
        [0.8611, 0.3333, 0.8644, 0.7500],
        [0.4444, 0.4167, 0.5424, 0.5833],
        [0.6667, 0.4583, 0.7797, 0.9583],
        [0.1389, 0.5833, 0.1017, 0.0417],
        [0.5556, 0.2083, 0.6610, 0.5833],
        [0.6667, 0.4167, 0.7119, 0.9167],
        [0.4722, 0.3750, 0.5932, 0.5833],
        [0.0833, 0.6667, 0.0000, 0.0417],
        [0.4722, 0.5833, 0.5932, 0.6250],
        [0.1111, 0.5000, 0.0508, 0.0417],
        [1.000

In [5]:
for i in dataloader:
    batch = i
    break

In [91]:
class VariationalAutoEncoder(nn.Module):
    def __init__(self, 
                 input_dim = 4, 
                 layers = 5,
                 dropout_rate = 0.5,
                 latent_space_dim = 2):
        super(VariationalAutoEncoder, self).__init__()
        
        max_neurons = 2 ** layers

        encoder_layers_list = [
            nn.Linear(input_dim, max_neurons),
            nn.LayerNorm(max_neurons),
            nn.ELU(),
            nn.Dropout(dropout_rate)
        ]
        
        current_dim = max_neurons
        while current_dim > latent_space_dim:
            next_dim = current_dim // 2
            encoder_layers_list.extend([
                nn.Linear(current_dim, next_dim),
                nn.LayerNorm(next_dim),
                nn.ELU(),
                nn.Dropout(dropout_rate)
            ])
            current_dim = next_dim
        
        #encoder_layers_list.append(nn.Linear(current_dim, latent_space_dim))

        self.Encoder = nn.Sequential(*encoder_layers_list)
        
        self.fc_mu = nn.Linear(next_dim, latent_space_dim)
        self.fc_logvar = nn.Linear(next_dim, latent_space_dim)
        
        decoder_layers_list = []
        current_dim = latent_space_dim
        while current_dim < max_neurons:
            next_dim = current_dim * 2
            decoder_layers_list.extend([
                nn.Linear(current_dim, next_dim),
                nn.LayerNorm(next_dim),
                nn.ELU(),
                nn.Dropout(dropout_rate)
            ])
            current_dim = next_dim
        
        # L'ultimo strato del decoder per ricostruire l'input
        decoder_layers_list.append(nn.Linear(current_dim, input_dim))
        decoder_layers_list.append(nn.Sigmoid())
        
        self.Decoder = nn.Sequential(*decoder_layers_list)
        
        
    def reparameterize(self, mu, logvar):
        """
        Implementa il trick della rielaborazione per campionare dallo spazio latente.
        Args:
            mu (torch.Tensor): Media della distribuzione latente (dimensione: batch_size x latent_dim).
            logvar (torch.Tensor): Log-varianza della distribuzione latente (dimensione: batch_size x latent_dim).
        Returns:
            z (torch.Tensor): Campione dallo spazio latente (dimensione: batch_size x latent_dim).
        """
        std = torch.exp(0.5 * logvar)  # Calcola lo scarto quadratico medio
        epsilon = torch.randn_like(std)  # Campiona da una distribuzione normale standard
        z = mu + epsilon * std  # Applica il trick della rielaborazione
        return z
        
    def forward(self, x):
        x = self.Encoder(x)
        mu, log_var = self.fc_mu(x), self.fc_logvar(x)
        z = self.reparameterize(mu, log_var)
        reconstruction = self.Decoder(z)
        
        return mu, log_var, reconstruction
        
        
vae = VariationalAutoEncoder(layers = 5, latent_space_dim = 2, dropout_rate = 0.1)
vae
vae(batch)

(tensor([[-0.1180, -1.0537],
         [-1.2860,  0.7071],
         [-1.2796,  0.7008],
         [-0.1179, -1.0540],
         [-0.1179, -1.0539],
         [-0.1150, -0.4757],
         [-0.1179, -1.0541],
         [-0.1179, -1.0541],
         [-0.2086, -0.8185],
         [-0.1179, -1.0541],
         [-0.2683, -0.3688],
         [-0.1179, -1.0542],
         [-0.1186, -1.0522],
         [-0.1179, -1.0542],
         [-0.1179, -1.0542],
         [-0.5728, -0.7368],
         [-0.1180, -1.0538],
         [-0.1179, -1.0542],
         [-0.1181, -1.0534],
         [-0.1179, -1.0542],
         [-0.1179, -1.0542],
         [-0.1179, -1.0541],
         [-1.2877,  0.7089],
         [-0.1179, -1.0541],
         [-0.1179, -1.0542],
         [-0.1150, -0.4757],
         [-1.2877,  0.7088],
         [-1.2470,  0.6680],
         [-0.1182, -1.0531],
         [-0.1179, -1.0542],
         [-1.2871,  0.7082],
         [-0.1183, -1.0529]], grad_fn=<AddmmBackward0>),
 tensor([[ 1.0137,  0.7909],
         [ 0.06

In [92]:
lr = 1e-5

def loss_function(recon_x, x, mu, logvar):
    # Perdita di ricostruzione (Errore quadratico medio)
    reconstruction_loss = F.mse_loss(recon_x, x, reduction='sum')
    
    # Perdita di Divergenza KL
    # Divari di distribuzioni (appena appreso vs. normale standard)
    # Formula di KL: -0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    # dove mu e sigma sono la media e la deviazione standard della distribuzione latente.
    kl_divergence = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    
    # Funzione di perdita totale
    return reconstruction_loss + kl_divergence

optimizer = optim.AdamW(vae.parameters(), lr = lr)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, eta_min = 1e-3, T_max = 5000)

In [93]:
epochs = 10000

for epoch in tqdm(range(epochs), desc = 'Training'):
    vae.train()
    train_loss = 0
    for batch_idx, data in enumerate(dataloader):
        
        data = data.float()
        optimizer.zero_grad()
        
        mu, log_var, rec = vae(data)
        loss = loss_function(rec, data, mu, log_var)
        
        loss.backward()
        
        optimizer.step()

        train_loss += loss.item()
    
    #scheduler.step()
        
    avg_train_loss = train_loss / len(dataloader.dataset)
    if epoch % 100 == 0 and epoch != 0:
        print(f'Epoch [{epoch}/{epochs}], Loss: {avg_train_loss:.4f}')

Training:   0%|          | 0/10000 [00:00<?, ?it/s]

Epoch [100/10000], Loss: 1.3016
Epoch [200/10000], Loss: 1.2669
Epoch [300/10000], Loss: 1.1849
Epoch [400/10000], Loss: 1.1557
Epoch [500/10000], Loss: 1.1340
Epoch [600/10000], Loss: 1.1180
Epoch [700/10000], Loss: 1.0517
Epoch [800/10000], Loss: 1.0766
Epoch [900/10000], Loss: 1.0208
Epoch [1000/10000], Loss: 0.9848
Epoch [1100/10000], Loss: 0.9674
Epoch [1200/10000], Loss: 0.9458
Epoch [1300/10000], Loss: 0.9213
Epoch [1400/10000], Loss: 0.8632
Epoch [1500/10000], Loss: 0.8451
Epoch [1600/10000], Loss: 0.8272
Epoch [1700/10000], Loss: 0.8006
Epoch [1800/10000], Loss: 0.7781
Epoch [1900/10000], Loss: 0.7684
Epoch [2000/10000], Loss: 0.7569
Epoch [2100/10000], Loss: 0.7158
Epoch [2200/10000], Loss: 0.7144
Epoch [2300/10000], Loss: 0.6812
Epoch [2400/10000], Loss: 0.6486
Epoch [2500/10000], Loss: 0.6271
Epoch [2600/10000], Loss: 0.6213
Epoch [2700/10000], Loss: 0.6020
Epoch [2800/10000], Loss: 0.5818
Epoch [2900/10000], Loss: 0.5708
Epoch [3000/10000], Loss: 0.5608
Epoch [3100/10000],

In [32]:
load_iris()['data'][:32]

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4]])

In [36]:
min_max.inverse_transform(vae(torch.randn(32,4))[2].detach().numpy())

array([[5.86594  , 3.0473926, 3.80724  , 1.2023778],
       [5.840497 , 3.046419 , 3.7648084, 1.1697036],
       [5.90191  , 3.1055648, 3.7460694, 1.2322149],
       [5.8999605, 3.052249 , 3.7557404, 1.2146475],
       [5.8790903, 3.062522 , 3.8294742, 1.2064927],
       [5.9214964, 3.0560744, 3.7653651, 1.1950041],
       [5.804203 , 3.0792027, 3.8179514, 1.1977283],
       [5.879549 , 3.0820544, 3.665507 , 1.2187321],
       [5.884159 , 3.0740461, 3.8082573, 1.205911 ],
       [5.903711 , 3.0609593, 3.6797545, 1.2421105],
       [5.8490405, 3.0820832, 3.6767902, 1.2125052],
       [5.8668733, 3.0659554, 3.7314997, 1.195871 ],
       [5.852328 , 3.0316799, 3.8162563, 1.2233747],
       [5.932607 , 3.0657039, 3.8027287, 1.1813002],
       [5.8609977, 3.0715013, 3.8723722, 1.1570592],
       [5.8390565, 3.0444613, 3.749691 , 1.2066691],
       [5.84271  , 3.0425165, 3.7996316, 1.1879617],
       [5.9089704, 3.0193138, 3.8356688, 1.1979198],
       [5.8398366, 3.0700977, 3.8445954, 1.257