In [12]:
import numpy as np
import pandas as pd
import math

from sklearn.datasets import load_iris
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from torch.utils.data import DataLoader, Dataset

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from tqdm.auto import tqdm

In [2]:
if torch.cuda.is_available():
    device = torch.device("cuda")
elif torch.backends.mps.is_available() and torch.backends.mps.is_built():
    device = torch.device("mps")
else:
    device = torch.device("cpu")
    
    
print(f'Actual device: {device}')

Actual device: mps


In [3]:
X = load_iris()['data']
X
min_max = MinMaxScaler()
X = min_max.fit_transform(X)
X[:10]

array([[0.22222222, 0.625     , 0.06779661, 0.04166667],
       [0.16666667, 0.41666667, 0.06779661, 0.04166667],
       [0.11111111, 0.5       , 0.05084746, 0.04166667],
       [0.08333333, 0.45833333, 0.08474576, 0.04166667],
       [0.19444444, 0.66666667, 0.06779661, 0.04166667],
       [0.30555556, 0.79166667, 0.11864407, 0.125     ],
       [0.08333333, 0.58333333, 0.06779661, 0.08333333],
       [0.19444444, 0.58333333, 0.08474576, 0.04166667],
       [0.02777778, 0.375     , 0.06779661, 0.04166667],
       [0.16666667, 0.45833333, 0.08474576, 0.        ]])

In [4]:
class IrisDataset(Dataset):
    def __init__(self, data):
        self.data = torch.tensor(data, dtype = torch.float32)
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx]

dataset = IrisDataset(X)
dataloader = DataLoader(dataset, batch_size = 32, shuffle = True)

for batch in dataloader:
    print(f'{batch.shape}\n{batch}')
    break

torch.Size([32, 4])
tensor([[0.6667, 0.5417, 0.7966, 1.0000],
        [0.1111, 0.5000, 0.0508, 0.0417],
        [0.6667, 0.4167, 0.7119, 0.9167],
        [0.2500, 0.2917, 0.4915, 0.5417],
        [0.1944, 0.5833, 0.1017, 0.1250],
        [0.5000, 0.4167, 0.6610, 0.7083],
        [0.1667, 0.4583, 0.0847, 0.0000],
        [0.3056, 0.7917, 0.0508, 0.1250],
        [0.9444, 0.3333, 0.9661, 0.7917],
        [0.3333, 0.1667, 0.4576, 0.3750],
        [0.3333, 0.2500, 0.5763, 0.4583],
        [0.2222, 0.7500, 0.1017, 0.0417],
        [0.1667, 0.2083, 0.5932, 0.6667],
        [0.8611, 0.3333, 0.8644, 0.7500],
        [0.3889, 0.3333, 0.5254, 0.5000],
        [0.1667, 0.4167, 0.0678, 0.0417],
        [0.6111, 0.5000, 0.6949, 0.7917],
        [0.7222, 0.4583, 0.6949, 0.9167],
        [0.1944, 0.4167, 0.1017, 0.0417],
        [0.0278, 0.5000, 0.0508, 0.0417],
        [0.5556, 0.5417, 0.6271, 0.6250],
        [0.7222, 0.4583, 0.7458, 0.8333],
        [0.3333, 0.1250, 0.5085, 0.5000],
        [0.305

In [98]:
for i in dataloader:
    batch = i

In [136]:
class VariationalAutoEncoder(nn.Module):
    def __init__(self, 
                 enc_input_dim = 4, 
                 encoder_layers = 5,
                 encoder_max_width = 512,
                 dropout = True,
                 dropout_rate = 0.5,
                 latent_space_dim = 4):
        super(VariationalAutoEncoder, self).__init__()
        
        encoder_layers_width = [neurons for neurons in range(encoder_max_width, latent_space_dim, -int(encoder_max_width/encoder_layers))]
        encoder_layers_width.insert(0, enc_input_dim)
        
        encoder_layers = []
        
        for i in range(len(encoder_layers_width) - 1):
            
            input_dim = encoder_layers_width[i]
            output_dim = encoder_layers_width[i + 1]
            
            encoder_layers.append(nn.Linear(input_dim, output_dim))
            
            if i != len(encoder_layers_width) - 2:
                encoder_layers.append(nn.LeakyReLU())
                if dropout:
                    encoder_layers.append(nn.Dropout(dropout_rate))
                
        self.Encoder = nn.Sequential(*encoder_layers)
        
        self.fc_mu = nn.Linear(output_dim, latent_space_dim)
        self.fc_logvar = nn.Linear(output_dim, latent_space_dim)
        
        
    def reparameterize(self, mu, logvar):
        """
        Implementa il trick della rielaborazione per campionare dallo spazio latente.
        Args:
            mu (torch.Tensor): Media della distribuzione latente (dimensione: batch_size x latent_dim).
            logvar (torch.Tensor): Log-varianza della distribuzione latente (dimensione: batch_size x latent_dim).
        Returns:
            z (torch.Tensor): Campione dallo spazio latente (dimensione: batch_size x latent_dim).
        """
        std = torch.exp(0.5 * logvar)  # Calcola lo scarto quadratico medio
        epsilon = torch.randn_like(std)  # Campiona da una distribuzione normale standard
        z = mu + epsilon * std  # Applica il trick della rielaborazione
        return z
        
    def forward(self, x):
        x = self.Encoder(x)
        mu, log_var = self.fc_mu(x), self.fc_logvar(x)
        trick = self.reparameterize(mu, log_var)
        
        return mu, log_var, x, trick.shape
        
        
vae = VariationalAutoEncoder()
vae(batch)

(tensor([[-7.7135e-03, -5.9712e-02, -1.2771e-01,  1.5375e-01],
         [-1.4391e-04, -4.3714e-02, -1.1657e-01,  1.4200e-01],
         [ 2.3427e-04,  7.2498e-03, -1.2600e-01,  1.0298e-01],
         [-1.3898e-02, -4.1719e-02, -9.8445e-02,  1.4800e-01],
         [ 1.1355e-01, -3.0341e-02, -1.0669e-01,  1.2169e-01],
         [ 4.1417e-02, -3.6572e-02, -8.9376e-02,  9.5198e-02],
         [-8.0407e-03, -8.0758e-02, -1.4783e-01,  1.3479e-01],
         [ 1.6625e-02, -3.0793e-02, -1.2535e-01,  1.2252e-01],
         [ 2.2204e-02, -1.4559e-02, -1.2521e-01,  1.0978e-01],
         [ 9.5967e-03, -5.5966e-02, -8.8129e-02,  1.3020e-01],
         [ 2.0876e-02, -1.7032e-02, -1.6937e-01,  1.3410e-01],
         [ 2.3369e-02, -8.9876e-03, -1.4439e-01,  9.6828e-02],
         [ 7.1979e-02, -5.0008e-02, -1.1610e-01,  1.5891e-01],
         [-1.8326e-02, -3.1170e-02, -1.1865e-01,  1.0396e-01],
         [ 3.4209e-02,  2.2012e-02, -1.4198e-01,  1.0398e-01],
         [ 2.4021e-02, -7.9709e-02, -1.1930e-01,  8.715