In [1]:
import torch
from torchvision import datasets, transforms
import torch.nn as nn
from torch.distributions import Categorical
import torch.nn.functional as F
import numpy as np
import torch.optim as optim
import matplotlib.pyplot as plt
from torch.utils.tensorboard import SummaryWriter
import pandas as pd
from torch.utils.data import Dataset, DataLoader

In [1]:
import math

In [11]:
math.ceil(512 * (7/8))

448

# Metadata e inizializzazione dataset

In [2]:
torch.manual_seed(0)

batch_size = 128
temperature = 1.0
seed = 0
log_interval = 600
log_interval_writer = 100
hard = False
latent_dim = 15
categorical_dim = 2
temp_min = 0.5
ANNEAL_RATE = 0.00003
n_start = 512
num_initial_bits = 512

g = np.log(2)

if torch.backends.mps.is_available():
    device = torch.device("mps")
    print("Utilizzo Apple Silicon GPU (MPS)")
elif torch.cuda.is_available():
    device = torch.device("cuda")
    print("Utilizzo NVIDIA GPU (CUDA)")
else:
    device = torch.device("cpu")
    print("Utilizzo la CPU")


torch.manual_seed(seed)
if device.type == "cuda": 
    torch.cuda.manual_seed(seed)
elif device.type == "mps": 
    torch.mps.manual_seed(seed)

kwargs = {'num_workers': 1, 'pin_memory': True} if device.type == "cuda" or device.type == "mps" else {} # pin_memory può essere utile anche per MPS

Utilizzo Apple Silicon GPU (MPS)


# Gumbel-softmax

In [3]:

def sample_gumbel(shape, eps=1e-20):
    # sample from a uniform distribution
    U = torch.rand(shape)
    return -torch.log(-torch.log(U.to(device) + eps) + eps)

def gumbel_softmax_sample(logits, temperature):
    y = logits + sample_gumbel(logits.size())
    return F.softmax(y / temperature, dim=-1)


def gumbel_softmax(logits, temperature, hard=False):
    y = gumbel_softmax_sample(logits, temperature)
    
    if not hard:
        return y.view(-1, latent_dim * categorical_dim)
    
    shape = y.size()
    _, ind = y.max(dim=-1)
    y_hard = torch.zeros_like(y).view(-1, shape[-1])
    y_hard.scatter_(1, ind.view(-1, 1), 1)
    y_hard = y_hard.view(*shape)
    # skip the gradient of y_hard
    y_hard = (y_hard - y).detach() + y 
    return y_hard.view(-1, latent_dim * categorical_dim)



# Class VAE

In [12]:
class VAE_model(nn.Module):
    def __init__(self):
        super(VAE_model, self).__init__()
        self.fc1 = nn.Linear(512, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, latent_dim * categorical_dim)
        self.fc4 = nn.Linear(latent_dim * categorical_dim, 128)
        self.fc5 = nn.Linear(128, 256)
        self.fc6 = nn.Linear(256, 512)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def sample_img(self, img, temp, random=True):
        with torch.no_grad():
            logits_z = self.encode(img.view(-1, num_initial_bits))
            logits_z = logits_z.view(-1, latent_dim, categorical_dim)
            if random:
                latent_z = gumbel_softmax(logits_z, temp, True)
            else:
                latent_z = logits_z.view(-1, latent_dim * categorical_dim)
            logits_x = self.decode(latent_z)
            dist_x = torch.distributions.Bernoulli(probs=logits_x)
            sampled_img = dist_x.sample()
        return sampled_img

    def encode(self, x):
        h1 = self.relu(self.fc1(x))
        h2 = self.relu(self.fc2(h1))
        return self.relu(self.fc3(h2))

    def decode(self, z):
        h4 = self.relu(self.fc4(z))
        h5 = self.relu(self.fc5(h4))
        return self.sigmoid(self.fc6(h5))

    def forward(self, data, temp, hard):
        logits_z = self.encode(data.view(-1, num_initial_bits))
        logits_z = logits_z.view(-1, latent_dim, categorical_dim)

        probs_z = F.softmax(logits_z, dim=-1)
        posterior_distrib = torch.distributions.Categorical(probs=probs_z)
        probs_prior = torch.ones_like(logits_z)/categorical_dim
        prior_distrib = torch.distributions.Categorical(probs=probs_prior)

        latent_z = gumbel_softmax(logits_z, temp)
        latent_z = latent_z.view(-1, latent_dim * categorical_dim)

        probs_x = self.decode(latent_z)
        dist_x = torch.distributions.Bernoulli(probs=probs_x, validate_args=False)

        rec_loss = dist_x.log_prob(data.view(-1, num_initial_bits)).sum(dim=-1)
        logits_z_log = F.log_softmax(logits_z, dim=-1)

        KL = (posterior_distrib.probs * (logits_z_log - prior_distrib.probs.log())).view(-1, latent_dim * categorical_dim).sum(dim=-1)
        elbo = rec_loss - KL
        loss = -elbo.mean()
        return loss, KL.mean(), rec_loss.mean()


# Train function

In [None]:
def train(model, optimizer, epochs):
    global_batch_idx = 0
    
    for epoch in range(epochs):
        model.train()
        train_loss = 0
        temp = temperature

        for batch_idx, data in enumerate(train_loader):
            global_batch_idx += 1
            # Sposta i dati sul device corretto
            data = data['example'].to(device)
            optimizer.zero_grad()
            loss, KL, rec_loss = model(data, temp, hard)
            loss.backward()
            train_loss += loss.item() * len(data)
            optimizer.step()

            if batch_idx % 100 == 1:
                temp = np.maximum(temp * np.exp(-ANNEAL_RATE * batch_idx), temp_min)

            '''
            if batch_idx % log_interval == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, batch_idx * len(data), len(train_loader.dataset),
                        100. * batch_idx / len(train_loader),
                        loss.item()))
                print("Temperature : ", temp)
            '''

            if global_batch_idx % log_interval_writer == 0:
                writer.add_scalar('KL/Train', KL, global_step=global_batch_idx)
                writer.add_scalar('rec_loss/Train', rec_loss, global_step=global_batch_idx)


        writer.add_scalar('Loss/Train', train_loss/len(train_loader.dataset), global_step=epoch)



        '''
        # Sposta l'immagine campionata sulla CPU per la visualizzazione con matplotlib
        sampled = model.sample_img(data[0].view(-1, 28*28), temp).view(28, 28).detach().cpu()
        fig, axs = plt.subplots(1, 2, figsize=(6,4))
        fig.suptitle('Reconstructed vs Real')
        axs[0].imshow(sampled.reshape(28,28))
        axs[0].axis('off')
        axs[1].imshow(data[0].reshape(28,28).detach().cpu())
        axs[1].axis('off')
        plt.show()
        '''

        print('====> Epoch: {} Average loss: {:.4f}'.format(
            epoch, train_loss / len(train_loader.dataset)))
        
        # Validation
        
        model.eval()
        val_loss_sum = 0.0
        with torch.no_grad():
            for batch_idx, data in enumerate(val_loader):
                data = data['example'].to(device)
                loss, KL, rec_loss = model(data, temp, hard=True)
                val_loss_sum += loss.item() * len(data)

        writer.add_scalar('Loss/Validation', val_loss_sum/len(val_loader.dataset), global_step=epoch)

        # Log histogram of weights and gradients
        for name, param in model.named_parameters():
            writer.add_histogram(f'Weights/{name}', param, global_step=epoch)
            if param.grad is not None:
                writer.add_histogram(f'Grads/{name}', param.grad, global_step=epoch)

    writer.close()
    print("Training completato e dati scritti su tensorboard")


# Dataset class

In [2]:
class Dataset_HFM(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.data = pd.read_csv(csv_file)
        self.transform = transform
        self.data['examples'] = self.data['examples'].apply(lambda x: torch.tensor(np.fromstring(x.strip("[]"), sep=' '), dtype=torch.float32))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        example = self.data.iloc[idx,1]
        sample = {'example': example}

        if self.transform:
            sample = self.transform(sample)

        return sample

In [7]:
writer = SummaryWriter(log_dir='runs/discrete_VAE_original_HFM_train/_0')

In [None]:
import torch
from torch.utils.data import DataLoader
import torch.optim as optim

# Qui importi i tuoi moduli personalizzati
from dataset import Dataset_HFM  # se li hai in moduli separati
from model import VAE_model      # idem
from train import train          # funzione di training

# 🔍 Rilevamento automatico del device
def get_device():
    if torch.backends.mps.is_available():
        print("✅ Using MPS (Apple Silicon)")
        return torch.device("mps")
    elif torch.cuda.is_available():
        print(f"✅ Using CUDA (GPU {torch.cuda.get_device_name(0)})")
        return torch.device("cuda")
    else:
        print("⚠️ Using CPU (no GPU available)")
        return torch.device("cpu")

# ⚙️ Impostazione sicura dei DataLoader kwargs
def get_dataloader_kwargs(device):
    if device.type == "mps":
        return {'num_workers': 0, 'pin_memory': False}
    elif device.type == "cuda":
        return {'num_workers': 4, 'pin_memory': True}  # valori tipici per CUDA
    else:
        return {'num_workers': 2, 'pin_memory': False} # fallback per CPU

if __name__ == '__main__':
    # 📦 Hyperparametri
    batch_size = 64
    epochs = 20

    # 📱 Device & loader options
    device = get_device()
    kwargs = get_dataloader_kwargs(device)

    # 📚 Dataset & DataLoader
    dataset_HFM = Dataset_HFM(csv_file='data/feat_512_g_log2_numex_60000.csv',
                              root_dir='data')
    train_loader = DataLoader(dataset_HFM, batch_size=batch_size, shuffle=True, **kwargs)

    dataset_HFM_val = Dataset_HFM(csv_file='data/feat_512_g_log2_numex_10000.csv',
                                  root_dir='data')
    val_loader = DataLoader(dataset_HFM_val, batch_size=batch_size, shuffle=False, **kwargs)

    # 🧠 Modello & ottimizzatore
    model = VAE_model().to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)

    # 🚀 Training
    train(model, optimizer, train_loader, val_loader, device=device, epochs=epochs)

# Qui importi i tuoi moduli personalizzati
from dataset import Dataset_HFM  # se li hai in moduli separati
from model import VAE_model      # idem
from train import train          # funzione di training

# 🔍 Rilevamento automatico del device
def get_device():
    if torch.backends.mps.is_available():
        print("✅ Using MPS (Apple Silicon)")
        return torch.device("mps")
    elif torch.cuda.is_available():
        print(f"✅ Using CUDA (GPU {torch.cuda.get_device_name(0)})")
        return torch.device("cuda")
    else:
        print("⚠️ Using CPU (no GPU available)")
        return torch.device("cpu")

# ⚙️ Impostazione sicura dei DataLoader kwargs
def get_dataloader_kwargs(device):
    if device.type == "mps":
        return {'num_workers': 0, 'pin_memory': False}
    elif device.type == "cuda":
        return {'num_workers': 4, 'pin_memory': True}  # valori tipici per CUDA
    else:
        return {'num_workers': 2, 'pin_memory': False} # fallback per CPU

if __name__ == '__main__':
    # 📦 Hyperparametri
    batch_size = 64
    epochs = 20

    # 📱 Device & loader options
    device = get_device()
    kwargs = get_dataloader_kwargs(device)

    # 📚 Dataset & DataLoader
    dataset_HFM = Dataset_HFM(csv_file='data/feat_512_g_log2_numex_60000.csv',
                              root_dir='data')
    train_loader = DataLoader(dataset_HFM, batch_size=batch_size, shuffle=True, **kwargs)

    dataset_HFM_val = Dataset_HFM(csv_file='data/feat_512_g_log2_numex_10000.csv',
                                  root_dir='data')
    val_loader = DataLoader(dataset_HFM_val, batch_size=batch_size, shuffle=False, **kwargs)

    # 🧠 Modello & ottimizzatore
    model = VAE_model().to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)

    # 🚀 Training
    train(model, optimizer, train_loader, val_loader, device=device, epochs=epochs)


In [None]:
if __name__ == '__main__':
    kwargs = {'num_workers': 0, 'pin_memory': False} if torch.backends.mps.is_available() else {}
    if torch.backends.mps.is_available():
        print("Running on Apple Silicon (MPS). num_workers set to 0 to avoid pickling issues.")
        kwargs = {'num_workers': 0, 'pin_memory': False} # pin_memory a False per MPS con num_workers=0


    # Inizializzazione del dataset e dei DataLoader
    dataset_HFM = Dataset_HFM(csv_file='data/feat_512_g_log2_numex_60000.csv',
                            root_dir='data')
    train_loader = DataLoader(
        dataset_HFM,
        batch_size=batch_size,
        shuffle=True,
        **kwargs
    )
    dataset_HFM_val = Dataset_HFM(csv_file='data/feat_512_g_log2_numex_10000.csv',
                                root_dir='data')
    val_loader = DataLoader(
        dataset_HFM_val, # Importante: usa dataset_HFM_val qui, non dataset_HFM
        batch_size=batch_size,
        shuffle=False,
        **kwargs
    )

    my_model = VAE_model()
    my_model.to(device)
    optimizer = optim.Adam(my_model.parameters(), lr=1e-3)

    # Chiamata alla funzione train
    train(my_model, optimizer, epochs=20)


Running on Apple Silicon (MPS). num_workers set to 0 to avoid pickling issues.
====> Epoch: 0 Average loss: 259.4679
====> Epoch: 1 Average loss: 257.0390
====> Epoch: 2 Average loss: 256.9899
====> Epoch: 3 Average loss: 256.9596
====> Epoch: 4 Average loss: 256.9341
====> Epoch: 5 Average loss: 256.8949
====> Epoch: 6 Average loss: 256.8497
====> Epoch: 7 Average loss: 256.8235
====> Epoch: 8 Average loss: 256.7887
====> Epoch: 9 Average loss: 256.7370
====> Epoch: 10 Average loss: 256.6819
====> Epoch: 11 Average loss: 256.6329
====> Epoch: 12 Average loss: 256.6014
====> Epoch: 13 Average loss: 256.5752
====> Epoch: 14 Average loss: 256.5535
====> Epoch: 15 Average loss: 256.5384
====> Epoch: 16 Average loss: 256.5250
====> Epoch: 17 Average loss: 256.5127
====> Epoch: 18 Average loss: 256.5019
====> Epoch: 19 Average loss: 256.4953
Training completato e dati scritti su tensorboard


In [26]:

my_model = VAE_model().to(device)

optimizer = optim.Adam(my_model.parameters(), lr=1e-3)

train(my_model, optimizer, epochs=15)



====> Epoch: 0 Average loss: 195.8797
====> Epoch: 1 Average loss: 161.6164
====> Epoch: 2 Average loss: 145.2841
====> Epoch: 3 Average loss: 137.5701
====> Epoch: 4 Average loss: 133.0866
====> Epoch: 5 Average loss: 129.0748
====> Epoch: 6 Average loss: 126.0739
====> Epoch: 7 Average loss: 123.8237
====> Epoch: 8 Average loss: 121.8725
====> Epoch: 9 Average loss: 120.0578
====> Epoch: 10 Average loss: 118.0473
====> Epoch: 11 Average loss: 116.4868
====> Epoch: 12 Average loss: 115.2364
====> Epoch: 13 Average loss: 114.0681
====> Epoch: 14 Average loss: 112.9505
Training completato e dati scritti su tensorboard
