In [2]:
import os, torch
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
print("CUDA_VISIBLE_DEVICES:", os.environ.get("CUDA_VISIBLE_DEVICES"))
print("Built with CUDA:", torch.version.cuda)     
print("CUDA available?:", torch.cuda.is_available())  
print("Device count:", torch.cuda.device_count())

CUDA_VISIBLE_DEVICES: 0
Built with CUDA: None
CUDA available?: False
Device count: 0


In [1]:
import numpy as np
import matplotlib.pyplot as plt
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torch import nn
from semisupervised import SemiSupervisedAutoEncoderOptions
from dimensionality_reduction import DimensionalityReductionAAE

In [3]:
from torch.utils.data import random_split

def configure_mnist(batch_size=100, val_size=10000):
    # transform: ToTensor + flatten
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Lambda(lambda x: x.view(-1))
    ])

    # full train + test datasets
    full_train = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
    test_ds    = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

    # split full_train → train_ds (60k - val_size) and val_ds (val_size)
    train_size = len(full_train) - val_size
    train_ds, val_ds = random_split(full_train, [train_size, val_size])

    # DataLoaders
    train_loader = DataLoader(train_ds,  batch_size=batch_size, shuffle=True)
    val_loader   = DataLoader(val_ds,   batch_size=batch_size, shuffle=False)
    test_loader  = DataLoader(test_ds,  batch_size=batch_size, shuffle=False)

    # (Optionally) extract raw tensors:
    X_train = torch.stack([x for x, _ in train_ds])
    Y_train = torch.tensor([y for _, y in train_ds])
    X_val   = torch.stack([x for x, _ in val_ds])
    Y_val   = torch.tensor([y for _, y in val_ds])
    X_test  = torch.stack([x for x, _ in test_ds])
    Y_test  = test_ds.targets.clone()

    return (X_train, X_val, X_test,
            Y_train, Y_val, Y_test,
            train_loader, val_loader, test_loader)

In [4]:
(X_train, X_val, X_test, Y_train, Y_val, Y_test, train_loader, val_loader, test_loader) = configure_mnist()

print(Y_train.max())
print(Y_train.min())

tensor(9)
tensor(0)


In [5]:
NUM_EPOCHS = 300
INPUT_DIM = 784
BATCH_SIZE = 100
AE_HIDDEN = 1000
DC_HIDDEN = 1000
LATENT_DIM_CAT = 10
LATENT_DIM_STYLE = 10
PRIOR_STD = 1.0

recon_loss = nn.MSELoss()
init_recon_lr = 0.001 #0.01

semi_sup_loss = nn.CrossEntropyLoss()
init_semi_sup_lr = 0.001 #0.1

init_gen_lr = init_disc_lr = 0.001 #0.1
use_decoder_sigmoid = True

In [7]:
options = SemiSupervisedAutoEncoderOptions(
    input_dim=INPUT_DIM,
    ae_hidden_dim=AE_HIDDEN,
    disc_hidden_dim=DC_HIDDEN,
    latent_dim_categorical=LATENT_DIM_CAT,
    latent_dim_style=LATENT_DIM_STYLE,
    recon_loss_fn=recon_loss,
    init_recon_lr=init_recon_lr,
    semi_supervised_loss_fn=semi_sup_loss,
    init_semi_sup_lr=init_semi_sup_lr,
    init_gen_lr=init_gen_lr,
    use_decoder_sigmoid=use_decoder_sigmoid,
    init_disc_categorical_lr = init_disc_lr,
    init_disc_style_lr = init_disc_lr
)

model = DimensionalityReductionAAE(options)
print(model)

DimensionalityReductionAAE(
  (encoder): Encoder(
    (fc): Sequential(
      (0): Linear(in_features=784, out_features=1000, bias=True)
      (1): ReLU()
      (2): Linear(in_features=1000, out_features=1000, bias=True)
      (3): ReLU()
      (4): Linear(in_features=1000, out_features=20, bias=True)
    )
  )
  (decoder): Decoder(
    (fc): Sequential(
      (0): Linear(in_features=10, out_features=1000, bias=True)
      (1): ReLU()
      (2): Linear(in_features=1000, out_features=1000, bias=True)
      (3): ReLU()
      (4): Linear(in_features=1000, out_features=784, bias=True)
      (5): Sigmoid()
    )
  )
  (cat_softmax): Softmax(dim=1)
  (discriminator_categorical): Discriminator(
    (fc): Sequential(
      (0): Linear(in_features=10, out_features=1000, bias=True)
      (1): ReLU()
      (2): Linear(in_features=1000, out_features=1000, bias=True)
      (3): ReLU()
      (4): Linear(in_features=1000, out_features=1, bias=True)
    )
  )
  (discriminator_style): Discriminator(
  

In [8]:
model.train_mbgd(
    train_loader=train_loader,
    val_loader=val_loader,
    epochs=NUM_EPOCHS,
    prior_std=PRIOR_STD,
    result_folder="dimensionality_reduction_results/300_epochs_adam_results"
)

Epoch [1/300]:   0%|          | 0/500 [00:00<?, ?it/s]

Epoch 1/300 — Recon: 0.1663, Disc_Cat: 0.4315, Gen_Cat: 2.9634, Disc_Style: 0.2832, Gen_Style: 5.1353, SemiSup: 6.5205
Validation Accuracy: 9.69%

Weights saved to dimensionality_reduction_results/300_epochs_adam_results/weights_*.pth


Epoch [2/300]:   0%|          | 0/500 [00:00<?, ?it/s]

Epoch 2/300 — Recon: 0.1546, Disc_Cat: 0.3033, Gen_Cat: 3.1830, Disc_Style: 0.0678, Gen_Style: 6.8512, SemiSup: 8.3056
Validation Accuracy: 9.69%

Weights saved to dimensionality_reduction_results/300_epochs_adam_results/weights_*.pth


Epoch [3/300]:   0%|          | 0/500 [00:00<?, ?it/s]

Epoch 3/300 — Recon: 0.1522, Disc_Cat: 0.2939, Gen_Cat: 2.7088, Disc_Style: 0.0429, Gen_Style: 7.6554, SemiSup: 7.3824
Validation Accuracy: 9.69%

Weights saved to dimensionality_reduction_results/300_epochs_adam_results/weights_*.pth


Epoch [4/300]:   0%|          | 0/500 [00:00<?, ?it/s]

KeyboardInterrupt: 