In [1]:
from torchvision import datasets, transforms

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_dataset = datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)


Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26421880/26421880 [00:07<00:00, 3691859.39it/s] 


Extracting ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29515/29515 [00:00<00:00, 272942.28it/s]


Extracting ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4422102/4422102 [00:00<00:00, 4998477.39it/s]


Extracting ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5148/5148 [00:00<00:00, 4484377.36it/s]

Extracting ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw






In [2]:
import random
from torch.utils.data import DataLoader, Subset
import torch

def get_class_indices(dataset):
    class_indices = {i: [] for i in range(10)}
    for idx, (_, label) in enumerate(dataset):
        class_indices[label].append(idx)
    return class_indices

class_indices = get_class_indices(train_dataset)

def sample_labeled_data(class_indices, num_samples_per_class):
    selected_indices = []
    for label, indices in class_indices.items():
        selected_indices.extend(random.sample(indices, num_samples_per_class))
    return selected_indices

# train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False)

In [3]:
labeled_indices_100 = sample_labeled_data(class_indices, num_samples_per_class=10)
labeled_indices_600 = sample_labeled_data(class_indices, num_samples_per_class=60)
labeled_indices_1000 = sample_labeled_data(class_indices, num_samples_per_class=100)
labeled_indices_3000 = sample_labeled_data(class_indices, num_samples_per_class=300)


In [4]:
labeled_subset_100 = Subset(train_dataset, labeled_indices_100)
labeled_subset_600 = Subset(train_dataset, labeled_indices_600)
labeled_subset_1000 = Subset(train_dataset, labeled_indices_1000)
labeled_subset_3000 = Subset(train_dataset, labeled_indices_3000)

labeled_loader_100 = DataLoader(labeled_subset_100, batch_size=64, shuffle=True)
labeled_loader_600 = DataLoader(labeled_subset_600, batch_size=64, shuffle=True)
labeled_loader_1000 = DataLoader(labeled_subset_1000, batch_size=64, shuffle=True)
labeled_loader_3000 = DataLoader(labeled_subset_3000, batch_size=64, shuffle=True)

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [6]:
import numpy as np
import torch

random.seed(42)
torch.manual_seed(42)
np.random.seed(42)

In [7]:
from torch import nn

class VAE(nn.Module):
    def __init__(self):
        super(VAE, self).__init__()
        # Encoder
        self.fc1 = nn.Linear(28 * 28, 400)
        self.fc21 = nn.Linear(400, 20)  # Latent mean
        self.fc22 = nn.Linear(400, 20)  # Latent log variance

        # Decoder
        self.fc3 = nn.Linear(20, 400)
        self.fc4 = nn.Linear(400, 28 * 28)

    def encode(self, x):
        h1 = torch.relu(self.fc1(x))
        return self.fc21(h1), self.fc22(h1)

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def decode(self, z):
        h3 = torch.relu(self.fc3(z))
        return torch.sigmoid(self.fc4(h3))

    def forward(self, x):
        mu, logvar = self.encode(x.view(-1, 28 * 28))
        z = self.reparameterize(mu, logvar)
        return self.decode(z), mu, logvar

def loss_function(recon_x, x, mu, logvar):
    BCE = nn.functional.binary_cross_entropy(recon_x, x.view(-1, 28 * 28), reduction='sum')
    # KL divergence term
    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return BCE + KLD


In [11]:
device

device(type='cuda')

# 100 labels

In [17]:
vae = VAE()
vae.to(device)
optimizer = torch.optim.Adam(vae.parameters(), lr=1e-3)
epochs = 30

for epoch in range(epochs):
    vae.train()
    train_loss = 0
    for batch_idx, (data, _) in enumerate(labeled_loader_100):
        data = data.to(device)
        data = (data + 1) / 2
        optimizer.zero_grad()
        recon_batch, mu, logvar = vae(data)
        # print(f"recon_batch shape: {recon_batch.shape}")
        # print(f"data shape: {data.shape}")

        loss = loss_function(recon_batch, data, mu, logvar)
        loss.backward()
        train_loss += loss.item()
        optimizer.step()
    print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss / len(labeled_loader_100.dataset)}")


Epoch 1/30, Train Loss: 541.53623046875
Epoch 2/30, Train Loss: 510.6644921875
Epoch 3/30, Train Loss: 478.0264453125
Epoch 4/30, Train Loss: 452.6604296875
Epoch 5/30, Train Loss: 433.676328125
Epoch 6/30, Train Loss: 417.58892578125
Epoch 7/30, Train Loss: 405.746435546875
Epoch 8/30, Train Loss: 396.358701171875
Epoch 9/30, Train Loss: 384.392900390625
Epoch 10/30, Train Loss: 379.639580078125
Epoch 11/30, Train Loss: 371.408642578125
Epoch 12/30, Train Loss: 365.121875
Epoch 13/30, Train Loss: 357.329599609375
Epoch 14/30, Train Loss: 352.20021484375
Epoch 15/30, Train Loss: 347.741220703125
Epoch 16/30, Train Loss: 345.69125
Epoch 17/30, Train Loss: 340.885546875
Epoch 18/30, Train Loss: 337.504619140625
Epoch 19/30, Train Loss: 334.14634765625
Epoch 20/30, Train Loss: 333.400673828125
Epoch 21/30, Train Loss: 328.3477734375
Epoch 22/30, Train Loss: 325.49017578125
Epoch 23/30, Train Loss: 318.94568359375
Epoch 24/30, Train Loss: 319.188837890625
Epoch 25/30, Train Loss: 319.53266

In [11]:
def get_latent_representations(data_loader, model):
    model.eval()
    latents = []
    labels = []
    with torch.no_grad():
        for data, target in data_loader:
            data = data.to(device)
            mu, logvar = model.encode(data.view(-1, 28*28))
            z = model.reparameterize(mu, logvar)
            latents.append(z.cpu().numpy())
            labels.append(target.cpu().numpy())
    return np.concatenate(latents), np.concatenate(labels)


In [18]:
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

latent_train, label_train = get_latent_representations(labeled_loader_100, vae)
latent_test, label_test = get_latent_representations(test_loader, vae)

# Scale the features and train the SVM
clf = make_pipeline(StandardScaler(), svm.SVC(kernel='rbf', gamma='auto'))
clf.fit(latent_train, label_train)


In [19]:
test_accuracy = clf.score(latent_test, label_test)
print(f"Test accuracy: {test_accuracy}")

Test accuracy: 0.4614


In [20]:
torch.save(vae.state_dict(), 'vae_fashionmnist_100.pth')
import joblib
joblib.dump(clf, 'svm_fashionmnist_100.pkl')


['svm_fashionmnist_100.pkl']

# 600 labels

In [40]:
vae = VAE()
vae.to(device)
optimizer = torch.optim.Adam(vae.parameters(), lr=1e-3)
epochs = 50

for epoch in range(epochs):
    vae.train()
    train_loss = 0
    for batch_idx, (data, _) in enumerate(labeled_loader_600):
        data = data.to(device)
        data = (data + 1) / 2
        optimizer.zero_grad()
        recon_batch, mu, logvar = vae(data)
        # print(f"recon_batch shape: {recon_batch.shape}")
        # print(f"data shape: {data.shape}")

        loss = loss_function(recon_batch, data, mu, logvar)
        loss.backward()
        train_loss += loss.item()
        optimizer.step()
    print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss / len(labeled_loader_600.dataset)}")


Epoch 1/50, Train Loss: 485.489541015625
Epoch 2/50, Train Loss: 403.5531868489583
Epoch 3/50, Train Loss: 361.6667765299479
Epoch 4/50, Train Loss: 334.09887044270835
Epoch 5/50, Train Loss: 323.1739689127604
Epoch 6/50, Train Loss: 314.6561604817708
Epoch 7/50, Train Loss: 310.0723380533854
Epoch 8/50, Train Loss: 304.17679931640623
Epoch 9/50, Train Loss: 300.4039021809896
Epoch 10/50, Train Loss: 295.42454020182294
Epoch 11/50, Train Loss: 292.3208186848958
Epoch 12/50, Train Loss: 290.139130859375
Epoch 13/50, Train Loss: 286.79291259765625
Epoch 14/50, Train Loss: 284.03236897786456
Epoch 15/50, Train Loss: 282.538056640625
Epoch 16/50, Train Loss: 280.724501953125
Epoch 17/50, Train Loss: 278.56652180989585
Epoch 18/50, Train Loss: 277.34888346354165
Epoch 19/50, Train Loss: 276.1142586263021
Epoch 20/50, Train Loss: 274.0466813151042
Epoch 21/50, Train Loss: 273.35492919921876
Epoch 22/50, Train Loss: 272.29987955729166
Epoch 23/50, Train Loss: 271.11586588541667
Epoch 24/50, T

In [41]:
def get_latent_representations(data_loader, model):
    model.eval()
    latents = []
    labels = []
    with torch.no_grad():
        for data, target in data_loader:
            data = data.to(device)
            mu, logvar = model.encode(data.view(-1, 28*28))
            z = model.reparameterize(mu, logvar)
            latents.append(z.cpu().numpy())
            labels.append(target.cpu().numpy())
    return np.concatenate(latents), np.concatenate(labels)


In [42]:
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

latent_train, label_train = get_latent_representations(labeled_loader_600, vae)
latent_test, label_test = get_latent_representations(test_loader, vae)

# Scale the features and train the SVM
clf = make_pipeline(StandardScaler(), svm.SVC(kernel='rbf', gamma='auto'))
clf.fit(latent_train, label_train)


In [43]:
test_accuracy = clf.score(latent_test, label_test)
print(f"Test accuracy: {test_accuracy}")

Test accuracy: 0.6981


In [44]:
torch.save(vae.state_dict(), 'vae_fashionmnist_600.pth')
import joblib
joblib.dump(clf, 'svm_fashionmnist_600.pkl')


['svm_fashionmnist_600.pkl']

# 1000 labels

In [45]:
vae = VAE()
vae.to(device)
optimizer = torch.optim.Adam(vae.parameters(), lr=1e-3)
epochs = 50

for epoch in range(epochs):
    vae.train()
    train_loss = 0
    for batch_idx, (data, _) in enumerate(labeled_loader_1000):
        data = data.to(device)
        data = (data + 1) / 2
        optimizer.zero_grad()
        recon_batch, mu, logvar = vae(data)
        # print(f"recon_batch shape: {recon_batch.shape}")
        # print(f"data shape: {data.shape}")

        loss = loss_function(recon_batch, data, mu, logvar)
        loss.backward()
        train_loss += loss.item()
        optimizer.step()
    print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss / len(labeled_loader_1000.dataset)}")


Epoch 1/50, Train Loss: 454.446751953125
Epoch 2/50, Train Loss: 361.364978515625
Epoch 3/50, Train Loss: 330.37807421875
Epoch 4/50, Train Loss: 314.8826435546875
Epoch 5/50, Train Loss: 305.596462890625
Epoch 6/50, Train Loss: 298.6144248046875
Epoch 7/50, Train Loss: 292.9660859375
Epoch 8/50, Train Loss: 287.136
Epoch 9/50, Train Loss: 283.6436669921875
Epoch 10/50, Train Loss: 280.0709658203125
Epoch 11/50, Train Loss: 277.4173798828125
Epoch 12/50, Train Loss: 275.1598232421875
Epoch 13/50, Train Loss: 272.9734521484375
Epoch 14/50, Train Loss: 272.302849609375
Epoch 15/50, Train Loss: 270.769216796875
Epoch 16/50, Train Loss: 270.014130859375
Epoch 17/50, Train Loss: 268.025501953125
Epoch 18/50, Train Loss: 267.3729130859375
Epoch 19/50, Train Loss: 266.9888115234375
Epoch 20/50, Train Loss: 266.13758203125
Epoch 21/50, Train Loss: 265.458162109375
Epoch 22/50, Train Loss: 264.5616640625
Epoch 23/50, Train Loss: 264.064015625
Epoch 24/50, Train Loss: 263.34376953125
Epoch 25/50

In [46]:
def get_latent_representations(data_loader, model):
    model.eval()
    latents = []
    labels = []
    with torch.no_grad():
        for data, target in data_loader:
            data = data.to(device)
            mu, logvar = model.encode(data.view(-1, 28*28))
            z = model.reparameterize(mu, logvar)
            latents.append(z.cpu().numpy())
            labels.append(target.cpu().numpy())
    return np.concatenate(latents), np.concatenate(labels)


In [47]:
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

latent_train, label_train = get_latent_representations(labeled_loader_1000, vae)
latent_test, label_test = get_latent_representations(test_loader, vae)

# Scale the features and train the SVM
clf = make_pipeline(StandardScaler(), svm.SVC(kernel='rbf', gamma='auto'))
clf.fit(latent_train, label_train)


In [48]:
test_accuracy = clf.score(latent_test, label_test)
print(f"Test accuracy: {test_accuracy}")

Test accuracy: 0.711


In [49]:
torch.save(vae.state_dict(), 'vae_fashionmnist_1000.pth')
import joblib
joblib.dump(clf, 'svm_fashionmnist_1000.pkl')


['svm_fashionmnist_1000.pkl']

# 3000 labels

In [55]:
vae = VAE()
vae.to(device)
optimizer = torch.optim.Adam(vae.parameters(), lr=1e-3)
epochs = 10

for epoch in range(epochs):
    vae.train()
    train_loss = 0
    for batch_idx, (data, _) in enumerate(labeled_loader_3000):
        data = data.to(device)
        data = (data + 1) / 2
        optimizer.zero_grad()
        recon_batch, mu, logvar = vae(data)
        # print(f"recon_batch shape: {recon_batch.shape}")
        # print(f"data shape: {data.shape}")

        loss = loss_function(recon_batch, data, mu, logvar)
        loss.backward()
        train_loss += loss.item()
        optimizer.step()
    print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss / len(labeled_loader_3000.dataset)}")


Epoch 1/10, Train Loss: 384.62847005208334
Epoch 2/10, Train Loss: 308.0201197916667
Epoch 3/10, Train Loss: 291.01540201822917
Epoch 4/10, Train Loss: 280.5287451171875
Epoch 5/10, Train Loss: 274.51184895833336
Epoch 6/10, Train Loss: 271.818099609375
Epoch 7/10, Train Loss: 269.21191373697917
Epoch 8/10, Train Loss: 267.82515006510414
Epoch 9/10, Train Loss: 265.4774169921875
Epoch 10/10, Train Loss: 263.67031868489585


In [56]:
def get_latent_representations(data_loader, model):
    model.eval()
    latents = []
    labels = []
    with torch.no_grad():
        for data, target in data_loader:
            data = data.to(device)
            mu, logvar = model.encode(data.view(-1, 28*28))
            z = model.reparameterize(mu, logvar)
            latents.append(z.cpu().numpy())
            labels.append(target.cpu().numpy())
    return np.concatenate(latents), np.concatenate(labels)


In [57]:
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

latent_train, label_train = get_latent_representations(labeled_loader_3000, vae)
latent_test, label_test = get_latent_representations(test_loader, vae)

# Scale the features and train the SVM
clf = make_pipeline(StandardScaler(), svm.SVC(kernel='rbf', gamma='auto'))
clf.fit(latent_train, label_train)


In [58]:
test_accuracy = clf.score(latent_test, label_test)
print(f"Test accuracy: {test_accuracy}")

Test accuracy: 0.7374


In [59]:
torch.save(vae.state_dict(), 'vae_fashionmnist_3000.pth')
import joblib
joblib.dump(clf, 'svm_fashionmnist_3000.pkl')


['svm_fashionmnist_3000.pkl']

# TEST the model

In [1]:
import torch
from torch import nn
import numpy as np

class VAE(nn.Module):
    def __init__(self):
        super(VAE, self).__init__()
        # Encoder
        self.fc1 = nn.Linear(28 * 28, 400)
        self.fc21 = nn.Linear(400, 20)  # Latent mean
        self.fc22 = nn.Linear(400, 20)  # Latent log variance

        # Decoder
        self.fc3 = nn.Linear(20, 400)
        self.fc4 = nn.Linear(400, 28 * 28)

    def encode(self, x):
        h1 = torch.relu(self.fc1(x))
        return self.fc21(h1), self.fc22(h1)

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def decode(self, z):
        h3 = torch.relu(self.fc3(z))
        return torch.sigmoid(self.fc4(h3))

    def forward(self, x):
        mu, logvar = self.encode(x.view(-1, 28 * 28))
        z = self.reparameterize(mu, logvar)
        return self.decode(z), mu, logvar

def loss_function(recon_x, x, mu, logvar):
    BCE = nn.functional.binary_cross_entropy(recon_x, x.view(-1, 28 * 28), reduction='sum')
    # KL divergence term
    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return BCE + KLD



In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
def get_latent_representations(data_loader, model):
    model.eval()
    latents = []
    labels = []
    with torch.no_grad():
        for data, target in data_loader:
            data = data.to(device)
            mu, logvar = model.encode(data.view(-1, 28*28))
            z = model.reparameterize(mu, logvar)
            latents.append(z.cpu().numpy())
            labels.append(target.cpu().numpy())
    return np.concatenate(latents), np.concatenate(labels)


In [4]:
model = VAE()

model.load_state_dict(torch.load('vae_fashionmnist_3000.pth'))

model.eval()

import joblib

clf = joblib.load('svm_fashionmnist_3000.pkl')

  model.load_state_dict(torch.load('vae_fashionmnist_3000.pth'))


In [6]:
from torchvision import datasets, transforms

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# train_dataset = datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26421880/26421880 [00:03<00:00, 8492778.89it/s] 


Extracting ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29515/29515 [00:00<00:00, 133040.39it/s]


Extracting ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4422102/4422102 [00:02<00:00, 1786770.43it/s]


Extracting ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5148/5148 [00:00<00:00, 5954847.49it/s]

Extracting ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw






In [7]:
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False)

In [8]:
latent_test, label_test = get_latent_representations(test_loader, model)
predicted_labels = clf.predict(latent_test)

In [9]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(label_test, predicted_labels)
print(f'Test accuracy: {accuracy}')


Test accuracy: 0.7382
