In [1]:
import torch
import pickle
import time
import numpy as np
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import gridspec
import matplotlib.patches as mpatches

In [2]:
is_cuda = torch.cuda.is_available()

In [3]:
seed = 10

n_classes = 10
#dimension of z or latent representation
z_dimension = 10
#dimension of X or data
X_dimension = 784
#dimension of label of data
y_dimension = 10

TRAIN_BATCH_SIZE = 100
VALID_BATCH_SIZE = 1000
EPOCHS = 1000
N = 1000
TINY_ERROR = 1e-15
DATA_PATH = "/floyd/input/skripsi_datasets_2/"
cuda = torch.device('cuda')

training_reconstruction_loss = []
training_generator_loss = []
training_discriminator_loss = []
training_generator_sample = []

In [4]:
class Encoder_net(nn.Module):
    def __init__(self):
        super(Encoder_net, self).__init__()
        self.layer1 = nn.Linear(X_dimension, N)
        self.layer2 = nn.Linear(N, N)
        self.layer3 = nn.Linear(N, z_dimension)

    def forward(self, x):
        x = F.dropout(self.layer1(x), p=0.5, training=self.training)
        x = F.relu(x)
        x = F.dropout(self.layer2(x), p=0.5, training=self.training)
        x = F.relu(x)
        x = self.layer3(x)

        return x


# Decoder
class Decoder_net(nn.Module):
    def __init__(self):
        super(Decoder_net, self).__init__()
        self.layer1 = nn.Linear(z_dimension + n_classes, N)
        self.layer2 = nn.Linear(N, N)
        self.layer3 = nn.Linear(N, X_dimension)

    def forward(self, x):
        x = self.layer1(x)
        x = F.dropout(x, p=0.5, training=self.training)
        x = F.relu(x)
        x = self.layer2(x)
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.layer3(x)
        return F.sigmoid(x)

class Discriminator_net_gauss(nn.Module):
    def __init__(self):
        super(Discriminator_net_gauss, self).__init__()
        self.layer1 = nn.Linear(z_dimension, N)
        self.layer2 = nn.Linear(N, N)
        self.layer3 = nn.Linear(N, 1)

    def forward(self, x):
        x = F.dropout(self.layer1(x), p=0.5, training=self.training)
        x = F.relu(x)
        x = F.dropout(self.layer2(x), p=0.5, training=self.training)
        x = F.relu(x)

        return F.sigmoid(self.layer3(x))

In [5]:
trainset_labeled = pickle.load(open(DATA_PATH + "train_labeled.p", "rb"))
trainset_unlabeled = pickle.load(open(DATA_PATH + "train_unlabeled.p", "rb"))
# Set -1 as labels for unlabeled data
trainset_unlabeled._train_labels = torch.from_numpy(np.array([-1] * 47000))
validset = pickle.load(open(DATA_PATH + "validation.p", "rb"))
train_labeled_loader = torch.utils.data.DataLoader(trainset_labeled,
                                                       batch_size=TRAIN_BATCH_SIZE,
                                                       shuffle=True)

train_unlabeled_loader = torch.utils.data.DataLoader(trainset_unlabeled,
                                                         batch_size=TRAIN_BATCH_SIZE,
                                                         shuffle=True)

valid_loader = torch.utils.data.DataLoader(validset, batch_size=VALID_BATCH_SIZE, shuffle=True)


3000
750


In [6]:
def train_one_epoch(decoder, encoder, discriminator_gauss, decoder_optimizer, encoder_optimizer, generator_optimizer, discriminator_optimizer, data_loader):

    encoder = encoder.train()
    decoder = decoder.train()
    discriminator_gauss = discriminator_gauss.train()

    for X, target in data_loader:
        X = X * 0.3081 + 0.1307
        X.resize_(TRAIN_BATCH_SIZE, X_dimension)
        X, target = Variable(X), Variable(target)
        if cuda:
            X, target = X.cuda(cuda), target.cuda(cuda)

        # Init gradients
        decoder.zero_grad()
        encoder.zero_grad()
        discriminator_gauss.zero_grad()


        z_gauss = encoder(X)
        
        category = np.array(target.data.tolist())
        category = np.eye(n_classes)[category].astype('float32')
        category = torch.from_numpy(category)
        z_category = Variable(category)
        
        if cuda:
            z_category = z_category.cuda(cuda)

        z_sample = torch.cat((z_category, z_gauss), 1)

        X_sample = decoder(z_sample)
        compared_with_original = X.resize(TRAIN_BATCH_SIZE, X_dimension)
        mse_loss = torch.nn.MSELoss()
        reconstruction_loss = mse_loss(X_sample + TINY_ERROR, compared_with_original + TINY_ERROR)
        
        reconstruction_loss.backward()
        decoder_optimizer.step()
        encoder_optimizer.step()

        decoder.zero_grad()
        encoder.zero_grad()
        discriminator_gauss.zero_grad()

        # Discriminator
        encoder = encoder.eval()
        z_real_gauss = Variable(torch.empty(TRAIN_BATCH_SIZE, z_dimension).normal_(mean=0, std=1.0))
        if cuda:
            z_real_gauss = z_real_gauss.cuda(cuda)

        z_fake_gauss = encoder(X)

        discriminator_real_gauss = discriminator_gauss(z_real_gauss)
        discriminator_fake_gauss = discriminator_gauss(z_fake_gauss)

        discriminator_loss = 0.5 * (torch.mean((discriminator_real_gauss + TINY_ERROR - 1)**2) + torch.mean((discriminator_fake_gauss + TINY_ERROR)**2))

        discriminator_loss.backward()
        discriminator_optimizer.step()

        decoder.zero_grad()
        encoder.zero_grad()
        discriminator_gauss.zero_grad()

        # Generator
        encoder = encoder.train()
        z_fake_gauss = encoder(X)

        generator_fake_gauss = discriminator_gauss(z_fake_gauss)
        generator_loss = 0.5 * torch.mean((generator_fake_gauss + TINY_ERROR - 1)**2)

        generator_loss.backward()
        generator_optimizer.step()

        decoder.zero_grad()
        encoder.zero_grad()
        discriminator_gauss.zero_grad()

    return discriminator_loss, generator_loss, reconstruction_loss

In [7]:
def train_model(train_labeled_loader, train_unlabeled_loader, valid_loader):
    torch.manual_seed(10)

    if cuda:
        encoder = Encoder_net().cuda(cuda)
        decoder = Decoder_net().cuda(cuda)
        discriminator_gauss = Discriminator_net_gauss().cuda(cuda)
    else:
        encoder = Encoder_net()
        decoder = Decoder_net()
        discriminator_gauss = Discriminator_net_gauss()

    #learning rates for optimization
    learning_rate_1 = 0.0001
    learning_rate_2 = 0.00005

    #optimization for decoder and encoder
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate_1)
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate_1)

    generator_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate_2)
    discriminator_optimizer = optim.Adam(discriminator_gauss.parameters(), lr=learning_rate_2)

    for epoch in range(EPOCHS):
        start_time = time.time()
        discriminator_loss, generator_loss, reconstruction_loss = train_one_epoch(decoder, encoder, discriminator_gauss, 
                                                                              decoder_optimizer, encoder_optimizer, generator_optimizer, 
                                                                              discriminator_optimizer, train_unlabeled_loader)
        
        epoch_time = time.time() - start_time
        if epoch % 10 == 0:
            training_reconstruction_loss.append(reconstruction_loss)
            training_generator_loss.append(generator_loss)
            training_discriminator_loss.append(discriminator_loss)
            print('Epoch-{}, Time-{:.2}, Discriminator_loss-{:.4}, Generator_loss-{:.4}, reconstruction_loss-{:.4}'.format(epoch, epoch_time, discriminator_loss.item(), generator_loss.item(), reconstruction_loss.item()))
    
        if epoch % 20 == 0:
            encoder = encoder.eval()
            decoder = decoder.eval()
            discriminator_gauss = discriminator_gauss.eval()

            X_test = None
            y_test = None

            for X, target in valid_loader:
                X_test = X
                y_test = target
                break

            if is_cuda:
                X_test = X_test.cuda(cuda)
                
            X_test = X_test.resize(VALID_BATCH_SIZE, X_dimension) 
            
            list_y_test = []
            for item in y_test:
                list_y_test.append(item.item())
            
            
            encoded_X_test = encoder(X_test)
            training_generator_sample.append(encoded_X_test)
            target_list = list_y_test

            figure = plt.figure()
            set_classes = set(target_list)
            color_map = plt.cm.rainbow(np.linspace(0, 1, len(set_classes)))
            axis = plt.subplot(111, aspect='equal')
            box = axis.get_position()
            axis.set_position([box.x0, box.y0, box.width * 0.8, box.height])
            handles = [mpatches.Circle((0, 0), label=class_, color=color_map[i]) for i, class_ in enumerate(set_classes)]
            axis.legend(handles=handles, shadow=True, bbox_to_anchor=(1.05, 0.45), fancybox=True, loc='center left')
            kwargs = {'alpha': 0.8, 'c': [color_map[i] for i in target_list]}
            encoded_X_test_cpu = encoded_X_test.cpu()
            plt.scatter(encoded_X_test_cpu[:, 0].detach().numpy(), encoded_X_test_cpu[:, 1].detach().numpy(), s = 2, **kwargs)
            axis.set_xlim([-20, 20])
            axis.set_ylim([-20, 20])

            plt.savefig('latent_space_supervised_aae_least/epoch_%d.png' % epoch)
            plt.close('all')

            
            n_digits = 20
            
            category_test = np.array(y_test.numpy().data.tolist())
            category_test = np.eye(n_classes)[category_test].astype('float32')
            category_test = torch.from_numpy(category_test)
            z_category_test = Variable(category_test[:n_digits])
            encoded_X_test = encoder(X_test[:n_digits])
            
            if is_cuda:
                z_category_test = z_category_test.cuda(cuda)
            
            encoded_X_test = torch.cat((z_category_test, encoded_X_test), 1)
            
            decoded_X_test = decoder(encoded_X_test)
            decoded_X_test_cpu = decoded_X_test.cpu()
            decoded_X_test_cpu = np.reshape(decoded_X_test_cpu.detach().numpy(), [-1, 28, 28]) * 255
            figure = plt.figure(figsize=(20, 4))

            for i in range (n_digits):
                axis = plt.subplot(2, n_digits, i + 1)
                X_test_cpu = X_test.cpu()
                plt.imshow(X_test_cpu[i].reshape(28, 28))
                plt.gray()
                axis.get_xaxis().set_visible(False)
                axis.get_yaxis().set_visible(False)
                
                axis = plt.subplot(2, n_digits, i + 1 + n_digits)
                plt.imshow(decoded_X_test_cpu[i])
                plt.gray()
                axis.get_xaxis().set_visible(False)
                axis.get_yaxis().set_visible(False)

            plt.savefig('reconstruction_supervised_aae_least/epoch_%d.png' % epoch)
            plt.close('all')

            z_sampling = [np.linspace(-5, 5, 10) for i in range (10)]

            n_x, n_y = 10, 10
            random_input = np.random.randn(10, z_dimension)
            sample_y = np.identity(10)
            plt.subplot()
            grid_spec = gridspec.GridSpec(n_x, n_y, hspace=0.05, wspace=0.05)
            i = 0
            for r in random_input:
                for t in sample_y:
                    r = np.reshape(r, (1, z_dimension))
                    t = np.reshape(t, (1, n_classes))
                    input_decoder = np.concatenate((t, r), 1)
                    input_decoder = input_decoder.astype('float32')
                    input_decoder = torch.from_numpy(input_decoder).float()
                    input_decoder = input_decoder.cuda(cuda)
                
                    decoded_X = decoder(input_decoder)
                    decoded_X_cpu = decoded_X.cpu().detach().numpy()
                    
                    axis = plt.subplot(grid_spec[i])
                    i += 1
                    image = np.array(decoded_X_cpu.tolist()).reshape(28, 28)
                    axis.imshow(image, cmap='gray')
                    axis.set_xticks([])
                    axis.set_yticks([])
                    axis.set_aspect('auto')
            
            plt.savefig('sampling_supervised_aae_least/epoch_%d.png' % epoch)
            plt.close()
            
            encoder = encoder.train()
            decoder = decoder.train()
            discriminator_gauss = discriminator_gauss.train()

    return encoder, decoder

In [None]:
trained_encoder, trained_decoder = train_model(train_labeled_loader, train_unlabeled_loader, valid_loader)




Epoch-0, Time-1.5e+01, Discriminator_loss-0.113, Generator_loss-0.285, reconstruction_loss-0.05025
Epoch-10, Time-1.5e+01, Discriminator_loss-0.2418, Generator_loss-0.1329, reconstruction_loss-0.02688
Epoch-20, Time-1.5e+01, Discriminator_loss-0.2392, Generator_loss-0.1304, reconstruction_loss-0.02591
Epoch-30, Time-1.5e+01, Discriminator_loss-0.249, Generator_loss-0.1265, reconstruction_loss-0.02421
Epoch-40, Time-1.5e+01, Discriminator_loss-0.2465, Generator_loss-0.1331, reconstruction_loss-0.02377
Epoch-50, Time-1.5e+01, Discriminator_loss-0.2475, Generator_loss-0.125, reconstruction_loss-0.02446
Epoch-60, Time-1.5e+01, Discriminator_loss-0.2439, Generator_loss-0.1232, reconstruction_loss-0.02291
Epoch-70, Time-1.5e+01, Discriminator_loss-0.244, Generator_loss-0.1311, reconstruction_loss-0.02141
Epoch-80, Time-1.5e+01, Discriminator_loss-0.2471, Generator_loss-0.1271, reconstruction_loss-0.02295
Epoch-90, Time-1.5e+01, Discriminator_loss-0.2441, Generator_loss-0.1236, reconstruction

In [None]:
file_location_encoder = "least-supervised-encoder.pt"
file_location_decoder = "least-supervised-decoder.pt"
torch.save(trained_encoder.state_dict(), file_location_encoder)
torch.save(trained_decoder.state_dict(), file_location_decoder)