## Setup

We begin by importing our dependencies.

In [1]:
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision

import time
from random import random

Set our seed and other configurations for reproducibility.

In [2]:
seed = 42
torch.manual_seed(seed)
torch.backends.cudnn.deterministic = True
PATH = './cifar_net.pth'

## Dataset

We load our MNIST dataset using the `torchvision` package. 

In [3]:
from model import *
from data import *
from pre_processing import *

In [4]:
batch_size = 1024
dataset = Loader(batch_size=batch_size)
trainloader = dataset.trainloader

def examples(trainloader):
    # get some random training images
    dataiter = iter(trainloader)
    images, _ = dataiter.next()

    # show images
    imshow(torchvision.utils.make_grid(images[:4]))

def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


print('Examples:')
# examples(trainloader)

Wait for both datasets to be downloaded and verified.
Files already downloaded and verified
Files already downloaded and verified
Examples:


## Autoencoder

An autoencoder is a type of neural network that finds the function mapping the features x to itself. This objective is known as reconstruction, and an autoencoder accomplishes this through the following process: (1) an encoder learns the data representation in lower-dimension space, i.e. extracting the most salient features of the data, and (2) a decoder learns to reconstruct the original data based on the learned representation by the encoder.

We define our autoencoder class with fully connected layers for both its encoder and decoder components.

Before using our defined autoencoder class, we have the following things to do:
    1. We configure which device we want to run on.
    2. We instantiate an `AE` object.
    3. We define our optimizer.
    4. We define our reconstruction loss.

In [5]:
"""
*******************************************************
****** skip this step if you dont want to train *******
*******************************************************
"""

#  use gpu if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# create a model from `AE` autoencoder class
# load it to the specified device, either gpu or cpu
net = AE().to(device)
print('Network:', net)

net.batchSize(batch_size)

if torch.cuda.is_available():
    torch.cuda.empty_cache()

criterion = nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr=1e-3)

Network:AE(
  (encoder): Sequential(
    (0): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (3): ReLU(inplace=True)
    (4): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1))
    (5): ReLU(inplace=True)
  )
  (latent): Linear(in_features=12800, out_features=10, bias=True)
  (decoder): Sequential(
    (0): Linear(in_features=10, out_features=12800, bias=True)
    (1): ConvTranspose2d(32, 16, kernel_size=(5, 5), stride=(1, 1))
    (2): ReLU(inplace=True)
    (3): ConvTranspose2d(16, 6, kernel_size=(5, 5), stride=(1, 1))
    (4): ReLU(inplace=True)
    (5): ConvTranspose2d(6, 3, kernel_size=(5, 5), stride=(1, 1))
    (6): ReLU(inplace=True)
  )
)


We train our autoencoder for our specified number of epochs.

In [6]:
"""
*******************************************************
****** skip this step if you dont want to train *******
*******************************************************
"""
workers = 14

def transformTensor(tensor, epoch):
    # add noise
    images = np.copy(tensor.data.numpy())
    noise_images = batch_gaussian_noise(images,workers,  0, 0.001 * epoch)
    images = np.asarray(noise_images)

    # picked = int(len(images) / 20)
    # warped_images = batch_random_projective_transform(images[:picked], workers, None, True, random_range = (epoch + 1) / 500)
    # images = np.append(noise_images, np.asarray(warped_images), axis=0)
    # np.random.shuffle(images)

    # covert to tensor again
    tensor = torch.from_numpy(images)
    return tensor.to(device, dtype=torch.float)

print("start training")

def train(net, trainloader, NUM_EPOCHS):
    train_loss = []
    start_time = time.time()
    for epoch in range(NUM_EPOCHS):
        running_loss = 0.0
        for data in trainloader:
            img, _ = data
            img = transformTensor(img, epoch).to(device)
            optimizer.zero_grad()
            outputs = net(img)
            loss = criterion(outputs, img)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        
        lossEp = running_loss / len(trainloader)
        train_loss.append(lossEp)
        timePerEpoch = (time.time() - start_time)/(epoch + 1)
        left = timePerEpoch * (NUM_EPOCHS - epoch + 1)
        print('Epoch {} of {}, Train Loss: {:.3f}'.format(epoch+1, NUM_EPOCHS, lossEp), ' aprox. ' + str(round(left / 60)) + ' min left')
 
    print('Finished Training')
    elapsed_time = time.time() - start_time
    print('final duration: ' + str(round(elapsed_time / 60)) + 'min')
    return train_loss

train_loss = train(net, dataset.trainloader, 5)
torch.save(net.state_dict(), PATH) 

start training
torch.Size([1024, 32, 20, 20])
torch.Size([4, 3276800])


RuntimeError: size mismatch, m1: [4 x 3276800], m2: [12800 x 10] at C:/cb/pytorch_1000000000000/work/aten/src\THC/generic/THCTensorMathBlas.cu:283

In [7]:
"""
*******************************************************
****** skip this step if you dont want to train *******
*******************************************************
"""

fig, ax = plt.subplots()
ax.plot(train_loss)
ax.set(xlabel='epoch', ylabel='loss')
ax.grid()
plt.draw()

NameError: name 'train_loss' is not defined

In [8]:
from eval import *
# load test set
dataset_iter = iter(dataset.testloader)
images, classes = dataset_iter.next()
length = len(images)
classNames = dataset.classes

# load network
net2 = AE()
net2.load_state_dict(torch.load(PATH, map_location=torch.device('cpu')))

# show reconstructed images
outputs = net2(images[:10])

for i in range(10):
    imshow(torchvision.utils.make_grid([images[i], outputs[i]]).detach())


RuntimeError: Error(s) in loading state_dict for AE:
	Missing key(s) in state_dict: "latent.weight", "latent.bias", "decoder.1.weight", "decoder.1.bias", "decoder.3.weight", "decoder.3.bias", "decoder.5.weight", "decoder.5.bias". 
	Unexpected key(s) in state_dict: "decoder.2.weight", "decoder.2.bias", "decoder.4.weight", "decoder.4.bias". 
	size mismatch for decoder.0.weight: copying a param with shape torch.Size([32, 16, 5, 5]) from checkpoint, the shape in current model is torch.Size([12800, 10]).
	size mismatch for decoder.0.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([12800]).

In [9]:
# model should output latent space and not reconstruction
net2.getLatentSpace(True)

# process images
outputs = net2(images).data.numpy()
outputShape = np.shape(outputs)

# flatten latent space
outputs = outputs.flatten().reshape(length, np.prod(outputShape[1:]))


torch.Size([10000, 32, 20, 20])
torch.Size([4, 32000000])


RuntimeError: size mismatch, m1: [4 x 32000000], m2: [12800 x 10] at C:\cb\pytorch_1000000000000\work\aten\src\TH/generic/THTensorMath.cpp:41

In [10]:
# get 20 random images with closest matches
for i in range(20):
    print('\n')
    srcImgId = round(random() * length)
    srcImgClass = classes[srcImgId]
    closest = findNClosest(outputs[srcImgId], outputs, 6)

    acc = 0
    closeImages = []
    closeClasses = []
    for img in closest:
        id = int(img[-1])
        closeImages.append(images[id])
        closeClasses.append(classNames[classes[id]])
        acc += 1 if classes[id] == srcImgClass else 0
    
    # show images
    print(str(i + 1) + ": " + classNames[srcImgClass])
    imshow(torchvision.utils.make_grid(closeImages))

    # show classes
    unique, count = np.unique(closeClasses, return_counts = True) 
    res = ""
    for j in range(len(unique)):
        res += unique[j] + ": " + str(count[j]) + ",   "
    
    print('closest: ' + res + ' (accuracy: ' + str(acc - 1) + '/5)')





NameError: name 'outputs' is not defined