In [1]:
import torch
import torch.optim
import torch.nn as nn
import torchvision
import numpy as np 
import matplotlib.pyplot as plt
from collections import OrderedDict
import Autoencoder as AE
import os

# use GPU if available
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# Reproducibility  
torch.manual_seed(0);

  import matplotlib.pyplot as plt


In [2]:
### Load Data ### 

#Use create a transform that uses bilinear interpolation to resize images to 64 square
im_size = 64
transform = torchvision.transforms.Compose([
    torchvision.transforms.Resize(im_size),
    torchvision.transforms.ToTensor()
])

#load dataset and init dataloader (THIS WILL DOWNLOAD THE DATASET)
#dataloader will automatically batch and handle data
#The MNIST dataset is "premade," we will likely need to use TensorDataset to load our own datasets
mnist_dset_train = torchvision.datasets.MNIST(root = './data/', download=True, train = True, transform = transform)
batch_size = 128
dataloader = torch.utils.data.DataLoader(mnist_dset_train, shuffle=True, batch_size=batch_size) 


#Create test dataset and load into dataloader
mnist_dset_test = torchvision.datasets.MNIST(root = './data/MNIST/', download=True, train = False, transform = transform)
num_samples = 500
dataloader_test = torch.utils.data.DataLoader(mnist_dset_test, shuffle=True, batch_size=num_samples)


In [3]:
optimizers = [
    'SGD',
    'SGD_momentum',
    'Adam',
    'Adam_beta0.5',
    'RMSprop'
]

# Training parameters
lrs = [1e-3, 1e-3, 1e-4, 1e-5]
EPOCHS = 10
SAVE_DIR = './train_results_MNIST/trial1/'

val_losses = {}
for opt in optimizers:
    for lr in lrs:
        #create directory for results
        out_dir = SAVE_DIR + opt + '/'
        os.makedirs(out_dir, exist_ok=True)

        print('\nTraining with optimizer: {}    lr = {}'.format(opt, lr))
        #Init network
        n_latent = int(0.01* 28**2) #use 1% of original amount of data
        n_hidden = 5 #number of hidden layers 
        NN_width = 1024*4  #width of the hidden layers
        error = nn.MSELoss()
        net = AE.AutoEncoder(im_size**2, n_latent, n_hidden, NN_width, taper = True, square=True).to(device)

        if opt == 'SGD':
            optimizer = torch.optim.SGD(net.parameters(), lr=lr)
        elif opt == 'SGD_momentum':
            optimizer = torch.optim.SGD(net.parameters(), lr=lr, momentum=0.9, nesterov=True)
        elif opt == 'Adam':
            optimizer = torch.optim.Adam(net.parameters(), betas=(0.9, 0.999), lr=lr)
        elif opt == 'Adam_beta0.5':
            optimizer = torch.optim.Adam(net.parameters(), betas=(0.5, 0.999), lr=lr)
        elif opt == 'RMSprop':
            optimizer = torch.optim.RMSprop(net.parameters(), lr=lr)
        elif opt == 'LBFGS':
            optimizer = torch.optim.LBFGS(net.parameters(), lr=lr)

        #train
        train_out = AE.train_mnist(net, optimizer, dataloader, dataloader_test, epochs=EPOCHS, error = error)

        #save results
        out_fname = out_dir + 'train_results_{}_lr={}.png'.format(train_out['optimizer'], lr)
        AE.plot_mnist_reconst(out_fname, train_out['net'], dataloader_test)

        #plot loss curve 
        plt.figure(figsize = (8,6))
        plt.plot(train_out['losses'])
        plt.xlabel('Iterations')
        plt.ylabel('MSE Loss')
        plt.yscale('log')
        plt.savefig(out_dir + 'loss_curve_{}_lr={}.png'.format(train_out['optimizer'], lr))
        plt.close()

        #save losses
        np.save(out_dir + 'losses_{}_lr={}.npy'.format(train_out['optimizer'], lr), train_out['losses'])

        val_losses[opt + '_' + str(lr)] = train_out['test_error']

        #empty cuda cache
        torch.cuda.empty_cache()


Training with optimizer: SGD    lr = 0.001
Epoch: 0/10     Iter: 0     Loss: 0.10676886141300201
Epoch: 2/10     Iter: 1000     Loss: 0.10398663580417633
Epoch: 4/10     Iter: 2000     Loss: 0.10393942892551422
Epoch: 6/10     Iter: 3000     Loss: 0.11133380234241486
Epoch: 8/10     Iter: 4000     Loss: 0.10645522177219391

Training with optimizer: SGD    lr = 0.001
Epoch: 0/10     Iter: 0     Loss: 0.10695052146911621
Epoch: 2/10     Iter: 1000     Loss: 0.10405895859003067
Epoch: 4/10     Iter: 2000     Loss: 0.10415985435247421
Epoch: 6/10     Iter: 3000     Loss: 0.11142878234386444
Epoch: 8/10     Iter: 4000     Loss: 0.10662300139665604

Training with optimizer: SGD    lr = 0.0001
Epoch: 0/10     Iter: 0     Loss: 0.10695052146911621
Epoch: 2/10     Iter: 1000     Loss: 0.10415934026241302
Epoch: 4/10     Iter: 2000     Loss: 0.10435932874679565
Epoch: 6/10     Iter: 3000     Loss: 0.1117313951253891
Epoch: 8/10     Iter: 4000     Loss: 0.10701459646224976

Training with optimiz

In [4]:
# print sorted val losses dictionary
sorted_val_losses = {k: v for k, v in sorted(val_losses.items(), key=lambda item: item[1])}
np.save(SAVE_DIR+'val_losses.npy', sorted_val_losses) #np.load(SAVE_DIR+'val_losses.npy', allow_pickle=True).item()
sorted_val_losses


{'Adam_0.001': 0.021386262401938437,
 'Adam_beta0.5_0.001': 0.021894904691725968,
 'RMSprop_0.001': 0.02386805769056082,
 'Adam_beta0.5_0.0001': 0.04258222114294767,
 'RMSprop_0.0001': 0.04259118717163801,
 'Adam_0.0001': 0.04299421682953834,
 'RMSprop_1e-05': 0.05262085497379303,
 'Adam_beta0.5_1e-05': 0.052843798883259296,
 'Adam_1e-05': 0.05284702740609646,
 'SGD_momentum_0.001': 0.1035735435783863,
 'SGD_0.001': 0.10681958533823491,
 'SGD_momentum_0.0001': 0.106820347905159,
 'SGD_0.0001': 0.10727736800909042,
 'SGD_momentum_1e-05': 0.10727750062942505,
 'SGD_1e-05': 0.10732478089630604}