In [10]:
import sys, os
sys.path.append('/home/A00512318/TCN')
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from TCN.mnist_pixel.model import TCN
import numpy as np

In [11]:
from torchvision import datasets, transforms

def data_generator(root, batch_size):
    train_set = datasets.FashionMNIST(root=root, train=True, download=True,
                               transform=transforms.Compose([
                                   transforms.ToTensor(),
                                   transforms.Normalize((0.1307,), (0.3081,))
                               ]))
    test_set = datasets.FashionMNIST(root=root, train=False, download=True,
                              transform=transforms.Compose([
                                  transforms.ToTensor(),
                                  transforms.Normalize((0.1307,), (0.3081,))
                              ]))

    train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size)
    test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size)
    return train_loader, test_loader

In [30]:
def trainTCN(ep):
    global steps
    train_loss = 0
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        if cuda: 
            data, target = data.to(device), target.to(device)
        data = data.view(-1, input_channels, seq_length)
        if permutee:
            data = data[:, :, permute]
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        if parameters['clip'] > 0:
            torch.nn.utils.clip_grad_norm_(model.parameters(), parameters['clip'])
        optimizer.step()
        train_loss += loss.item()
        train_losses_.append(train_loss)
        steps += seq_length
        if batch_idx > 0 and batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tSteps: {}'.format(
                ep, batch_idx * parameters['batch_size'], len(train_loader.dataset),
                100. * batch_idx / len(train_loader), train_loss/log_interval, steps))
            train_loss = 0  

In [31]:
def testTCN():
    test_loss = 0
    correct = 0
    correct_class = list(0. for i in range(10))
    correct_total = list(0. for i in range(10))
    tot = 0
    with torch.no_grad():
        for data, target in test_loader:
            if cuda:
                data, target = data.to(device), target.to(device)
            data = data.view(-1, input_channels, seq_length)
            if permutee:
                data = data[:, :, permute]
            output = model(data)
            test_loss += F.nll_loss(output, target, size_average=False).item()
            _, pred = torch.max(output, 1)
            correct += pred.eq(target.data.view_as(pred)).cpu().sum()
#             print(len(target.data.view_as(pred)))
            c = (pred == target).squeeze()
            tot += 1
#             if tot != 313:
#             for i in range(len(test_loader.dataset) // batch_size):
# #                     print(pred[i], target.data.view_as(pred)[i])
#                 print(i)
#                 label = pred[i]
#                 if (pred[i] == target.data.view_as(pred)[i]):
#                     correct_class[label] += c[i].item()
#                 correct_total[label] += 1
                    
                
#     print(tot)
#     for i in range(10):
#         print('Accuracy of %5s : %2d %%' % (
#             classes[i], 100 * correct_class[i] / correct_total[i]))

    test_loss /= len(test_loader.dataset)
#     print(correct.item())
    accuracies_.append(correct.item() / 10000.)
    
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    return test_loss

In [32]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


# Print model's state_dict
def models_state_dict(model):
    print("Model's state_dict:")
    for param_tensor in model.state_dict():
        print(param_tensor, "\t", model.state_dict()[param_tensor].size())

# Print optimizer's state_dict
def optimizers_state_dict():
    print("Optimizer's state_dict:")
    for var_name in optimizer.state_dict():
        print(var_name, "\t", optimizer.state_dict()[var_name])

In [33]:
import pickle

def save_list_params(obj):
    with open('list_params.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_list_params():
    with open('list_params.pkl', 'rb') as f:
        return pickle.load(f)

In [41]:
import random
num_models = 50
seen_hyper = set()
list_parameters = [{} for i in range(0, num_models)]
for i in range(0, num_models):
    while(True):
        list_parameters[i]['batch_size'] = random.choice([16, 32, 64, 128])
        list_parameters[i]['dropout'] = np.random.uniform(0., 1)
        list_parameters[i]['clip'] = np.random.uniform(0.3, 1)
        list_parameters[i]['lr'] = pow(10, np.random.uniform(-6, 1))
        list_parameters[i]['ksize'] = np.random.randint(5, 8)
        list_parameters[i]['levels'] = np.random.randint(5, 9)
        list_parameters[i]['optim'] = 'Adam'
        list_parameters[i]['nhid'] = np.random.randint(15, 30)
        list_parameters[i]['epochs'] = np.random.randint(1, 6)
        k = "".join(str(v) + '_' for k, v in list_parameters[i].items())
        if k not in seen_hyper:
            seen_hyper.add(k)
            break
save_list_params(list_parameters)
[param for param in list_parameters]

[{'batch_size': 128,
  'dropout': 0.5901130817628256,
  'clip': 0.5487486927464348,
  'lr': 5.891266424581271,
  'ksize': 7,
  'levels': 5,
  'optim': 'Adam',
  'nhid': 16,
  'epochs': 1},
 {'batch_size': 16,
  'dropout': 0.48585040418657977,
  'clip': 0.31031668450281513,
  'lr': 0.13275497835764283,
  'ksize': 5,
  'levels': 8,
  'optim': 'Adam',
  'nhid': 17,
  'epochs': 2},
 {'batch_size': 128,
  'dropout': 0.9480511363413594,
  'clip': 0.7681822557707426,
  'lr': 6.528991996109323e-06,
  'ksize': 5,
  'levels': 7,
  'optim': 'Adam',
  'nhid': 29,
  'epochs': 4},
 {'batch_size': 64,
  'dropout': 0.9462518398031703,
  'clip': 0.3716806137992855,
  'lr': 1.16771578236534,
  'ksize': 5,
  'levels': 8,
  'optim': 'Adam',
  'nhid': 24,
  'epochs': 1},
 {'batch_size': 128,
  'dropout': 0.5073239528142397,
  'clip': 0.7670871424546122,
  'lr': 0.00015223143959839703,
  'ksize': 7,
  'levels': 5,
  'optim': 'Adam',
  'nhid': 20,
  'epochs': 1},
 {'batch_size': 16,
  'dropout': 0.5819818993

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
input_channels = 1 # just one for the image
classes = ('T-shirt/Top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot')
n_classes = len(classes) # 10 classes for fashion-mnist
log_interval = 100
seed = 1111
torch.manual_seed(seed)
permutee = False
input_channels = 1
seq_length = int(784 / input_channels)

cuda = True

for model_indx, parameters in enumerate(list_parameters):
    print(parameters)
    steps = 0
#     permute = torch.Tensor(np.random.permutation(784).astype(np.float64)).long()
#     permute = permute.to(device)
    
    train_loader, test_loader = data_generator('../data/fashion_mnist', parameters['batch_size'])
    
    model = TCN(input_channels, n_classes, hidden_units=parameters['nhid'], levels=parameters['levels'], kernel_size=parameters['ksize'], dropout=parameters['dropout'])
    model = nn.DataParallel(model)
    model.to(device)

    print(count_parameters(model))
    lr = parameters['lr']
    optimizer = getattr(optim, parameters['optim'])(model.parameters(), lr=lr)
    accuracies_ = []
    train_losses_ = []
    test_losses_ = []
    file_name = 'model_{0}.pt'.format(model_indx)
   
    for epoch in range(1, parameters['epochs']+1):
        trainTCN(epoch)
        testTCN()
        if epoch % 5 == 0: 
            print('Saving checkpoint for model.....')
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.module.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'train_losses': train_losses_,
                'test_losses': test_losses_,
                'accuracies': accuracies_,
                'curr_lr': lr,
            }, file_name)
        if epoch % 10 == 0:
            lr /= 10
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr
    torch.save({
        'model_state_dict': model.module.state_dict(),
        'accuracies': 
    }, file_name)
    print('Saved as %s' % file_name)    

{'batch_size': 128, 'dropout': 0.5901130817628256, 'clip': 0.5487486927464348, 'lr': 5.891266424581271, 'ksize': 7, 'levels': 5, 'optim': 'Adam', 'nhid': 16, 'epochs': 1}
16762





Test set: Average loss: 2.6862, Accuracy: 1000/10000 (10%)

Saved as model_0.pt
{'batch_size': 16, 'dropout': 0.48585040418657977, 'clip': 0.31031668450281513, 'lr': 0.13275497835764283, 'ksize': 5, 'levels': 8, 'optim': 'Adam', 'nhid': 17, 'epochs': 2}
22518

Test set: Average loss: 2.3274, Accuracy: 1000/10000 (10%)


Test set: Average loss: 2.3274, Accuracy: 1000/10000 (10%)

Saved as model_1.pt
{'batch_size': 128, 'dropout': 0.9480511363413594, 'clip': 0.7681822557707426, 'lr': 6.528991996109323e-06, 'ksize': 5, 'levels': 7, 'optim': 'Adam', 'nhid': 29, 'epochs': 4}
55980

Test set: Average loss: 2297.0765, Accuracy: 1028/10000 (10%)


Test set: Average loss: 1889.8382, Accuracy: 954/10000 (9%)


Test set: Average loss: 1478.1817, Accuracy: 972/10000 (9%)


Test set: Average loss: 1152.8744, Accuracy: 986/10000 (9%)

Saved as model_2.pt
{'batch_size': 64, 'dropout': 0.9462518398031703, 'clip': 0.3716806137992855, 'lr': 1.16771578236534, 'ksize': 5, 'levels': 8, 'optim': 'Adam', 'n


Test set: Average loss: 1.8728, Accuracy: 2854/10000 (28%)

Saved as model_4.pt
{'batch_size': 16, 'dropout': 0.5819818993622836, 'clip': 0.847429772417585, 'lr': 0.05157739931286471, 'ksize': 5, 'levels': 6, 'optim': 'Adam', 'nhid': 27, 'epochs': 3}
41212

Test set: Average loss: 2.0314, Accuracy: 1817/10000 (18%)


Test set: Average loss: 2.0367, Accuracy: 1846/10000 (18%)


Test set: Average loss: 2.0555, Accuracy: 1796/10000 (17%)

Saved as model_5.pt
{'batch_size': 16, 'dropout': 0.9359662309541344, 'clip': 0.550138516719525, 'lr': 4.052374179715319, 'ksize': 6, 'levels': 5, 'optim': 'Adam', 'nhid': 26, 'epochs': 5}
37502



Test set: Average loss: 2.9636, Accuracy: 1000/10000 (10%)


Test set: Average loss: 2.9639, Accuracy: 1000/10000 (10%)


Test set: Average loss: 2.9639, Accuracy: 1000/10000 (10%)




Test set: Average loss: 2.9639, Accuracy: 1000/10000 (10%)


Test set: Average loss: 2.9639, Accuracy: 1000/10000 (10%)

Saving checkpoint for model.....
Saved as model_6.pt
{'batch_size': 16, 'dropout': 0.9703799763451182, 'clip': 0.38176531220238613, 'lr': 8.262931635944093e-05, 'ksize': 5, 'levels': 5, 'optim': 'Adam', 'nhid': 17, 'epochs': 1}
13644

Test set: Average loss: 5.8062, Accuracy: 1001/10000 (10%)

Saved as model_7.pt
{'batch_size': 128, 'dropout': 0.14947872916874339, 'clip': 0.9785268997460439, 'lr': 1.7364279060050786e-06, 'ksize': 6, 'levels': 8, 'optim': 'Adam', 'nhid': 24, 'epochs': 2}
53050

Test set: Average loss: 2.4313, Accuracy: 1111/10000 (11%)


Test set: Average loss: 2.3714, Accuracy: 1158/10000 (11%)

Saved as model_8.pt
{'batch_size': 16, 'dropout': 0.8924517236176205, 'clip': 0.5735127668271558, 'lr': 1.6483387794252606e-05, 'ksize': 6, 'levels': 6, 'optim': 'Adam', 'nhid': 29, 'epochs': 1}
56734



Test set: Average loss: 2.9868, Accuracy: 963/10000 (9%)

Saved as model_9.pt
{'batch_size': 16, 'dropout': 0.8584678686992067, 'clip': 0.40927755813491395, 'lr': 0.0002357793473902143, 'ksize': 5, 'levels': 5, 'optim': 'Adam', 'nhid': 25, 'epochs': 4}
29060

Test set: Average loss: 2.2060, Accuracy: 1899/10000 (18%)


Test set: Average loss: 2.0108, Accuracy: 2273/10000 (22%)




Test set: Average loss: 1.9304, Accuracy: 2401/10000 (24%)


Test set: Average loss: 1.8243, Accuracy: 2739/10000 (27%)

Saved as model_10.pt
{'batch_size': 16, 'dropout': 0.05118951370514613, 'clip': 0.31994881392220775, 'lr': 0.000251527199959743, 'ksize': 6, 'levels': 7, 'optim': 'Adam', 'nhid': 17, 'epochs': 3}
23334

Test set: Average loss: 0.5761, Accuracy: 7965/10000 (79%)




Test set: Average loss: 0.4923, Accuracy: 8235/10000 (82%)


Test set: Average loss: 0.4571, Accuracy: 8370/10000 (83%)

Saved as model_11.pt
{'batch_size': 64, 'dropout': 0.5425488120869835, 'clip': 0.8092360709576101, 'lr': 4.141605299345055e-06, 'ksize': 7, 'levels': 8, 'optim': 'Adam', 'nhid': 18, 'epochs': 2}
34948

Test set: Average loss: 2.3828, Accuracy: 1076/10000 (10%)


Test set: Average loss: 2.3427, Accuracy: 1135/10000 (11%)

Saved as model_12.pt
{'batch_size': 16, 'dropout': 0.6057602991308068, 'clip': 0.41433924012286244, 'lr': 0.008962401103785466, 'ksize': 6, 'levels': 8, 'optim': 'Adam', 'nhid': 17, 'epochs': 4}
26870

Test set: Average loss: 1.1354, Accuracy: 5561/10000 (55%)




Test set: Average loss: 1.0175, Accuracy: 6109/10000 (61%)


Test set: Average loss: 1.0034, Accuracy: 6145/10000 (61%)


Test set: Average loss: 0.9753, Accuracy: 6286/10000 (62%)

Saved as model_13.pt
{'batch_size': 128, 'dropout': 0.23400079647375893, 'clip': 0.8809301954153927, 'lr': 0.002886432900681828, 'ksize': 5, 'levels': 6, 'optim': 'Adam', 'nhid': 23, 'epochs': 1}
30048

Test set: Average loss: 0.8400, Accuracy: 6859/10000 (68%)

Saved as model_14.pt
{'batch_size': 32, 'dropout': 0.03344934581766823, 'clip': 0.37756529279140755, 'lr': 5.399624493963521e-05, 'ksize': 5, 'levels': 8, 'optim': 'Adam', 'nhid': 23, 'epochs': 2}
40812



Test set: Average loss: 0.8468, Accuracy: 6937/10000 (69%)


Test set: Average loss: 0.6740, Accuracy: 7578/10000 (75%)

Saved as model_15.pt
{'batch_size': 16, 'dropout': 0.9900438491724513, 'clip': 0.3033740574062547, 'lr': 2.974999017966558e-06, 'ksize': 6, 'levels': 6, 'optim': 'Adam', 'nhid': 20, 'epochs': 4}
27250

Test set: Average loss: 64688.3761, Accuracy: 1021/10000 (10%)


Test set: Average loss: 28658.6538, Accuracy: 1015/10000 (10%)




Test set: Average loss: 9668.7820, Accuracy: 989/10000 (9%)


Test set: Average loss: 9164.3307, Accuracy: 995/10000 (9%)

Saved as model_16.pt
{'batch_size': 16, 'dropout': 0.10590504331156836, 'clip': 0.48592889050596755, 'lr': 1.4429847735337994e-06, 'ksize': 7, 'levels': 5, 'optim': 'Adam', 'nhid': 20, 'epochs': 2}
25990

Test set: Average loss: 2.3713, Accuracy: 1000/10000 (10%)




Test set: Average loss: 2.3353, Accuracy: 1000/10000 (10%)

Saved as model_17.pt
{'batch_size': 32, 'dropout': 0.057350003758114165, 'clip': 0.9710753465106388, 'lr': 6.372287464160273, 'ksize': 7, 'levels': 7, 'optim': 'Adam', 'nhid': 23, 'epochs': 5}
49230

Test set: Average loss: 5869255648968401400788331201036288.0000, Accuracy: 1017/10000 (10%)


Test set: Average loss: 5879232600914758408817055558008832.0000, Accuracy: 985/10000 (9%)


Test set: Average loss: 5879120571302611395772183618781184.0000, Accuracy: 1026/10000 (10%)




Test set: Average loss: 5853045534236365596102794116333568.0000, Accuracy: 1014/10000 (10%)


Test set: Average loss: 5866912276010931636525015123361792.0000, Accuracy: 1012/10000 (10%)

Saving checkpoint for model.....
Saved as model_18.pt
{'batch_size': 32, 'dropout': 0.11867975945823495, 'clip': 0.7609097129050828, 'lr': 6.587968106121537e-05, 'ksize': 5, 'levels': 5, 'optim': 'Adam', 'nhid': 26, 'epochs': 3}
31392

Test set: Average loss: 1.2984, Accuracy: 4813/10000 (48%)


Test set: Average loss: 1.0914, Accuracy: 5669/10000 (56%)


Test set: Average loss: 1.0072, Accuracy: 6071/10000 (60%)

Saved as model_19.pt
{'batch_size': 16, 'dropout': 0.14358312064295675, 'clip': 0.5558071479307589, 'lr': 0.018478457802452417, 'ksize': 6, 'levels': 5, 'optim': 'Adam', 'nhid': 26, 'epochs': 5}
37502



Test set: Average loss: 1.2753, Accuracy: 5044/10000 (50%)


Test set: Average loss: 1.2559, Accuracy: 5106/10000 (51%)


Test set: Average loss: 1.2379, Accuracy: 5143/10000 (51%)




Test set: Average loss: 1.2305, Accuracy: 5196/10000 (51%)


Test set: Average loss: 1.2123, Accuracy: 5263/10000 (52%)

Saving checkpoint for model.....
Saved as model_20.pt
{'batch_size': 128, 'dropout': 0.4844239468027499, 'clip': 0.3540157564863804, 'lr': 7.586564277287263e-06, 'ksize': 6, 'levels': 6, 'optim': 'Adam', 'nhid': 17, 'epochs': 1}
19798

Test set: Average loss: 2.3794, Accuracy: 1014/10000 (10%)

Saved as model_21.pt
{'batch_size': 64, 'dropout': 0.45630023608472403, 'clip': 0.6862334576815152, 'lr': 1.3025631431070124, 'ksize': 5, 'levels': 6, 'optim': 'Adam', 'nhid': 28, 'epochs': 3}
44278

Test set: Average loss: 2.4377, Accuracy: 1000/10000 (10%)


Test set: Average loss: 2.4373, Accuracy: 1000/10000 (10%)


Test set: Average loss: 2.4374, Accuracy: 1000/10000 (10%)

Saved as model_22.pt
{'batch_size': 16, 'dropout': 0.20341730039966932, 'clip': 0.3933274732559375, 'lr': 0.011672000871670073, 'ksize': 7, 'levels': 5, 'optim': 'Adam', 'nhid': 28, 'epochs': 5}
50494


Test set: Average loss: 1.0250, Accuracy: 5964/10000 (59%)

