In [1]:
import sys, os
sys.path.append('/home/A00512318/TCN')
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F
from TCN.mnist_pixel.utils import data_generator
from TCN.mnist_pixel.model import TCN
import numpy as np
import argparse

# set default values for model
batch_size = 64
cuda = True
dropout = 0.05
clip = -1
epochs = 10
kernel_size = 7
levels = 8
log_interval = 100
lr = 2e-3
optimm = 'Adam'
nhid = 25
seed = 1111
permutee = False
root = '../data/fashion_mnist'
save_filename = './checkpoints/fashionmnist/paperModel.pt'
input_channels = 1
n_classes = 10
seq_length = int(784 / input_channels)
steps = 0

In [2]:
from torchvision import datasets, transforms

def data_generator(root, batch_size):
    train_set = datasets.FashionMNIST(root=root, train=True, download=True,
                               transform=transforms.Compose([
                                   transforms.ToTensor(),
                                   transforms.Normalize((0.1307,), (0.3081,))
                               ]))
    test_set = datasets.FashionMNIST(root=root, train=False, download=True,
                              transform=transforms.Compose([
                                  transforms.ToTensor(),
                                  transforms.Normalize((0.1307,), (0.3081,))
                              ]))

    train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size)
    test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size)
    return train_loader, test_loader

train_loader, test_loader = data_generator(root, batch_size)

classes = ('T-shirt/Top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot')
# print(classes)

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import torchvision

# functions to show an image


def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))


# get some random training images
dataiter = iter(train_loader)
images, labels = dataiter.next()

unique_images = {}
seen = set()
for i in range(batch_size):
    if labels[i] not in seen:
        seen.add(labels[i].item())
#         print(seen)
        unique_images[labels[i].item()] = images[i]

# print([label[0] for label in unique_images])
# print(seen)
# print(labels)
# print()

# show images
imshow(torchvision.utils.make_grid(images))
# print labels
print(' '.join('%5s' % classes[labels[j]] for j in range(batch_size)))


In [None]:
%matplotlib inline
def plot_sample_predictions(classes, images):
    """
    This function displays images randomly and tries to predict the correct class
    stolen from here: https://github.com/cmasch/zalando-fashion-mnist/blob/master/Simple_Convolutional_Neural_Network_Fashion-MNIST.ipynb
    """

    class_ = 0
    images_per_row = 5
    rows = len(classes) // images_per_row
#     print(rows)
    ii = 0;
    for i in range(rows):
        fig, axis = plt.subplots(1, images_per_row)
     
        for i, axis in enumerate(axis):
#             print(axis, i)
            fig.set_size_inches(15, 25)
            axis.text(0,36,classes[ii])
            axis.imshow(images[ii][0] / 2 + 0.5, cmap='gray')
            axis.axis('off')
            class_ +=1
            ii += 1
    plt.savefig('./classes.png')
    plt.show()
    

In [None]:
# fig = plt.figure()
# for i in range(6):
#   plt.subplot(2,3,i+1)
#   plt.tight_layout()
#   plt.imshow(example_data[i][0], cmap='gray', interpolation='none')
#   plt.title("Ground Truth: {}".format(example_targets[i]))
#   plt.xticks([])
#   plt.yticks([])
# fig

dataiter = iter(train_loader)
images, labels = dataiter.next()
plot_sample_predictions(classes, unique_images)

In [None]:
# sanity check for validation data
dataiter = iter(test_loader)
images, labels = dataiter.next()
print(labels)
imshow(torchvision.utils.make_grid(images))
# print labels
print(' '.join('%5s' % classes[labels[j]] for j in range(batch_size)))


In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

permute = torch.Tensor(np.random.permutation(784).astype(np.float64)).long()
channel_sizes = [nhid] * levels
model = TCN(input_channels, n_classes, channel_sizes, kernel_size=kernel_size, dropout=dropout)

model = nn.DataParallel(model)
model.to(device)

# permute = permute.cuda()
permute = permute.to(device)
optimizer = getattr(optim, optimm)(model.parameters(), lr=lr)

In [4]:
def trainTCN(ep):
    global steps
    train_loss = 0
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        if cuda: 
            data, target = data.to(device), target.to(device)
        data = data.view(-1, input_channels, seq_length)
        if permutee:
            data = data[:, :, permute]
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        if clip > 0:
            torch.nn.utils.clip_grad_norm(model.parameters(), clip)
        optimizer.step()
        train_loss += loss.item()
#         loss_.append(train_loss)
        steps += seq_length
        if batch_idx > 0 and batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tSteps: {}'.format(
                ep, batch_idx * batch_size, len(train_loader.dataset),
                100. * batch_idx / len(train_loader), train_loss/log_interval, steps))
            train_loss = 0

In [5]:
def testTCN():
    test_loss = 0
    correct = 0
    correct_class = list(0. for i in range(10))
    correct_total = list(0. for i in range(10))
    tot = 0
    with torch.no_grad():
        for data, target in test_loader:
            if cuda:
                data, target = data.to(device), target.to(device)
            data = data.view(-1, input_channels, seq_length)
            if permutee:
                data = data[:, :, permute]
            output = model(data)
            test_loss += F.nll_loss(output, target, size_average=False).item()
            _, pred = torch.max(output, 1)
            correct += pred.eq(target.data.view_as(pred)).cpu().sum()
#             print(len(target.data.view_as(pred)))
            c = (pred == target).squeeze()
            tot += 1
#             if tot != 313:
#             for i in range(len(test_loader.dataset) // batch_size):
# #                     print(pred[i], target.data.view_as(pred)[i])
#                 print(i)
#                 label = pred[i]
#                 if (pred[i] == target.data.view_as(pred)[i]):
#                     correct_class[label] += c[i].item()
#                 correct_total[label] += 1
                    
                
#     print(tot)
#     for i in range(10):
#         print('Accuracy of %5s : %2d %%' % (
#             classes[i], 100 * correct_class[i] / correct_total[i]))

    test_loss /= len(test_loader.dataset)
#     print(correct.item())
    accuracy_.append(correct.item() / 10000.)
    
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    return test_loss

In [6]:
def save(model):
    torch.save(model.state_dict(), save_filename)
    print('Saved as %s' % save_filename)

In [7]:
accuracy_ = []
loss_ = []
# def runModel():
for epoch in range(1, epochs+1):
    trainTCN(epoch)
    testTCN()
#     if epoch % 5 == 0: 
#         print('Saving model.....')
#         save(model) 
    if epoch % 10 == 0:
        lr /= 10
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
save(model.module)
            






Test set: Average loss: 0.4374, Accuracy: 8397/10000 (83%)


Test set: Average loss: 0.3840, Accuracy: 8566/10000 (85%)


Test set: Average loss: 0.3594, Accuracy: 8695/10000 (86%)


Test set: Average loss: 0.3379, Accuracy: 8730/10000 (87%)


Test set: Average loss: 0.3223, Accuracy: 8813/10000 (88%)


Test set: Average loss: 0.3226, Accuracy: 8793/10000 (87%)


Test set: Average loss: 0.3117, Accuracy: 8834/10000 (88%)


Test set: Average loss: 0.3124, Accuracy: 8835/10000 (88%)


Test set: Average loss: 0.3067, Accuracy: 8881/10000 (88%)


Test set: Average loss: 0.3000, Accuracy: 8889/10000 (88%)

Saved as ./checkpoints/fashionmnist/paperModel.pt


In [9]:
loaded_model = TCN()
with open(save_filename, 'rb') as f:
        loaded_model.load_state_dict(torch.load(save_filename, map_location="cuda:0"))
        print(model)
        model.to(device)
        
def count_parameters(loaded_model):
    return sum(p.numel() for p in loaded_model.parameters() if p.requires_grad)
count_parameters(model)

DataParallel(
  (module): TCN(
    (tcn): TemporalConvNet(
      (network): Sequential(
        (0): TemporalBlock(
          (conv1): Conv1d(1, 25, kernel_size=(7,), stride=(1,), padding=(6,))
          (chomp1): Chomp1d()
          (relu1): ReLU()
          (dropout1): Dropout(p=0.05)
          (conv2): Conv1d(25, 25, kernel_size=(7,), stride=(1,), padding=(6,))
          (chomp2): Chomp1d()
          (relu2): ReLU()
          (dropout2): Dropout(p=0.05)
          (net): Sequential(
            (0): Conv1d(1, 25, kernel_size=(7,), stride=(1,), padding=(6,))
            (1): Chomp1d()
            (2): ReLU()
            (3): Dropout(p=0.05)
            (4): Conv1d(25, 25, kernel_size=(7,), stride=(1,), padding=(6,))
            (5): Chomp1d()
            (6): ReLU()
            (7): Dropout(p=0.05)
          )
          (downsample): Conv1d(1, 25, kernel_size=(1,), stride=(1,))
          (relu): ReLU()
        )
        (1): TemporalBlock(
          (conv1): Conv1d(25, 25, kernel_size

66910

In [None]:
# loaded_model.to(device)
# # kernel = loaded_model.module.tcn.network[0].net[0].weight.data.clone()
# # kernel.shape
# # kernel
# # loaded_model.module.tcn.network
# loaded_model.module.tcn.network[0].net

In [None]:
# fig = plt.figure()
# plt.figure(figsize=(10,10))
# for idx, filt  in enumerate(kernel):
# #     print(filt[0, :])
# #     print(filt[0, :])
# #     plt.subplot(4,7, idx + 1)
#     plt.imshow(filt, cmap="gray")
#     plt.axis('off')
    
    
# fig.show()

In [None]:
# import matplotlib
# import matplotlib.pyplot as plt
# import numpy as np

# # Data for plotting
# t = np.arange(1, 21)
# # s = 1 + np.sin(2 * np.pi * t)

# fig, ax = plt.subplots()
# ax.plot(t, accuracy_)

# ax.set(xlabel='Epoch', ylabel='Accuracy',
#        title='TCN')
# ax.grid()

# fig.savefig("test.png")
# plt.show()

In [10]:
# Print model's state_dict
print("Model's state_dict:")
for param_tensor in loaded_model.state_dict():
    print(param_tensor, "\t", loaded_model.state_dict()[param_tensor].size())

# Print optimizer's state_dict
print("Optimizer's state_dict:")
for var_name in optimizer.state_dict():
    print(var_name, "\t", optimizer.state_dict()[var_name])

Model's state_dict:
tcn.network.0.conv1.bias 	 torch.Size([25])
tcn.network.0.conv1.weight_g 	 torch.Size([25, 1, 1])
tcn.network.0.conv1.weight_v 	 torch.Size([25, 1, 7])
tcn.network.0.conv2.bias 	 torch.Size([25])
tcn.network.0.conv2.weight_g 	 torch.Size([25, 1, 1])
tcn.network.0.conv2.weight_v 	 torch.Size([25, 25, 7])
tcn.network.0.net.0.bias 	 torch.Size([25])
tcn.network.0.net.0.weight_g 	 torch.Size([25, 1, 1])
tcn.network.0.net.0.weight_v 	 torch.Size([25, 1, 7])
tcn.network.0.net.4.bias 	 torch.Size([25])
tcn.network.0.net.4.weight_g 	 torch.Size([25, 1, 1])
tcn.network.0.net.4.weight_v 	 torch.Size([25, 25, 7])
tcn.network.0.downsample.weight 	 torch.Size([25, 1, 1])
tcn.network.0.downsample.bias 	 torch.Size([25])
tcn.network.1.conv1.bias 	 torch.Size([25])
tcn.network.1.conv1.weight_g 	 torch.Size([25, 1, 1])
tcn.network.1.conv1.weight_v 	 torch.Size([25, 25, 7])
tcn.network.1.conv2.bias 	 torch.Size([25])
tcn.network.1.conv2.weight_g 	 torch.Size([25, 1, 1])
tcn.network.1