In [1]:
import numpy as np

import time
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.optim import Adam
from torchvision import datasets, transforms

verbose = False
USE_CUDA = True
device = torch.device('cuda:0' if USE_CUDA else 'cpu')
USE_MNIST = False
USE_CIFAR100 = False
NUM_CLASSES = 10
num_ensemble = 7

In [None]:
if USE_CIFAR100:
        #Classes :
        #aquatic mammals	beaver=0, dolphin=1, otter=2, seal=3, whale=4
        #fish	aquarium fish=5, flatfish=6, ray=7, shark=8, trout=9
        #food containers	bottles=15, bowls=16, cans=17, cups=18, plates=19
        #people	baby=70, boy=71, girl=72, man=73, woman=74
    label_class = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 15, 16, 17, 18, 19, 70, 71, 72, 73, 74]

In [2]:
class Cifar10:
    def __init__(self, batch_size):
        dataset_transform = transforms.Compose([
            transforms.ToTensor(),
            #transforms.Normalize((0.1307,), (0.3081,))
            #transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)            
            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))
        ])

        train_dataset = datasets.CIFAR10('../data', train=True, download=True, transform=dataset_transform)
        test_dataset = datasets.CIFAR10('../data', train=False, download=True, transform=dataset_transform)
        
        self.train_loader  = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        self.test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True) 
        

In [None]:
def get_same_index(target, label):
    label_indices = []

    for i in range(len(target)):
        if target[i] in label:
            label_indices.append(i)

    return label_indices
class Cifar100:
    def __init__(self, batch_size):
        dataset_transform = transforms.Compose([
            transforms.ToTensor(),
            #transforms.Normalize((0.1307,), (0.3081,))
            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))
        ])
        
        train_dataset = datasets.CIFAR100('../data', train=True, download=True, transform=dataset_transform)
        test_dataset = datasets.CIFAR100('../data', train=False, download=True, transform=dataset_transform)
                
        # Get indices of label_class
        train_indices = get_same_index(train_dataset.train_labels, label_class)
        test_indices = get_same_index(test_dataset.test_labels, label_class)
    
        print("Training data {}, Test data {}, Num_classes {}".format(len(train_indices),len(test_indices),NUM_CLASSES))
    
        self.train_loader  = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(train_indices))
        self.test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(test_indices)) 

In [None]:
class Mnist:
    def __init__(self, batch_size):
        dataset_transform = transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])

        train_dataset = datasets.MNIST('../data', train=True, download=True, transform=dataset_transform)
        test_dataset = datasets.MNIST('../data', train=False, download=True, transform=dataset_transform)
        
        self.train_loader  = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        self.test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True) 
        


In [3]:
class ConvLayer(nn.Module):
    #MNIST 
    def __init__(self, MNIST = USE_MNIST):
        if MNIST:
            in_channels=1
            out_channels=256
            kernel_size=9
        else:
            in_channels=3
            out_channels=256
            kernel_size=9
            
        super(ConvLayer, self).__init__()

        self.conv = nn.Conv2d(in_channels=in_channels,
                               out_channels=out_channels,
                               kernel_size=kernel_size,
                               stride=1
                             )

    def forward(self, x):
        if verbose: print( "Conv {}".format(x.size()))
        return F.relu(self.conv(x))

In [4]:
class PrimaryCaps(nn.Module):
    #MNIST  
    def __init__(self, MNIST = USE_MNIST):
        if MNIST:
            num_capsules=8
            in_channels=256
            out_channels=32
            kernel_size=9
        else:
            num_capsules=8
            in_channels=256
            out_channels=32
            kernel_size= 9

        super(PrimaryCaps, self).__init__()

        self.capsules = nn.ModuleList([
            nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=2, padding=0) 
                          for _ in range(num_capsules)])
    
    def forward(self, x, MNIST = USE_MNIST):
        u = [capsule(x) for capsule in self.capsules]
        u = torch.stack(u, dim=1)
        if verbose: print( "PrimaryCaps {}".format(u.size()))
        if MNIST: 
            u = u.view(x.size(0), 32 * 6 * 6, -1)
        else:
            u = u.view(x.size(0), 32 * 8 * 8, -1)
        if verbose: print(u.size())
        return self.squash(u)
    
    def squash(self, input_tensor):
        squared_norm = (input_tensor ** 2).sum(-1, keepdim=True)
        output_tensor = squared_norm *  input_tensor / ((1. + squared_norm) * torch.sqrt(squared_norm))
        if verbose: print(output_tensor.size())
        return output_tensor

In [5]:
class DigitCaps(nn.Module):
    #MNIST 
    def __init__(self, MNIST = USE_MNIST):
        if MNIST:
            num_capsules=10
            num_routes=32 * 6 * 6
            in_channels=8
            out_channels=16
        else:
            num_capsules=NUM_CLASSES 
            num_routes=32 * 8 * 8 
            in_channels=8
            out_channels=16  

        super(DigitCaps, self).__init__()

        self.in_channels = in_channels
        self.num_routes = num_routes
        self.num_capsules = num_capsules

        self.W = nn.Parameter(torch.randn(1, num_routes, num_capsules, out_channels, in_channels))

    def forward(self, x):
        batch_size = x.size(0)
        x = torch.stack([x] * self.num_capsules, dim=2).unsqueeze(4)
        if verbose: print( "DigitCaps {},{}".format(x.size(),self.W.size()))
        if verbose: print(len(([self.W][0])))
        W = torch.cat([self.W] * batch_size, dim=0)
        if verbose: print(W.size())
        u_hat = torch.matmul(W, x)

        b_ij = Variable(torch.zeros(1, self.num_routes, self.num_capsules, 1))
        if USE_CUDA:
            b_ij = b_ij.to(device)#cuda()

        num_iterations = 3
        for iteration in range(num_iterations):
            c_ij = F.softmax(b_ij)
            c_ij = torch.cat([c_ij] * batch_size, dim=0).unsqueeze(4)

            s_j = (c_ij * u_hat).sum(dim=1, keepdim=True)
            v_j = self.squash(s_j)
            
            if iteration < num_iterations - 1:
                a_ij = torch.matmul(u_hat.transpose(3, 4), torch.cat([v_j] * self.num_routes, dim=1))
                b_ij = b_ij + a_ij.squeeze(4).mean(dim=0, keepdim=True)

        return v_j.squeeze(1)
    
    def squash(self, input_tensor):
        squared_norm = (input_tensor ** 2).sum(-1, keepdim=True)
        output_tensor = squared_norm *  input_tensor / ((1. + squared_norm) * torch.sqrt(squared_norm))
        return output_tensor

In [6]:
class Decoder(nn.Module):
    def __init__(self, MNIST = USE_MNIST):
        super(Decoder, self).__init__()
        
        if MNIST:
            self.reconstraction_layers = nn.Sequential(
                nn.Linear(16 * NUM_CLASSES, 512),
                nn.ReLU(inplace=True),
                nn.Linear(512, 1024),
                nn.ReLU(inplace=True),
                nn.Linear(1024, 784),
                nn.Sigmoid()
            )            
        else:
            self.reconstraction_layers = nn.Sequential(
                nn.Linear(16 * NUM_CLASSES, 512),
                nn.ReLU(inplace=True),
                nn.Linear(512, 1024),
                nn.ReLU(inplace=True),
                nn.Linear(1024, 3072),
                nn.Sigmoid()
            )
        
    def forward(self, x, data, MNIST = USE_MNIST):
        classes = torch.sqrt((x ** 2).sum(2))
        classes = F.softmax(classes)
        
        _, max_length_indices = classes.max(dim=1)
        masked = Variable(torch.eye(NUM_CLASSES))
        if USE_CUDA:
            masked = masked.to(device)#cuda()
        masked = masked.index_select(dim=0, index=max_length_indices.squeeze(1).data)
        
        reconstructions = self.reconstraction_layers((x * masked[:, :, None, None]).view(x.size(0), -1))
        if MNIST:
            reconstructions = reconstructions.view(-1,1,28,28)
        else:
            
            reconstructions = reconstructions.view(-1,3,32,32)
        
        return reconstructions, masked

In [7]:
class CapsNet(nn.Module):
    def __init__(self):
        super(CapsNet, self).__init__()
        self.conv_layer = ConvLayer()
        self.primary_capsules = PrimaryCaps()
        self.digit_capsules = DigitCaps()
        self.decoder = Decoder()
        
        self.mse_loss = nn.MSELoss()
        
    def forward(self, data):
        output = self.digit_capsules(self.primary_capsules(self.conv_layer(data)))
        reconstructions, masked = self.decoder(output, data)
        return output, reconstructions, masked
    
    def loss(self, data, x, target, reconstructions):
        return self.margin_loss(x, target) + self.reconstruction_loss(data, reconstructions)
    
    def margin_loss(self, x, labels, size_average=True):
        batch_size = x.size(0)

        v_c = torch.sqrt((x**2).sum(dim=2, keepdim=True))

        left = F.relu(0.9 - v_c).view(batch_size, -1)
        right = F.relu(v_c - 0.1).view(batch_size, -1)

        loss = labels * left + 0.5 * (1.0 - labels) * right
        loss = loss.sum(dim=1).mean()

        return loss
    
    def reconstruction_loss(self, data, reconstructions):
        loss = self.mse_loss(reconstructions.view(reconstructions.size(0), -1), data.view(reconstructions.size(0), -1))
        return loss * 0.0005

In [8]:
batch_size = 100

if USE_MNIST:
    mnist = Mnist(batch_size) 
    if verbose: print('MNIST dataset')
else:
    if USE_CIFAR100:
        mnist = Cifar100(batch_size)
        if verbose: print('CIFAR100 dataset')
    else:
        mnist = Cifar10(batch_size)
        if verbose: print('CIFAR10 dataset')
#for batch_id,(data, target) in  enumerate(mnist.train_loader):
 #       data = Variable(data)

  #      if USE_CUDA:
   #         data = data.to(device)#.cuda()
    
#plot_images_separately(data[:6,0].data.cpu().numpy())    

Files already downloaded and verified
Files already downloaded and verified


In [None]:
def generate_dictionary():

    dictionary = {}
    for i in range(NUM_CLASSES):
        dictionary[i] = label_class[i]
    
    return dictionary

def reformat_targetTensor(target):
    dictionary_labels = generate_dictionary()
    if verbose: print("Dictionary labels generated: ",dictionary_labels)
    if verbose: print("Target Tensor before reformat:", target)
    for i in range(NUM_CLASSES):
        target[target==dictionary_labels[i]] = i
        
    if verbose: print("Target Tensor after reformat:", target)
    return target

In [None]:
print(torch.version)
import matplotlib
import matplotlib.pyplot as plt



n_epochs = 15
x = range(0,n_epochs)
mean_batch_accuracy = []
loss_train = []

ensemble_output = []
target_output = []

for ensemble in range(num_ensemble):
    capsule_net = CapsNet()
    if USE_CUDA:
        capsule_net = capsule_net.to(device)#cuda()
        print('cuda')
    optimizer = Adam(capsule_net.parameters(),lr = 0.001)#, weight_decay = 0.96)
    start = time.time()
    for epoch in range(n_epochs): 
        capsule_net.train() 
        train_loss = 0 

        batch_accuracy = []

        print('epoch {}:{} - of {}'.format(epoch+1, n_epochs, ensemble)) 
        for batch_id, (data, target) in enumerate(mnist.train_loader):

            if USE_CIFAR100:
                target = reformat_targetTensor(target)

            target =torch.eye(NUM_CLASSES).index_select(dim=0, index=target)
            data, target = Variable(data), Variable(target)

            if USE_CUDA:
                data, target = data.to(device), target.to(device)#.cuda()

            optimizer.zero_grad()
            output, reconstructions, masked = capsule_net(data)
            loss = capsule_net.loss(data, output, target, reconstructions)
            loss.backward()
            optimizer.step()

            train_loss += loss.data[0]

            if batch_id % 100 == 0:
                print("train accuracy:", sum(np.argmax(masked.data.cpu().numpy(), 1) == 
                                       np.argmax(target.data.cpu().numpy(), 1)) / float(batch_size))
                batch_accuracy.append(sum(np.argmax(masked.data.cpu().numpy(), 1) == 
                                       np.argmax(target.data.cpu().numpy(), 1)) / float(batch_size))
        mean_batch_accuracy.append(np.mean(batch_accuracy))
        del batch_accuracy
        loss_train.append(train_loss/len(mnist.train_loader))
    end = time.time()
    print("Training time execution {}".format(end-start))
    
   # fig = plt.figure(1)
   # ax = plt.axes()
   # plt.plot(x,mean_batch_accuracy)
   # fig2 = plt.figure(2)
   # ax = plt.axes()
   # plt.plot(x,loss_train)
   # print(train_loss / len(mnist.train_loader))
   # print(len(mnist.train_loader))    
    capsule_net.eval()
    test_loss = 0
    start = time.time()
    for batch_id, (data, target) in enumerate(mnist.test_loader):
        if USE_CIFAR100:
            target = reformat_targetTensor(target)

        target = torch.eye(NUM_CLASSES).index_select(dim=0, index=target)
        data, target = Variable(data), Variable(target)

        if USE_CUDA:
            data, target = data.to(device), target.to(device)#cuda()

        output, reconstructions, masked = capsule_net(data)
        loss = capsule_net.loss(data, output, target, reconstructions)

        test_loss += loss.data[0]

        if batch_id % 100 == 0:
            print("test accuracy:", sum(np.argmax(masked.data.cpu().numpy(), 1) == 
                                   np.argmax(target.data.cpu().numpy(), 1)) / float(batch_size)) 
        output_np = output.cpu().detach().numpy()
        ensemble_output = np.append(ensemble_output, [output_np])
        if ensemble == 0:
            target_output = np.append(target_output, target.cpu().detach().numpy())
    end = time.time()



    print("Test time execution {}".format(end-start))
    print(test_loss / len(mnist.test_loader))
    print(len(mnist.test_loader))
   
    torch.cuda.empty_cache()

<module 'torch.version' from '/home/rita/.local/lib/python3.6/site-packages/torch/version.py'>
cuda
epoch 1:15 - of 0




train accuracy: 0.08
train accuracy: 0.22
train accuracy: 0.37
train accuracy: 0.32


In [None]:
predictions = ((ensemble_output.reshape(num_ensemble,batch_id+1, batch_size,NUM_CLASSES,16)).sum(axis=0)).max(axis=3)
targets = target_output.reshape(batch_id+1,batch_size,NUM_CLASSES)
total_wrong = np.sum(np.not_equal(predictions,targets))
print('Total wrong predictions: {}, wrong percent: {}'.format(
        total_wrong, total_wrong /(batch_id+1) * 100))

In [None]:
def plot_images_separately(images):
    "Plot the six MNIST images separately."
    fig = plt.figure()
    for j in range(1, 7):
    
        ax = fig.add_subplot(1, 6, j)
        if USE_MNIST:
            ax.matshow(images[j-1], cmap = matplotlib.cm.binary)
        else:
            min = (np.min(images[j-1],(1,2,0)))
            max = (np.max(images[j-1],(1,2,0)))
            img = ((np.transpose(images[j-1],(1,2,0))-min)/(max-min)).astype(np.float)
            ax.imshow(img)   
        plt.xticks(np.array([]))
        plt.yticks(np.array([]))
    plt.show()

In [None]:
print(data.size())
print((data[2,:,:,:]).size())
print(np.transpose(data[2,:,:,:].data.cpu().numpy(),(1,2,0)).shape)
min = (np.min(data[2,:,:,:].data.cpu().numpy(),(1,2,0)))
max = (np.max(data[2,:,:,:].data.cpu().numpy(),(1,2,0)))
fig = plt.figure(1)
plt.imshow(((np.transpose(data[2,:,:,:].data.cpu().numpy(),(1,2,0))-min)/(max-min)).astype(np.float))
plt.show()

In [None]:
plot_images_separately(data[:6,:,:,:].data.cpu().numpy())

In [None]:
image = reconstructions[4,:,:,:].data.cpu().numpy()
fig = plt.figure()
minimum = np.min(image)
maximum = np.max(image)
plt.imshow(np.transpose(image,(1,2,0)))
plt.show()
print(image)

In [None]:
plot_images_separately(reconstructions[:6,:,:,:].data.cpu().numpy())

In [None]:
torch.cuda.empty_cache()