In [None]:
from __future__ import print_function

import pandas as pd
import numpy as np
import csv
import os
import torch
import time
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.optim import Adam

import matplotlib
import matplotlib.pyplot as plt

from PIL import Image
from skimage import io
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader

verbose = False
USE_CUDA = True
device = torch.device('cuda:0' if USE_CUDA else 'cpu')
NUM_CLASSES = 26

In [None]:
class MARVEL_dataset(Dataset):
    def __init__(self, dat_file,train = True, transform = None):   
        self.root_dir = os.path.dirname(dat_file)
        datContent = [i.strip().split(',') for i in open(dat_file).readlines()]
        if train:
            csv_file = os.path.join(self.root_dir, "data_Train.csv")
        else:
            csv_file = os.path.join(self.root_dir, "data_Test.csv")
        with open(csv_file, "w") as f:
            writer = csv.writer(f,delimiter=',')
            writer.writerow(["counter", "set", "class", "label","location"])
            for line in datContent:
                if train and line[1]=='1':
                    if not(line[4] == '-'):
                        writer.writerows([line])  
                if not(train) and line[1] == '2':
                    if not(line[4]=='-'):
                        writer.writerows([line]) 
                
        self.MARVEL_datafile = pd.read_csv(csv_file)
        
        self.transform = transform
        
    def __len__(self):
        return len(self.MARVEL_datafile)
    
    def __getitem__(self,idx):
        img_name = self.MARVEL_datafile.iloc[idx,4]
        image = self.__loadfile(img_name)
        target = self.MARVEL_datafile.iloc[idx,2]
        if self.transform:
            image = Image.fromarray(image)
            sample = self.transform(image)
        else:
            sample = image
        return (sample,target)
    
    def __loadfile(self, data_file):
        image = io.imread(data_file)
        if len(image.shape)<3:
            image = np.stack((image,)*3, axis=-1)
        return image

In [None]:
class MARVEL26():
    def __init__(self, batch_size):
        dataset_transform = transforms.Compose([
                    transforms.Resize(( 32,32)),
                    transforms.ToTensor(),                   
                    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))      

               ])

        dat_file = "/home/rita/JupyterProjects/EYE-SEA/DataSets/marveldataset2016-master/FINAL.dat"

        self.train_dataset = MARVEL_dataset(dat_file,train = True,transform=dataset_transform)
        self.test_dataset = MARVEL_dataset(dat_file,train = False,transform=dataset_transform)
        self.train_loader = DataLoader(self.train_dataset, batch_size=100, shuffle=True) 
        self.test_loader = DataLoader(self.test_dataset, batch_size=100, shuffle=True) 

In [None]:
class ConvLayer(nn.Module):
    #MNIST 
    def __init__(self,in_channels=3, out_channels=256, kernel_size=9, stride=1):
        in_channels=in_channels
        out_channels=out_channels
        kernel_size=kernel_size
            
        super(ConvLayer, self).__init__()

        self.conv = nn.Conv2d(in_channels=in_channels,
                               out_channels=out_channels,
                               kernel_size=kernel_size,
                               stride=1
                             )

    def forward(self, x):
        if verbose: print( "Conv {}".format(x.size()))
        return F.relu(self.conv(x))

In [None]:
class PrimaryCaps(nn.Module):
     
    def __init__(self):
        num_capsules=8
        in_channels=256
        out_channels=32
        kernel_size= 9

        super(PrimaryCaps, self).__init__()

        self.capsules = nn.ModuleList([
            nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=2, padding=0) 
                          for _ in range(num_capsules)])
    
    def forward(self, x):
        u = [capsule(x) for capsule in self.capsules]
        u = torch.stack(u, dim=1)
        if verbose: print( "PrimaryCaps {}".format(u.size()))
        u = u.view(x.size(0), 32 * 8 * 8, -1)
        if verbose: print(u.size())
        return self.squash(u)
    
    def squash(self, input_tensor):
        squared_norm = (input_tensor ** 2).sum(-1, keepdim=True)
        output_tensor = squared_norm *  input_tensor / ((1. + squared_norm) * torch.sqrt(squared_norm))
        if verbose: print(output_tensor.size())
        return output_tensor

In [None]:
class DigitCaps(nn.Module):
    #MNIST 
    def __init__(self):
        num_capsules=NUM_CLASSES 
        num_routes=32 * 8 * 8 
        in_channels=8
        out_channels=16  

        super(DigitCaps, self).__init__()

        self.in_channels = in_channels
        self.num_routes = num_routes
        self.num_capsules = num_capsules

        self.W = nn.Parameter(torch.randn(1, num_routes, num_capsules, out_channels, in_channels))

    def forward(self, x):
        batch_size = x.size(0)
        x = torch.stack([x] * self.num_capsules, dim=2).unsqueeze(4)
        if verbose: print( "DigitCaps {},{}".format(x.size(),self.W.size()))
        if verbose: print(len(([self.W][0])))
        W = torch.cat([self.W] * batch_size, dim=0)
        if verbose: print(W.size())
        u_hat = torch.matmul(W, x)

        b_ij = Variable(torch.zeros(1, self.num_routes, self.num_capsules, 1))
        if USE_CUDA:
            b_ij = b_ij.to(device)#cuda()

        num_iterations = 3
        for iteration in range(num_iterations):
            c_ij = F.softmax(b_ij)
            c_ij = torch.cat([c_ij] * batch_size, dim=0).unsqueeze(4)

            s_j = (c_ij * u_hat).sum(dim=1, keepdim=True)
            v_j = self.squash(s_j)
            
            if iteration < num_iterations - 1:
                a_ij = torch.matmul(u_hat.transpose(3, 4), torch.cat([v_j] * self.num_routes, dim=1))
                b_ij = b_ij + a_ij.squeeze(4).mean(dim=0, keepdim=True)

        return v_j.squeeze(1)
    
    def squash(self, input_tensor):
        squared_norm = (input_tensor ** 2).sum(-1, keepdim=True)
        output_tensor = squared_norm *  input_tensor / ((1. + squared_norm) * torch.sqrt(squared_norm))
        return output_tensor

In [None]:
class Decoder(nn.Module):
    def __init__(self):
        super(Decoder, self).__init__()
        self.reconstraction_layers = nn.Sequential(
            nn.Linear(16 * NUM_CLASSES, 512),
            nn.ReLU(inplace=True),
            nn.Linear(512, 1024),
            nn.ReLU(inplace=True),
            nn.Linear(1024, 3072),
            nn.Sigmoid()
        )
        
    def forward(self, x, data):
        classes = torch.sqrt((x ** 2).sum(2))
        classes = F.softmax(classes)
        
        _, max_length_indices = classes.max(dim=1)
        masked = Variable(torch.eye(NUM_CLASSES))
        if USE_CUDA:
            masked = masked.to(device)#cuda()
        masked = masked.index_select(dim=0, index=max_length_indices.squeeze(1).data)
        
        reconstructions = self.reconstraction_layers((x * masked[:, :, None, None]).view(x.size(0), -1))
        reconstructions = reconstructions.view(-1,3,32,32)
        
        return reconstructions, masked

In [None]:
class CapsNet(nn.Module):
    def __init__(self):
        super(CapsNet, self).__init__()
        self.conv_layer = ConvLayer(in_channels=3, out_channels=256, kernel_size=9)
        self.primary_capsules = PrimaryCaps()
        self.digit_capsules = DigitCaps()
        self.decoder = Decoder()
        
        self.mse_loss = nn.MSELoss()
        
    def forward(self, data):
        output = self.digit_capsules(self.primary_capsules(self.conv_layer(data)))
        reconstructions, masked = self.decoder(output, data)
        return output, reconstructions, masked
    
    def loss(self, data, x, target, reconstructions):
        return self.margin_loss(x, target) + self.reconstruction_loss(data, reconstructions)
    
    def margin_loss(self, x, labels, size_average=True):
        batch_size = x.size(0)

        v_c = torch.sqrt((x**2).sum(dim=2, keepdim=True))

        left = F.relu(0.9 - v_c).view(batch_size, -1)
        right = F.relu(v_c - 0.1).view(batch_size, -1)

        loss = labels * left + 0.5 * (1.0 - labels) * right
        loss = loss.sum(dim=1).mean()

        return loss
    
    def reconstruction_loss(self, data, reconstructions):
        loss = self.mse_loss(reconstructions.view(reconstructions.size(0), -1), data.view(reconstructions.size(0), -1))
        return loss * 0.0005

In [None]:
capsule_net = CapsNet()
if USE_CUDA:
    capsule_net = capsule_net.to(device)#cuda()
    print('cuda')
optimizer = Adam(capsule_net.parameters(),lr = 0.001)#, weight_decay = 0.96)

In [None]:
batch_size = 100

marvel = MARVEL26(batch_size)

In [None]:
print(torch.version)

n_epochs = 3
x = range(0,n_epochs)
mean_batch_accuracy = []
loss_train = []

start = time.time()

for epoch in range(n_epochs): 
    capsule_net.train() 
    train_loss = 0 
    
    batch_accuracy = []

    print('epoch {}:{}'.format(epoch+1, n_epochs)) 
    for batch_id, (data, target) in enumerate(marvel.train_loader):
        #print(target)
        #print(target-1)
        #print(data.shape)
        target =torch.eye(NUM_CLASSES).index_select(dim=0, index=target-1)
        data, target = Variable(data), Variable(target)

        if USE_CUDA:
            data, target = data.to(device), target.to(device)#.cuda()

        optimizer.zero_grad()
        output, reconstructions, masked = capsule_net(data)
        loss = capsule_net.loss(data, output, target, reconstructions)
        loss.backward()
        optimizer.step()

        train_loss += loss.data[0]
        

        if batch_id % 100 == 0:
            print("train accuracy:", sum(np.argmax(masked.data.cpu().numpy(), 1) == 
                                   np.argmax(target.data.cpu().numpy(), 1)) / float(batch_size))
            batch_accuracy.append(sum(np.argmax(masked.data.cpu().numpy(), 1) == 
                                   np.argmax(target.data.cpu().numpy(), 1)) / float(batch_size))
    mean_batch_accuracy.append(np.mean(batch_accuracy))
    del batch_accuracy
    loss_train.append(train_loss/len(marvel.train_loader))
end = time.time()
print("Training time execution {}".format(end-start))
fig = plt.figure(1)
ax = plt.axes()
plt.plot(x,mean_batch_accuracy)
fig2 = plt.figure(2)
ax = plt.axes()
plt.plot(x,loss_train)
print(train_loss / len(marvel.train_loader))
print(len(marvel.train_loader))    
capsule_net.eval()
test_loss = 0
start = time.time()
for batch_id, (data, target) in enumerate(marvel.test_loader):
     
    target = torch.eye(NUM_CLASSES).index_select(dim=0, index=target-1)
    data, target = Variable(data), Variable(target)

    if USE_CUDA:
        data, target = data.to(device), target.to(device)#cuda()

    output, reconstructions, masked = capsule_net(data)
    loss = capsule_net.loss(data, output, target, reconstructions)

    test_loss += loss.data[0]

    if batch_id % 100 == 0:
        print("test accuracy:", sum(np.argmax(masked.data.cpu().numpy(), 1) == 
                               np.argmax(target.data.cpu().numpy(), 1)) / float(batch_size))
end = time.time()
print("Test time execution {}".format(end-start))
print(test_loss / len(marvel.test_loader))
print(len(marvel.test_loader))

In [None]:
marvel = MARVEL26(100)

for batch_id, (data, target) in enumerate(marvel.test_loader):
    print(data.shape)

In [None]:
def plot_images_separately(images):
    "Plot the six MNIST images separately."
    fig = plt.figure()
    for j in range(1, 7):
    
        ax = fig.add_subplot(1, 6, j)
        if False:
            ax.matshow(images[j-1], cmap = matplotlib.cm.binary)
        else:
            min = (np.min(images[j-1],(1,2,0)))
            max = (np.max(images[j-1],(1,2,0)))
            img = ((np.transpose(images[j-1],(1,2,0))-min)/(max-min)).astype(np.float)
            ax.imshow(img)   
        plt.xticks(np.array([]))
        plt.yticks(np.array([]))
    plt.show()

In [None]:
print(data.size())
print((data[2,:,:,:]).size())
print(np.transpose(data[2,:,:,:].data.cpu().numpy(),(1,2,0)).shape)
min = (np.min(data[2,:,:,:].data.cpu().numpy(),(1,2,0)))
max = (np.max(data[2,:,:,:].data.cpu().numpy(),(1,2,0)))
fig = plt.figure(1)
plt.imshow(((np.transpose(data[2,:,:,:].data.cpu().numpy(),(1,2,0))-min)/(max-min)).astype(np.float))
plt.show()

In [None]:
plot_images_separately(data[:6,:,:,:].data.cpu().numpy())

In [None]:
image = reconstructions[4,:,:,:].data.cpu().numpy()
fig = plt.figure()
minimum = np.min(image)
maximum = np.max(image)
plt.imshow(np.transpose(image,(1,2,0)))
plt.show()
print(image)

In [None]:
plot_images_separately(reconstructions[:6,:,:,:].data.cpu().numpy())

In [None]:
torch.cuda.empty_cache()