In [1]:
from __future__ import print_function, division
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.optim import Adam
import torch.nn.functional as F

import csv
from skimage import io

from PIL import Image
import pandas as pd

import numpy as np
import torchvision
from torchvision import datasets, models, transforms
from torch.utils.data import Dataset, DataLoader
from torch.autograd import Variable

import matplotlib.pyplot as plt
import time
import os
import copy

import import_ipynb
import ResNetCaps

verbose = False
USE_CUDA = True
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

MNIST_USE = False
CIFAR10_USE = True
MARVEL_USE = False

#matrix product for bilinear function
euclidean = False
kronecker = False
outer_m = True

In [2]:
class MARVEL_dataset(Dataset):
    def __init__(self, dat_file,train = True, transform = None):   
        self.root_dir = os.path.dirname(dat_file)
        datContent = [i.strip().split(',') for i in open(dat_file).readlines()]
        if train:
            csv_file = os.path.join(self.root_dir, "data_Train.csv")
        else:
            csv_file = os.path.join(self.root_dir, "data_Test.csv")
        with open(csv_file, "w") as f:
            writer = csv.writer(f,delimiter=',')
            writer.writerow(["counter", "set", "class", "label","location"])
            for line in datContent:
                if train and line[1]=='1':
                    if not(line[4] == '-'):
                        writer.writerows([line])  
                if not(train) and line[1] == '2':
                    if not(line[4]=='-'):
                        writer.writerows([line]) 
                
        self.MARVEL_datafile = pd.read_csv(csv_file)       
        self.transform = transform
        
    def __len__(self):
        return len(self.MARVEL_datafile)
    
    def __getitem__(self,idx):
        img_name = self.MARVEL_datafile.iloc[idx,4]
        image = self.__loadfile(img_name)
        target = self.MARVEL_datafile.iloc[idx,2]
        if self.transform:
            image = Image.fromarray(image)
            sample = self.transform(image)
        else:
            sample = image
        return (sample,target)
    
    def __loadfile(self, data_file):
        image = io.imread(data_file)
        if len(image.shape)<3:
            image = np.stack((image,)*3, axis=-1)
        return image

In [3]:
dataset_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),        
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))
])


batch_size = 32


if CIFAR10_USE: 
    NUM_CLASSES = 10
    print("CIFAR10")
    image_datasets = {'train': datasets.CIFAR10('../data', train=True, download=True, transform=dataset_transform),'val': datasets.CIFAR10('../data', train=False, download=True, transform=dataset_transform)}
    print("Initializing Datasets and Dataloaders...")

    dataloaders = {'train': torch.utils.data.DataLoader(image_datasets['train'], batch_size=batch_size, shuffle=True) , 'val': torch.utils.data.DataLoader(image_datasets['val'], batch_size=batch_size, shuffle=True) }
    print("Initializing Datasets and Dataloaders...")

if MARVEL_USE: 
    NUM_CLASSES = 26
    print("MARVEL")
    dat_file = "/home/rita/JupyterProjects/EYE-SEA/DataSets/marveldataset2016-master/FINAL.dat"

    image_datasets = {'train': MARVEL_dataset(dat_file,train = True,transform=dataset_transform),'val': MARVEL_dataset(dat_file,train = False,transform=dataset_transform)}
    print("Initializing Datasets and Dataloaders...")

    dataloaders = {'train': torch.utils.data.DataLoader(image_datasets['train'], batch_size=batch_size, shuffle=True) , 'val': torch.utils.data.DataLoader(image_datasets['val'], batch_size=batch_size, shuffle=True) }
    print("Initializing Datasets and Dataloaders...")

if MNIST_USE: 
    dataset_transform = transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor(),        
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))
    ])
    NUM_CLASSES = 10
    print("MNIST")
    image_datasets = {'train': datasets.MNIST('../data', train=True, download=True, transform=dataset_transform),'val': datasets.MNIST('../data', train=False, download=True, transform=dataset_transform)}
    print("Initializing Datasets and Dataloaders...")

    dataloaders = {'train': torch.utils.data.DataLoader(image_datasets['train'], batch_size=batch_size, shuffle=True) , 'val': torch.utils.data.DataLoader(image_datasets['val'], batch_size=batch_size, shuffle=True) }
    print("Initializing Datasets and Dataloaders...")

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}

CIFAR10
Files already downloaded and verified
Files already downloaded and verified
Initializing Datasets and Dataloaders...
Initializing Datasets and Dataloaders...


In [4]:
class Bilinear(nn.Module):
    def __init__(self,NUM_CLASSES):
        super(Bilinear, self).__init__()
        #Features function :
        self.modelCaps1 = ResNetCaps.ResNetCaps(NUM_CLASSES)
        self.modelCaps2 = ResNetCaps.ResNetCaps(NUM_CLASSES)
        #Classification function:
        self.modelLin = nn.Linear(NUM_CLASSES, NUM_CLASSES)
        
    def forward(self,inputs):
        #1) Extract features functions vectors (I need to do MATRIX OUTER PRODUCT)
        digit1, masked = self.modelCaps1(inputs)
        digit2, masked = self.modelCaps2(inputs)
        #2) Classification Function    
        output = F.softmax(self.modelLin(self.bilinear(digit1, digit2)),dim=1)
        
        return output, masked
 
    def bilinear(self, A, B):
        self.A = A
        self.B = B
        
        Z_list = []
        labels_size = A.size()[1]
        batch = min(A.size()[0],batch_size)
        for i in range(batch):
        
            a = A[i,:,:,0]
            b = B[i,:,:,0]
            #1.1) Pooling for aggregation of features vectors 
            if euclidean:
                #EUCLIDEAN MATRIX PRODUCT
                if verbose: print("Dim A {} B {}".format(a.shape,b.shape))
                x = torch.mm(a,torch.transpose(b,0,1))
                x = torch.sum(x, dim=1)
            if outer_m:
                #OUTER MATRIX PRODUCT
                k = torch.sum(a,dim=1)
                j = torch.sum(b,dim=1)
                x = torch.ger(k,j)
                x = torch.sum(x, dim=1)          
            if kronecker:
                #KRONECKER MATRIX PRODUCT
                x = torch.kron(a.cpu().numpy(),b.cpu().numpy())
                x = torch.from_numpy(x).float().to(device)
                
            y = torch.sign(x)*torch.sqrt(torch.FloatTensor.abs_(x))
            z = y/torch.norm(y)       
            
            Z_list.append(z)
            
        Z = torch.cat(Z_list,0)
        Z = Z.view(batch,labels_size)
        return Z    

In [7]:
#Load model
model = Bilinear(NUM_CLASSES)
model = model.to(device)

#optimizers
#criterion = nn.CrossEntropyLoss()
criterion = nn.L1Loss()
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()),lr = 0.001)
n_epochs = 30
#train
start = time.time()
#batch_id = 100
#inputs, labels = next(iter(dataloaders['train']))
accuracy_train = []
loss_train = []


for epoch in range(n_epochs): 
    model.train() 
    train_loss = 0
    train_accuracy = 0

    print('epoch {}:{}'.format(epoch+1, n_epochs)) 
    for batch_id, (inputs, labels) in enumerate(dataloaders['train']):
        if MARVEL_USE: labels = labels-1
        labels =torch.eye(NUM_CLASSES).index_select(dim=0, index=labels)
        inputs, labels = Variable(inputs), Variable(labels)
        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        outputs, masked = model(inputs)
        print(outputs)
        loss = criterion(outputs, labels)
        print(loss)
        loss.backward()
        optimizer.step()

        train_loss += loss.data[0]
        train_accuracy += (sum(np.argmax(outputs.data.cpu().numpy(), 1) == np.argmax(labels.data.cpu().numpy(), 1)) / float(batch_size))

        if batch_id % 100 == 0:
            print("train accuracy:", sum(np.argmax(outputs.data.cpu().numpy(), 1) == 
                                   np.argmax(labels.data.cpu().numpy(), 1)) / float(batch_size))
            if verbose: print("masked {}".format(np.argmax(masked.data.cpu().numpy(), 1)))
            if verbose: print("labels {}".format(np.argmax(labels.data.cpu().numpy(), 1)))
        #                batch_accuracy.append(sum(np.argmax(preds.data.cpu().numpy(), 1) == 
        #                                       np.argmax(labels.data.cpu().numpy(), 1)) / float(batch_size))
    accuracy_train.append(train_accuracy/len(dataloaders['train']))
    loss_train.append(train_loss/len(dataloaders['train']))
end = time.time()
print("Training time execution {}".format(end-start))
print("Loss value for training phase: {}".format(train_loss / len(dataloaders['train'])))
print("Accuracy value for training phase: {}".format(train_accuracy / len(dataloaders['train'])))


epoch 1:30
tensor([[ 0.0782,  0.0824,  0.1153,  0.1411,  0.0871,  0.1087,  0.1085,
          0.0726,  0.0600,  0.1461],
        [ 0.1781,  0.0892,  0.1025,  0.0810,  0.0566,  0.1077,  0.1410,
          0.0769,  0.0988,  0.0681],
        [ 0.1439,  0.0803,  0.0757,  0.0667,  0.0716,  0.1386,  0.1137,
          0.0783,  0.1303,  0.1009],
        [ 0.1634,  0.0877,  0.1046,  0.0794,  0.0613,  0.1338,  0.1293,
          0.0752,  0.0918,  0.0735],
        [ 0.1158,  0.0877,  0.0848,  0.1136,  0.0641,  0.1493,  0.1030,
          0.0797,  0.1132,  0.0889],
        [ 0.1520,  0.1015,  0.1173,  0.0866,  0.0476,  0.1099,  0.1418,
          0.0820,  0.0944,  0.0669],
        [ 0.0957,  0.1018,  0.1088,  0.1173,  0.0609,  0.1173,  0.1058,
          0.0889,  0.0941,  0.1093],
        [ 0.1723,  0.0813,  0.0779,  0.0729,  0.0747,  0.1244,  0.1328,
          0.0668,  0.1055,  0.0914],
        [ 0.0679,  0.0599,  0.0829,  0.1002,  0.1053,  0.1333,  0.0928,
          0.0797,  0.0665,  0.2116],
        

RuntimeError: Cublas_Sgemv only supports m, n, lda, incx, incyin the range 0 < [val] <= 2147483647 at /pytorch/aten/src/THC/THCBlas.cu:111

In [None]:
epochs = np.arange(1,n_epochs+1)
plt.plot(epochs, loss_train, color='g')
plt.plot(epochs, accuracy_train, color='orange')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training phase')
plt.show() 

In [None]:
model.eval()
test_loss = 0
test_accuracy = 0 
start = time.time()
for batch_id, (inputs, labels) in enumerate(dataloaders['val']):

    labels = torch.eye(NUM_CLASSES).index_select(dim=0, index=labels)
    inputs, labels = Variable(inputs), Variable(labels)

    if USE_CUDA: inputs, labels = inputs.to(device), labels.to(device)#cuda()

    outputs, masked = model(inputs)
    _,label = torch.max(labels, 1)
    loss = criterion(outputs, label.long())

    test_loss += loss.data[0]
    test_accuracy += (sum(np.argmax(outputs.data.cpu().numpy(), 1) == np.argmax(labels.data.cpu().numpy(), 1)) / float(batch_size))

    if batch_id % 100 == 0:
        print("test accuracy:", sum(np.argmax(outputs.data.cpu().numpy(), 1) == 
                               np.argmax(labels.data.cpu().numpy(), 1)) / float(batch_size))
end = time.time()
print("Test time execution {}".format(end-start))
print("Loss value for test phase: {}".format(test_loss /  len(dataloaders['val']))) 
print("Accuracy value for test phase: {}".format(test_accuracy /  len(dataloaders['val'])))


In [None]:
torch.cuda.empty_cache()