In [1]:
import torch 
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from torch.autograd import Variable

In [2]:
import torch.nn.functional as F
import math

from torch.optim import lr_scheduler

In [3]:
import argparse
import torch.optim as optim
from torchvision import datasets, transforms

In [4]:

# MNIST Dataset
train_dataset = dsets.MNIST(root='./data/',
                            train=True, 
                            transform=transforms.ToTensor(),
                            download=True)

test_dataset = dsets.MNIST(root='./data/',
                           train=False, 
                           transform=transforms.ToTensor())

# Data Loader (Input Pipeline)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=100, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=100, 
                                          shuffle=False)

In [5]:
def squash(x):
    lengths2 = x.pow(2).sum(dim=2)
    lengths = lengths2.sqrt()
    x = x * (lengths2 / (1 + lengths2) / lengths).view(x.size(0), x.size(1), 1)
    return x

In [6]:
class AgreementRouting(nn.Module):
    def __init__(self, input_caps, output_caps, n_iterations):
        super(AgreementRouting, self).__init__()
        self.n_iterations = n_iterations
        self.b = nn.Parameter(torch.zeros((input_caps, output_caps)))

    def forward(self, u_predict):
        batch_size, input_caps, output_caps, output_dim = u_predict.size()

        c = F.softmax(self.b)
        s = (c.unsqueeze(2) * u_predict).sum(dim=1)
        v = squash(s)

        if self.n_iterations > 0:
            b_batch = self.b.expand((batch_size, input_caps, output_caps))
            for r in range(self.n_iterations):
                v = v.unsqueeze(1)
                b_batch = b_batch + (u_predict * v).sum(-1)

                c = F.softmax(b_batch.view(-1, output_caps)).view(-1, input_caps, output_caps, 1)
                s = (c * u_predict).sum(dim=1)
                v = squash(s)

        return v

In [7]:
class Encode_layer(nn.Module):
    def __init__(self, input_dim, output_dim, output_caps):
        super(Encode_layer, self).__init__()
        
        self.layer = nn.Linear(input_dim, output_dim*output_caps)
        self.output_dim = output_dim
        self.output_caps = output_caps
        
    def forward(self, input) : 
        output = self.layer(input)
        output = F.relu(output)
        output = output.view(output.shape[0], self.output_caps, self.output_dim)
        output = squash(output)
        
        return output

In [8]:
class Neuron_layer(nn.Module):
    def __init__(self, input_neurons, input_dim, output_dim, output_neurons, routing):
        super(Neuron_layer, self).__init__()
        self.input_neurons = input_neurons
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.output_neurons = output_neurons
        self.routing = routing
        self.weights = nn.Parameter(torch.Tensor(input_neurons, input_dim, output_neurons * output_dim))
        self.reset_parameters()
        
    def reset_parameters(self):
        stdv = 1. / math.sqrt(self.input_neurons)
        self.weights.data.uniform_(-stdv, stdv)
        
    def forward(self, input):
        
        input = input.unsqueeze(2)
        predict = input.matmul(self.weights)
        predict = predict.view(predict.size(0), self.input_neurons, self.output_neurons, self.output_dim)
        v = self.routing(predict)
        
        return v
        

In [15]:
class Learning(nn.Module) :
    def __init__(self, routing_iterations) :
        super(Learning, self).__init__()
        
        self.network1 = Encode_layer(784, 128, 4)
        
        routing_module1 = AgreementRouting(4, 3, routing_iterations)
        
        self.network2 = Neuron_layer(4, 128, 32, 3, routing_module1)
        
        routing_module2 = AgreementRouting(3, 10, routing_iterations)
        
        self.network3 = Neuron_layer(3, 32, 16, 10, routing_module2)
        
    def forward(self, input):
        
        output1 = self.network1(input)
        output2 = self.network2(output1)
        output3 = self.network3(output2)
        
        probs = output3.pow(2).sum(dim=2).sqrt()
        
        return probs

In [16]:
model=Learning(3)

In [17]:
class MarginLoss(nn.Module):
    def __init__(self, m_pos, m_neg, lambda_):
        super(MarginLoss, self).__init__()
        self.m_pos = m_pos
        self.m_neg = m_neg
        self.lambda_ = lambda_

    def forward(self, lengths, targets, size_average=True):
        t = torch.zeros(lengths.size()).long()
        if targets.is_cuda:
            t = t.cuda()
        t = t.scatter_(1, targets.data.view(-1, 1), 1)
        targets = Variable(t)
        losses = targets.float() * F.relu(self.m_pos - lengths).pow(2) + \
                 self.lambda_ * (1. - targets.float()) * F.relu(lengths - self.m_neg).pow(2)
        return losses.mean() if size_average else losses.sum()


In [18]:
optimizer = optim.Adam(model.parameters(), lr=1e-4)

scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, verbose=True, patience=15, min_lr=1e-6)

loss_fn = MarginLoss(0.9, 0.1, 0.5)

In [19]:
def train(epoch):
    model.train()
    correct=0.
    
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = Variable(data.view(data.shape[0], -1)), Variable(target, requires_grad=False)
        optimizer.zero_grad()
        probs= model(data)
        
        loss = loss_fn(probs, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                       100. * batch_idx / len(train_loader), loss.data[0]))
            
        pred = probs.data.max(1, keepdim=True)[1]  # get the index of the max probability
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()
        
    print('Accuracy : {:.2f}%'.format(100. * correct / len(train_loader.dataset)))

In [20]:
import numpy as np

In [21]:
for epoch in range(10) :
    print(epoch)
    train(epoch)

0


  # Remove the CWD from sys.path while we load stuff.


Accuracy : 82.44%
1
Accuracy : 94.91%
2


Accuracy : 96.44%
3
Accuracy : 97.31%
4
Accuracy : 97.81%
5


Accuracy : 98.23%
6
Accuracy : 98.56%
7
Accuracy : 98.86%
8


Accuracy : 99.03%
9
Accuracy : 99.23%


In [23]:
#Test
total=0
correct=0.
#batch size 다를 땐 테스트가 안 됌?
for images, labels in test_loader:
    images = Variable(images.view(images.shape[0], -1))
    outputs= model(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum()

print('Test Accuracy of the model on the 10000 test images: %d %%' % (100 * correct / total))


  # Remove the CWD from sys.path while we load stuff.


Test Accuracy of the model on the 10000 test images: 97 %


In [24]:
adv_test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=1, 
                                          shuffle=False)

In [26]:
dummy=1
s=0.
t=0.


for (x, y) in adv_test_loader :
    x, y = Variable(x, requires_grad=True), Variable(y, requires_grad=False)
    probs= model(x.view(x.shape[0], -1))
    y_pred = np.argmax(probs.data.numpy())
    loss = nn.CrossEntropyLoss()(probs, y)
    loss.backward()
    epsilon = 0.1 
    x_grad   = torch.sign(x.grad.data)
    x_adversarial = torch.clamp(x.data + epsilon * x_grad, 0, 1)
    adversarial_probs= model(Variable(x_adversarial).view(x.shape[0], -1))
    y_pred_adversarial = np.argmax(adversarial_probs.data.numpy())
    
    print("{0} th example ".format(dummy))
    print ("True value: "+ str(y.data.numpy()[0])+"\nPredicted value : "+str(y_pred)+ "\nAdversarial :" + str(y_pred_adversarial)+"\n" )
    
    dummy+=1
    
    if y.data.numpy()[0]!=y_pred :
        t+=1
    
    if y_pred!=y_pred_adversarial :
        s+=1
    if dummy==100:
        break
        
print("Accuracy of test_model : {0:.3f} , Adversarials : {1:.3f}".format((t/dummy)*100, (s/dummy)*100))

  # Remove the CWD from sys.path while we load stuff.


1 th example 
True value: 7
Predicted value : 7
Adversarial :2

2 th example 
True value: 2
Predicted value : 2
Adversarial :5

3 th example 
True value: 1
Predicted value : 1
Adversarial :8

4 th example 
True value: 0
Predicted value : 0
Adversarial :2

5 th example 
True value: 4
Predicted value : 4
Adversarial :7

6 th example 
True value: 1
Predicted value : 1
Adversarial :7

7 th example 
True value: 4
Predicted value : 4
Adversarial :8

8 th example 
True value: 9
Predicted value : 9
Adversarial :3

9 th example 
True value: 5
Predicted value : 6
Adversarial :6

10 th example 
True value: 9
Predicted value : 9
Adversarial :4

11 th example 
True value: 0
Predicted value : 0
Adversarial :0

12 th example 
True value: 6
Predicted value : 6
Adversarial :8

13 th example 
True value: 9
Predicted value : 9
Adversarial :4

14 th example 
True value: 0
Predicted value : 0
Adversarial :7

15 th example 
True value: 1
Predicted value : 1
Adversarial :3

16 th example 
True value: 5
Predi