In [1]:
%matplotlib inline

import numpy as np 
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from torch.utils.data import DataLoader

import torchvision
import torchvision.transforms as transforms

In [2]:
# Definir a arquitetura MLP

class MLP(nn.Module):

    def __init__(self, layers):
        super(MLP, self).__init__()
        self.layers = layers
        self.fc1 = nn.Linear(2, 10)
        self.fc2 = nn.Linear(10, 4)
        self.activation_function = torch.nn.LeakyReLU()
        
    def forward(self, x):
        x = x.view(-1, 2)
        x = self.activation_function(self.fc1(x))
        x = self.activation_function(self.fc2(x))
        return x

In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = MLP([]).to(device)
print(device)
print(model)

# Definir otimizador e loss
# Nota: testar outros otimizadores e funções de loss (em particular cross entropy)
optimizer = torch.optim.SGD(model.parameters(), lr = 0.1)
loss_fn = torch.nn.CrossEntropyLoss().cuda()

cuda:0
MLP(
  (fc1): Linear(in_features=2, out_features=10, bias=True)
  (fc2): Linear(in_features=10, out_features=4, bias=True)
  (activation_function): ReLU()
)


In [4]:

labels = [[0], [1], [2], [3]]
inputs = [[1,1], [1,0], [0,1], [0,0]]
train = []
for i in range(4):
    train.append((inputs[i],labels[i]))

In [5]:
import random

for epoch in range(1000):
    error = 0
#     random.shuffle(train)
    for i, (inputs, labels) in enumerate(train):
        inputs = torch.Tensor(np.array(inputs)).to(device)
        labels = torch.LongTensor(np.array(labels)).to(device)
        optimizer.zero_grad()
        y_pred = model(inputs)
        loss = loss_fn(y_pred, labels)
        loss.backward()
        optimizer.step()
        error+=loss

    print(epoch, error/4)

0 tensor(1.4635, device='cuda:0', grad_fn=<DivBackward0>)
1 tensor(1.4307, device='cuda:0', grad_fn=<DivBackward0>)
2 tensor(1.4068, device='cuda:0', grad_fn=<DivBackward0>)
3 tensor(1.3852, device='cuda:0', grad_fn=<DivBackward0>)
4 tensor(1.3666, device='cuda:0', grad_fn=<DivBackward0>)
5 tensor(1.3518, device='cuda:0', grad_fn=<DivBackward0>)
6 tensor(1.3376, device='cuda:0', grad_fn=<DivBackward0>)
7 tensor(1.3230, device='cuda:0', grad_fn=<DivBackward0>)
8 tensor(1.3121, device='cuda:0', grad_fn=<DivBackward0>)
9 tensor(1.3030, device='cuda:0', grad_fn=<DivBackward0>)
10 tensor(1.2936, device='cuda:0', grad_fn=<DivBackward0>)
11 tensor(1.2841, device='cuda:0', grad_fn=<DivBackward0>)
12 tensor(1.2745, device='cuda:0', grad_fn=<DivBackward0>)
13 tensor(1.2647, device='cuda:0', grad_fn=<DivBackward0>)
14 tensor(1.2547, device='cuda:0', grad_fn=<DivBackward0>)
15 tensor(1.2447, device='cuda:0', grad_fn=<DivBackward0>)
16 tensor(1.2341, device='cuda:0', grad_fn=<DivBackward0>)
17 tens

142 tensor(0.7045, device='cuda:0', grad_fn=<DivBackward0>)
143 tensor(0.7007, device='cuda:0', grad_fn=<DivBackward0>)
144 tensor(0.7005, device='cuda:0', grad_fn=<DivBackward0>)
145 tensor(0.7003, device='cuda:0', grad_fn=<DivBackward0>)
146 tensor(0.7001, device='cuda:0', grad_fn=<DivBackward0>)
147 tensor(0.7000, device='cuda:0', grad_fn=<DivBackward0>)
148 tensor(0.6998, device='cuda:0', grad_fn=<DivBackward0>)
149 tensor(0.6997, device='cuda:0', grad_fn=<DivBackward0>)
150 tensor(0.6996, device='cuda:0', grad_fn=<DivBackward0>)
151 tensor(0.6994, device='cuda:0', grad_fn=<DivBackward0>)
152 tensor(0.6993, device='cuda:0', grad_fn=<DivBackward0>)
153 tensor(0.6992, device='cuda:0', grad_fn=<DivBackward0>)
154 tensor(0.6991, device='cuda:0', grad_fn=<DivBackward0>)
155 tensor(0.6990, device='cuda:0', grad_fn=<DivBackward0>)
156 tensor(0.6989, device='cuda:0', grad_fn=<DivBackward0>)
157 tensor(0.6988, device='cuda:0', grad_fn=<DivBackward0>)
158 tensor(0.6987, device='cuda:0', grad

291 tensor(0.6952, device='cuda:0', grad_fn=<DivBackward0>)
292 tensor(0.6952, device='cuda:0', grad_fn=<DivBackward0>)
293 tensor(0.6957, device='cuda:0', grad_fn=<DivBackward0>)
294 tensor(0.6974, device='cuda:0', grad_fn=<DivBackward0>)
295 tensor(0.6957, device='cuda:0', grad_fn=<DivBackward0>)
296 tensor(0.6957, device='cuda:0', grad_fn=<DivBackward0>)
297 tensor(0.6956, device='cuda:0', grad_fn=<DivBackward0>)
298 tensor(0.6956, device='cuda:0', grad_fn=<DivBackward0>)
299 tensor(0.6956, device='cuda:0', grad_fn=<DivBackward0>)
300 tensor(0.6956, device='cuda:0', grad_fn=<DivBackward0>)
301 tensor(0.6955, device='cuda:0', grad_fn=<DivBackward0>)
302 tensor(0.6955, device='cuda:0', grad_fn=<DivBackward0>)
303 tensor(0.6955, device='cuda:0', grad_fn=<DivBackward0>)
304 tensor(0.6955, device='cuda:0', grad_fn=<DivBackward0>)
305 tensor(0.6954, device='cuda:0', grad_fn=<DivBackward0>)
306 tensor(0.6954, device='cuda:0', grad_fn=<DivBackward0>)
307 tensor(0.6954, device='cuda:0', grad

430 tensor(0.6944, device='cuda:0', grad_fn=<DivBackward0>)
431 tensor(0.6944, device='cuda:0', grad_fn=<DivBackward0>)
432 tensor(0.6944, device='cuda:0', grad_fn=<DivBackward0>)
433 tensor(0.6944, device='cuda:0', grad_fn=<DivBackward0>)
434 tensor(0.6944, device='cuda:0', grad_fn=<DivBackward0>)
435 tensor(0.6944, device='cuda:0', grad_fn=<DivBackward0>)
436 tensor(0.6944, device='cuda:0', grad_fn=<DivBackward0>)
437 tensor(0.6944, device='cuda:0', grad_fn=<DivBackward0>)
438 tensor(0.6944, device='cuda:0', grad_fn=<DivBackward0>)
439 tensor(0.6944, device='cuda:0', grad_fn=<DivBackward0>)
440 tensor(0.6944, device='cuda:0', grad_fn=<DivBackward0>)
441 tensor(0.6944, device='cuda:0', grad_fn=<DivBackward0>)
442 tensor(0.6944, device='cuda:0', grad_fn=<DivBackward0>)
443 tensor(0.6943, device='cuda:0', grad_fn=<DivBackward0>)
444 tensor(0.6943, device='cuda:0', grad_fn=<DivBackward0>)
445 tensor(0.6943, device='cuda:0', grad_fn=<DivBackward0>)
446 tensor(0.6943, device='cuda:0', grad

576 tensor(0.6940, device='cuda:0', grad_fn=<DivBackward0>)
577 tensor(0.6940, device='cuda:0', grad_fn=<DivBackward0>)
578 tensor(0.6940, device='cuda:0', grad_fn=<DivBackward0>)
579 tensor(0.6940, device='cuda:0', grad_fn=<DivBackward0>)
580 tensor(0.6940, device='cuda:0', grad_fn=<DivBackward0>)
581 tensor(0.6940, device='cuda:0', grad_fn=<DivBackward0>)
582 tensor(0.6940, device='cuda:0', grad_fn=<DivBackward0>)
583 tensor(0.6940, device='cuda:0', grad_fn=<DivBackward0>)
584 tensor(0.6940, device='cuda:0', grad_fn=<DivBackward0>)
585 tensor(0.6940, device='cuda:0', grad_fn=<DivBackward0>)
586 tensor(0.6940, device='cuda:0', grad_fn=<DivBackward0>)
587 tensor(0.6940, device='cuda:0', grad_fn=<DivBackward0>)
588 tensor(0.6940, device='cuda:0', grad_fn=<DivBackward0>)
589 tensor(0.6940, device='cuda:0', grad_fn=<DivBackward0>)
590 tensor(0.6940, device='cuda:0', grad_fn=<DivBackward0>)
591 tensor(0.6940, device='cuda:0', grad_fn=<DivBackward0>)
592 tensor(0.6940, device='cuda:0', grad

714 tensor(0.6938, device='cuda:0', grad_fn=<DivBackward0>)
715 tensor(0.6938, device='cuda:0', grad_fn=<DivBackward0>)
716 tensor(0.6938, device='cuda:0', grad_fn=<DivBackward0>)
717 tensor(0.6938, device='cuda:0', grad_fn=<DivBackward0>)
718 tensor(0.6938, device='cuda:0', grad_fn=<DivBackward0>)
719 tensor(0.6938, device='cuda:0', grad_fn=<DivBackward0>)
720 tensor(0.6938, device='cuda:0', grad_fn=<DivBackward0>)
721 tensor(0.6938, device='cuda:0', grad_fn=<DivBackward0>)
722 tensor(0.6938, device='cuda:0', grad_fn=<DivBackward0>)
723 tensor(0.6938, device='cuda:0', grad_fn=<DivBackward0>)
724 tensor(0.6938, device='cuda:0', grad_fn=<DivBackward0>)
725 tensor(0.6938, device='cuda:0', grad_fn=<DivBackward0>)
726 tensor(0.6938, device='cuda:0', grad_fn=<DivBackward0>)
727 tensor(0.6938, device='cuda:0', grad_fn=<DivBackward0>)
728 tensor(0.6938, device='cuda:0', grad_fn=<DivBackward0>)
729 tensor(0.6938, device='cuda:0', grad_fn=<DivBackward0>)
730 tensor(0.6938, device='cuda:0', grad

855 tensor(0.6936, device='cuda:0', grad_fn=<DivBackward0>)
856 tensor(0.6936, device='cuda:0', grad_fn=<DivBackward0>)
857 tensor(0.6936, device='cuda:0', grad_fn=<DivBackward0>)
858 tensor(0.6936, device='cuda:0', grad_fn=<DivBackward0>)
859 tensor(0.6936, device='cuda:0', grad_fn=<DivBackward0>)
860 tensor(0.6936, device='cuda:0', grad_fn=<DivBackward0>)
861 tensor(0.6936, device='cuda:0', grad_fn=<DivBackward0>)
862 tensor(0.6939, device='cuda:0', grad_fn=<DivBackward0>)
863 tensor(0.6938, device='cuda:0', grad_fn=<DivBackward0>)
864 tensor(0.6938, device='cuda:0', grad_fn=<DivBackward0>)
865 tensor(0.6938, device='cuda:0', grad_fn=<DivBackward0>)
866 tensor(0.6938, device='cuda:0', grad_fn=<DivBackward0>)
867 tensor(0.6938, device='cuda:0', grad_fn=<DivBackward0>)
868 tensor(0.6938, device='cuda:0', grad_fn=<DivBackward0>)
869 tensor(0.6938, device='cuda:0', grad_fn=<DivBackward0>)
870 tensor(0.6938, device='cuda:0', grad_fn=<DivBackward0>)
871 tensor(0.6938, device='cuda:0', grad

In [6]:
for i, (inputs, labels) in enumerate(train):
    inputs = torch.Tensor(np.array(inputs)).to(device)
    labels = torch.Tensor(np.array(labels)).to(device)
    print(model(inputs))
    print(labels)
    print()

tensor([[0., 0., 0., 0.]], device='cuda:0', grad_fn=<ThresholdBackward0>)
tensor([0.], device='cuda:0')

tensor([[0.0000, 8.0314, 0.0000, 0.0000]], device='cuda:0',
       grad_fn=<ThresholdBackward0>)
tensor([1.], device='cuda:0')

tensor([[0.0000, 0.0000, 8.3271, 0.0000]], device='cuda:0',
       grad_fn=<ThresholdBackward0>)
tensor([2.], device='cuda:0')

tensor([[0., 0., 0., 0.]], device='cuda:0', grad_fn=<ThresholdBackward0>)
tensor([3.], device='cuda:0')

