In [1]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

import numpy as np
import matplotlib.pyplot as plt

# sigmoid function
# we don't use the logistic itself since it is slightly more costly
from scipy.special import expit 

import pandas as pd

In [21]:
class MLP():
    def __init__(self, hidden_layers=1, dimensions=[],
                classification=True, learning_rate=1, use_bias=True):
        if not dimensions:
            raise ValueError("Must pass input dimensions!")
        if len(dimensions) < 2:
            raise ValueError("At least 2 layers needed")
            
        self.classification = classification
        self.use_bias = use_bias
        self.lr = learning_rate
        
        # initialize weights - we suposse our bias is built into the weights
        self.weights = []
        
        for idx, (input_dim, output_dim) in enumerate(dimensions):
            if use_bias:
                input_dim += 1
                if idx != len(dimensions) - 1:
                    output_dim += 1
                    
            print(idx, input_dim, output_dim)    
            self.weights.append(np.random.uniform(low=-0.1, high=0.100001, 
                                                      size=(input_dim, output_dim)))
            if use_bias:
                if idx != 0:
                    self.weights[idx] = self.extend_with_bias(self.weights[idx])
                    
                     
    def extend_with_bias(self, matrix):
        # Add bias term to a dataset
        new = np.ones((matrix.shape[0], 
                            matrix.shape[1]+1))
        new[:, :-1] = matrix
        return new
    
    def forward_pass(self, x_train):
        # Output of each layer
        output1 = expit(x_train.dot(self.weights[0]))
        output2 = expit(output1.dot(self.weights[1]))
        return output1, output2
    
    def fit(self, original_x, original_y, n_iter=100, testing=False,
           verbose=False):
        
        if self.use_bias:
            x_train = self.extend_with_bias(original_x)
        else:
            x_train = original_x
            
        y_train = original_y.reshape(len(original_y), 1)
           
        for _ in range(n_iter):   
            output1, output2 = self.forward_pass(x_train)
            
            # Error and derivate of 2nd layer
            layer2_error = (y_train - output2)
            layer2_delta = layer2_error * (output2*(1-output2))

            # Error and derivate of 1st layer
            #layer1_error = layer2_delta.dot(self.weights[1].T)
            layer1_error = np.dot(layer2_delta, self.weights[1].T)
            layer1_delta = layer1_error * output1*(1-output1)

            # Update weights with learning rate
            self.grad1 = x_train.T.dot(layer1_delta)
            self.grad2 = output1.T.dot(layer2_delta)
            if verbose:
                print("Total loss:", layer2_error)
                
            self.weights[0] += self.lr*self.grad1
            self.weights[1] += self.lr*self.grad2
            
    def predict(self, x_test):
        if self.use_bias:
            new_x_test = self.extend_with_bias(x_test)
        else:
            new_x_test = x_test
        _, output = self.forward_pass(new_x_test)
        
        if self.classification:
            return (output > 0.5).astype(int)
        else:
            return output
    

In [22]:
x = []
y = []

for i in [0, 1]:
    for j in [0, 1]:
        x.append([i, j])
        y.append(i != j)

x = np.array(x)
y = np.array(y)
# Use this if you want more than 1 output node:
#y = np.vstack((y, y)).T
y = y.astype(int)

In [23]:
dimensions = [(2,5), (5,1)]
#mlp = MLP(dimensions=input_dim=2, middle_dim=5, output_dim=1, learning_rate=10, 
#         use_bias=False)
mlp = MLP(dimensions=dimensions, learning_rate=10, 
         use_bias=True)

mlp.fit(x, y, n_iter=10000, verbose=False)

0 3 6
1 6 1


In [26]:
preds = mlp.predict(x)#.reshape(1, -1)
#pd.DataFrame(np.vstack((preds, y)).T, columns=["MLP predito", "Resposta"])

In [27]:
preds

array([[0, 0],
       [1, 1],
       [1, 1],
       [0, 0]])

**RESULTADOS:**

Conforme podemos observar acima, conseguimos aprender o XOR, dado a não a maior capacidade do modelo ao incluir 2 camadas, diferente de um Perceptron/Adalaide simples.



- - - 

<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>

# Pergunta 2 - Autoencoders

In [6]:
# Define sizes for auto-encoder
input_size = 8
#input_dim = input_size*input_size
#middle_dim = int(np.log2(input_dim))
#output_dim = input_dim
input_dim = input_size
middle_dim = int(np.log2(input_dim))
output_dim = input_dim

print(f"{input_dim}->{middle_dim}->{output_dim}")

x = np.identity(input_size).reshape(input_size, -1)
y = x.copy()

print(x.shape)

8->3->8
(8, 8)


In [7]:
def autoencode():
    mlp = MLP(input_dim=input_dim+1, middle_dim=middle_dim, output_dim=output_dim, 
              learning_rate=10, use_bias=True, classification=False)
    mlp.fit(x, y, n_iter=100000, verbose=False, )

    preds = mlp.predict(x)
    preds[preds < 0.5] = 0
    preds[preds >= 0.5] = 1
    return preds

In [8]:
autoencode()

array([[0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0.]])

In [9]:
# Define sizes for auto-encoder
input_size = 15
#input_dim = input_size*input_size
#middle_dim = int(np.log2(input_dim))
#output_dim = input_dim
input_dim = input_size
middle_dim = int(np.log2(input_dim))
output_dim = input_dim

print(f"{input_dim}->{middle_dim}->{output_dim}")

x = np.identity(input_size).reshape(input_size, -1)
y = x.copy()

print(x.shape)

15->3->15
(15, 15)


In [10]:
# Agora para 15 elementos
x = np.identity(input_size).reshape(input_size, -1)
y = x.copy()

print(x.shape)

(15, 15)


In [11]:
autoencode()

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 0., 0.

**RESULTADOS:**

Nossa rede tem dificuldades em aprender o padrão dos dados, dado apenas 1 camada escondida, e não consegue passar do "chute" trivial de falar que todos os elementos são 0.


- - - 

<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>