Implementação


In [17]:
import math
import numpy as np
import operator

class MLP():
  def __init__(self, input_nodes=1, learning_rate=1e-03, function="sigmoid") -> None:
    self.number_of_nodes = []
    self.number_of_nodes.append(input_nodes)
    self.weights = []
    self.biases = []
    self.function = function
    self.learning_rate = learning_rate


  # Definição das funções custo
  @staticmethod
  def soft_plus(x):
      sp = np.vectorize(lambda y: math.log(1 + math.exp(y)))
      return sp(x)

  @staticmethod
  def relu(x):
      re = np.vectorize(lambda y: max(0, y))
      return re(x)

  @staticmethod
  def sigmoid(x):
      sig = np.vectorize(lambda y:  (1 - 1 / (1 + math.exp(y))) if y < 0 else  (1 / (1 + math.exp(-y))))
      return sig(x)

  # Método para aplicar as funções sigmoidais, soft_plus e relu
  @staticmethod
  def apply_function(x,function):
    if function == "sigmoid":
        return MLP.sigmoid(x)
    elif function == "soft_plus":
        return MLP.soft_plus(x)
    elif function == "relu":
        return MLP.relu(x)

  # Método de cálculo da derivada
  @staticmethod
  def derivative(x, function):
     if function == "sigmoid":
        return np.multiply(x, (1-x))
     elif function == "soft_plus":
        return MLP.sigmoid(x)
     elif function == "relu":
        d_relu = np.vectorize(lambda y: 1 if y > 0 else 0)
        return d_relu(x)

  # Método auxiliar para adicionar camadas ocultas
  def add_layer(self, number_of_nodes: int, weights=None, bias=None):
    self.number_of_nodes.append(number_of_nodes)
    if not weights is None:
        self.weights.append(weights)
    elif len(self.number_of_nodes) > 1:
        self.weights.append(np.random.randn(self.number_of_nodes[-1], self.number_of_nodes[-2]) * np.sqrt(2 / (self.number_of_nodes[-1] + self.number_of_nodes[-2])))

    if not bias is None:
        self.biases.append(bias)
    elif len(self.number_of_nodes) > 1:
        self.biases.append(np.random.uniform(0, 0, size=(number_of_nodes, 1)))


  # Definição da função de feed_forward
  def feed_forward(self, inp):
    out = [np.matrix(inp).T]

    for i in range(len(self.number_of_nodes) - 1):
      y_hat = np.dot(self.weights[i], out[-1]) + self.biases[i]
      out.append(MLP.apply_function(y_hat, self.function))
    return out


  # Aplica o gradiente descendente no passo de trainemtn
  def train_gradient(self, y, out, errors):
      for i in range(len(self.weights)):
          # Calcula o gradiente e a correção dos pesos
          grad = np.multiply(errors[-1-i], MLP.derivative(out[-1-i], self.function))
          grad *= self.learning_rate
          self.biases[-1-i] += grad
          delta_w  = np.dot(grad, out[-2-i].T)
          self.weights[-1-i] += delta_w

      return self.weights


  # Método de treinamento
  def train(self, inp, y, type='gradient'):
    y = np.matrix(y).T
    out = self.feed_forward(inp)
    errors = [np.subtract(y, out[-1])]

    for i in range(len(self.weights) - 1):
        errors.insert(0, np.dot(self.weights[-1-i].T, errors[0]))

    if type == 'gradient':
      return self.train_gradient(y, out, errors)

  # Método de predição
  def predict(self, inp):
    out = self.feed_forward(inp)[-1]
    out = dict(enumerate(out.A1))
    out_class = max(out.items(), key=operator.itemgetter(1))[0]
    out_prob = out[out_class]
    return out_class, out_prob


Explicação do uso


In [18]:
# Inicializando a rede com dois nós de entrada e taxa de aprendizagem igual a 0.2
# Adicionando duas camadas com 2 nós cada.
mlp = MLP(input_nodes=2, learning_rate=.2)
mlp.add_layer(2)
mlp.add_layer(2)

# Carregue o array de entrada e saída aqui
input = []
y = []

epoch = 2000
# Depois faça as iterações de treinamento pelas épocas
for i in range(epoch):
  data = 'random data'
  mlp.train(input, y)

# Vetor xi com entradas não conhecidas
xi = []

# Predição de uma saída yi a partir de uma entrada xi
# Fornece a classe de saída e a probabilidade de acerto
out_class, out_prob = mlp.predict(xi)

ValueError: ignored

Exemplo com a tarefa XOR


In [19]:
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
training_xor = [
    {
        "input": [0, 0],
        "output": [0]
    },
    {
        "input": [1, 0],
        "output": [1]
    },
    {
        "input": [0, 1],
        "output": [1]
    },
    {
        "input": [1, 1],
        "output": [0]
    }
]
def xor():
    # Create a MLP with 2 input, a hidden layer with 2 nodes a single output node
    # Criando uma MLP 
    nn = MLP(input_nodes=2, learning_rate=.2)
    nn.add_layer(2)
    nn.add_layer(4)


    print("Treinando a rede")
    for i in range(20000):
        data = random.choice(training_xor)
        [ws, e] = nn.train(data["input"], data["output"])
        
    
    for i in range(2):
        for j in range(2):
            out_class, out_prob = nn.predict([i, j])
            print("Predição do XOR entre {} e {} obtendo {} e o resultado real é  {} (% de acerto: {:.2f})"
                  .format(i, j, out_prob > .5, bool(i) ^ bool(j), out_prob))
            
xor()

Treinando a rede
Predição do XOR entre 0 e 0 obtendo False e o resultado real é  False (% de acerto: 0.02)
Predição do XOR entre 0 e 1 obtendo True e o resultado real é  True (% de acerto: 0.51)
Predição do XOR entre 1 e 0 obtendo True e o resultado real é  True (% de acerto: 0.98)
Predição do XOR entre 1 e 1 obtendo True e o resultado real é  False (% de acerto: 0.51)


Exemplo com mnist dataset


In [20]:
def filter_pixel(x):
    return x / 255


def process_df(df, type="test"):
    if type == "train":
      label = "6"
    else:
      label = "7"
    labels = df[label]
    df = df.drop([label], axis=1)
    df = df.apply(np.vectorize(filter_pixel))
    df = pd.concat([labels, df], axis=1)
    return df

In [21]:
import pandas as pd

def ocr(training_population=5000, testing_population=1000):
    print("Loading data...")
    train = pd.read_csv('sample_data/mnist_train_small.csv')
    train = process_df(train, "train")
    test_set = pd.read_csv('sample_data/mnist_test.csv')
    test_set = process_df(test_set)
    print("Loaded {} rows for training.".format(train.shape[0]))
    print("Loaded {} rows for testing.".format(test_set.shape[0]))
    nn = MLP(input_nodes=784, learning_rate=.05)
    nn.add_layer(300)
    nn.add_layer(150)
    nn.add_layer(10)

    print("Training the network with {} samples...".format(training_population))
    for i in range(training_population):
        data = train.sample(n=1)
        label = data["6"].tolist()[0]
        inputs = list(data.iloc[0, 1:])
        outputs = [0] * 10
        outputs[label] = 1
        nn.train(inputs, outputs)

    print("Trained successfully.")
    # nn.save("ocr.mlp")
    print("Testing with {} samples...".format(testing_population))
    c_m = np.zeros(shape=(10, 10))
    for i in range(testing_population):
        data = test_set.sample(n=1)
        inputs = list(data.iloc[0, 1:])
        label = data["7"].tolist()[0]
        out_class, out_prob = nn.predict(inputs)
        c_m[label][out_class] += 1

    print("Results:")

    correct_guesses = np.sum(np.diagonal(c_m))
    total_guesses = c_m.sum()
    accuracy = correct_guesses / total_guesses

    recall = 0
    precision = 0
    c_m_t = c_m.T

    for i in range(10):
        correct_guesses = c_m[i][i]
        total_row = np.sum(c_m[i])
        total_col = np.sum(c_m_t[i])
        recall += (correct_guesses / total_row) if total_row > 0 else 0
        precision += (correct_guesses / total_col) if total_col > 0 else 0
    
    recall = recall / 10
    precision = precision / 10

    print("\tRecall: {0:.2f}\n\tPrecision: {0:.2f}\n\tAccuracy: {0:.2f}".format(recall, precision, accuracy))

In [22]:
ocr(training_population=50000, testing_population=5000)

Loading data...
Loaded 19999 rows for training.
Loaded 9999 rows for testing.
Training the network with 50000 samples...
Trained successfully.
Testing with 5000 samples...
Results:
	Recall: 0.96
	Precision: 0.96
	Accuracy: 0.96


Baseado na implementação https://github.com/Fodark/mlp-python
