In [3309]:
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score

In [3364]:
class Neuron:
    def __init__(self, input_shape):
      self.input_shape = input_shape
      self.bias = 1      
      self.bias_weight = np.random.uniform(low=-1, high=1.0, size=1)[0]
      self.weight_matrix = np.random.uniform(low=-1, high=1.0, size=self.input_shape)

    def sigmoid(self,x):
      return 1/(1+np.exp(-x))
    
    def threshold(self, x):
      if x >= 0:
        return 1
      else:
        return -1
    
    def dot_product(self, input_pattern): # todo: move inside layer class
      return np.dot(input_pattern, self.weight_matrix)
        
    def activate(self, x, activation):
      if activation == 'linear':
        return x
      if activation == 'sigmoid':
        return self.sigmoid(x)
      if activation == 'tanh':
        return np.tanh(x)
      if activation == 'threshold':
        return self.threshold(x)

class Layer():
  def __init__(self, input_shape): 
    self.neurons = []
    self.input_shape = input_shape
    for i in range(input_shape[0]):
      self.neurons.append(Neuron(input_shape=input_shape[1]))
        
class MultiLayerPerceptron: 
  
  def build_layers(self, input_shape):
    for i in range(0, len(input_shape)-1):
      self.layers.append(Layer(input_shape=(input_shape[i+1],input_shape[i])))
  
  '''
      input_shape: Array or tuple with the corresponding
      number of neurons per layer.
      
  '''
  def __init__(self, input_shape):
    self.layers = []
    self.build_layers(input_shape)
    self.weight_matrix = [None] * len(input_shape)
    self.weight_matrix[0] = np.random.uniform(low=-1, high=1.0, size=(input_shape[0]))
    self.input_shape = input_shape

  def derivative(self, x, activation):
    if activation == 'tanh':
      return (1 - (np.tanh(x)**2))
    
  def forward_pass(self, input_pattern):
    first_out = []
    inact_out = []
    for neuron in self.layers[0].neurons:
      dot = neuron.bias * neuron.bias_weight + neuron.dot_product(input_pattern) # 
      induced_local_field = neuron.activate(dot, 'tanh')
      first_out.append(induced_local_field)
      inact_out.append(dot)
    output_by_layer = [first_out]
    inactivated_outputs = [inact_out]
    for i in range(len(self.layers)-1):
      layer_out = []
      inact_out = []
      for neuron in self.layers[i+1].neurons:
        dot = neuron.bias * neuron.bias_weight + neuron.dot_product(inactivated_outputs[i]) 
        inact_out.append(dot)
        dot = neuron.bias * neuron.bias_weight + neuron.dot_product(output_by_layer[i])
        induced_local_field = neuron.activate(dot, 'tanh')
        layer_out.append(induced_local_field)
      output_by_layer.append(layer_out)    
      inactivated_outputs.append(inact_out)
    return output_by_layer, inactivated_outputs 
  
  def backpropagate(self, output_by_layer, inact_output_by_layer, learning_rate, input_pattern, desired_output):
    total_error = []
    lr = learning_rate # 0.01 # test 
    alpha = 0.9
    for l in reversed(range(len(self.layers))):
      # If it is an output layer:
      if (l+1) == len(self.layers):
        #print('Output layer')
        for j in range(len(output_by_layer[l])):
          neuron_output = output_by_layer[l][j]
          error = (desired_output[j] - neuron_output) # (error**2)/2 
          total_error.append(error)
          #print('Error by neuron:', error, 'OUTPUT_LAYER')
          # Update bias weight:
          current_neuron = self.layers[l].neurons[j]
          local_field_neuron_j = inact_output_by_layer[l][j]
          delta = error * self.derivative(local_field_neuron_j, 'tanh')
          momentum = alpha * delta
          current_neuron.bias_weight = current_neuron.bias_weight + (lr * delta) + momentum
          # Update weights to previous layer neurons:
          for i in range(len(current_neuron.weight_matrix)):
            y_sub_i = output_by_layer[l-1][i]
            nabla_sub_ji = lr * delta * y_sub_i # nabla
            current_neuron.weight_matrix[i] = current_neuron.weight_matrix[i] + nabla_sub_ji
          # nabla_ji = eta * delta_j(n) * y_i(n)
          # where delta_j(n) equals to: e_j(n) * fi'_j(v_j(n)) which corresponds to the local gradient
          # where y_i(n) equals to fi_i(v_i(n)) which corresponds to the input signal of neuron j           
      else:
        for j in range(len(output_by_layer[l])): 
          accumulated_delta_k = 0
          # Code below assumes that the (next) layer l+1 is the output layer therefore should only work for 2 layers networks at first.
          # Despite that, making it generalizable should be straightforward since implies 
          # only testing whether layer 'l+1' is an output node or not (TO-DO).
          for k in range(len(output_by_layer[l+1])):  # For each neuron k (if output layer) do:
            neuron_output = output_by_layer[l+1][k]
            error = (desired_output[k] - neuron_output)
            local_field_neuron_k = inact_output_by_layer[l+1][k]
            delta_sub_k = error * self.derivative(local_field_neuron_k, 'tanh') # Get the weighted sum of the local gradients by the
            w_sub_kj = self.layers[l+1].neurons[k].weight_matrix[j] # corresponding weight connections between neurons j and k.
            accumulated_delta_k += delta_sub_k * w_sub_kj  # Propagate them back as error for updating the weights on neuron j.
          # Weight Update Rule:
          local_field_neuron_j = inact_output_by_layer[l][j]
          fi_sub_j = self.derivative(local_field_neuron_j, 'tanh')
          delta_j  =  fi_sub_j * accumulated_delta_k
          # Update weights:
          neuron_j = self.layers[l].neurons[j]
          for i in range(len(neuron_j.weight_matrix)):
            # TODO: add momentum 
            nabla = lr * (delta_j * input_pattern[i])
            # w -> (neuron_j.weight_matrix[i]) + nabla
            momentum = alpha * nabla
            neuron_j.weight_matrix[i] = neuron_j.weight_matrix[i] + nabla + momentum
          # Update bias:
          neuron_j.bias_weight += delta_j * lr
    total_energy = 0
    for error in total_error:
      total_energy += (error**2)
    return total_energy/2
          
  def test(self, X, y):
    predictions = []
    labels = []
    for (x_test, y_test) in zip(X,y):
      out, inact = self.forward_pass(x_test)
      predictions.append(np.argmax(out[len(out)-1]))
      labels.append(np.argmax(y_test))
    acc = accuracy_score(y_true=labels, y_pred=predictions)
    return acc    
  
  def train(self, X, y, learning_rate, error):
    prev_mse = 999999999
    current_mse  = 0
    epochs = 0
    while (abs(current_mse - prev_mse) > error):
      prev_mse = current_mse
      mse = 0
      for (X_input, y_input) in zip(X,y):
        out, inactivated_out = self.forward_pass(X_input)
        mse += self.backpropagate(out, inactivated_out, learning_rate, X_input, y_input)
      current_mse = (mse / len(X))
      epochs += 1
    print('Épocas:', epochs, ' | | Final MSE:', current_mse)
    print('Training Accuracy:', self.test(X,y))    
#nn = MultiLayerPerceptron(input_shape=[4,5,3])

In [3321]:
def assembly_dataset():
  f = open("iris.data", "r")
  data_x = []
  data_y = []
  for line in f:
    if len(line) != 1:
      data = line.replace('\n', '')
      t = data.split(',')
      data_y.append(t.pop(4))
      data_x.append(np.array(t,dtype=np.float32))
  return data_x, data_y

def normalize_row(row, max_val, min_val):
  for i in range(len(row)):
    row[i] = (2 * ((row[i] - min_val)/(max_val - min_val))) - 1 

def find_max_min(features):
  max_val = features[0][0]
  min_val = features[0][0]
  for i in range(len(features)):
    max_test = features[i][np.argmax(features[i])] 
    if max_test > max_val:
      max_val = max_test
      
    min_test = features[i][np.argmin(features[i])] 
    if min_test < min_val:
      min_val = min_test
  return max_val, min_val
    
def feature_normalization(features):
  max_val, min_val = find_max_min(features)
  for feature in features:
    normalize_row(feature, max_val, min_val)
    


In [None]:
def preprocess_dataset():  
  features, labels = assembly_dataset()
  feature_normalization(features)  

  data_dictionary = {'Iris-virginica' : [], 'Iris-versicolor': [], 'Iris-setosa': []}

  one_hot_label_dictionary = {'Iris-virginica' : [1,0,0], 'Iris-versicolor': [0,1,0], 'Iris-setosa': [0,0,1]}

  for t in zip(features,labels):
    data_dictionary[t[1]].append(t[0])
  return data_dictionary, one_hot_label_dictionary
data, one_hot_label_dictionary = preprocess_dataset()
print(data)
print(one_hot_label_dictionary)

In [3367]:
 # stratified random sampling
labels = list(one_hot_label_dictionary.keys())
X = []
y = []
for i in range(len(labels)):
  for x_sample in data[labels[i]]:
    X.append(x_sample)
    y.append(i)
    
labels = ['Iris-virginica', 'Iris-versicolor', 'Iris-setosa']
skf = StratifiedKFold(n_splits=10) # shuffle and configure seed.

avg = []

#skf.get_n_splits(X, y)
for i, (train_index, test_index) in enumerate(skf.split(X, y)):
  nn = MultiLayerPerceptron(input_shape=[4,5,3])
  print(f"Fold {i}:")
  #print(f"  Train: index={train_index}")
  #print(f"  Test:  index={test_index}")
  train_X = []
  train_y = []
  for j in train_index:
    train_X.append(X[j])
    train_y.append(one_hot_label_dictionary[labels[y[j]]])

  validation_X = []
  validation_y = []
  for j in test_index:
    validation_X.append(X[j])
    validation_y.append((one_hot_label_dictionary[labels[y[j]]]))
  
  nn.train(train_X, train_y, learning_rate=0.1, error=1.0e-6)
  val_acc = nn.test(validation_X, validation_y)
  print('Validation Accuracy:', val_acc)
  avg.append(val_acc)
print('Average validation accuracy:', np.average(avg))
print('Standard Deviation:', np.std(avg))

Fold 0:
Épocas: 208  | | Final MSE: 0.04108329551403673
Training Accuracy: 0.8740740740740741
Validation Accuracy: 1.0
Fold 1:
Épocas: 162  | | Final MSE: 0.04258182273513295
Training Accuracy: 0.8592592592592593
Validation Accuracy: 0.9333333333333333
Fold 2:
Épocas: 181  | | Final MSE: 0.03749008001017502
Training Accuracy: 0.8740740740740741
Validation Accuracy: 0.8666666666666667
Fold 3:
Épocas: 229  | | Final MSE: 0.040252677316659255
Training Accuracy: 0.8222222222222222
Validation Accuracy: 0.8
Fold 4:
Épocas: 792  | | Final MSE: 0.33203747761997354
Training Accuracy: 0.9555555555555556
Validation Accuracy: 0.9333333333333333
Fold 5:
Épocas: 4369  | | Final MSE: 0.4054485706039565
Training Accuracy: 0.9851851851851852
Validation Accuracy: 0.9333333333333333
Fold 6:
Épocas: 174  | | Final MSE: 0.16056142373783264
Training Accuracy: 0.674074074074074
Validation Accuracy: 0.6666666666666666
Fold 7:
Épocas: 3188  | | Final MSE: 0.4005499965744236
Training Accuracy: 0.918518518518518