# Lab2 - backpropagation

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from sklearn import metrics

## Dodano uczenie metodą propagacji błędu wraz z podejściem batchowym 

In [2]:
class MLP:

    def __init__(self, weights, bias, num_outputs=1, num_hidden=1, activation_function="sigmoid"):
        self.weights = weights
        self.bias = bias
        self.activation_function = activation_function
        self.num_hidden = len(weights) - 1
        self.num_neurons = [weights[0].shape[0]]

        for i in range(len(weights)):
            self.num_neurons.append(weights[i].shape[1])
        self.num_inputs = self.num_neurons[0]
        self.num_outputs = self.num_neurons[-1]

        # verify if dimensions are correct
        for i in range(len(weights) - 1):
            if (weights[i].shape[1] != weights[i + 1].shape[0]):
                print("Weights' dimensions between layers " + str(i) + " and " + str(i + 1) + "are incorrect!")
        for i in range(len(bias)):
            if (weights[i].shape[1] != bias[i].size):
                print("Bias size in layer " + str(i) + " is incorrect!")

        # derivatives of weights and biases used in backpropagation
        self.dw = []
        self.db = []

    def forward_prop(self, inputs):
        """
        performs forward propagation
        """
        # activations and linear combinations passed to activation function
        self.a = []
        self.z = []

        activation_function = self.getActivationFunction(self.activation_function)
        activations = inputs
        self.a.append(activations)
        for i in range(self.num_hidden):
            outputs = np.dot(activations, self.weights[i]) + self.bias[i]
            self.z.append(outputs)
            activations = activation_function(outputs)
            self.a.append(activations)

        results = np.dot(activations, self.weights[self.num_hidden]) + self.bias[self.num_hidden]
        self.z.append(results)
        self.a.append(results)
        return results

    def backpropagation(self, y):
        deltas = [None] * len(self.weights)
        deltas[-1] = y - self.a[-1]
        for i in reversed(range(len(deltas) - 1)):
            deltas[i] = (self.weights[i + 1].dot(deltas[i + 1].T) * (
                self.getDerivitiveActivationFunction(self.activation_function)(self.z[i]).T)).T
        
        batch_size = y.shape[0]
        db = [d.T.dot(np.ones((batch_size, 1))).T / float(batch_size) for d in deltas]
        dw = [(d.T.dot(self.a[i])).T / float(batch_size) for i, d in enumerate(deltas)]
        return dw, db

    def train(self, x, y, batch_size = 20, epochs = 500, learning_rate = 0.1, verbose = False):
        for epoch in range(epochs):
            i = 0
            while i < len(y):
                x_batch = x[i:i + batch_size]
                y_batch = y[i:i + batch_size]
                i = i + batch_size
                self.forward_prop(x_batch)
                dw, db = self.backpropagation(y_batch)
                self.weights = [w + learning_rate * dweight for w, dweight in zip(self.weights, dw)]
                self.bias = [w + learning_rate * dbias for w, dbias in zip(self.bias, db)]
                if verbose or epoch in [0,epochs-1]:
                    print("Error in epoch {} = {}".format(epoch, np.linalg.norm(self.a[-1] - y_batch)))

    @staticmethod
    def getActivationFunction(name):
        if (name == 'sigmoid'):
            return lambda x: np.exp(x) / (1 + np.exp(x))
        elif (name == 'linear'):
            return lambda x: x
        else:
            print('Unknown activation function. linear is used')
            return lambda x: x

    @staticmethod
    def getDerivitiveActivationFunction(name):
        if (name == 'sigmoid'):
            sig = lambda x: np.exp(x) / (1 + np.exp(x))
            return lambda x: sig(x) * (1 - sig(x))
        elif (name == 'linear'):
            return lambda x: 1
        else:
            print('Unknown activation function. linear is used')
            return lambda x: 1

    def show_attributes(self):
        """
    gives basic information about the neural network
    """
        print("Neural Network attributes:")
        print("-------------------------")
        print("Number of neurons in layers: {}".format(self.num_neurons))
        print("Number of predictors: {}".format(self.num_inputs))
        print("Number of hidden layers: {}".format(self.num_hidden))
        print("Number of targets: {}".format(self.num_outputs))
        print("Activations function used: {}".format(self.activation_function))
        return


def generate_random_mlp(num_inputs, num_hidden, num_targets, start=-10, stop=10):
    """
    generates mlp with random weights given number of neurons in each layer
    """
    weights = []
    bias = []
    weights.append(np.random.uniform(start, stop, num_inputs * num_hidden[0]).reshape(num_inputs, -1))
    for i in range(len(num_hidden) - 1):
        weights.append(np.random.uniform(start, stop, num_hidden[i] * num_hidden[i + 1]).reshape(num_hidden[i], -1))
        bias.append(np.random.uniform(start, stop, num_hidden[i]).reshape(1, -1))
    weights.append(np.random.uniform(start, stop, num_hidden[-1] * num_targets).reshape(-1, num_targets))
    bias.append(np.random.uniform(start, stop, num_hidden[-1]).reshape(1, -1))
    bias.append(np.random.uniform(start, stop, num_targets).reshape(1, -1))
    mlp = MLP(weights, bias)
    return mlp

Zainicjujmy losową sieć i sprawdźmy jak nauczy się wzoru paraboli

In [3]:
mymlp = generate_random_mlp(1,[5,10,5],1)

In [4]:
mymlp.show_attributes()

Neural Network attributes:
-------------------------
Number of neurons in layers: [1, 5, 10, 5, 1]
Number of predictors: 1
Number of hidden layers: 3
Number of targets: 1
Activations function used: sigmoid


In [5]:
train_df = pd.read_csv('~/Documents/Sem6/MIO/datasets/regression/square-simple-training.csv', index_col=0)
test_df = pd.read_csv('~/Documents/Sem6/MIO/datasets/regression/square-simple-test.csv', index_col=0)

In [6]:
train_df['split'] = 'train'
test_df['split'] = 'test'
df = pd.concat([train_df, test_df])
df

Unnamed: 0,x,y,split
1,-0.171543,-127.351580,train
2,0.025201,-129.942844,train
3,-1.368991,38.672367,train
4,1.907390,197.432191,train
5,0.011129,-129.988852,train
...,...,...,...
96,1.315377,25.719403,test
97,-1.196968,-1.054107,test
98,1.544766,84.767303,test
99,0.441051,-112.492699,test


In [None]:
sns.scatterplot(x='x', y='y', data=train_df)
plt.title('Train split', fontsize=20)

Text(0.5, 1.0, 'Train split')

In [None]:
mymlp = generate_random_mlp(1, [10,20], 1, start=-100, stop=100)
mymlp.show_attributes()
mymlp.train(np.asarray(train_df['x']).reshape(-1,1), np.asarray(train_df['y']).reshape(-1,1), 200, 10000, 0.02)

In [None]:
result = mymlp.forward_prop(np.asarray(test_df['x']).reshape(-1,1))

In [None]:
sns.scatterplot(x='x', y='y', data=test_df)
sns.scatterplot(x=test_df['x'], y=np.concatenate(result))
plt.title('Test performance', fontsize=20)

Sieć uczy się prawidłowo, jednak to dobrego ustawienia wag potrzebny jest mały krok oraz duża liczba iteracji.

Sprawdżmy w jakim stopniu propagacja wsteczna błędu przyda się do ulepszenia sieci z poprzedniego tygodnia. Wyglądała wtedy następująco:

In [None]:
weights_1 = [np.array([[1, 1, 1, 1, 1]]), np.array([[900], [200], [0], [-200], [-900]])]
bias_1 = [np.array([-2, -1, 0, 1, 2]), np.array([650])]
mlp_1 = MLP(weights=weights_1, bias=bias_1)
mlp_1.show_attributes()

In [None]:
res = mlp_1.forward_prop(np.asarray(train_df['x']).reshape(-1,1))
sns.scatterplot(x='x', y='y', data=train_df)
sns.scatterplot(x=train_df['x'], y=np.concatenate(res))
plt.title('Train performance', fontsize=20)

Po użyciu zaimplementowanej techniki:

In [None]:
mlp_1.train(np.asarray(train_df['x']).reshape(-1,1), np.asarray(train_df['y']).reshape(-1,1), 200, 1000, 0.00001)

In [None]:
res = mlp_1.forward_prop(np.asarray(test_df['x']).reshape(-1,1))
sns.scatterplot(x='x', y='y', data=test_df)
sns.scatterplot(x=test_df['x'], y=np.concatenate(res))
plt.title('Test performance', fontsize=20)

Test wypadł zdecydowanie dobrze. Przy 1000 epokach sieć lepiej przewiduje obserwacje na "ogonach" paraboli.