In [44]:
import numpy as np
import math
import pandas as pd
from sklearn.datasets import load_iris
import matplotlib.pyplot as plt
import random
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.datasets import load_digits

In [45]:
class Perceptron:
    weights: np.array
    output: float
    sum_output: float
    dimension: int
    error: float
    derivative: int
    learning_rate: float
    af_map: dict = {'relu': False, 'sigmoid': False, 'tanh': False, 'binary': False}

    def __init__(self,dimension: int, std_deviation: float, learning_rate: float = 0.0001, activation_function='relu'):
        self.weights = np.array([random.gauss(0,std_deviation) for _ in range(dimension)])
        self.dimension = dimension
        self.learning_rate = learning_rate
        self.error = 0
        self.af_map[activation_function] = True


    def update_weights(self, error):
        overall_error = error*self.learning_rate*self.derivative
        self.weights += overall_error
        self.error = overall_error

    def relu(self):
        if self.sum_output < 0:
            self.derivative = 1
            return 0.01*self.sum_output

        self.derivative = 1

        return self.sum_output

    def sigmoid(self):
        sigmoid = 1/(1-math.pow(math.e, self.sum_output))

        self.derivative = sigmoid*(1-sigmoid)

        return sigmoid

    def binary(self):
        self.derivative = 0
        if self.sum_output > 0:
            return 1

        return 0

    def tanh(self):
        self.derivative = 1 - (math.tanh(self.sum_output))**2

        return math.tanh(self.sum_output)

    def activation_function(self):
        if self.af_map['relu']:
            return self.relu()

        if self.af_map['sigmoid']:
            return self.sigmoid()

        if self.af_map['binary']:
            return self.binary()

        if self.af_map['tanh']:
            return self.tanh()

    def get_output(self, sample):
        _ = self.get_weighted_sum(sample)

        return self.output

    def get_weighted_sum(self, sample):
        self.sum_output = np.dot(self.weights, sample)
        self.output = self.activation_function()

        return self.sum_output

    def get_weights(self):
        return self.weights

    def get_error(self):
        return self.error

In [46]:
class Layer:
    perceptrons: list
    common_input: list
    output_vector: list
    previous_layer = None
    next_layer = None

    def __init__(self, n_features: int, n_perceptrons: int, std_deviation, learning_rate, activation_function='relu'):
        self.perceptrons = list()
        for n in range(n_perceptrons):
            self.perceptrons.append(Perceptron(n_features, std_deviation, learning_rate, activation_function))

    def get_perceptrons(self):
        return self.perceptrons

    def set_previous_layer(self, layer):
        self.previous_layer = layer

    def get_previous_layer(self):
        return self.previous_layer

    def set_next_layer(self, layer):
        self.next_layer = layer

    def get_next_layer(self):
        return self.next_layer

    def set_common_input(self, features):
        self.common_input = features

    def get_common_input(self):
        return self.common_input

    def set_output_vector(self, output_vector):
        self.output_vector = output_vector

    def get_output_vector(self):
        return self.output_vector

    def get_len(self):
        return len(self.perceptrons)

    def make_connections(self, previous_layer, next_layer):
        self.previous_layer = previous_layer
        self.next_layer = next_layer

    def get_weights_sum(self):
        weights_sum = 0

        for perceptron in self.perceptrons:
            weights_sum += np.sum(perceptron.get_weights())

        return weights_sum


In [47]:
class MLP:
    layers: list
    input_len: int
    output_len: int
    output_layer = None
    learning_rate: float
    activation_function: str
    lmb: float
    reg_map: dict = {'l1': False, 'l2': False}
    regularization_method: str

    def __init__(self, n_features, n_labels, learning_rate, activation_function='relu', regularization_method=None, lmb=0):
        self.layers = list()
        self.input_len = n_features
        self.output_len = n_labels
        self.learning_rate = learning_rate
        self.activation_function = activation_function
        self.regularization_method = regularization_method
        self.lmb = lmb

    def add_layer(self, n_perceptrons):
        if self.layers == list():
            last_layer = None
            n_features = self.input_len
        else:
            last_layer = self.layers[-1]
            n_features = last_layer.get_len()

        if self.activation_function != 'relu':
            numerator = 1
        else:
            numerator = 2

        deviation = math.sqrt(numerator/n_features)
        output_deviation = math.sqrt(numerator/n_perceptrons)

        new_layer = Layer(n_features, n_perceptrons, deviation, self.learning_rate, self.activation_function)

        self.output_layer = Layer(n_perceptrons, self.output_len, output_deviation, self.learning_rate, self.activation_function)

        new_layer.make_connections(last_layer, self.output_layer)

        if last_layer != None:
            last_layer.set_next_layer(new_layer)

        self.layers.append(new_layer)

    def forward_propagation(self):
        for layer in self.layers:
            input_vector = layer.get_common_input()
            output_vector = []

            perceptrons = layer.get_perceptrons()
            for perceptron in perceptrons:
                output = perceptron.get_output(np.array(input_vector))
                output_vector.append(output)

            layer.set_output_vector(output_vector)

            next_layer = layer.get_next_layer()
            if next_layer != None:
                next_layer.set_common_input(output_vector)

        return output_vector

    def softmax(self, output_vector):
        output_vector = np.array(output_vector)

        numerator = np.exp(output_vector)
        denominator = sum(numerator)

        probabilities = numerator/denominator
        #print('soft', probabilities)
        return probabilities.tolist()

    def back_propagation(self, errors):
        reversed_order_layers = self.layers[::-1]

        for index, perceptron in enumerate(reversed_order_layers[0].get_perceptrons()):
            perceptron.update_weights(errors[index])

        for layer in reversed_order_layers[1:]:
            current_layer_p = layer.get_perceptrons()
            next_layer_p = layer.get_next_layer().get_perceptrons()

            for index_c, perceptron_c in enumerate(current_layer_p):
                sum_error = 0

                for index_n, perceptron_n in enumerate(next_layer_p):
                    error_n = perceptron_n.get_error()

                    weights = perceptron_n.get_weights()
                    weight_c = weights[index_c]

                    sum_error += error_n*weight_c

                perceptron_c.update_weights(sum_error)

    def l1_regularization(self, weights_sum, ms_error):
        regularization_term = self.lmb * abs(weights_sum)

        return ms_error + regularization_term

    def l2_regularization(self, weights_sum, ms_error):
        regularization_term = self.lmb * np.square((weights_sum))

        return ms_error + regularization_term

    def get_all_weights_sum(self):
        weights_sum = 0

        for layer in self.layers:
            weights_sum += layer.get_weights_sum()

        return weights_sum

    def calculate_loss(self, ms_error, weights_sum):
        loss = ms_error

        if self.regularization_method != None:
            if self.reg_map['l1']:
                loss = self.l1_regularization(weights_sum, ms_error)

            if self.reg_map['l2']:
                loss = self.l2_regularization(weights_sum, ms_error)

        return loss

    def train_model(self, features: pd.DataFrame, labels:pd.DataFrame, epochs=50):
        unique_labels = labels.unique()
        self.layers.append(self.output_layer)
        epoch_counter = 0

        while epoch_counter != epochs:
            sizes = np.zeros(self.output_len)
            hits = np.zeros(self.output_len)
            sum_error = np.zeros(self.output_len)
            predictions = np.zeros(self.output_len)

            for row, label in zip(features.values, labels.values):
                true_proba = [1 if lb == label else 0 for lb in unique_labels]
                true_pred = true_proba.index(1)
                sizes[true_pred] += 1

                self.layers[0].set_common_input(row)

                output_vector = self.forward_propagation()
                proba = self.softmax(output_vector)

                prediction = proba.index(max(proba))

                if prediction == true_pred:
                    hits[true_pred] += 1

                predictions[prediction] += 1

                error = np.subtract(true_proba, proba)
                sum_error += np.sign(error) * np.square(error)

            ms_error = sum_error/sizes
            weights_sum = self.get_all_weights_sum()

            loss = self.calculate_loss(ms_error, weights_sum)

            self.back_propagation(loss)
            epoch_counter += 1

            print(np.sum(sizes), hits, np.sum(hits), predictions)

    def predict(self, features: pd.DataFrame, labels: pd.DataFrame):
        unique_labels = labels.unique()
        hits = 0
        misses = 0

        for row, label in zip(features.values, labels.values):
            true_proba = [1 if lb == label else 0 for lb in unique_labels]
            true_index = true_proba.index(1)

            self.layers[0].set_common_input(row)
            output_vector = self.forward_propagation()

            proba = self.softmax(output_vector)
            pred_index = proba.index(max(proba))

            prediction = unique_labels[pred_index]

            if prediction == label:
                hits += 1
            else:
                misses += 1

        print(hits, misses)


In [48]:
digits = load_digits()

data = digits.data
label = digits.target

df = {str(x): [] for x in range(64)}
for row in data:
    for index, gs_number in enumerate(row):
        df[str(index)].append(gs_number)
df['target'] = label

df = pd.DataFrame(data=df)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,55,56,57,58,59,60,61,62,63,target
0,0.0,0.0,5.0,13.0,9.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,6.0,13.0,10.0,0.0,0.0,0.0,0
1,0.0,0.0,0.0,12.0,13.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,11.0,16.0,10.0,0.0,0.0,1
2,0.0,0.0,0.0,4.0,15.0,12.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,3.0,11.0,16.0,9.0,0.0,2
3,0.0,0.0,7.0,15.0,13.0,1.0,0.0,0.0,0.0,8.0,...,0.0,0.0,0.0,7.0,13.0,13.0,9.0,0.0,0.0,3
4,0.0,0.0,0.0,1.0,11.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2.0,16.0,4.0,0.0,0.0,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1792,0.0,0.0,4.0,10.0,13.0,6.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,2.0,14.0,15.0,9.0,0.0,0.0,9
1793,0.0,0.0,6.0,16.0,13.0,11.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,6.0,16.0,14.0,6.0,0.0,0.0,0
1794,0.0,0.0,1.0,11.0,15.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,2.0,9.0,13.0,6.0,0.0,0.0,8
1795,0.0,0.0,2.0,10.0,7.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,5.0,12.0,16.0,12.0,0.0,0.0,9


In [49]:
features = df.drop(columns='target')
labels = df['target']

x_train, x_test, y_train, y_test = train_test_split(features, labels, test_size=0.2)

In [50]:
mlp = MLP(64,10,0.005,'relu','l1',0.5)

mlp.add_layer(4)

In [51]:
mlp.train_model(x_train, y_train, 1000)

1437.0 [54. 80.  0.  0. 82.  0.  2.  6.  0.  0.] 224.0 [476. 656.   0.  29. 190.   0.   9.  76.   0.   1.]
1437.0 [52. 76.  0.  0. 84.  0.  2.  6.  0.  0.] 220.0 [474. 646.   1.  29. 201.   0.   9.  76.   0.   1.]
1437.0 [49. 74.  0.  0. 85.  0.  2.  6.  0.  0.] 216.0 [468. 641.   1.  30. 210.   0.  10.  76.   0.   1.]
1437.0 [46. 71.  0.  0. 88.  0.  2.  7.  0.  0.] 214.0 [461. 634.   1.  30. 222.   0.  10.  78.   0.   1.]
1437.0 [43. 70.  0.  0. 88.  0.  3.  7.  0.  0.] 211.0 [453. 630.   1.  29. 232.   0.  12.  78.   0.   2.]
1437.0 [43. 70.  0.  0. 90.  0.  3.  7.  0.  0.] 213.0 [448. 624.   1.  29. 242.   0.  13.  78.   0.   2.]
1437.0 [42. 70.  0.  0. 89.  0.  3.  7.  0.  0.] 211.0 [446. 617.   1.  29. 248.   0.  15.  79.   0.   2.]
1437.0 [41. 69.  0.  0. 90.  0.  4.  7.  0.  0.] 211.0 [441. 611.   1.  29. 255.   0.  17.  81.   0.   2.]
1437.0 [40. 69.  0.  0. 89.  0.  6.  7.  0.  0.] 211.0 [435. 604.   1.  30. 264.   0.  21.  80.   0.   2.]
1437.0 [39. 67.  0.  0. 88.  0.  6.  

In [52]:
mlp.predict(x_test, y_test)

21 339
