#Reading data
https://github.com/ASaid7/MLP-from-Scratch/blob/master/multilayer%20perceptron.ipynb


In [16]:
import numpy as np
import matplotlib.pyplot as plt
from mlxtend.data import loadlocal_mnist
import import_ipynb
import images_data
from functools import partial

import nbimporter
from Layer import Layer

PATH_TRAIN_IMAGES = "data/train-images-idx3-ubyte"
PATH_TRAIN_LABELS = "data/train-labels-idx1-ubyte"

PATH_TEST_IMAGES = "data/t10k-images-idx3-ubyte"
PATH_TEST_LABELS = "data/t10k-labels-idx1-ubyte"

COL_SIZE = 28
%matplotlib inline


In [9]:
# do innego pliku
def sigmoid(x):
    return 1/(1 + np.exp(-x))

def deriative_sigmoid(x):
    return sigmoid(x) * (1-sigmoid(x))

In [25]:
class Network:
    def __init__(self, list_of_numbers_of_neurons, learning_rate, activation, deriative):
        self.number_of_neurons = list_of_numbers_of_neurons
        self.learning_rate = learning_rate
        self.layers = []
        self.erors = None
        for index in range(1, len(list_of_numbers_of_neurons)):
            layer = Layer(list_of_numbers_of_neurons[index], list_of_numbers_of_neurons[index - 1], activation, deriative)
            self.layers.append(layer)
            

    def feed_forward(self, inputs: np.array) -> np.array:
        last_out = inputs.copy()
        for index in range(len(self.layers)):
            print(last_out.shape, self.layers[index].biases.shape, "shape", self.layers[index].number_of_neurons)
            last_out = self.layers[index].activation(last_out)
        return last_out
    
    
    def feed_back(self, inputs: np.array, y: np.array):
        result_from_network = self.feed_forward(inputs)
        
        last_layer = self.layers[-1]
        cost_derivative = inputs - y #różnica między oczekiwanym a rzeczywistym
        sigmoid_derivative = deriative_sigmoid(Network.last_layer.z) #z to whynik przed sigmoidą

        errors = []
        delta = cost_derivative * sigmoid_derivative 
        errors.insert(0, delta) #wrzucanie na początek listy

        for layer in self.layers.reverse():
            cost_derivative = np.dot(layer.weights.T, delta) # czemy Transpose
            sigmoid_derivative = layer.deriative(layer.z)
            delta = cost_derivative * sigmoid_derivative
            errors.insert(0, delta)
        return errors
    
    
    def train(self, mini_batch):
        delta_biases = [np.zeros(layer.biases.shape) for layer in self.layers]
        delta_weights = [np.zeros(layer.weights.shape) for layer in self.layers]

        for sample in mini_batch: # obrazek wektor wyjść
            x, y = sample

            errors = self.feed_back(x, y)
            (
                delta_weights_backprop,
                delta_biases_backprop,
            ) = self.get_delta_weights_and_biases_from_errors(x, errors)

            for layer_index in range(len(self.layers)):
                delta_biases[layer_index] += delta_biases_backprop[layer_index]
                delta_weights[layer_index] += delta_weights_backprop[layer_index]
#oco biega
        for layer_index in range(len(self.layers)):
            delta_biases[layer_index] *= 1.0 / len(mini_batch)
            delta_biases[layer_index] *= self.learning_rate

            delta_weights[layer_index] *= 1.0 / len(mini_batch)
            delta_weights[layer_index] *= self.learning_rate

        self.update_weights_and_biases(delta_weights, delta_biases)
        

    def get_delta_weights_and_biases_from_errors(self, inputs: np.array, errors):
        to_change_biases = list()
        to_change_weights = list()

        for layer_index in range(len(self.layers) - 1, -1, -1):
            delta = errors[layer_index]
            to_change_biases.insert(0, delta.copy())

            if layer_index == 0:
                to_change_weights.insert(0, np.dot(delta, inputs.T))
            else:
                to_change_weights.insert(0, np.dot(delta, self.layers[layer_index - 1].a.T))

        return to_change_weights, to_change_biases
    

    def update_weights_and_biases(
        self, delta_weights, delta_biases):
        
        for index, layer in enumerate(self.layers):
            layer.set_biases(layer.biases - delta_biases[index])
            layer.set_weights(layer.weights - delta_weights[index])

    def cost(self, y: np.array):
        return 0.5 * (y - self.layers[-1].a) ** 2

    def final_cost(self, y: np.array):
        return sum(x for x in self.cost(y))


    def dump_to_file(self, folder_name: str = "model_w_b"):
        weights = np.array([layer.weights for layer in self.layers])
        biases = np.array([layer.biases for layer in self.layers])
        np.save(f'{folder_name}/weights.npy', weights)
        np.save(f'{folder_name}/biases.npy', biases)

    
    def loss(self, y_true, y_pred):
        return np.mean((y_true - y_pred)**2)
    
    def dloss(self, y_true, y_pred):
        return 2 * (y_pred - y_true) / y_true.size



In [26]:
def prepare_data(images, labels):
    network_input_size = images[0].flatten().size

    return [
        ((image.flatten() / 255).reshape((network_input_size, 1)), int(label))
        for image, label in zip(images, labels)
    ]

def make_output(number: int):
    n = np.zeros((10, 1))
    n[number][0] = 1.0
    return n

def main():
    # number of pixels in photo
    train_images, train_labels = images_data.load_datasets() 
    network_input_size = train_images[0].flatten().size

    # numbers in <0, 9>
    network_output_size = 10

    learning_rate = 0.1

    network = Network([network_input_size, 5, network_output_size],
                      0.1, sigmoid, deriative_sigmoid)

    train_data = prepare_data(train_images, train_labels)
    np.random.shuffle(train_data)

    test_images, test_labels = images_data.load_datasets(set_name="test") 
    test_data = prepare_data(test_images, test_labels)
    np.random.shuffle(test_data)

    number_of_epochs = 5
    batch_size = 15

  
    for epoch in range(number_of_epochs):
        print(f"Epoch {epoch}")

        for i in range(0, len(train_data) - batch_size, batch_size):
            mini_batch: List[Tuple[np.array, np.array]] = []

            for _, number in enumerate(train_data[i: i + batch_size]):
                pixels, number_label = number
                results: np.array = make_output(number_label)
                mini_batch.append((pixels, results))

            network.train(mini_batch)



In [27]:
main()

Epoch 0
(784, 1) (784,) shape 784


ValueError: shapes (784,10) and (784,1) not aligned: 10 (dim 1) != 784 (dim 0)

In [None]:
Network