In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load the data
data = pd.read_csv('data/mnist.csv')

data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,775,776,777,778,779,780,781,782,783,label
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69995,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
69996,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3
69997,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4
69998,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5


In [2]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_prime(z):
    """Derivative of the sigmoid function."""
    return sigmoid(z)*(1-sigmoid(z))

In [59]:
import tqdm

class NNetwork():
    def __init__(self, layers_sizes):
        self.num_layers = len(layers_sizes)
        self.sizes = layers_sizes
        self.biases = [np.random.randn(y, 1) for y in layers_sizes[1:]]
        self.weights = [np.random.randn(y, x)
                        for x, y in zip(layers_sizes[:-1], layers_sizes[1:])]
        
    def cost_derivative(self, output_activations, y): #cost function mse
        return 2*(output_activations-y)

    def feedforward(self, a):
        a = a.reshape(-1, 1)
        """Return the output of the network if ``a`` is input."""
        for b, w in zip(self.biases, self.weights):
            a = sigmoid(np.dot(w, a)+b)
        return a
    
    def backpropogation(self, x, y): # x is the input, y is the output 
        x = x.reshape(-1, 1)
        y = y.reshape(-1, 1)

        gradiente_b = [np.zeros(b.shape) for b in self.biases]
        gradiente_w = [np.zeros(w.shape) for w in self.weights]

        # feedforward
        activation = x # input layer
        activations = [x] # list to store all the activations, layer by layer
        zs = [] # list to store all the z vectors, layer by layer

        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, activation) + b
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation) 

        # backward pass
        delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1]) # dC/dz in the output layer

        gradiente_b[-1] = delta #* 1  # dC/db in the output layer
        gradiente_w[-1] = np.dot(delta, activations[-2].T) # dC/dw in the output layer

        for l in range(2, self.num_layers): 
            z = zs[-l] # z in the layer l
            sp = sigmoid_prime(z) # sigmoid prime in the layer l
            delta = np.dot(self.weights[-l+1].T, delta) * sp # dC/dz in the layer l
            gradiente_b[-l] = delta # dC/db in the layer l
            gradiente_w[-l] = np.dot(delta, activations[-l-1].T) # dC/dw in the layer l

        return (gradiente_b, gradiente_w) # return the gradient of the cost function to the all weights and biases
    
    def update_weigts_from_batch(self, batch, learning_rate):
        gradiente_b = [np.zeros(b.shape) for b in self.biases] # initialize the gradient of the cost function to the biases
        gradiente_w = [np.zeros(w.shape) for w in self.weights] # initialize the gradient of the cost function to the weights

        for x, y in batch: # for each input x and output y in the batch
            delta_gradiente_b, delta_gradiente_w = self.backpropogation(x, y) # calculate the gradient of the cost function to the one input x and output y

            gradiente_b = [gb+dg for gb, dg in zip(gradiente_b, delta_gradiente_b)] # sum to the gradient of the cost function to the biases
            gradiente_w = [gw+dw for gw, dw in zip(gradiente_w, delta_gradiente_w)] # sum to the gradient of the cost function to the weights
        
        self.weights = [w-(learning_rate/len(batch))*nw for w, nw in zip(self.weights, gradiente_w)]   # update the weights from the gradient of the cost function
        self.biases = [b-(learning_rate/len(batch))*nb for b, nb in zip(self.biases, gradiente_b)] # update the biases from the gradient of the cost function
    
    def fit_from_data(self, data, epochs, batch_size, learning_rate):
        for j in tqdm.tqdm(range(epochs)):
            np.random.shuffle(data)
            batches = [data[k:k+batch_size] for k in range(0, len(data), batch_size)]
            for batch in batches:
                self.update_weigts_from_batch(batch, learning_rate)
            print(f"Epoch {j} complete")

    def make_prediction(self, data):
        y_pred = []
        for x in data:
            y_pred.append(np.argmax(self.feedforward(x)))
        return y_pred
    


In [61]:
#Обучим нейросеть
nn = NNetwork([784, 30, 10])

X = data.drop('label', axis=1)
y = data['label']

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train = X_train.values
X_test = X_test.values

y_train = pd.get_dummies(y_train).values
y_test = pd.get_dummies(y_test).values

data_train = list(zip(X_train, y_train))

nn.fit_from_data(data_train, 10, 100, 1)

 10%|█         | 1/10 [00:08<01:19,  8.81s/it]

Epoch 0 complete


 20%|██        | 2/10 [00:17<01:10,  8.81s/it]

Epoch 1 complete


 30%|███       | 3/10 [00:26<01:01,  8.82s/it]

Epoch 2 complete


 40%|████      | 4/10 [00:35<00:52,  8.82s/it]

Epoch 3 complete


 50%|█████     | 5/10 [00:44<00:44,  8.82s/it]

Epoch 4 complete


 60%|██████    | 6/10 [00:52<00:35,  8.83s/it]

Epoch 5 complete


 70%|███████   | 7/10 [01:01<00:26,  8.90s/it]

Epoch 6 complete


 80%|████████  | 8/10 [01:10<00:17,  8.86s/it]

Epoch 7 complete


 90%|█████████ | 9/10 [01:19<00:08,  8.95s/it]

Epoch 8 complete


100%|██████████| 10/10 [01:28<00:00,  8.89s/it]

Epoch 9 complete





In [62]:
y_pred = nn.make_prediction(X_test)

from sklearn.metrics import classification_report
print(classification_report(np.argmax(y_test, axis=1), y_pred)) 

              precision    recall  f1-score   support

           0       0.94      0.96      0.95      1343
           1       0.95      0.96      0.96      1600
           2       0.89      0.86      0.87      1380
           3       0.88      0.87      0.87      1433
           4       0.89      0.90      0.90      1295
           5       0.88      0.81      0.85      1273
           6       0.91      0.94      0.92      1396
           7       0.90      0.92      0.91      1503
           8       0.84      0.87      0.85      1357
           9       0.88      0.87      0.87      1420

    accuracy                           0.90     14000
   macro avg       0.90      0.90      0.90     14000
weighted avg       0.90      0.90      0.90     14000

