In [1]:
import numpy as np
import matplotlib.pyplot as plt
# from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from tqdm import tqdm_notebook

In [2]:
!pip install numba





In [9]:
from numba import jit

In [3]:
from tensorflow.keras.datasets import mnist

In [90]:
# Загрузка данных MNIST
(X_train, y_train), (X_test, y_test) = mnist.load_data()
y_train = y_train[:16_000]
y_test = y_test[:4_000]

# Преобразование данных в нужный формат
X_train = X_train.reshape(60_000, 784)[:16_000]
X_test = X_test.reshape(10_000, 784)[:4_000]
X_train = X_train / 255
X_test = X_test / 255
y_train_onehot = np.zeros((y_train.size, y_train.max() + 1))
y_train_onehot[np.arange(y_train.size), y_train] = 1

In [92]:
# Определим параметры модели
n_inputs = X_train.shape[1] # Количество входных нейронов
n_hidden = 522  # Количество нейронов в скрытом слое
n_outputs = len(np.unique(y_train))  # Количество выходных нейронов
learning_rate = 0.1
n_epochs = 1001


In [93]:
# Инициализация весов
weights_input_hidden = np.random.uniform(-0.5, 0.5, size=(n_inputs, n_hidden))
bias_hidden = np.zeros(n_hidden)

weights_hidden_output = np.random.uniform(-0.5, 0.5, size=(n_hidden, n_outputs))
bias_output = np.zeros(n_outputs)


In [94]:
# Определение функций активации.
@jit(nopython=True, fastmath=True)
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

@jit(nopython=True, fastmath=True)
def make_dot(x,y):
    return np.dot(x, y)

In [95]:

@jit(cache=True, fastmath=True)
def make_loss(y_pred, X_train, y_train):
    loss = 0.0
    for i in range(len(X_train)):
        loss -= np.log(y_pred[i, y_train[i]])
    return loss / len(X_train)

In [83]:

@jit(cache=True)#(nopython=True)#(fastmath=True)
def softmax(x):
    exp_x = np.exp(x)
    return exp_x / np.sum(exp_x, axis=1)[..., np.newaxis] #exp_x / np.sum(exp_x, axis=1, keepdims=True) #


In [96]:
%%time
# @jit(nopython=True)
def perceptron_train(X_train, y_train, weights_input_hidden, bias_hidden, weights_hidden_output, bias_output, n_epochs, learning_rate, n_outputs):
    for epoch in tqdm_notebook(range(n_epochs)):
        # Forward pass
        hidden_inputs = make_dot(X_train, weights_input_hidden) + bias_hidden
        hidden_outputs = sigmoid(hidden_inputs)
        output_inputs = make_dot(hidden_outputs, weights_hidden_output) + bias_output
        y_pred = softmax(output_inputs)

        # Compute loss
        loss = make_loss(y_pred,X_train,y_train)#-np.sum(np.log(y_pred[np.arange(len(X_train)), y_train])) / len(X_train)

        # Compute gradients
        dL_dOutputInputs = y_pred
        dL_dOutputInputs[np.arange(len(X_train)), y_train] -= 1
        dL_dOutputInputs /= len(X_train)

        dL_dWeightsHiddenOutput = make_dot(hidden_outputs.T, dL_dOutputInputs)
        dbiasOutput = np.sum(dL_dOutputInputs, axis=0)

        dL_dHiddenInputs = make_dot(dL_dOutputInputs, weights_hidden_output.T) * sigmoid(hidden_inputs)
        dL_dWeightsInputHidden = make_dot(X_train.T, dL_dHiddenInputs)
        dbiasHidden = np.sum(dL_dHiddenInputs, axis=0)

        # Update weights and biases
        weights_hidden_output -= learning_rate * dL_dWeightsHiddenOutput
        bias_output -= learning_rate * dbiasOutput
        weights_input_hidden -= learning_rate * dL_dWeightsInputHidden
        bias_hidden -= learning_rate * dbiasHidden

    return weights_input_hidden, bias_hidden, weights_hidden_output, bias_output


# [00:57<00:00, 14.52it/s] y_train = y_train[:6000] n_epochs = 500 Accuracy: 0.853
# [04:39<00:00, 11.01it/s] y_train = y_train[:16000] n_epochs = 1000 Accuracy: 0.8725

# 5/5  y_train = y_train[:] n_epochs = 5 Accuracy: 0.9098
# 500/500 [00:16<00:00, 15.01it/s] y_train = y_train[:1000] n_epochs = 500 Accuracy: 0.84
# 500/500 [00:42<00:00, 12.261it/s] y_train = y_train[:6000] n_epochs = 500 Accuracy: 0.912
# 1500/1500 [02:07<00:00, 11.01it/s] y_train = y_train[:6000] n_epochs = 1500 Accuracy: 0.879
# 1000/1000 [03:37<00:00, 11.01it/s] y_train = y_train[:16000] n_epochs = 1000 Accuracy: 0.87
# 1000/1000 [14:24<00:00, 11.01it/s] y_train = y_train[:] n_epochs = 1000 Accuracy: 0.9096

Wall time: 0 ns


In [97]:
weights_input_hidden, bias_hidden, weights_hidden_output, bias_output = perceptron_train(X_train, y_train, weights_input_hidden, bias_hidden, weights_hidden_output, bias_output, n_epochs, learning_rate, n_outputs)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


  0%|          | 0/1001 [00:00<?, ?it/s]

In [98]:
hidden_inputs = np.dot(X_test, weights_input_hidden) + bias_hidden
hidden_outputs = sigmoid(hidden_inputs)

output_inputs = np.dot(hidden_outputs, weights_hidden_output) + bias_output
y_pred = np.argmax(output_inputs, axis=1)

In [99]:
accuracy = np.mean(y_pred == y_test)
print(f'Accuracy: {accuracy}')

Accuracy: 0.8725
