In [498]:
import numpy as np
import gzip
import os

In [499]:
# Загрузка данных
data_sources = {
    "training_images": "train-images-idx3-ubyte.gz", 
    "test_images": "t10k-images-idx3-ubyte.gz",  
    "training_labels": "train-labels-idx1-ubyte.gz",  
    "test_labels": "t10k-labels-idx1-ubyte.gz",}  
data_dir = "../_data"
os.makedirs(data_dir, exist_ok=True)
mnist_dataset = {}
# Данные
for key in ("training_images", "test_images"):
    with gzip.open(os.path.join(data_dir, data_sources[key]), "rb") as mnist_file:
        mnist_dataset[key] = np.frombuffer(
            mnist_file.read(), np.uint8, offset=16
        ).reshape(-1, 28 * 28)
# Лейблы
for key in ("training_labels", "test_labels"):
    with gzip.open(os.path.join(data_dir, data_sources[key]), "rb") as mnist_file:
        mnist_dataset[key] = np.frombuffer(mnist_file.read(), np.uint8, offset=8)

x_train, y_train, x_test, y_test = (
    mnist_dataset["training_images"],
    mnist_dataset["training_labels"],
    mnist_dataset["test_images"],
    mnist_dataset["test_labels"])


In [500]:
# Подготовка данных
training_sample, test_sample = 1000, 1000
training_images = x_train[0:training_sample] / 255
test_images = x_test[0:test_sample] / 255

def one_hot_encoding(labels, dimension=10):
    one_hot_labels = labels[..., None] == np.arange(dimension)[None]
    return one_hot_labels.astype(np.float64)

training_labels = one_hot_encoding(y_train[:training_sample])
test_labels = one_hot_encoding(y_test[:test_sample])

In [502]:
# Функции активации
def relu(x):
    return (x >= 0) * x
    
def softmax(x):
    expx = np.exp(x-x.max())
    return expx / expx.sum()

In [503]:
# Иницализация параметров
seed = 80112
rng = np.random.default_rng(seed)
learning_rate = 0.005 # Скорость обучения
epochs = 5 # Количество эпох
h = 100 # Длина скрытого слоя
Ih=np.ones((1,h))
pixels_per_image = 784
num_labels = 10
weights_1 = 0.2 * rng.random((pixels_per_image, h)) - 0.1
weights_2 = 0.2 * rng.random((h, num_labels)) - 0.1


In [504]:
# Обучение сети
for j in range(epochs):
    for i in range(len(training_images)):
        layer_0 = training_images[i]
        layer_1 = np.dot(layer_0, weights_1)
        layer_1 = relu(layer_1)
        layer_2 = np.dot(layer_1, weights_2)
        layer_2 = softmax(layer_2)
        layer_2_delta = layer_2 - training_labels[i]
        layer_2_gradient = np.dot(layer_1.reshape((h,1)),layer_2_delta.reshape((1,10))) 
        layer_1_delta = (layer_1*(Ih-layer_1))*np.dot(layer_2_delta,weights_2.T)
        layer_1_gradient = delta2 = np.dot(training_images[i].reshape(784,1),layer_1_delta) 
        weights_1 -= learning_rate * layer_1_gradient
        weights_2 -= learning_rate * layer_2_gradient
    results_1 = relu(np.dot(test_images,weights_1))
    results_2 = softmax(np.dot(results_1,weights_2))
    test_accurate_predictions = np.sum(np.argmax(results_2, axis=1) == np.argmax(test_labels, axis=1))
    print("Epoch: "+ str(j)+ " Accuracy: "+ str(test_accurate_predictions / float(len(test_images))))

Epoch: 0 Accuracy: 0.571
Epoch: 1 Accuracy: 0.663
Epoch: 2 Accuracy: 0.716
Epoch: 3 Accuracy: 0.741
Epoch: 4 Accuracy: 0.746
