In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from time import time

In [2]:
# Функции активации
def relu(Z):
    return np.maximum(0, Z)

def relu_derivative(Z):
    return Z > 0

def softmax(Z):
    exp_Z = np.exp(Z - np.max(Z, axis=1, keepdims=True))  # Нормализация для числовой стабильности
    return exp_Z / np.sum(exp_Z, axis=1, keepdims=True)

In [3]:
# Функция ошибки (кросс-энтропия)
def cross_entropy_loss(Y_true, Y_pred):
    m = Y_true.shape[0]
    log_likelihood = -np.log(Y_pred[range(m), Y_true.argmax(axis=1)])
    loss = np.sum(log_likelihood) / m
    return loss


In [4]:
# Обратное распространение ошибки
def backprop(X, Y_true, params, cache):
    m = X.shape[0]
    # Извлечение параметров
    W1, b1, W2, b2 = params
    Z1, A1, Z2, A2 = cache
    # Градиенты
    dZ2 = A2 - Y_true
    dW2 = (1 / m) * np.dot(A1.T, dZ2)
    db2 = (1 / m) * np.sum(dZ2, axis=0, keepdims=True)

    dA1 = np.dot(dZ2, W2.T)
    dZ1 = dA1 * relu_derivative(Z1)
    dW1 = (1 / m) * np.dot(X.T, dZ1)
    db1 = (1 / m) * np.sum(dZ1, axis=0, keepdims=True)

    grads = (dW1, db1, dW2, db2)
    return grads


In [5]:
# Обновление параметров
def update_params(params, grads, learning_rate):
    W1, b1, W2, b2 = params
    dW1, db1, dW2, db2 = grads

    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2

    params = (W1, b1, W2, b2)
    return params


In [6]:
# Прямое распространение
def forward_propagation(X, params):
    W1, b1, W2, b2 = params

    Z1 = np.dot(X, W1) + b1
    A1 = relu(Z1)

    Z2 = np.dot(A1, W2) + b2
    A2 = softmax(Z2)

    cache = (Z1, A1, Z2, A2)
    return A2, cache

In [7]:
# Функция для тренировки модели
def train(X_train, Y_train, X_test, Y_test, hidden_neurons=300, epochs=20, batch_size=32, learning_rate=0.1):
    input_size = X_train.shape[1]
    output_size = Y_train.shape[1]

    # Инициализация параметров
    W1 = np.random.randn(input_size, hidden_neurons) * 0.01
    b1 = np.zeros((1, hidden_neurons))
    W2 = np.random.randn(hidden_neurons, output_size) * 0.01
    b2 = np.zeros((1, output_size))

    params = (W1, b1, W2, b2)


In [8]:
# Функция для тренировки модели
def train(X_train, Y_train, X_test, Y_test, hidden_neurons=300, epochs=20, batch_size=32, learning_rate=0.1):
    input_size = X_train.shape[1]
    output_size = Y_train.shape[1]

    # Инициализация параметров
    W1 = np.random.randn(input_size, hidden_neurons) * 0.01
    b1 = np.zeros((1, hidden_neurons))
    W2 = np.random.randn(hidden_neurons, output_size) * 0.01
    b2 = np.zeros((1, output_size))

    params = (W1, b1, W2, b2)

    # Обучение
    for epoch in range(epochs):
        epoch_loss = 0
        start_time = time()

        # Перемешиваем данные
        permutation = np.random.permutation(X_train.shape[0])
        X_train_shuffled = X_train[permutation]
        Y_train_shuffled = Y_train[permutation]

        # Мини-пакетная обработка
        for i in range(0, X_train.shape[0], batch_size):
            X_batch = X_train_shuffled[i:i + batch_size]
            Y_batch = Y_train_shuffled[i:i + batch_size]

            # Прямое распространение
            A2, cache = forward_propagation(X_batch, params)

            # Вычисление ошибки
            loss = cross_entropy_loss(Y_batch, A2)
            epoch_loss += loss

            # Обратное распространение
            grads = backprop(X_batch, Y_batch, params, cache)

            # Обновление параметров
            params = update_params(params, grads, learning_rate)

        end_time = time()

        # Вывод статистики по эпохе
        print(f"Epoch {epoch+1}/{epochs} - Loss: {epoch_loss/X_train.shape[0]:.4f} - Time: {end_time - start_time:.2f}s")

    # Оценка точности
    accuracy = evaluate(X_test, Y_test, params)
    print(f"Accuracy on test data: {accuracy:.2f}%")


In [9]:
# Оценка точности на тестовом наборе
def evaluate(X, Y_true, params):
    A2, _ = forward_propagation(X, params)
    predictions = np.argmax(A2, axis=1)
    labels = np.argmax(Y_true, axis=1)
    accuracy = np.mean(predictions == labels) * 100
    return accuracy

In [10]:
# Загрузка и предобработка данных MNIST
def load_data():
    # Загрузка набора данных MNIST
    X, y = fetch_openml('mnist_784', version=1, return_X_y=True)

    # Нормализация данных
    X = X / 255.0

    # Преобразование меток в массив NumPy
    y = np.array(y)

    # Разделение данных на тренировочные и тестовые выборки
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Преобразование меток в one-hot encoding
    encoder = OneHotEncoder(sparse_output=False)  # Используем sparse_output
    y_train = encoder.fit_transform(y_train.reshape(-1, 1))  # Преобразуем в массив
    y_test = encoder.transform(y_test.reshape(-1, 1))        # Преобразуем в массив

    # Преобразуем X_train и X_test в массивы NumPy (если это не было сделано автоматически)
    X_train = np.array(X_train)
    X_test = np.array(X_test)

    return X_train, X_test, y_train, y_test


In [11]:
# Основной запуск
X_train, X_test, Y_train, Y_test = load_data()
train(X_train, Y_train, X_test, Y_test, hidden_neurons=300, epochs=20, batch_size=32, learning_rate=0.1)

Epoch 1/20 - Loss: 0.0124 - Time: 8.07s
Epoch 2/20 - Loss: 0.0052 - Time: 5.42s
Epoch 3/20 - Loss: 0.0036 - Time: 7.85s
Epoch 4/20 - Loss: 0.0027 - Time: 5.36s
Epoch 5/20 - Loss: 0.0022 - Time: 7.99s
Epoch 6/20 - Loss: 0.0018 - Time: 5.46s
Epoch 7/20 - Loss: 0.0015 - Time: 7.71s
Epoch 8/20 - Loss: 0.0013 - Time: 5.62s
Epoch 9/20 - Loss: 0.0011 - Time: 5.67s
Epoch 10/20 - Loss: 0.0009 - Time: 6.95s
Epoch 11/20 - Loss: 0.0008 - Time: 5.26s
Epoch 12/20 - Loss: 0.0007 - Time: 7.85s
Epoch 13/20 - Loss: 0.0006 - Time: 5.34s
Epoch 14/20 - Loss: 0.0005 - Time: 7.67s
Epoch 15/20 - Loss: 0.0004 - Time: 5.40s
Epoch 16/20 - Loss: 0.0004 - Time: 7.70s
Epoch 17/20 - Loss: 0.0003 - Time: 5.31s
Epoch 18/20 - Loss: 0.0003 - Time: 6.89s
Epoch 19/20 - Loss: 0.0003 - Time: 5.82s
Epoch 20/20 - Loss: 0.0002 - Time: 5.40s
Accuracy on test data: 97.82%
