In [1]:
import numpy as np
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical






In [2]:
# Метод реализации активационной функции relu
def relu(x):
    return np.maximum(0, x)

# Метод реализации активационной функции softmax
def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=-1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=-1, keepdims=True)

# Метод расчета кросс-энтропии
def cross_entropy_loss(predictions, targets):
    epsilon = 1e-15
    predictions = np.clip(predictions, epsilon, 1.0)  
    N = predictions.shape[0]
    cross_entropy = -np.sum(targets * np.log(predictions)) / N  
    return cross_entropy

# Метод реализации One-Hot Encoding
def one_hot_encode(labels, num_classes):
    num_samples = len(labels)
    one_hot_targets = np.zeros((num_samples, num_classes))
    one_hot_targets[np.arange(num_samples), labels] = 1
    return one_hot_targets

# Метод реализации обучения нейронной сети
def train_neural_network(X, y, hidden_size, output_size, learning_rate, num_epochs, batch_size):
    input_size = X.shape[1]
    num_samples = X.shape[0]

    # Инициализация весов
    weights_input_hidden = np.random.randn(input_size, hidden_size)
    biases_hidden = np.zeros((1, hidden_size))
    weights_hidden_output = np.random.randn(hidden_size, output_size)
    biases_output = np.zeros((1, output_size))

    for epoch in range(num_epochs):
        # Перемешивание данных
        indices = np.arange(num_samples)
        np.random.shuffle(indices)

        total_loss = 0
        correct_predictions = 0

        for batch_start in range(0, num_samples, batch_size):
            # Получение пачки данных
            batch_indices = indices[batch_start:batch_start + batch_size]
            X_batch = X[batch_indices]
            y_batch = y[batch_indices]

            # Прямой проход
            hidden_layer_input = np.dot(X_batch, weights_input_hidden) + biases_hidden
            hidden_layer_output = relu(hidden_layer_input)
            output_layer_input = np.dot(hidden_layer_output, weights_hidden_output) + biases_output
            output_layer_output = softmax(output_layer_input)

            # Расчет ошибки
            loss = cross_entropy_loss(output_layer_output, y_batch)
            total_loss += loss

            # Расчет accuracy
            predictions = np.argmax(output_layer_output, axis=1)
            true_labels = np.argmax(y_batch, axis=1)
            correct_predictions += np.sum(predictions == true_labels)

            # Обратный проход (обновление весов)
            output_error = output_layer_output - y_batch
            hidden_error = np.dot(output_error, weights_hidden_output.T) * (hidden_layer_output > 0)

            weights_hidden_output -= learning_rate * np.dot(hidden_layer_output.T, output_error)
            biases_output -= learning_rate * np.sum(output_error, axis=0, keepdims=True)
            weights_input_hidden -= learning_rate * np.dot(X_batch.T, hidden_error)
            biases_hidden -= learning_rate * np.sum(hidden_error, axis=0, keepdims=True)

        average_loss = total_loss / (num_samples / batch_size)
        accuracy = correct_predictions / num_samples

        print(f"Epoch {epoch+ 1}/{num_epochs}, Loss: {average_loss:.4f}, Accuracy: {accuracy:.4f}")

    return weights_input_hidden, biases_hidden, weights_hidden_output, biases_output

# Загрузка данных MNIST
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Преобразование изображений в одномерные векторы и нормализация значений
X_train = X_train.reshape((X_train.shape[0], -1)) / 255.0
X_test = X_test.reshape((X_test.shape[0], -1)) / 255.0

# Преобразование меток классов в One-Hot Encoding формат
y_train_one_hot = to_categorical(y_train)
y_test_one_hot = to_categorical(y_test)

# Параметры нейронной сети
hidden_size = 300
output_size = 10
learning_rate = 0.01
num_epochs = 10
min_batch_size = 64
max_batch_size = 64


# Обучение сети с разными размерами пачек данных
for batch_size in range(min_batch_size, max_batch_size + 1, 8):
    print(f"\nTraining with batch size {batch_size}\n{'=' * 40}")
    train_neural_network(X_train, y_train_one_hot, hidden_size, output_size, learning_rate, num_epochs, batch_size)


Training with batch size 64
Epoch 1/10, Loss: 1.1697, Accuracy: 0.8206
Epoch 2/10, Loss: 0.3778, Accuracy: 0.9018
Epoch 3/10, Loss: 0.2814, Accuracy: 0.9217
Epoch 4/10, Loss: 0.2298, Accuracy: 0.9350
Epoch 5/10, Loss: 0.1946, Accuracy: 0.9441
Epoch 6/10, Loss: 0.1756, Accuracy: 0.9485
Epoch 7/10, Loss: 0.1552, Accuracy: 0.9543
Epoch 8/10, Loss: 0.1413, Accuracy: 0.9579
Epoch 9/10, Loss: 0.1306, Accuracy: 0.9603
Epoch 10/10, Loss: 0.1186, Accuracy: 0.9648


In [3]:
# Параметры нейронной сети
hidden_size = 300
output_size = 10
learning_rate = 0.01
num_epochs = 10
min_batch_size = 8
max_batch_size = 64


# Обучение сети с разными размерами пачек данных
for batch_size in range(min_batch_size, max_batch_size + 1, 8):
    print(f"\nTraining with batch size {batch_size}\n{'=' * 40}")
    train_neural_network(X_train, y_train_one_hot, hidden_size, output_size, learning_rate, num_epochs, batch_size)


Training with batch size 8
Epoch 1/10, Loss: 1.4488, Accuracy: 0.8840
Epoch 2/10, Loss: 0.3669, Accuracy: 0.9274
Epoch 3/10, Loss: 0.2401, Accuracy: 0.9448
Epoch 4/10, Loss: 0.1752, Accuracy: 0.9561
Epoch 5/10, Loss: 0.1349, Accuracy: 0.9641
Epoch 6/10, Loss: 0.1062, Accuracy: 0.9705
Epoch 7/10, Loss: 0.0878, Accuracy: 0.9749
Epoch 8/10, Loss: 0.0729, Accuracy: 0.9780
Epoch 9/10, Loss: 0.0596, Accuracy: 0.9820
Epoch 10/10, Loss: 0.0506, Accuracy: 0.9844

Training with batch size 16
Epoch 1/10, Loss: 1.3739, Accuracy: 0.8754
Epoch 2/10, Loss: 0.3486, Accuracy: 0.9252
Epoch 3/10, Loss: 0.2301, Accuracy: 0.9441
Epoch 4/10, Loss: 0.1735, Accuracy: 0.9555
Epoch 5/10, Loss: 0.1364, Accuracy: 0.9634
Epoch 6/10, Loss: 0.1097, Accuracy: 0.9691
Epoch 7/10, Loss: 0.0915, Accuracy: 0.9738
Epoch 8/10, Loss: 0.0746, Accuracy: 0.9780
Epoch 9/10, Loss: 0.0645, Accuracy: 0.9806
Epoch 10/10, Loss: 0.0552, Accuracy: 0.9833

Training with batch size 24
Epoch 1/10, Loss: 1.3171, Accuracy: 0.8705
Epoch 2/1