In [1]:
import numpy as np
import torch
from torch.nn.functional import mse_loss

### 1. Реализация классов слоев
#### Линейный слой

In [4]:
class LinearLayer:
    def __init__(self, input_dim, output_dim):
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.weights = np.random.randn(input_dim, output_dim) * 0.01
        self.bias = np.zeros((1, output_dim))
        self.input = None
        self.grad_weights = None
        self.grad_bias = None

    def forward(self, x):
        self.input = x  # Запоминаем для backward
        return np.dot(x, self.weights) + self.bias

    def backward(self, grad_output, learning_rate=0.01):
        # Градиенты
        self.grad_weights = np.dot(self.input.T, grad_output) / self.input.shape[0]
        self.grad_bias = np.sum(grad_output, axis=0, keepdims=True) / self.input.shape[0]

        # Градиент по входу
        grad_input = np.dot(grad_output, self.weights.T)

        # Обновляем параметры
        self.weights -= learning_rate * self.grad_weights
        self.bias -= learning_rate * self.grad_bias

        return grad_input

#### ReLU активация

In [5]:
class ReLU:
    def __init__(self):
        self.input = None

    def forward(self, x):
        self.input = x
        return np.maximum(0, x)

    def backward(self, grad_output):
        return grad_output * (self.input > 0)

#### Softmax активация

In [6]:
class Softmax:
    def forward(self, x):
        exps = np.exp(x - np.max(x, axis=1, keepdims=True))
        self.output = exps / np.sum(exps, axis=1, keepdims=True)
        return self.output

    def backward(self, grad_output):
        # Градиенты внешнего слоя
        s = self.output  # Shape: [batch_size, num_classes]
        batch_jacobian = np.einsum('ij,ik->ijk', s, -s)  # Якобиан softmax

        # Добавляем диагональные элементы
        diag = np.einsum('ij,jk->ij', s, np.eye(s.shape[1]))
        batch_jacobian += diag

        # Применение батчевого Якобиана
        gradients = np.einsum('ijk,ik->ij', batch_jacobian, grad_output)
        return gradients

### 2. Loss-функция

In [7]:
class MSELoss:
    def forward(self, predictions, targets):
        self.predictions = predictions
        self.targets = targets
        return np.mean((predictions - targets) ** 2)

    def backward(self):
        return 2 * (self.predictions - self.targets) / self.targets.shape[0]

### 3. Вспомогательные функции

#### One-hot кодировка

In [8]:
# Для использования MSE нам нужно представить метки в формате one-hot:
def one_hot_encode(labels, num_classes=10):
    one_hot = np.zeros((labels.size, num_classes))
    one_hot[np.arange(labels.size), labels] = 1
    return one_hot

### 4. Загрузка данных

In [9]:
from torchvision import datasets
from torchvision.transforms import ToTensor

# Загружаем данные
train_data = datasets.MNIST(root="/mnist-dataset", train=True, download=True, transform=ToTensor())
test_data = datasets.MNIST(root="/mnist-dataset", train=False, download=True, transform=ToTensor())

# Преобразуем в numpy
x_train = train_data.data.numpy().reshape(-1, 28 * 28) / 255.0
y_train = train_data.targets.numpy()

x_test = test_data.data.numpy().reshape(-1, 28 * 28) / 255.0
y_test = test_data.targets.numpy()

# One-hot кодируем метки
y_train_one_hot = one_hot_encode(y_train)
y_test_one_hot = one_hot_encode(y_test)

### 5. Собираем нейронную сеть

In [10]:
class SimpleNN:
    def __init__(self, input_dim, hidden_dim, output_dim):
        self.linear1 = LinearLayer(input_dim, hidden_dim)
        self.relu = ReLU()
        self.linear2 = LinearLayer(hidden_dim, output_dim)
        self.softmax = Softmax()

    def forward(self, x):
        x = self.linear1.forward(x)
        x = self.relu.forward(x)
        x = self.linear2.forward(x)
        x = self.softmax.forward(x)
        return x

    def backward(self, grad_output, learning_rate=0.01):
        grad_output = self.linear2.backward(grad_output, learning_rate)
        grad_output = self.relu.backward(grad_output)
        grad_output = self.linear1.backward(grad_output, learning_rate)

### 6. Обучение

In [11]:
# Гиперпараметры
input_dim = 28 * 28  # Размерность изображения
hidden_dim = 64
output_dim = 10
learning_rate = 0.01
epochs = 40
batch_size = 64

# Создаем сеть
network = SimpleNN(input_dim, hidden_dim, output_dim)
loss_function = MSELoss()

# Обучение
for epoch in range(epochs):
    for i in range(0, x_train.shape[0], batch_size):
        x_batch = x_train[i:i + batch_size]
        y_batch = y_train_one_hot[i:i + batch_size]

        # Прямой проход
        predictions = network.forward(x_batch)

        # Потери
        loss = loss_function.forward(predictions, y_batch)

        # Обратное распространение
        grad_loss = loss_function.backward()
        network.backward(grad_loss, learning_rate)

    print(f"Epoch {epoch + 1}, Loss: {loss}")

Epoch 1, Loss: 0.09000763726913405
Epoch 2, Loss: 0.0899823103539135
Epoch 3, Loss: 0.08995383318620452
Epoch 4, Loss: 0.08991949471630234
Epoch 5, Loss: 0.08987644099667964
Epoch 6, Loss: 0.08982280584517963
Epoch 7, Loss: 0.08975447388984621
Epoch 8, Loss: 0.08966658412618547
Epoch 9, Loss: 0.08955202838431117
Epoch 10, Loss: 0.089402784757451
Epoch 11, Loss: 0.08920926480428928
Epoch 12, Loss: 0.08896008161715818
Epoch 13, Loss: 0.08864088971050418
Epoch 14, Loss: 0.08823512847838107
Epoch 15, Loss: 0.087724063326288
Epoch 16, Loss: 0.0870867427427631
Epoch 17, Loss: 0.08630160973414094
Epoch 18, Loss: 0.08534751765725235
Epoch 19, Loss: 0.08420374512295485
Epoch 20, Loss: 0.08285000697266102
Epoch 21, Loss: 0.08126759850994773
Epoch 22, Loss: 0.07944580042827362
Epoch 23, Loss: 0.07738543615177519
Epoch 24, Loss: 0.07510364611715578
Epoch 25, Loss: 0.07263703316184013
Epoch 26, Loss: 0.07004028816511013
Epoch 27, Loss: 0.06737743892259292
Epoch 28, Loss: 0.06471269726158212
Epoch 2

### 7. Оценка точности

In [24]:
def accuracy(predictions, labels):
    preds = np.argmax(predictions, axis=1)
    return np.mean(preds == labels)

# Предсказания
predictions = network.forward(x_test)
acc = accuracy(predictions, y_test)
print(f"Accuracy: {acc * 100:.2f}%")

Accuracy: 78.80%


### 8. Тестирование слоев

In [25]:
# Пример теста на LinearLayer
x = np.random.randn(4, 3).astype(np.float32)
torch_x = torch.tensor(x, requires_grad=True)
linear = LinearLayer(3, 2)
torch_linear = torch.nn.Linear(3, 2)
torch_linear.weight.data = torch.tensor(linear.weights.T, dtype=torch.float32)
torch_linear.bias.data = torch.tensor(linear.bias.flatten(), dtype=torch.float32)

# Forward test
output = linear.forward(x)
torch_output = torch_linear(torch_x)
assert np.allclose(output, torch_output.detach().numpy(), atol=1e-5)

# Backward test
grad_output = np.random.randn(*output.shape).astype(np.float32)
torch_output.backward(torch.tensor(grad_output))
grad_input = linear.backward(grad_output)
assert np.allclose(grad_input, torch_x.grad.numpy(), atol=1e-5)