# Введение в нейронные сети

In [None]:
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim

from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
SEED = 42
np.random.seed(SEED)
_ = torch.manual_seed(SEED)

In [None]:
EPOCHS = 20
LR = 0.1
INPUT_SIZE = 2
HIDDEN_SIZE = 8
OUTPUT_SIZE = 1

## Создание и визуализация датасета

In [None]:
X, Y = make_moons(n_samples=1000, noise=0.125, random_state=42)

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=SEED)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

plt.scatter(X_train_scaled[Y_train == 0, 0], X_train_scaled[Y_train == 0, 1],
           color='red', label='0')
plt.scatter(X_train_scaled[Y_train == 1, 0], X_train_scaled[Y_train == 1, 1],
           color='blue', label='1')
plt.title('Dataset')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()


#### Архитектура сети:
$$
\begin{align*}
z_1 &= X \cdot W_1 + b_1 \\
a_1 &= \text{ReLU}(z_1) \\
z_2 &= a_1 \cdot W_2 + b_2 \\
a_2 &= \sigma(z_2)
\end{align*}
$$

#### Функции активации:
$$
\begin{align*}
\text{ReLU}(x) &= \max(0, x) \\
\sigma(x) &= \frac{1}{1 + e^{-x}}
\end{align*}
$$

#### Функция потерь (Binary Cross-Entropy):
$$
L = -\frac{1}{m} \sum_{i=1}^{m} \left[ y_i \cdot \log(\hat{y}_i) + (1 - y_i) \cdot \log(1 - \hat{y}_i) \right]
$$

In [None]:
class Linear_NN_numpy:
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.01):
        # Kaiming Initialization для ReLU
        self.W1 = np.random.randn(input_size, hidden_size) * np.sqrt(2.0 / input_size)
        self.b1 = np.zeros((1, hidden_size))

        # Xavier Initialization для Sigmoid
        self.W2 = np.random.randn(hidden_size, output_size) * np.sqrt(1.0 / hidden_size)
        self.b2 = np.zeros((1, output_size))

        self.lr = learning_rate

    def relu(self, x):
        return np.maximum(0, x)

    def relu_derivative(self, x):
        return (x > 0).astype(float)

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-np.clip(x, -666, 666)))

    def binary_cross_entropy(self, y_true, y_pred):
        y_true = y_true.reshape(-1, 1)
        epsilon = 1e-12
        y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
        loss = -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
        return loss

    def forward(self, X):
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = self.relu(self.z1)
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.a2 = self.sigmoid(self.z2)
        return self.a2

    def backward(self, X, y_true, y_pred):
        m = X.shape[0]
        y_true = y_true.reshape(-1, 1)

        # градиенты выходного слоя
        dz2 = y_pred - y_true.reshape(-1, 1)
        dW2 = (1/m) * np.dot(self.a1.T, dz2)
        db2 = (1/m) * np.sum(dz2, axis=0, keepdims=True)

        # градиенты скрытого слоя
        dz1 = np.dot(dz2, self.W2.T) * self.relu_derivative(self.z1)
        dW1 = (1/m) * np.dot(X.T, dz1)
        db1 = (1/m) * np.sum(dz1, axis=0, keepdims=True)

        # обновление весов
        self.W2 -= self.lr * dW2
        self.b2 -= self.lr * db2
        self.W1 -= self.lr * dW1
        self.b1 -= self.lr * db1

    def predict(self, X, threshold=0.5):
        y_pred = self.forward(X)
        return (y_pred > threshold).astype(int)

In [None]:
model_NN_numpy = Linear_NN_numpy(input_size=INPUT_SIZE, hidden_size=HIDDEN_SIZE, output_size=OUTPUT_SIZE, learning_rate=LR)

learning_history_numpy = {
    'loss': [],
    'accuracy_train': [],
    'accuracy_test': [],
    'W1_norm': [],
    'W2_norm': []
}

In [None]:
for epoch in range(EPOCHS):
    # прямой проход
    Y_pred = model_NN_numpy.forward(X_train_scaled)

    # вычисление значения функции потерь
    loss = model_NN_numpy.binary_cross_entropy(Y_train, Y_pred)

    # обратный проход
    model_NN_numpy.backward(X_train_scaled, Y_train, Y_pred)

    # оценка качества
    Y_pred_train = model_NN_numpy.predict(X_train_scaled)
    accuracy_train = np.mean(Y_pred_train.flatten() == Y_train)

    Y_pred_test = model_NN_numpy.predict(X_test_scaled)
    accuracy_test = np.mean(Y_pred_test.flatten() == Y_test)

    # сохранение истории
    learning_history_numpy['loss'].append(loss)
    learning_history_numpy['accuracy_train'].append(accuracy_train)
    learning_history_numpy['accuracy_test'].append(accuracy_test)
    learning_history_numpy['W1_norm'].append(np.linalg.norm(model_NN_numpy.W1))
    learning_history_numpy['W2_norm'].append(np.linalg.norm(model_NN_numpy.W2))

    if epoch % 10 == 0:
        print(f'Epoch {epoch:4d}, Loss: {loss:.4f}, Train Acc: {accuracy_train:.4f}, Test Acc: {accuracy_test:.4f}')

#### Визуализация процесса обучения НС

In [None]:
plt.figure(figsize=(15, 10))
# Loss
plt.subplot(2, 2, 1)
plt.plot(learning_history_numpy['loss'], 'b-', linewidth=2)
plt.title('Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.grid(True, alpha=0.3)
plt.legend(['Loss'])

# Accuracy
plt.subplot(2, 2, 2)
plt.plot(learning_history_numpy['accuracy_train'], 'g-', linewidth=2, label='Train')
plt.plot(learning_history_numpy['accuracy_test'], 'r-', linewidth=2, label='Test')
plt.title('Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True, alpha=0.3)

# Weights
plt.subplot(2, 2, 3)
plt.plot(learning_history_numpy['W1_norm'], 'purple', linewidth=2, label='||W1||')
plt.plot(learning_history_numpy['W2_norm'], 'orange', linewidth=2, label='||W2||')
plt.title('Weights')
plt.xlabel('Epoch')
plt.ylabel('Weights')
plt.legend()
plt.grid(True, alpha=0.3)

In [None]:
# Визуализация границы принятия решений
def plot_decision_boundary(model, X, y):
    """Визуализация границы принятия решений нейронной сети"""
    h = 0.02
    x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
    y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))

    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)

    plt.figure(figsize=(10, 8))
    plt.contourf(xx, yy, Z, alpha=0.8, cmap=plt.cm.RdBu)
    scatter = plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.RdBu, edgecolors='black', s=30)
    plt.colorbar(scatter)
    plt.show()

plot_decision_boundary(model_NN_numpy, X_test_scaled, Y_test)

## Создание НС с использованием PyTorch

In [None]:
class Linear_NN_torch(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Linear_NN_torch, self).__init__()

        self.network = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, output_size),
            nn.Sigmoid()
        )

        self._initialize_weights()

    def _initialize_weights(self):
        nn.init.kaiming_normal_(self.network[0].weight, nonlinearity='relu')
        nn.init.constant_(self.network[0].bias, 0)

        nn.init.xavier_normal_(self.network[2].weight)
        nn.init.constant_(self.network[2].bias, 0)

    def forward(self, x):
        return self.network(x)

In [None]:
# преобразуем данные в torch tensor тип
X_train_torch = torch.FloatTensor(X_train_scaled)
Y_train_torch = torch.FloatTensor(Y_train).reshape(-1, 1)
X_test_torch = torch.FloatTensor(X_test_scaled)
Y_test_torch = torch.FloatTensor(Y_test).reshape(-1, 1)

In [None]:
model_nn_torch = Linear_NN_torch(input_size=INPUT_SIZE, hidden_size=HIDDEN_SIZE, output_size=OUTPUT_SIZE)

learning_history_torch = {
    'loss': [],
    'accuracy_train': [],
    'accuracy_test': [],
    'W1_norm': [],
    'W2_norm': []
}

# Задаем Loss и оптимизатор
criterion = nn.BCELoss()
optimizer = optim.SGD(model_nn_torch.parameters(), lr=LR)

In [None]:
for epoch in range(EPOCHS):
    # прямой проход
    y_pred = model_nn_torch(X_train_torch)

    # вычисление значения функции потерь
    loss = criterion(y_pred, Y_train_torch)

    # обратный проход
    optimizer.zero_grad()  # обнуляем градиенты потому что torch по дефолту накапливает значения градиентов
    loss.backward()        # вычисляем градиенты
    optimizer.step()       # обновляем веса

    # не используем автоград при расчете метрики (а так же при валидации и инференсе)
    with torch.no_grad():
        # train accuracy
        train_pred = (y_pred > 0.5).float()
        train_accuracy = (train_pred == Y_train_torch).float().mean()

        # test accuracy
        test_outputs = model_nn_torch(X_test_torch)
        test_pred = (test_outputs > 0.5).float()
        test_accuracy = (test_pred == Y_test_torch).float().mean()

        # weights
        W1_norm = torch.norm(model_nn_torch.network[0].weight).item()
        W2_norm = torch.norm(model_nn_torch.network[2].weight).item()

    # сохранение истории
    learning_history_torch['loss'].append(loss.item())
    learning_history_torch['accuracy_train'].append(train_accuracy.item())
    learning_history_torch['accuracy_test'].append(test_accuracy.item())
    learning_history_torch['W1_norm'].append(W1_norm)
    learning_history_torch['W2_norm'].append(W2_norm)

    if epoch % 10 == 0:
        print(f'Epoch {epoch:3d}, Loss: {loss.item():.4f}, Train Acc: {train_accuracy.item():.4f}, Test Acc: {test_accuracy.item():.4f}')

#### Визуализация процесса обучения НС

In [None]:
plt.figure(figsize=(15, 10))
# Loss
plt.subplot(2, 2, 1)
plt.plot(learning_history_torch['loss'], 'b-', linewidth=2)
plt.title('Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.grid(True, alpha=0.3)
plt.legend(['Loss'])

# Accuracy
plt.subplot(2, 2, 2)
plt.plot(learning_history_torch['accuracy_train'], 'g-', linewidth=2, label='Train')
plt.plot(learning_history_torch['accuracy_test'], 'r-', linewidth=2, label='Test')
plt.title('Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True, alpha=0.3)

# Weights
plt.subplot(2, 2, 3)
plt.plot(learning_history_torch['W1_norm'], 'purple', linewidth=2, label='||W1||')
plt.plot(learning_history_torch['W2_norm'], 'orange', linewidth=2, label='||W2||')
plt.title('Weights')
plt.xlabel('Epoch')
plt.ylabel('Weights')
plt.legend()
plt.grid(True, alpha=0.3)

In [None]:
def plot_decision_boundary(model, X, y):
    """Визуализация границы принятия решений нейронной сети"""
    h = 0.02
    x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
    y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))

    with torch.no_grad():
        X_grid = torch.FloatTensor(np.c_[xx.ravel(), yy.ravel()])
        Z = model(X_grid)
        Z = (Z > 0.5).float().numpy()

    Z = Z.reshape(xx.shape)

    plt.figure(figsize=(10, 8))
    plt.contourf(xx, yy, Z, alpha=0.8, cmap=plt.cm.RdBu)
    scatter = plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.RdBu, edgecolors='black', s=30)
    plt.colorbar(scatter)
    plt.show()

plot_decision_boundary(model_nn_torch, X_test_scaled, Y_test)