# Лабораторная работа: Реализация простой нейронной сети

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons

# --- Функции активации ---
def sigmoid(z):
    z = np.clip(z, -500, 500)
    return 1 / (1 + np.exp(-z))

def sigmoid_derivative(z):
    s = sigmoid(z)
    return s * (1 - s)

def relu(z):
    return np.maximum(0, z)

def relu_derivative(z):
    return np.where(z > 0, 1.0, 0.0)

def tanh(z):
    return np.tanh(z)

def tanh_derivative(z):
    return 1 - np.tanh(z)**2

ACTIVATIONS = {
    'sigmoid': (sigmoid, sigmoid_derivative),
    'relu': (relu, relu_derivative),
    'tanh': (tanh, tanh_derivative)
}

# --- Класс NeuralNetwork ---
class NeuralNetwork:
    def __init__(self, layer_dims, activation_funcs, learning_rate=0.01):
        if len(layer_dims) < 2:
            raise ValueError("Сеть должна иметь как минимум входной и выходной слои.")
        if len(activation_funcs) != len(layer_dims) - 1:
            raise ValueError("Количество функций активации должно соответствовать количеству слоев (исключая входной).")
        self.num_layers = len(layer_dims)
        self.layer_dims = layer_dims
        self.learning_rate = learning_rate
        self.parameters = {}
        self.activation_funcs = {}
        np.random.seed(42)
        for l in range(1, self.num_layers):
            if activation_funcs[l-1] == 'relu':
                self.parameters[f'W{l}'] = np.random.randn(layer_dims[l], layer_dims[l-1]) * np.sqrt(2. / layer_dims[l-1])
            else:
                limit = np.sqrt(6. / (layer_dims[l-1] + layer_dims[l]))
                self.parameters[f'W{l}'] = np.random.uniform(-limit, limit, (layer_dims[l], layer_dims[l-1]))
            self.parameters[f'b{l}'] = np.zeros((layer_dims[l], 1))
            activation_name = activation_funcs[l-1]
            if activation_name not in ACTIVATIONS:
                raise ValueError(f"Неизвестная функция активации: {activation_name}")
            self.activation_funcs[l] = ACTIVATIONS[activation_name]

    def _forward(self, X):
        cache = {}
        A = X
        cache["A0"] = A
        for l in range(1, self.num_layers):
            W = self.parameters[f'W{l}']
            b = self.parameters[f'b{l}']
            activation_func, _ = self.activation_funcs[l]
            A_prev = A
            Z = np.dot(W, A_prev) + b
            A = activation_func(Z)
            cache[f'Z{l}'] = Z
            cache[f'A{l}'] = A
        A_last = A
        return A_last, cache

    def _compute_cost(self, A_last, Y):
        m = Y.shape[1]
        epsilon = 1e-8
        cost = -(1.0/m) * np.sum(Y * np.log(A_last + epsilon) + (1 - Y) * np.log(1 - A_last + epsilon))
        cost = np.squeeze(cost)
        return cost

    def _backward(self, A_last, Y, cache):
        grads = {}
        m = Y.shape[1]
        L = self.num_layers - 1
        epsilon = 1e-8
        dA_last = - (Y / (A_last + epsilon) - (1 - Y) / (1 - A_last + epsilon))
        Z_last = cache[f'Z{L}']
        A_prev = cache[f'A{L-1}']
        _, activation_derivative = self.activation_funcs[L]
        dZ_last = dA_last * activation_derivative(Z_last)
        grads[f'dW{L}'] = (1.0/m) * np.dot(dZ_last, A_prev.T)
        grads[f'db{L}'] = (1.0/m) * np.sum(dZ_last, axis=1, keepdims=True)
        W_last = self.parameters[f'W{L}']
        dA_prev = np.dot(W_last.T, dZ_last)
        for l in range(L-1, 0, -1):
            Z = cache[f'Z{l}']
            A_prev_l = cache[f'A{l-1}']
            _, activation_derivative = self.activation_funcs[l]
            dZ = dA_prev * activation_derivative(Z)
            grads[f'dW{l}'] = (1.0/m) * np.dot(dZ, A_prev_l.T)
            grads[f'db{l}'] = (1.0/m) * np.sum(dZ, axis=1, keepdims=True)
            W_current = self.parameters[f'W{l}']
            dA_prev = np.dot(W_current.T, dZ)
        return grads

    def _update_parameters(self, grads):
        for l in range(1, self.num_layers):
            self.parameters[f'W{l}'] = self.parameters[f'W{l}'] - self.learning_rate * grads[f'dW{l}']
            self.parameters[f'b{l}'] = self.parameters[f'b{l}'] - self.learning_rate * grads[f'db{l}']

    def fit(self, X_train, Y_train, epochs, batch_size, print_cost_every=100):
        costs = []
        m = X_train.shape[1]
        np.random.seed(1)
        for epoch in range(epochs):
            epoch_cost = 0.
            permutation = np.random.permutation(m)
            shuffled_X = X_train[:, permutation]
            shuffled_Y = Y_train[:, permutation]
            num_minibatches = m // batch_size
            if m % batch_size != 0:
                num_minibatches += 1
            for i in range(num_minibatches):
                start_idx = i * batch_size
                end_idx = min(start_idx + batch_size, m)
                mini_batch_X = shuffled_X[:, start_idx:end_idx]
                mini_batch_Y = shuffled_Y[:, start_idx:end_idx]
                A_last, cache = self._forward(mini_batch_X)
                batch_cost = self._compute_cost(A_last, mini_batch_Y)
                epoch_cost += batch_cost * mini_batch_X.shape[1]
                grads = self._backward(A_last, mini_batch_Y, cache)
                self._update_parameters(grads)
            epoch_cost /= m
            costs.append(epoch_cost)
            if print_cost_every > 0 and (epoch % print_cost_every == 0 or epoch == epochs - 1):
                print(f"Эпоха {epoch}: стоимость = {epoch_cost:.6f}")
        return costs

    def predict(self, X):
        A_last, _ = self._forward(X)
        predictions = (A_last > 0.5).astype(int)
        return predictions

# --- Данные и обучение ---
X, Y = make_moons(n_samples=500, noise=0.2, random_state=42)
X_train = X.T
Y_train = Y.reshape(1, Y.shape[0])

layer_dims = [X_train.shape[0], 5, 3, 1]
activation_funcs = ['relu', 'relu', 'sigmoid']
nn = NeuralNetwork(layer_dims, activation_funcs, learning_rate=0.1)
costs = nn.fit(X_train, Y_train, epochs=2000, batch_size=64, print_cost_every=500)

# --- График функции потерь ---
plt.figure(figsize=(10, 6))
plt.plot(costs)
plt.title("Функция потерь во время обучения")
plt.xlabel("Эпохи")
plt.ylabel("Стоимость")
plt.grid(True)
plt.show()

# --- Граница решений ---
def plot_decision_boundary(model, X, y):
    x_min, x_max = X[0, :].min() - 0.5, X[0, :].max() + 0.5
    y_min, y_max = X[1, :].min() - 0.5, X[1, :].max() + 0.5
    h = 0.01
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    grid_input = np.c_[xx.ravel(), yy.ravel()].T
    Z = model.predict(grid_input)
    Z = Z.reshape(xx.shape)
    plt.figure(figsize=(8, 6))
    plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral, alpha=0.8)
    plt.scatter(X[0, :], X[1, :], c=y.ravel(), cmap=plt.cm.Spectral, edgecolors='k')
    plt.title("Граница решений нейронной сети")
    plt.xlabel("Признак 1")
    plt.ylabel("Признак 2")
    plt.show()

plot_decision_boundary(nn, X_train, Y_train)

preds = nn.predict(X_train)
accuracy = np.mean(preds == Y_train) * 100
print(f"Точность на обучающем наборе: {accuracy:.2f}%")
