In [18]:
import numpy as np
import math

# ---------- Функции активации ----------
def ReLU(x):
    return np.maximum(0, x)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def softmax(x: np.array):
    x_max = np.max(x, axis=1, keepdims=True)
    x_stable = x - x_max
    # Дополнительно можно "подрезать" слишком большие значения:
    x_stable = np.clip(x_stable, -15, 15)
    
    exps = np.exp(x_stable)
    sums = np.sum(exps, axis=1, keepdims=True)
    # Также, если sums -> Inf
    sums = np.clip(sums, 1e-9, None)
    
    return exps / sums


# ---------- Функции потерь ----------
def categorical_cross_entropy_loss(y_pred: np.array, y_true: np.array):
    """
    y_pred: вероятности (после softmax), shape=(batch_size, n_classes)
    y_true: one-hot метки, shape=(batch_size, n_classes)

    Возвращает (loss_value, grad),
      где grad = dL/dy_pred той же формы, что y_pred.
    """
    eps = 1e-7
    # 1) Считаем кросс-энтропию
    N = y_pred.shape[0]
    loss_value = - np.mean(np.sum(y_true * np.log(y_pred + eps), axis=1))

    # 2) Градиент по y_pred
    #    dL/dy_pred = -1/N * (y_true / y_pred)
    grad = - (y_true / (y_pred + eps)) / N
    return loss_value, grad


def mse_loss(y_pred, y_true):
    N = y_pred.shape[0]
    loss_value = np.mean((y_pred - y_true)**2)
    grad = 2.0 * (y_pred - y_true) / N  # dL/dy_pred
    return loss_value, grad

In [2]:
# -- Производные активаций --
def d_relu(z):
    return (z > 0).astype(float)

def d_sigmoid(z):
    sig = 1 / (1 + np.exp(-z))
    return sig * (1 - sig)

In [3]:
class Linear:
    def __init__(self, input_size, output_size, activation_func=None, activation_deriv=None):
        self.n_in = input_size
        self.n_out = output_size

        # Инициализация весов
        self.weights = np.random.randn(self.n_in, self.n_out) * 0.01
        self.bias = np.zeros((1, self.n_out))  # иногда лучше инициализировать нулями

        self.activation_func = activation_func if activation_func else lambda x: x
        self.activation_deriv = activation_deriv

        # Для backward сохраняем промежуточное
        self.input = None
        self.z = None
        self.output = None

    def forward(self, x: np.array):
        """
        x: (batch_size, n_in)
        """
        self.input = x
        # Линейная часть
        self.z = np.dot(x, self.weights) + self.bias
        # Активация
        self.output = self.activation_func(self.z)
        return self.output

    def backward(self, d_out: np.array, learning_rate: float):
        """
        d_out = dL/d(output этого слоя).

        Возвращаем dL/d(input этого слоя).
        """
        # dZ = d_out * activation'(z) (покомпонентное)
        if self.activation_deriv is not None:
            dZ = d_out * self.activation_deriv(self.z)
        else:
            dZ = d_out

        # dW = X^T * dZ
        dW = np.dot(self.input.T, dZ)
        # db = сумма dZ по batch
        db = np.sum(dZ, axis=0, keepdims=True)
        # dX = dZ * W^T
        dX = np.dot(dZ, self.weights.T)

        # Обновляем параметры
        self.weights -= learning_rate * dW
        self.bias -= learning_rate * db

        return dX

In [4]:
class NeuralNetwork:
    def __init__(self, layers=None, loss_func=None, lr=0.01):
        self.layers = layers if layers else []
        self.loss_func = loss_func  # должна возвращать (loss_value, dL/dy_pred)
        self.lr = lr

    def add_layer(self, layer: Linear):
        self.layers.append(layer)

    def forward(self, x: np.array):
        for layer in self.layers:
            x = layer.forward(x)
        return x

    def backward(self, x: np.array, y_true: np.array):
        """
        1) Forward
        2) Считаем loss и dOut
        3) backward (reverse layers)
        """
        # forward
        y_pred = self.forward(x)
        # loss + dL/dy_pred
        loss_value, d_out = self.loss_func(y_pred, y_true)

        # идём с конца
        for layer in reversed(self.layers):
            d_out = layer.backward(d_out, self.lr)

        return loss_value

In [7]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

def one_hot_encode(y, num_classes):
    """
    Превращает массив целых меток, например [0,1,2,1,0],
    в one-hot, например [[1,0,0],[0,1,0],[0,0,1],...].
    """
    y_encoded = np.zeros((y.shape[0], num_classes))
    y_encoded[np.arange(y.shape[0]), y] = 1
    return y_encoded

def prepare_iris_dataset(test_size=0.2, random_state=42):
    """
    Возвращает (X_train, y_train), (X_test, y_test) в удобном формате.
    - X_train, X_test: numpy-массивы признаков формы (N, 4)
    - y_train, y_test: numpy-массивы one-hot меток формы (N, 3)
    """
    # 1) Загружаем
    iris = load_iris()
    X = iris.data  # shape = (150, 4) - 4 признака
    y = iris.target  # shape = (150,) - метки 0..2
    
    # 2) Делим на обучающую и тестовую выборку
    X_train, X_test, y_train_int, y_test_int = train_test_split(
        X, y, test_size=test_size, random_state=random_state, shuffle=True
    )
    
    # 3) Переводим метки в one-hot
    y_train = one_hot_encode(y_train_int, num_classes=3)
    y_test = one_hot_encode(y_test_int, num_classes=3)
    
    # 4) (Опционально) нормализуем / стандартизируем признаки
    # Например, можно сделать простую стандартизацию:
    mean = np.mean(X_train, axis=0)
    std = np.std(X_train, axis=0) + 1e-9
    X_train = (X_train - mean) / std
    X_test = (X_test - mean) / std
    
    return (X_train, y_train), (X_test, y_test)

if __name__ == "__main__":
    (X_train, y_train), (X_test, y_test) = prepare_iris_dataset()
    
    print("Train shapes:", X_train.shape, y_train.shape)  # (120, 4), (120, 3)
    print("Test shapes:", X_test.shape, y_test.shape)     # (30, 4), (30, 3)
    
    # Посмотрим часть данных
    print("\nSample X_train[0]:", X_train[0])
    print("Sample y_train[0]:", y_train[0])


Train shapes: (120, 4) (120, 3)
Test shapes: (30, 4) (30, 3)

Sample X_train[0]: [-1.47393679  1.20365798 -1.56253475 -1.31260281]
Sample y_train[0]: [1. 0. 0.]


In [21]:
layers = [
    Linear(4, 5, ReLU, d_relu),
    Linear(5, 3, softmax),
]
model = NeuralNetwork(layers=layers,loss_func=categorical_cross_entropy_loss, lr=0.01)

n_epoch = 450
for n in range(n_epoch):
    loss_val = model.backward(X_train, y_train)
    if n % 10 == 0:
        print(f'epoch {n}: loss = {loss_val:.4f}')

preds = model.forward(X_test)
preds_labels = np.argmax(preds, axis=1)
true_labels = np.argmax(y_test, axis=1)

accuracy = np.mean(preds_labels == true_labels)
print(f'Accuracy {accuracy * 100:.2f}')

print('---Первые 10 предсказаний и тестов---')
print(f'Pred: {preds[:10]}')
print(f'Test: {y_test[:10]}')

epoch 0: loss = 1.0986
epoch 10: loss = 1.0985
epoch 20: loss = 1.0984
epoch 30: loss = 1.0982
epoch 40: loss = 1.0979
epoch 50: loss = 1.0975
epoch 60: loss = 1.0970
epoch 70: loss = 1.0960
epoch 80: loss = 1.0946
epoch 90: loss = 1.0922
epoch 100: loss = 1.0886
epoch 110: loss = 1.0828
epoch 120: loss = 1.0738
epoch 130: loss = 1.0601
epoch 140: loss = 1.0396
epoch 150: loss = 1.0104
epoch 160: loss = 0.9707
epoch 170: loss = 0.9199
epoch 180: loss = 0.8593
epoch 190: loss = 0.7921
epoch 200: loss = 0.7225
epoch 210: loss = 0.6551
epoch 220: loss = 0.5939
epoch 230: loss = 0.5417
epoch 240: loss = 0.5001
epoch 250: loss = 0.4689
epoch 260: loss = 0.4471
epoch 270: loss = 0.4325
epoch 280: loss = 0.4226
epoch 290: loss = 0.4152
epoch 300: loss = 0.4090
epoch 310: loss = 0.4036
epoch 320: loss = 0.3987
epoch 330: loss = 0.3945
epoch 340: loss = 0.3905
epoch 350: loss = 0.3866
epoch 360: loss = 0.3827
epoch 370: loss = 0.3786
epoch 380: loss = 0.3744
epoch 390: loss = 0.3699
epoch 400: 