# Подготовка данных

### Загружаем датасет

In [1]:
!pip install idx2numpy --break-system-packages

Defaulting to user installation because normal site-packages is not writeable


In [2]:
import idx2numpy
import numpy as np

In [3]:
# images = idx2numpy.convert_from_file('samples/t10k-images.idx3-ubyte')
# labels = idx2numpy.convert_from_file('samples/t10k-labels.idx1-ubyte')
images = idx2numpy.convert_from_file('samples/train-images.idx3-ubyte')
labels = idx2numpy.convert_from_file('samples/train-labels.idx1-ubyte')

In [4]:
print(f"Изображения: {images.shape}")
print(f"Метки: {labels.shape}")
print(f"Уникальные метки: { [ int(n) for n in set(labels) ] }")

Изображения: (60000, 28, 28)
Метки: (60000,)
Уникальные метки: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]


### Преобразуем изображения
> Они уже в виде матриц unsigned byte (uint8), нормализуем для простоты (`[0..255]` -> `[0..1]`)

In [5]:
images = images / 255

In [6]:
# Преобразуем в двумерный массив (сейчас трёхмерный)
X = images.reshape(len(images), -1)
print(X.shape)

y = labels

(60000, 784)


### Формируем датасет

In [7]:
import pandas as pd

In [8]:
data = pd.DataFrame(X, columns=[f"px-{str(n)}" for n, _ in enumerate(X[0])])

signals_y = []
for n in y:
    arr = np.zeros(10)
    arr[n] = 1
    signals_y.append(arr)

data['target'] = signals_y
data['num'] = y
data

Unnamed: 0,px-0,px-1,px-2,px-3,px-4,px-5,px-6,px-7,px-8,px-9,...,px-776,px-777,px-778,px-779,px-780,px-781,px-782,px-783,target,num
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ...",5
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...",4
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",1
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59995,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ...",8
59996,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",3
59997,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ...",5
59998,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...",6


In [9]:
print('Число изображений на цифру:')
for n in sorted(data['num'].unique()):
    n_df = data[ data['num'] == n]
    print(f'"{int(n)}": {len(n_df)}')

Число изображений на цифру:
"0": 5923
"1": 6742
"2": 5958
"3": 6131
"4": 5842
"5": 5421
"6": 5918
"7": 6265
"8": 5851
"9": 5949


In [10]:
validate_part = 0.15
test_part = 0.15

def split_df(df : pd.DataFrame, validate_part : float, test_part : float) -> (pd.DataFrame, pd.DataFrame):
    df_ = df.head(int(len(df) * (test_part + validate_part)))
    df_3 = df_.head( int(len(df) * test_part) )
    df_2 = df_.tail( len(df_) - len(df_3) )
    df_1 = df.tail( len(df) - len(df_2) - len(df_3) )
    return (df_1, df_2, df_3)

def get_learning_and_test(data:pd.DataFrame, validate_part, test_part) -> (pd.DataFrame, pd.DataFrame):
    data.sample(frac=1)
    df_train, df_validate, df_test = split_df(data, validate_part, test_part)
    
    df_train.sample(frac=1)
    df_test.sample(frac=1)
    df_validate.sample(frac=1)
    return df_train, df_validate, df_test

df_train, df_validate, df_test = get_learning_and_test(data, validate_part, test_part)

In [11]:
X_train = df_train.drop(['target', 'num'], axis=1).to_numpy()
y_train = df_train['target']

X_validate = df_validate.drop(['target', 'num'], axis=1).to_numpy()
y_validate = df_validate['target']

X_test = df_test.drop(['target', 'num'], axis=1).to_numpy()
y_test = df_test['target']
y_int_test = df_test['num']

In [12]:
print('Число изображений на цифру (train):')
for n in sorted(df_train['num'].unique()):
    n_df = df_train[ df_train['num'] == n]
    print(f'"{int(n)}": {len(n_df)}')

print()
print('Число изображений на цифру (validate):')
for n in sorted(df_validate['num'].unique()):
    n_df = df_validate[ df_validate['num'] == n]
    print(f'"{int(n)}": {len(n_df)}')

print()
print('Число изображений на цифру (test):')
for n in sorted(df_test['num'].unique()):
    n_df = df_test[ df_test['num'] == n]
    print(f'"{int(n)}": {len(n_df)}')

Число изображений на цифру (train):
"0": 4140
"1": 4691
"2": 4218
"3": 4248
"4": 4095
"5": 3834
"6": 4147
"7": 4365
"8": 4119
"9": 4143

Число изображений на цифру (validate):
"0": 892
"1": 1028
"2": 851
"3": 961
"4": 848
"5": 813
"6": 863
"7": 939
"8": 886
"9": 919

Число изображений на цифру (test):
"0": 891
"1": 1023
"2": 889
"3": 922
"4": 899
"5": 774
"6": 908
"7": 961
"8": 846
"9": 887


In [13]:
df_test

Unnamed: 0,px-0,px-1,px-2,px-3,px-4,px-5,px-6,px-7,px-8,px-9,...,px-776,px-777,px-778,px-779,px-780,px-781,px-782,px-783,target,num
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ...",5
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...",4
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",1
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8995,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...",4
8996,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...",4
8997,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",2
8998,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",9


In [14]:
df_validate

Unnamed: 0,px-0,px-1,px-2,px-3,px-4,px-5,px-6,px-7,px-8,px-9,...,px-776,px-777,px-778,px-779,px-780,px-781,px-782,px-783,target,num
9000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...",6
9001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...",7
9002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",9
9003,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0
9004,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17995,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",2
17996,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...",6
17997,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",3
17998,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...",6


In [15]:
df_train

Unnamed: 0,px-0,px-1,px-2,px-3,px-4,px-5,px-6,px-7,px-8,px-9,...,px-776,px-777,px-778,px-779,px-780,px-781,px-782,px-783,target,num
18000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...",4
18001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",3
18002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...",7
18003,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",2
18004,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59995,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ...",8
59996,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",3
59997,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ...",5
59998,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...",6


# ИНС
-- Идея: имитировать нейрон мозга с помощью математических объектов.  
Нейрон получает на вход (*синапсы*) какие-то сигналы, и на основе них выдаёт (*аксон*) сигнал

---

> **Тензор** -- упорядоченное конечное множество чисел (n-мерные списки, n - количество индексов)
> * Список -- тензор с 1 индексом
> * Матрица -- тензор с 2 индексами
> * ...

---

> **Нейрон** -- состоит из 3 объектов:
> * $w$ -- вектор весов (*weights*)
> * $b$ -- смещение (*bias*)
> * $f$ -- функция активации

---

> **Функция активации** -- нормализует сигнал активации нейрона
> 
> Аргумент функции активации: `[матрица весов] x [вектор входного сигнала] + [смещение]`  
> Результат: число в диапазоне `[0..1]`
> $$ y^a = f\left(\sum_{j=0}^{J-1}{w^j x_j^a + b}\right)$$
> * $a$ -- номер пакета
> * $J$ -- число входов нейрона (синапсов)
> * $x_j^a$ -- входной сигнал пакета $a$
> * $y^a$ -- выходной сигнал пакета $a$
>
> Или, если записывать компактно (в виде матриц):
> $$y^a = f( W x^a + b )$$
> * $W$ -- матрица весов
> * $b$ -- вектор смещения
> * $x^a$ -- вектор входного сигнала
> * $y^a$ -- вектор выходного сигнала
> 
> (в коде будем делать именно так)

> Один из вариантов функции активации -- **сигмоида**
> $$\sigma(x)=\frac{1}{1+e^{-x}}$$

---

> **Обучение нейрона** -- подбор коэффициентов и смещения для *каждого* нейрона, чтобы для определённых пакетов выдавались определённые сигналы

> **Общий подход к обучению:**
> 1. Прямой проход (прогоняем сигнал от входа к выходу)
> 2. Считаем ошибку
> 3. Обратный проход (корректируем веса)

> **Дельта-правло** -- один из способов обучения модели.
> 1. Считается ошибка  
>    `err = true - pred`
>    
> 2. Производная сигмоиды  
>    `sigmoid'(x) = sigmoid(x)*( 1 - sigmoid(x) )`
>
> 4. Общая корректировка  
>    `delta = err * sigmoid'(x)`
>
> 5. Считается изменение весов  
>    `delta_w = learninig_rate * delta * inputs`  
>    `w_new = w + delta_w`  
>    
> 6. Считается изменение смещения  
>    `new_b = b + learning_rate * delta`

---

> **Слой ИНС** -- набор не связанных сигналами (ни напрямую ни опосредованно) нейронов.  
> Обычно у всех нейронов в слое одинаковая *функция активации*.  
> Число *синапсов* для нейронов одного слоя совпадает.  
> В нейронах слоя столько же синапсов, сколько нейронов в предыдущем слое. Каждый нейрон посылает свой сигнал на каждый вход.


### Вспомогательные функции

In [16]:
from math import exp

def sigmoid(x):
    if isinstance(x, np.ndarray): exp_ = np.exp
    else: exp_ = exp
    return 1.0 / (1 + exp_(-x))

In [17]:
class Neuron:
    n_synapses: int       # число синапсов (входов)
    weights: list[float]  # веса
    bias: float           # смещение
    # activation_f -- функция активации (должна принимать массив W*x + b и возвращать массив выходного сигнала)
    def __init__(self, n_synapses:int, activation_f=sigmoid, random_state=None):
        if random_state is not None: np.random.seed(random_state)
        self.n_synapses = n_synapses
        self.weights = np.random.randn(n_synapses) * 0.01
        self.bias = 0.0
        self.activation_f = activation_f

    def activate(self, signal:list[float], verbose=False):
        if (len(signal) != self.n_synapses): 
            raise Exception(f'Синапсов {self.n_synapses}, а сигнал рассчитан на {len(signal)}')
        x = np.dot(self.weights, signal) + self.bias
        if verbose: return self.activation_f( x ), x
        return self.activation_f( x )

In [18]:
class Layer:
    n_neurons : int        # число нейронов в слое
    neurons : list[Neuron] # список нейронов слоя
    # activation_f         # функция активации нейронов слоя 
    def __init__(self, n_neurons:int, n_inputs:int, activation_f=sigmoid, random_state=None):
        self.n_neurons = n_neurons
        self.neurons = [ Neuron(n_inputs, activation_f, random_state) for _ in range(n_neurons) ]
        self.activation_f = activation_f

    # Прямой проход
    def forward(self, signal:list[float], verbose=False):
        act = []
        pre_act = []
        for neuron in self.neurons:
            a, p = neuron.activate(signal, True)
            act.append(a)
            pre_act.append(p)
        if verbose: return (np.array(act), np.array(pre_act))
        return np.array(act)


    # Матрица весов (делаем свойством а не методом для красоты)
    def set_weights(self, new_W):
        for i,neuron in enumerate(self.neurons): 
            neuron.weights = new_W[i] 
        
    weights = property(
        fget = lambda self: np.array( [neuron.weights for neuron in self.neurons] ),
        fset = set_weights
    )

    # Массив смещений слоя
    def set_biases(self, new_b):
        for i,neuron in enumerate(self.neurons): 
            neuron.bias = new_b[i] 

    biases = property(
        fget = lambda self: np.array( [neuron.bias for neuron in self.neurons] ),
        fset = set_biases
    )

In [19]:
def calculate_delta(y_true, y_pred):
    err = y_true - y_pred

    d_sigmoid = y_pred * (1-y_pred)
    delta = err * d_sigmoid
    
    return delta, err

In [20]:
from copy import deepcopy

In [21]:
class SingleLayerNetwork:
    layer : Layer
    def __init__(self, n_inputs:int, n_outputs:int, learning_rate=0.1, random_state=None):
        # Слой который принимает n_inputs сигналов 
        # и имеет n_outputs нейронов (столько же сигналов на выходе)
        self.layer = Layer(n_outputs, n_inputs, random_state=random_state)
        self.learning_rate = learning_rate

    def predict_y(self, x):
        pred = self._forward(x)
        return np.argmax(pred)

    def predict(self, X):
        return np.array([self.predict_y(x) for x in X])

    def _forward(self, x):
        self.last_X = x
        return self.layer.forward(x)

    def _backward(self, delta):
        for i, neuron in enumerate(self.layer.neurons):
            gradient = delta[i] * self.last_X # - self.l2_lambda*neuron.weights
            
            neuron.weights += self.learning_rate * gradient
            neuron.bias += self.learning_rate * delta[i]

    # Одна эпоха обучения
    def _train_epoch(self, X, y):
        loss = 0
        for x, y_true in zip(X, y):
            # Прямой проход
            y_pred = self._forward(x)

            # Считаем общую корректировку и ошибку
            delta, err = calculate_delta(y_true, y_pred)

            # Ошибка (MSE)
            loss += np.mean( err**2 )
            
            # Обратный проход
            self._backward(delta)
        return loss / len(X)

    # Полное обучение
    def fit(self, X, y, X_validate, y_validate, epochs:int):
        loss = 0
        best_accuracy = -1
        best_layer = None
        best_epoch = -1
        for epoch in range(epochs):
            ep_loss = self._train_epoch(X, y)
            loss += ep_loss

            y_pred = self.predict(X)
            y_true = [ np.argmax(y_i) for y_i in y ]
            TPred = len( [ 0 for true,pred in zip(y_true, y_pred) if true==pred ] )
            accuracy_train = TPred / len(y)

            y_pred = self.predict(X_validate)
            y_true = [ np.argmax(y_i) for y_i in y_validate ]
            TPred = len( [ 0 for true,pred in zip(y_true, y_pred) if true==pred ] )
            accuracy_valid = TPred / len(y_validate)

            if (accuracy_valid > best_accuracy):
                best_accuracy = accuracy_valid
                best_layer = deepcopy(self.layer)
                best_epoch = epoch
            
            print(f'Epoch {epoch:2}, Loss: {ep_loss:.6f},', end=' ') 
            print(f'Accuracy (train): {accuracy_train:.2%},', end=' ')
            print(f'Accuracy (validate): {accuracy_valid:.2%}')
        self.layer = best_layer
        print(f'\n{50*'='}\nНаибольшая точность: {best_accuracy:2%}, достигнута в {best_epoch} эпохе')
        return loss

In [30]:
network = SingleLayerNetwork(784, 10)
network.fit(X_train, y_train, X_validate, y_validate, 20)

Epoch  0, Loss: 0.021056, Accuracy (train): 89.06%, Accuracy (validate): 88.01%
Epoch  1, Loss: 0.017296, Accuracy (train): 90.14%, Accuracy (validate): 89.13%
Epoch  2, Loss: 0.016480, Accuracy (train): 90.65%, Accuracy (validate): 89.41%
Epoch  3, Loss: 0.016020, Accuracy (train): 91.00%, Accuracy (validate): 89.83%
Epoch  4, Loss: 0.015703, Accuracy (train): 91.25%, Accuracy (validate): 90.08%
Epoch  5, Loss: 0.015464, Accuracy (train): 91.43%, Accuracy (validate): 90.17%
Epoch  6, Loss: 0.015272, Accuracy (train): 91.49%, Accuracy (validate): 90.18%
Epoch  7, Loss: 0.015113, Accuracy (train): 91.55%, Accuracy (validate): 90.17%
Epoch  8, Loss: 0.014977, Accuracy (train): 91.62%, Accuracy (validate): 90.23%
Epoch  9, Loss: 0.014859, Accuracy (train): 91.69%, Accuracy (validate): 90.26%
Epoch 10, Loss: 0.014756, Accuracy (train): 91.72%, Accuracy (validate): 90.30%
Epoch 11, Loss: 0.014665, Accuracy (train): 91.77%, Accuracy (validate): 90.31%
Epoch 12, Loss: 0.014582, Accuracy (trai

np.float64(0.30653628980753805)

In [23]:
y_pred = network.predict(X_test)

In [24]:
TPred = len( [ 0 for true,pred in zip(y_int_test, y_pred) if true==pred ] )

accuracy = TPred / len(y_pred)
accuracy

0.9091111111111111

pd.DataFrame({ 'true':y_int_test, 'predict':y_pred})

In [25]:
class MultiLayerNetwork:
    layers : list[Layer]
    def __init__(self, layer_sizes : list[int], activate_funcs, learning_rate=0.1, random_state=None):
        if not isinstance(activate_funcs, list): activate_funcs = [activate_funcs]*(len(layer_sizes)-1)
        self.layers = []
        for i,_ in enumerate(layer_sizes):
            if (i==0): continue
            self.layers.append( Layer(layer_sizes[i], layer_sizes[i-1], activate_funcs[i-1], random_state=random_state) )
        self.learning_rate = learning_rate

    def predict_y(self, x):
        # Тут жёстко прописан softmax
        pred = self._forward(x)
        return np.argmax(pred)

    def predict(self, X):
        return np.array([self.predict_y(x) for x in X])

    def _forward(self, x, verbose=False):
        self.last_X = x
        # Сохраняем информацию при проходах чтобы не считать потом снова
        info = {
            'input_signals'  : [x], # Входы слоёв
            'pure_signals'   : [ ], # Значения до активации
            'output_signals' : [ ], # После активации
        }
        prev_x = x
        for layer in self.layers:
            new_x, pre_act = layer.forward(prev_x, verbose=True)
            
            info['input_signals' ].append(prev_x.copy() )
            info[ 'pure_signals' ].append(pre_act.copy())
            info['output_signals'].append( new_x.copy() )
            
            prev_x = new_x

        if verbose: return prev_x, info
        return prev_x

    def _backward(self, last_delta, info):

        weight_grads = []
        bias_grads   = []
        delta = last_delta

        # Итерируем по слоям в обратном порядке
        for idx in reversed( range( len(self.layers) ) ):
            layer = self.layers[idx]
            
            # Вход для первого слоя -- вход модели
            if idx == 0: cur_input = self.last_X
            # Для остальных это выход предыдущего
            else: cur_input = info['output_signals'][idx-1]

            # Градиенты
            # delta * cur_input.T как матрица
            weight_grad = np.outer(delta, cur_input)
            weight_grads.append(weight_grad)
            
            bias_grad = delta
            bias_grads.append(bias_grad)

            # Распространение ошибки назад по слоям
            if idx > 0:
                # Веса слоя
                W = layer.weights

                # Для распространения ошибки на предыдущие слои
                delta_W = np.dot(W.T, delta)

                # Сигнал до активации предыдущего слоя
                pre_act = info['pure_signals'][idx-1]

                # Производная функции активации предыдущего слоя
                act_f = self.layers[idx - 1].activation_f
                d_act_f = lambda x: ( act_f(x+1e-6) - act_f(x-1e-6) ) / 2e-6

                delta = delta_W * d_act_f( info['pure_signals'][idx-1] )

        # Разворачиваем (мы шли в обратном порядке)
        weight_grads.reverse()
        bias_grads.reverse()
        
        # Обновляем веса всех слоев
        for idx,layer in enumerate(self.layers):
            layer.weights += self.learning_rate * weight_grads[idx]
            layer.biases  += self.learning_rate * bias_grads[idx]

    # Вычисление дельты
    def _calculate_delta(self, y_true, y_pred, info):
        err = y_true - y_pred

        act_f = self.layers[-1].activation_f
        d_act_f = lambda x: ( act_f(x+1e-6) - act_f(x-1e-6) ) / 2e-6
        
        delta = err * d_act_f(info['pure_signals'][-1])
        
        return delta, err

    # Одна эпоха обучения
    def _train_epoch(self, X, y):
        loss = 0
        for x, y_true in zip(X, y):
            # Прямой проход
            y_pred, info = self._forward(x, True)

            # Считаем общую корректировку и ошибку
            delta, err = self._calculate_delta(y_true, y_pred, info)

            # Ошибка (MSE)
            loss += np.mean( err**2 )
            
            # Обратный проход
            self._backward(delta, info)
        return loss / len(X)

    # Полное обучение
    def fit(self, X, y, X_validate, y_validate, epochs:int):
        loss = 0
        best_accuracy = -1
        best_layer = None
        best_epoch = -1
        for epoch in range(epochs):
            ep_loss = self._train_epoch(X, y)
            loss += ep_loss

            y_pred = self.predict(X)
            y_true = [ np.argmax(y_i) for y_i in y ]
            TPred = len( [ 0 for true,pred in zip(y_true, y_pred) if true==pred ] )
            accuracy_train = TPred / len(y)

            y_pred = self.predict(X_validate)
            y_true = [ np.argmax(y_i) for y_i in y_validate ]
            TPred = len( [ 0 for true,pred in zip(y_true, y_pred) if true==pred ] )
            accuracy_valid = TPred / len(y_validate)

            if (accuracy_valid > best_accuracy):
                best_accuracy = accuracy_valid
                best_layer = deepcopy(self.layers)
                best_epoch = epoch
            
            print(f'Epoch {epoch:2}, Loss: {ep_loss:.6f},', end=' ') 
            print(f'Accuracy (train): {accuracy_train:.2%},', end=' ')
            print(f'Accuracy (validate): {accuracy_valid:.2%}')
        self.layers = best_layer
        print(f'\n{50*'='}\nНаибольшая точность: {best_accuracy:2%}, достигнута в {best_epoch} эпохе')
        return loss

In [26]:
def ReLU(x):
    return np.maximum(0, x)

In [27]:
network = MultiLayerNetwork([784, 25, 10], [ReLU, sigmoid], learning_rate=0.1)
network.fit(X_train, y_train, X_validate, y_validate, 20)

Epoch  0, Loss: 0.017901, Accuracy (train): 91.20%, Accuracy (validate): 90.17%
Epoch  1, Loss: 0.011034, Accuracy (train): 93.50%, Accuracy (validate): 92.49%
Epoch  2, Loss: 0.009753, Accuracy (train): 93.95%, Accuracy (validate): 92.67%
Epoch  3, Loss: 0.009081, Accuracy (train): 95.18%, Accuracy (validate): 93.50%
Epoch  4, Loss: 0.008657, Accuracy (train): 94.91%, Accuracy (validate): 93.44%
Epoch  5, Loss: 0.008321, Accuracy (train): 95.79%, Accuracy (validate): 94.41%
Epoch  6, Loss: 0.008088, Accuracy (train): 95.57%, Accuracy (validate): 93.94%
Epoch  7, Loss: 0.008044, Accuracy (train): 95.75%, Accuracy (validate): 94.24%
Epoch  8, Loss: 0.007788, Accuracy (train): 95.92%, Accuracy (validate): 94.56%
Epoch  9, Loss: 0.007704, Accuracy (train): 96.19%, Accuracy (validate): 94.74%
Epoch 10, Loss: 0.007791, Accuracy (train): 96.35%, Accuracy (validate): 94.70%
Epoch 11, Loss: 0.007511, Accuracy (train): 95.97%, Accuracy (validate): 94.33%
Epoch 12, Loss: 0.007538, Accuracy (trai

np.float64(0.16993979388072536)

In [28]:
y_pred = network.predict(X_test)

In [29]:
TPred = len( [ 0 for true,pred in zip(y_int_test, y_pred) if true==pred ] )

accuracy = TPred / len(y_pred)
accuracy

0.9506666666666667

## Вывод
> Обе модели показали высокую точность.  
> Однослойная ИНС достигла максимальной точности `0.909` на тестовых данных за 19 эпох.  
> Двуслойная ИНС (со скрытым слоем) достигла максимальной точности `0.954` за 19 эпох.  
>
> Такое поведение ожидаемо -- дополнительные слои позволяют лучше справляться со сложными задачами.  

