In [15]:
# импортируем нужные библиотеки
import numpy as np
from tqdm import tqdm_notebook
from tensorflow.keras.datasets import mnist
from numba import jit

In [62]:
# Загрузка данных MNIST 70_000 x 784
(X_train, y_train), (X_test, y_test) = mnist.load_data()
y_train = y_train[:]
y_test = y_test[:]
# Преобразование данных в нужный формат
X_train = X_train.reshape(60_000, 784)[:]
X_test = X_test.reshape(10_000, 784)[:]
X_train = X_train / 255
X_test = X_test / 255
y_train_onehot = np.zeros((y_train.size, y_train.max() + 1))
y_train_onehot[np.arange(y_train.size), y_train] = 1

In [63]:
# Определение параметров модели
n_inputs = X_train.shape[1]   # Количество входных нейронов
n_outputs = len(np.unique(y_train))  # Количество выходных нейронов
n_hidden = 522 # np.ceil((n_inputs + n_outputs) / 2).astype(int)  # Количество нейронов в скрытом слое
learning_rate = 0.1 # Скорость обучения
n_epochs = 101 # количество эпох
batch_size = 64 # количество образцов данных

In [52]:
# Определение функций активации
@jit(nopython=True) #, fastmath=True
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# def softmax(x):
#     exp_x = np.exp(x)
#     return exp_x / np.sum(exp_x, axis=1, keepdims=True)

@jit(nopython=True) #, fastmath=True
def make_dot(x,y):
    return np.dot(x, y)

In [53]:

@jit(cache=True) #, fastmath=True
def make_loss(y_pred, X_train, y_train):
    loss = 0.0
    for i in range(len(X_train)):
        loss -= np.log(y_pred[i, y_train[i]])
    return loss / len(X_train)

In [54]:

@jit(cache=True)#(nopython=True)#(fastmath=True)
def softmax(x):
    exp_x = np.exp(x)
    return exp_x / np.sum(exp_x, axis=1)[..., np.newaxis]

In [59]:

@jit(nopython=True)#(cache=True)#(fastmath=True)
def make_svd(hidden_weights,k):
    U, s, V = np.linalg.svd(hidden_weights, full_matrices=False)
    k = k  # Количество компонент для сжатия
    S = np.diag(s[:k])
    U = U[:, :k]
    V = V[:k, :]
    hidden_weights_compressed = np.dot(U, np.dot(S, V))
    return hidden_weights_compressed

In [49]:
# Инициализация весов
weights_input_hidden = np.random.uniform(-0.5, 0.5, size=(n_inputs, n_hidden))
bias_hidden = np.zeros(n_hidden)

weights_hidden_output = np.random.uniform(-0.5, 0.5, size=(n_hidden, n_outputs))
bias_output = np.zeros(n_outputs)

In [64]:
%%time
# Обучение модели с помощью SGD
def train_and_predict(X_train, y_train, X_test, y_test, n_epochs, batch_size, learning_rate, weights_input_hidden=weights_input_hidden, bias_hidden=bias_hidden, weights_hidden_output=weights_hidden_output,bias_output=bias_output,k = 100):
    for epoch in tqdm_notebook(range(n_epochs)):
        # Перемешиваем обучающие данные
        indices = np.random.permutation(len(X_train))
        X_train = X_train[indices]
        y_train = y_train[indices]

        for i in range(0, len(X_train), batch_size):
            # Получаем батч обучающих данных
            X_batch = X_train[i:i+batch_size]
            y_batch = y_train[i:i+batch_size]

            hidden_weights = weights_input_hidden

            # Применение SVD для сжатия матрицы весов скрытого слоя
            hidden_weights_compressed = make_svd(hidden_weights, k = k)
            U, s, V = np.linalg.svd(hidden_weights, full_matrices=False)
            k = k  # Количество компонент для сжатия
            S = np.diag(s[:k])
            U = U[:, :k]
            V = V[:k, :]
            hidden_weights_compressed = np.dot(U, np.dot(S, V))

            hidden_inputs = make_dot(X_batch, hidden_weights_compressed) + bias_hidden
            hidden_outputs = sigmoid(hidden_inputs)

            output_inputs = make_dot(hidden_outputs, weights_hidden_output) + bias_output
            y_pred = softmax(output_inputs)

            # Обратное распространение ошибки
            error = y_pred - np.eye(n_outputs)[y_batch]
            grad_output = error / len(X_batch)
            grad_hidden = make_dot(grad_output, weights_hidden_output.T) * hidden_outputs * (1 - hidden_outputs)

            output_inputs = make_dot(hidden_outputs, weights_hidden_output) + bias_output
            y_pred = softmax(output_inputs)

            # Обновление весов и пороговых значений
            weights_hidden_output -= learning_rate * make_dot(hidden_outputs.T, grad_output)
            bias_output -= learning_rate * np.sum(grad_output, axis=0)

            weights_input_hidden -= learning_rate * make_dot(X_batch.T, grad_hidden)
            bias_hidden -= learning_rate * np.sum(grad_hidden, axis=0)

        # Вычисление функции потерь на обучающих и тестовых данных
        hidden_train = make_dot(X_train, weights_input_hidden) + bias_hidden
        hidden_train = sigmoid(hidden_train)

        output_train = make_dot(hidden_train, weights_hidden_output) + bias_output
        y_pred_train = softmax(output_train)
        train_loss = np.mean(-np.log(y_pred_train[np.arange(len(X_train)), y_train]))

        hidden_test = make_dot(X_test, weights_input_hidden) + bias_hidden
        hidden_test = sigmoid(hidden_test)

        output_test = make_dot(hidden_test, weights_hidden_output) + bias_output
        y_pred_test = softmax(output_test)
        test_loss = np.mean(-np.log(y_pred_test[np.arange(len(X_test)), y_test]))

        # Выводим значение функции потерь на каждой эпохе
        if epoch % 50 == 0:
            print(f"Epoch {epoch}, Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}")

    # Предсказание классов для новых данных
    hidden_inputs = make_dot(X_test, weights_input_hidden) + bias_hidden
    hidden_outputs = sigmoid(hidden_inputs)

    output_inputs = make_dot(hidden_outputs, weights_hidden_output) + bias_output
    y_pred = np.argmax(output_inputs, axis=1)

    # Вычисление точности модели на тестовых данных
    accuracy = np.mean(y_pred == y_test)

    return weights_input_hidden, bias_hidden, weights_hidden_output, bias_output, y_pred, accuracy

Wall time: 0 ns


In [65]:
weights_input_hidden, bias_hidden, weights_hidden_output, bias_output, y_pred, accuracy = train_and_predict(X_train, y_train, X_test, y_test, n_epochs=801, batch_size=64, learning_rate=0.1, k=400)

print(f"Accuracy: {accuracy}")

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


  0%|          | 0/801 [00:00<?, ?it/s]

Epoch 0, Train Loss: 0.1128, Test Loss: 0.1465
Epoch 50, Train Loss: 0.0173, Test Loss: 0.0861
Epoch 100, Train Loss: 0.0086, Test Loss: 0.0832
Epoch 150, Train Loss: 0.0059, Test Loss: 0.0827
Epoch 200, Train Loss: 0.0045, Test Loss: 0.0826
Epoch 250, Train Loss: 0.0036, Test Loss: 0.0824
Epoch 300, Train Loss: 0.0031, Test Loss: 0.0831
Epoch 350, Train Loss: 0.0027, Test Loss: 0.0832
Epoch 400, Train Loss: 0.0025, Test Loss: 0.0840
Epoch 450, Train Loss: 0.0022, Test Loss: 0.0843
Epoch 500, Train Loss: 0.0020, Test Loss: 0.0844
Epoch 550, Train Loss: 0.0018, Test Loss: 0.0846
Epoch 600, Train Loss: 0.0016, Test Loss: 0.0850
Epoch 650, Train Loss: 0.0015, Test Loss: 0.0849
Epoch 700, Train Loss: 0.0014, Test Loss: 0.0853
Epoch 750, Train Loss: 0.0013, Test Loss: 0.0854
Epoch 800, Train Loss: 0.0013, Test Loss: 0.0859
Accuracy: 0.9797


In [None]:
# no SVD n_epochs = 501 Accuracy: 0.828
# k = 50 n_epochs = 500 Accuracy: 0.85
# k = 100  n_epochs = 1501 n_hidden = 522 Accuracy: 0.866
# k = 250 n_epochs = 500 Accuracy: 0.85
# 100/100 [03:09<00:00, 11.01it/s] y_train = y_train[:1200] n_epochs = 100 к=100 Accuracy: 0.817
# 100/100 [03:09<00:00, 11.01it/s] y_train = y_train[:1200] n_epochs = 100 к=100 Accuracy: 0.833
# 100/100 [03:24<00:00, 11.01it/s] y_train = y_train[:1200] n_epochs = 100 к=300 Accuracy: 0.8791
# 100/100 [43:04<00:00, 11.01it/s] y_train = y_train[:16_000] n_epochs = 100 к=300 Accuracy: 0.94075
# 300/300 [01:03:04<00:00, 11.01it/s] y_train = y_train[:16_000] n_epochs = 300 к=300 Accuracy:  0.94175
# 500/500 [39:28<00:00, 11.01it/s] y_train = y_train[:6_000] n_epochs = 500 к=400 Accuracy: 0.9455
# 800/800 [39:28<00:00, 11.01it/s] y_train = y_train[:6_000] n_epochs = 800 к=400 Accuracy: 0.945
# 500/500 [05:33:28<00:00, 11.01it/s] y_train = y_train[:12_000] n_epochs = 500 к=400 Accuracy: 0.938
# 800/800 [10:17:16<00:00, 46.01it/s] y_train = y_train[:] n_epochs = 800 к=400 Accuracy: 0.9797

In [6]:
# Предсказание классов для новых данных
hidden_inputs = np.dot(X_test, weights_input_hidden) + bias_hidden
hidden_outputs = sigmoid(hidden_inputs)

output_inputs = np.dot(hidden_outputs, weights_hidden_output) + bias_output
y_pred = np.argmax(output_inputs, axis=1)

# Вычисление точности модели на тестовых данных
accuracy = np.mean(y_pred == y_test)
print(f"Accuracy: {accuracy}")

Accuracy: 0.3458333333333333


In [None]:
no SVD
# Определение параметров модели
n_inputs = X_train.shape[1]   # Количество входных нейронов
n_outputs = len(np.unique(y_train))  # Количество выходных нейронов
n_hidden = 522 # np.ceil((n_inputs + n_outputs) / 2).astype(int)  # Количество нейронов в скрытом слое
learning_rate = 0.1 # Скорость обучения
n_epochs = 1501 # количество эпох
batch_size = 32 # количество образцов данных
# Epoch 0, Train Loss: 0.9151, Test Loss: 1.0985
# Epoch 50, Train Loss: 0.0337, Test Loss: 0.4167
# Epoch 100, Train Loss: 0.0149, Test Loss: 0.4140
# Epoch 150, Train Loss: 0.0093, Test Loss: 0.4194
# Epoch 200, Train Loss: 0.0068, Test Loss: 0.4231
# Epoch 250, Train Loss: 0.0053, Test Loss: 0.4270
# Epoch 300, Train Loss: 0.0043, Test Loss: 0.4316
# Epoch 350, Train Loss: 0.0036, Test Loss: 0.4348
# Epoch 400, Train Loss: 0.0031, Test Loss: 0.4377
# Epoch 450, Train Loss: 0.0027, Test Loss: 0.4411
# Epoch 500, Train Loss: 0.0024, Test Loss: 0.4437
# Epoch 550, Train Loss: 0.0022, Test Loss: 0.4460
# Epoch 600, Train Loss: 0.0020, Test Loss: 0.4486
# Epoch 650, Train Loss: 0.0018, Test Loss: 0.4506
# Epoch 700, Train Loss: 0.0017, Test Loss: 0.4527
# Epoch 750, Train Loss: 0.0016, Test Loss: 0.4546
# Epoch 800, Train Loss: 0.0014, Test Loss: 0.4564
# Epoch 850, Train Loss: 0.0014, Test Loss: 0.4582
# Epoch 900, Train Loss: 0.0013, Test Loss: 0.4598
# Epoch 950, Train Loss: 0.0012, Test Loss: 0.4613
# Epoch 1000, Train Loss: 0.0011, Test Loss: 0.4629
# Epoch 1050, Train Loss: 0.0011, Test Loss: 0.4642
# Epoch 1100, Train Loss: 0.0010, Test Loss: 0.4657
# Epoch 1150, Train Loss: 0.0010, Test Loss: 0.4670
# Epoch 1200, Train Loss: 0.0009, Test Loss: 0.4683
# Epoch 1250, Train Loss: 0.0009, Test Loss: 0.4695
# Epoch 1300, Train Loss: 0.0008, Test Loss: 0.4707
# Epoch 1350, Train Loss: 0.0008, Test Loss: 0.4719
# Epoch 1400, Train Loss: 0.0008, Test Loss: 0.4730
# Epoch 1450, Train Loss: 0.0007, Test Loss: 0.4741
# Epoch 1500, Train Loss: 0.0007, Test Loss: 0.4751
Wall time: 4min 42s

Accuracy: 0.84

In [None]:
SVD On
k = 100  # Количество компонент для сжатия
# Определение параметров модели
n_inputs = X_train.shape[1]   # Количество входных нейронов
n_outputs = len(np.unique(y_train))  # Количество выходных нейронов
n_hidden = 522 # np.ceil((n_inputs + n_outputs) / 2).astype(int)  # Количество нейронов в скрытом слое
learning_rate = 0.1 # Скорость обучения
n_epochs = 1501 # количество эпох
batch_size = 32 # количество образцов данных
# Epoch 0, Train Loss: 1.3839, Test Loss: 1.6510
# Epoch 50, Train Loss: 0.3226, Test Loss: 0.7049
# Epoch 100, Train Loss: 0.2353, Test Loss: 0.6711
# Epoch 150, Train Loss: 0.2131, Test Loss: 0.6851
# Epoch 200, Train Loss: 0.1888, Test Loss: 0.6803
# Epoch 250, Train Loss: 0.1727, Test Loss: 0.6811
# Epoch 300, Train Loss: 0.1571, Test Loss: 0.6768
# Epoch 350, Train Loss: 0.1501, Test Loss: 0.6812
# Epoch 400, Train Loss: 0.1416, Test Loss: 0.6817
# Epoch 450, Train Loss: 0.1347, Test Loss: 0.6821
# Epoch 500, Train Loss: 0.1288, Test Loss: 0.6831
# Epoch 550, Train Loss: 0.1237, Test Loss: 0.6840
# Epoch 600, Train Loss: 0.1188, Test Loss: 0.6846
# Epoch 650, Train Loss: 0.1142, Test Loss: 0.6848
# Epoch 700, Train Loss: 0.1103, Test Loss: 0.6851
# Epoch 750, Train Loss: 0.1073, Test Loss: 0.6868
# Epoch 800, Train Loss: 0.1050, Test Loss: 0.6888
# Epoch 850, Train Loss: 0.1026, Test Loss: 0.6901
# Epoch 900, Train Loss: 0.0997, Test Loss: 0.6905
# Epoch 950, Train Loss: 0.0974, Test Loss: 0.6917
# Epoch 1000, Train Loss: 0.0954, Test Loss: 0.6928
# Epoch 1050, Train Loss: 0.0934, Test Loss: 0.6936
# Epoch 1100, Train Loss: 0.0917, Test Loss: 0.6948
# Epoch 1150, Train Loss: 0.0899, Test Loss: 0.6958
# Epoch 1200, Train Loss: 0.0881, Test Loss: 0.6963
# Epoch 1250, Train Loss: 0.0868, Test Loss: 0.6973
# Epoch 1300, Train Loss: 0.0854, Test Loss: 0.6983
# Epoch 1350, Train Loss: 0.0839, Test Loss: 0.6990
# Epoch 1400, Train Loss: 0.0829, Test Loss: 0.7002
# Epoch 1450, Train Loss: 0.0815, Test Loss: 0.7008
# Epoch 1500, Train Loss: 0.0807, Test Loss: 0.7021
Wall time: 57min 34s
Accuracy: 0.866

In [None]:
k = 250

# Epoch 0, Train Loss: 1.0804, Test Loss: 1.3565
# Epoch 50, Train Loss: 0.0981, Test Loss: 0.5274
# Epoch 100, Train Loss: 0.0671, Test Loss: 0.5420
# Epoch 150, Train Loss: 0.0480, Test Loss: 0.5320
# Epoch 200, Train Loss: 0.0401, Test Loss: 0.5353
# Epoch 250, Train Loss: 0.0348, Test Loss: 0.5368
# Epoch 300, Train Loss: 0.0303, Test Loss: 0.5361
# Epoch 350, Train Loss: 0.0275, Test Loss: 0.5384
# Epoch 400, Train Loss: 0.0255, Test Loss: 0.5405
# Epoch 450, Train Loss: 0.0236, Test Loss: 0.5421
# Epoch 500, Train Loss: 0.0219, Test Loss: 0.5425
# Wall time: 18min 48s

Accuracy: 0.85

In [None]:
k = 50  # Количество компонент для сжатия

Epoch 0, Train Loss: 1.3601, Test Loss: 1.6002
Epoch 50, Train Loss: 0.7355, Test Loss: 1.2752
Epoch 100, Train Loss: 0.4583, Test Loss: 1.0451
Epoch 150, Train Loss: 0.3726, Test Loss: 0.9839
Epoch 200, Train Loss: 0.3426, Test Loss: 0.9751
Epoch 250, Train Loss: 0.2963, Test Loss: 0.9399
Epoch 300, Train Loss: 0.2588, Test Loss: 0.9129
Epoch 350, Train Loss: 0.1831, Test Loss: 0.8367
Epoch 400, Train Loss: 0.1484, Test Loss: 0.8013
Epoch 450, Train Loss: 0.1218, Test Loss: 0.7711
Epoch 500, Train Loss: 0.1040, Test Loss: 0.7506
Wall time: 18min 57s

Accuracy: 0.828

In [None]:
  k = 500  # Количество компонент для сжатия
y_train = y_train[:5500]
y_test = y_test[:1000]
Epoch 0, Train Loss: 0.9050, Test Loss: 1.1975
Epoch 50, Train Loss: 0.0332, Test Loss: 0.3967
Epoch 100, Train Loss: 0.0152, Test Loss: 0.3999
Epoch 150, Train Loss: 0.0095, Test Loss: 0.4015
Epoch 200, Train Loss: 0.0069, Test Loss: 0.4068
Epoch 250, Train Loss: 0.0054, Test Loss: 0.4102
Epoch 300, Train Loss: 0.0044, Test Loss: 0.4133
Epoch 350, Train Loss: 0.0038, Test Loss: 0.4170
Epoch 400, Train Loss: 0.0032, Test Loss: 0.4196
Epoch 450, Train Loss: 0.0028, Test Loss: 0.4227
Epoch 500, Train Loss: 0.0025, Test Loss: 0.4248
CPU times: user 2h 26min 39s, sys: 33min 17s, total: 2h 59min 56s
Wall time: 1h 43min 21s
Accuracy: 0.894

In [None]:
y_train = y_train[:1500]
y_test = y_test[:500]
# Преобразование данных в нужный формат
X_train = X_train.reshape(60_000, 784)[:1500]
X_test = X_test.reshape(10_000, 784)[:500]
n_hidden = 522 # np.ceil((n_inputs + n_outputs) / 2).astype(int)  # Количество нейронов в скрытом слое
learning_rate = 0.1 # Скорость обучения
n_epochs = 501 # количество эпох
batch_size = 32 # количество образцов данных
k = 250  # Количество компонент для сжатия
Epoch 0, Train Loss: 1.0307, Test Loss: 1.1796
Epoch 50, Train Loss: 0.0986, Test Loss: 0.4407
Epoch 100, Train Loss: 0.0535, Test Loss: 0.4412
Epoch 150, Train Loss: 0.0382, Test Loss: 0.4425
Epoch 200, Train Loss: 0.0314, Test Loss: 0.4499
Epoch 250, Train Loss: 0.0259, Test Loss: 0.4532
Epoch 300, Train Loss: 0.0236, Test Loss: 0.4608
Epoch 350, Train Loss: 0.0207, Test Loss: 0.4633
Epoch 400, Train Loss: 0.0190, Test Loss: 0.4681
Epoch 450, Train Loss: 0.0178, Test Loss: 0.4714
Epoch 500, Train Loss: 0.0163, Test Loss: 0.4744
CPU times: user 4h 14min 39s, sys: 1h 34min 55s, total: 5h 49min 34s
Wall time: 1h 29min 56s
Accuracy: 0.886

In [None]:
 k = 250  # Количество компонент для сжатия
y_train = y_train[:2500]
y_test = y_test[:1000]
# Преобразование данных в нужный формат
X_train = X_train.reshape(60_000, 784)[:2500]
X_test = X_test.reshape(10_000, 784)[:1000]
Epoch 0, Train Loss: 1.6045, Test Loss: 1.9372
Epoch 50, Train Loss: 0.0835, Test Loss: 0.4695
Epoch 100, Train Loss: 0.0468, Test Loss: 0.4685
Epoch 150, Train Loss: 0.0370, Test Loss: 0.4827
Epoch 200, Train Loss: 0.0265, Test Loss: 0.4807
Epoch 250, Train Loss: 0.0233, Test Loss: 0.4863
Epoch 300, Train Loss: 0.0207, Test Loss: 0.4917
Epoch 350, Train Loss: 0.0182, Test Loss: 0.4947
Epoch 400, Train Loss: 0.0166, Test Loss: 0.4968
Epoch 450, Train Loss: 0.0151, Test Loss: 0.5000
Epoch 500, Train Loss: 0.0143, Test Loss: 0.5036
CPU times: user 3h 48min 47s, sys: 56min 33s, total: 4h 45min 20s
Wall time: 2h 44min 52s
Accuracy: 0.871

In [None]:
y_train = y_train[:2500]
y_test = y_test[:500]
# Преобразование данных в нужный формат
X_train = X_train.reshape(60_000, 784)[:2500]
X_test = X_test.reshape(10_000, 784)[:500]
k = 550  # Количество компонент для сжатия

Epoch 0, Train Loss: 0.9871, Test Loss: 1.2458
Epoch 50, Train Loss: 0.0352, Test Loss: 0.3426
Epoch 100, Train Loss: 0.0124, Test Loss: 0.3376
Epoch 150, Train Loss: 0.0078, Test Loss: 0.3440
Epoch 200, Train Loss: 0.0056, Test Loss: 0.3446
Epoch 250, Train Loss: 0.0043, Test Loss: 0.3512
Epoch 300, Train Loss: 0.0035, Test Loss: 0.3525
Epoch 350, Train Loss: 0.0029, Test Loss: 0.3538
Epoch 400, Train Loss: 0.0025, Test Loss: 0.3561
Epoch 450, Train Loss: 0.0022, Test Loss: 0.3572
Epoch 500, Train Loss: 0.0019, Test Loss: 0.3612
CPU times: user 7h 26min 40s, sys: 2h 38min 41s, total: 10h 5min 21s
Wall time: 2h 35min 33s


Accuracy: 0.896