<a href="https://colab.research.google.com/github/Kabaaaan/-Introduction-to-AI/blob/main/Intro_to_AI_task4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import random
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

In [None]:
print("Загрузка MNIST датасета...")
mnist = fetch_openml('mnist_784', version=1, as_frame=False, parser='auto')
X = mnist.data.astype(np.float32)
y = mnist.target.astype(np.int32)

X = X / 255.0

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=10000, random_state=42)

train_dataset = [(X_train[i][None, ...], y_train[i]) for i in range(len(X_train))]
test_dataset = [(X_test[i][None, ...], y_test[i]) for i in range(len(X_test))]

print(f"Размер обучающей выборки: {len(train_dataset)}")
print(f"Размер тестовой выборки: {len(test_dataset)}")

In [3]:
# Гиперпараметры
INPUT_DIM = 784
OUT_DIM = 10
H_DIM = 32

# Обучаемые параметры
ALPHA = 0.001
NUM_EPOCHS = 20
BATCH_SIZE = 100

In [4]:
# def softmax(t):
#     out = np.exp(t)
#     return out / np.sum(out)
#
# def cross_entropy(z, y):
#     return -np.log(z[0, y])
#
# def to_one_hot(y, num_classes):
#     y_full = np.zeros((1, num_classes))
#     y_full[0, y] = 1
#     return y_full

def he_init(w):
    return w * np.sqrt(2.0 / INPUT_DIM)

def relu(t):
    return np.maximum(t, 0)

def relu_deriv(t):
    return (t >= 0).astype(float)

def softmax_batch(t):
    out = np.exp(t)
    return out / np.sum(out, axis=1, keepdims=True)

def cross_entropy_batch(z, y):
    return -np.log(np.array([z[j, y[j]] for j in range(len(y))]))

def to_one_hot_batch(y, num_classes):
    y_full = np.zeros((len(y), num_classes))
    for j, yj in enumerate(y):
        y_full[j, yj] = 1
    return y_full

In [None]:
# Инициализация весов
W1 = he_init(np.random.randn(INPUT_DIM, H_DIM))
b1 = np.zeros((1, H_DIM))

W2 = he_init(np.random.randn(H_DIM, H_DIM))
b2 = np.zeros((1, H_DIM))

W3 = he_init(np.random.randn(H_DIM, OUT_DIM))
b3 = np.zeros((1, OUT_DIM))

loss_arr = []

for ep in range(NUM_EPOCHS):
    random.shuffle(train_dataset)
    total_loss = 0
    batch_count = 0

    for i in range(len(train_dataset) // BATCH_SIZE):
        batch_x, batch_y = zip(*train_dataset[i*BATCH_SIZE : i*BATCH_SIZE+BATCH_SIZE])
        x = np.concatenate(batch_x, axis=0)
        y = np.array(batch_y)

        # Прямое распространение
        t1 = x @ W1 + b1 # матричное умножение
        h1 = relu(t1)
        t2 = h1 @ W2 + b2
        h2 = relu(t2)
        t3 = h2 @ W3 + b3
        z = softmax_batch(t3)
        E = np.sum(cross_entropy_batch(z, y))

        # Обратное распространение
        y_full = to_one_hot_batch(y, OUT_DIM)

        dE_dt3 = z - y_full
        dE_dW3 = h2.T @ dE_dt3
        dE_db3 = np.sum(dE_dt3, axis=0, keepdims=True)

        dE_dh2 = dE_dt3 @ W3.T
        dE_dt2 = dE_dh2 * relu_deriv(t2)
        dE_dW2 = h1.T @ dE_dt2
        dE_db2 = np.sum(dE_dt2, axis=0, keepdims=True)

        dE_dh1 = dE_dt2 @ W2.T
        dE_dt1 = dE_dh1 * relu_deriv(t1)
        dE_dW1 = x.T @ dE_dt1
        dE_db1 = np.sum(dE_dt1, axis=0, keepdims=True)

        # Обновление параметров
        W1 = W1 - ALPHA * dE_dW1
        b1 = b1 - ALPHA * dE_db1
        W2 = W2 - ALPHA * dE_dW2
        b2 = b2 - ALPHA * dE_db2
        W3 = W3 - ALPHA * dE_dW3
        b3 = b3 - ALPHA * dE_db3

        total_loss += E
        batch_count += 1

    avg_loss = total_loss / batch_count if batch_count > 0 else 0
    loss_arr.append(avg_loss)
    print(f"Эпоха {ep+1}/{NUM_EPOCHS}, Средний loss: {avg_loss:.4f}")

In [None]:
def predict(x):
    t1 = x @ W1 + b1
    h1 = relu(t1)
    t2 = h1 @ W2 + b2
    h2 = relu(t2)
    t3 = h2 @ W3 + b3
    z = softmax_batch(t3)
    return z

def calc_accuracy(dataset):
    correct = 0
    for x, y in dataset:
        z = predict(x)
        y_pred = np.argmax(z)
        if y_pred == y:
            correct += 1
    acc = correct / len(dataset)
    return acc

train_accuracy = calc_accuracy(train_dataset[:1000])
test_accuracy = calc_accuracy(test_dataset)

print(f"Точность на обучающей выборке: {train_accuracy:.4f}")
print(f"Точность на тестовой выборке: {test_accuracy:.4f}")

plt.plot(loss_arr)
plt.title('Loss во время обучения')
plt.xlabel('Эпоха')
plt.ylabel('Loss')
plt.tight_layout()
plt.show()

In [None]:
def plot_predictions(dataset, num_examples=12):
    fig, axes = plt.subplots(3, 4, figsize=(12, 9))
    axes = axes.ravel()

    for i in range(num_examples):
        x, true_label = dataset[i]
        z = predict(x)
        pred_label = np.argmax(z)
        confidence = z[0, pred_label]

        axes[i].imshow(x.reshape(28, 28), cmap='gray')
        axes[i].set_title(f'True: {true_label}, Pred: {pred_label}\nConf: {confidence:.3f}')

        if pred_label != true_label:
            axes[i].title.set_color('red')

        axes[i].axis('off')

    plt.tight_layout()
    plt.show()

plot_predictions(test_dataset)

In [None]:
def plot_confusion_matrix(dataset, title):
    y_true = []
    y_pred = []

    for x, y in dataset[:1000]:
        z = predict(x)
        y_pred.append(np.argmax(z))
        y_true.append(y)

    cm = confusion_matrix(y_true, y_pred)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm)

    fig, ax = plt.subplots(figsize=(10, 8))
    disp.plot(ax=ax, cmap='Blues')
    plt.title(title)
    plt.show()

plot_confusion_matrix(test_dataset, 'Матрица ошибок на тестовой выборке')