In [22]:
# Load the MNIST dataset
import numpy as np
from torchvision.datasets import MNIST

def download_mnist(is_train: bool):
    dataset = MNIST(root='./data',
                    transform=lambda x: np.array(x).flatten(),
                    download=True,
                    train=is_train)
    mnist_data = []
    mnist_labels = []
    for image, label in dataset:
        mnist_data.append(image)
        mnist_labels.append(label)

    return mnist_data, mnist_labels

train_X, train_Y = download_mnist(True)
test_X, test_Y = download_mnist(False)

In [23]:
# Normalize the data

# imaginile sunt in intervalul [0, 255] (alb-negru) si le vom normaliza la [0, 1] pentru a facilita antrenarea retelei
train_X = np.array(train_X) / 255.0
test_X = np.array(test_X) / 255.0

# One-hot encode 
# ex de label one-hot encoding: 3 -> [0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
def one_hot_encode(labels, num_classes):
    return np.eye(num_classes)[labels]

train_Y = one_hot_encode(np.array(train_Y), 10)
test_Y = one_hot_encode(np.array(test_Y), 10)


In [24]:
np.random.seed(0)  # pt a reproduce rez. pe care le-am obt. anterior (pt teste si performanta modelului) privind greutatile si biasul
W = np.random.randn(784, 10) * 0.01 # initializarea greutatilor cu valori mici
b = np.zeros((10,))  

# softmax transforma scorurile brute în prob. intre 0 si 1
def softmax(z):
    exp_z = np.exp(z - np.max(z))  # evitarea overflow-ului (scaderea max. scorului)
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)
# keepdims=True -> dimensiunea de pe rand va fi pastrata

# calculeaza scorul pentru fiecare clasă (z = X * W + b)
def forward_propagation(X):
    z = np.dot(X, W) + b  # calcularea scorului
    return softmax(z)  

# calcularea erorii folosind cross-entropy loss (cat de departe sunt prob. prezise de cele reale)
def cross_entropy_loss(y_pred, y_true):
    m = y_true.shape[0]  
    return -np.sum(y_true * np.log(y_pred + 1e-15)) / m  
# 1e-15 -> pt a evita log(0) (este nedefinit), altfel calculul ar fi produs erori numerice

# actualizeaza greutatile si biasul folosind gradientul erorii
def gradient_descent(X, y_true, y_pred, learning_rate):
    m = y_true.shape[0] # nr total de etichete true (batch size)
    dz = (y_pred - y_true) / m  # derivata erorii ( directia in care trebuie sa actualizam greutatile)
    global W, b
    W -= learning_rate * np.dot(X.T, dz)  # actualizarea greutatilor
    b -= learning_rate * np.sum(dz, axis=0)  # actualizarea biasului

In [31]:
def predict(X):
    y_pred = forward_propagation(X)
    return np.argmax(y_pred, axis=1)  # clasa cu probabilitatea cea mai mare

initial_predictions = predict(test_X)
initial_accuracy = np.mean(initial_predictions == np.argmax(test_Y, axis=1)) * 100
print(f'Initial accuracy: {initial_accuracy:.2f}%')


Initial accuracy: 92.23%


In [32]:
epochs = 300  # pot modifica acest numar intre 50 si 500
learning_rate = 0.01

# antrenarea modelului
for epoch in range(epochs):
    # impart datele in batch-uri de 100
    for i in range(0, len(train_X), 100):
        X_batch = train_X[i:i + 100]
        y_batch = train_Y[i:i + 100]

        # Forward propagation
        y_pred = forward_propagation(X_batch)

        # Calcularea erorii
        loss = cross_entropy_loss(y_pred, y_batch)

        # Actualizarea greutatilor si biasului
        gradient_descent(X_batch, y_batch, y_pred, learning_rate)


In [33]:
final_predictions = predict(test_X)
final_accuracy = np.mean(final_predictions == np.argmax(test_Y, axis=1)) * 100
print(f'Final accuracy after training: {final_accuracy:.2f}%')


Final accuracy after training: 92.56%
