In [None]:
import numpy as np
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

## Przygotujmy dane

In [None]:
digits = load_digits()
X = digits.data
y = digits.target

# Normalizacja
X = X / 16.0

y_one_hot = y.reshape(-1, 1)

X_train, X_test, y_train, y_test = train_test_split(X, y_one_hot, test_size=0.2, random_state=42)

## Wizualizacja danych

In [None]:
fig, axes = plt.subplots(2, 5, figsize=(10, 5))
axes = axes.ravel()

for i in range(10):
    axes[i].imshow(digits.images[i], cmap='gray')
    axes[i].set_title(f"Label: {digits.target[i]}")
    axes[i].axis('off')

plt.tight_layout()
plt.show()

## Dataloader

In [None]:
def create_batches(X, y, batch_size):
    # Shuffle the data
    indices = np.arange(X.shape[0])
    np.random.shuffle(indices)
    X, y = X[indices], y[indices]
    
    for i in range(0, X.shape[0], batch_size):
        yield X[i:i + batch_size], y[i:i + batch_size]

## Funkcja straty

In [None]:
def cross_entropy_loss(y_true, y_pred):
    batch_size = y_pred.shape[0]
    
    # Apply LogSoftmax (Numerically Stable)
    log_probs = y_pred - np.log(np.sum(np.exp(y_pred), axis=1, keepdims=True)) #  - np.max(y_pred, axis=1, keepdims=True)
    
    # Compute the Negative Log-Likelihood (NLL) Loss
    neg_log_likelihood = -np.mean(log_probs[np.arange(batch_size), y_true])
    return neg_log_likelihood


def cross_entropy_derivative(y_true, y_pred):
    batch_size = y_pred.shape[0]

    # Softmax
    exp_logits = np.exp(y_pred)  #  - np.max(y_pred, axis=1, keepdims=True)
    softmax = exp_logits / np.sum(exp_logits, axis=1, keepdims=True)

    # Gradient of Cross-Entropy Loss
    derivative = softmax
    derivative[np.arange(batch_size), y_true] -= 1
    derivative /= batch_size

    return derivative

## Budujemy sieć neuronową

### Zaczynamy od hiperparametrów

In [None]:
input_dim = 64
hidden_dim = 64
output_dim = 10
learning_rate = 0.19
epochs = 150
batch_size = 64

### Wagi sieci

In [None]:
W1 = np.random.randn(input_dim, hidden_dim) * 0.5
b1 = np.zeros((1, hidden_dim))
W2 = np.random.randn(hidden_dim, output_dim) * 0.5
b2 = np.zeros((1, output_dim))

## Pętla trenująca

In [None]:
loss_history = []
n_batches = X_train.shape[0] // batch_size
for epoch in range(epochs):
    epoch_loss = 0
    for X_batch, y_batch in create_batches(X_train, y_train, batch_size):
        y_batch = y_batch.reshape(-1)
        # Forward pass
        Z1 = np.dot(X_batch, W1) + b1
        A1 = np.tanh(Z1)  # Aktywacja
        Z2 = np.dot(A1, W2) + b2

        # Wartość funkcji straty
        loss = cross_entropy_loss(y_batch, Z2)
        epoch_loss += loss

        # Wsteczna propagacja gradientu
        dZ2 = cross_entropy_derivative(y_batch, Z2)  # Gradient wyjścia sieci
        dW2 = np.dot(A1.T, dZ2) / X_batch.shape[0]
        db2 = np.sum(dZ2, axis=0) / X_batch.shape[0]

        dA1 = np.dot(dZ2, W2.T)
        dZ1 = np.multiply(dA1, (1 - np.tanh(Z1) ** 2))  # Pochodna tanh
        dW1 = np.dot(X_batch.T, dZ1) / X_batch.shape[0]
        db1 = np.sum(dZ1, axis=0) / X_batch.shape[0]

        # Optymalizacja wag (SGD)
        W2 -= learning_rate * dW2
        b2 -= learning_rate * db2

        W1 -= learning_rate * dW1
        b1 -= learning_rate * db1

    loss_history.append(epoch_loss / n_batches)
    if epoch == 0 or (epoch+1) % 10 == 0:
        print(f"Epoch {epoch}, Loss: {epoch_loss / n_batches:.4f}")

In [None]:
plt.plot(range(1, epochs+1), loss_history)
plt.title("Loss")
plt.xlabel("Epoch")
plt.ylabel("Value")

## Ewaluacja

In [None]:
Z1_test = np.dot(X_test, W1) + b1
A1_test = np.tanh(Z1_test)
A2_test = np.dot(A1_test, W2) + b2

test_loss = cross_entropy_loss(y_test, A2_test)
predictions = np.argmax(A2_test, axis=1)
accuracy = np.mean(predictions == y_test.reshape(-1))

print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")