In [3]:
import numpy as np
from matplotlib import pyplot as plt
import torch
from torchvision import datasets, transforms

transform = transforms.ToTensor()

train_dataset = datasets.FashionMNIST(root="./data", train=True, transform=transform, download=True)
test_dataset = datasets.FashionMNIST(root="./data", train=False, transform=transform, download=True)

x_train = train_dataset.data.float() / 255  #κανονικοποιούμε τις τιμές των pixel
y_train = train_dataset.targets

x_test = test_dataset.data.float() / 255
y_test = test_dataset.targets


#W_1 = np.random.rand(784,128) - 0.5
b_1 = np.random.rand(256, 1) - 0.5 

#W_2 = np.random.rand(128, 128) - 0.5 
b_2 = np.random.rand(128, 1) - 0.5

#W_3 = np.random.rand(128, 10) - 0.5
b_3 = np.random.rand(10, 1) - 0.5 #ορίζουμε τυχαία τους πίνακς των biases και weights με τις απαιτούμενες διαστάσεις

W_1 = np.random.randn(784, 256) * np.sqrt(2.0 / 784)
W_2 = np.random.randn(256, 128) * np.sqrt(2.0 / 128)
W_3 = np.random.randn(128, 10) * np.sqrt(2.0 / 128)

A_layers = []
for i in range(len(x_train)):
   A_layers.append(x_train[i].reshape(784, 1))

def ReLU(x):
  return np.maximum(0, x)

def softmax(Z):
  Z = Z - np.max(Z)  
  exp_values = np.exp(Z)
  sum_exp_values = np.sum(exp_values)     #απλά εφαρμόζουμε τον τύπο της softmax και κάνουμε τις τιμές των 10 νευρώνων μία κατανομή διακριτής τυχαίας μεταβλητής
  A_out_prob = exp_values / sum_exp_values
  return A_out_prob


def forwardProp(A , W_1 , b_1, W_2, b_2, W_3 , b_3): #με A να είναι ένας 784x1 πινακας η ίσοδος layer 0 στο νευρωνικό δύκτιο , W να έιναι ένας
                                          #784x10 πίνακας με τα βάρη των ακμών και b να είναι ένας 10x1 πίνακας τα biases όλων των νέων κορυφών
  A_1 = np.dot(W_1.T , A)
  Z_1 = A_1 + b_1
  A_1 = ReLU(Z_1)
  A_2 = np.dot(W_2.T, A_1)
  Z_2 = A_2 + b_2
  A_2 = ReLU(Z_2)
  A_3 = np.dot(W_3.T , A_2)
  Z_3 = A_3 + b_3
  A_3 = softmax(Z_3)

  return A_1, Z_1, A_2 , Z_2 , A_3 , Z_3

def d_ReLU(x):
  return x > 0


def backProp(A, A_1, A_2, A_3, Z_1, Z_2, Z_3, W_1, W_2 ,W_3 , mean):
  vector = np.zeros((10, 1))
  vector[mean, 0] = 1

  dZ3 = (A_3 - vector)
  dW3 = np.dot(A_2, dZ3.T)
  db3 = dZ3

  dA2 = np.dot(W_3, dZ3)
  dZ2 = dA2 * d_ReLU(Z_2)
  dW2 = np.dot(A_1, dZ2.T)
  db2 = dZ2

  dA1 = np.dot(W_2, dZ2)
  dZ1 = dA1 * d_ReLU(Z_1)
  dW1 = np.dot(A, dZ1.T)
  db1 = dZ1

  return dW1, db1, dW2, db2, dW3 , db3

    


def gradientDescent(W_1, b_1, W_2, b_2, W_3, b_3, learning_rate, epochs):
    accuracy_list = []
    
    for epoch in range(epochs):
      correct_predictions = 0
      total_samples = len(x_train)
      for i in range(total_samples):
          A = A_layers[i]
          A_1, Z_1, A_2 , Z_2, A_3, Z_3 = forwardProp(A, W_1, b_1, W_2, b_2, W_3, b_3)

          predicted_label = np.argmax(A_3)
          if predicted_label == y_train[i]:
              correct_predictions += 1

          dW_1, db_1, dW_2, db_2 , dW_3, db_3 = backProp(A, A_1, A_2,A_3, Z_1, Z_2,Z_3,  W_1, W_2, W_3 ,y_train[i])

          W_1 = W_1 - learning_rate * dW_1
          b_1 = b_1 - learning_rate * db_1
          W_2 = W_2 - learning_rate * dW_2
          b_2 = b_2 - learning_rate * db_2
          W_3 = W_3 - learning_rate * dW_3
          b_3 = b_3 - learning_rate * db_3
        
      accuracy = (correct_predictions / total_samples) * 100
      accuracy_list.append(accuracy)
      print(f"Epoch {epoch+1}, Accuracy: {accuracy:.2f}%")

      if (epoch + 1) % 5 == 0:
        learning_rate /= 100
        print(f"Learning rate decayed to {learning_rate}")

    return accuracy_list[-1], W_1, b_1, W_2, b_2 , W_3, b_3

In [4]:
accuracy, c_W_1, c_b_1, c_W_2, c_b_2 , c_W_3 , c_b_3 = gradientDescent(W_1, b_1, W_2, b_2, W_3, b_3, 0.01, 14)

  A_1 = np.dot(W_1.T , A)
  dW1 = np.dot(A, dZ1.T)


Epoch 1, Accuracy: 81.38%
Epoch 2, Accuracy: 85.63%
Epoch 3, Accuracy: 86.87%
Epoch 4, Accuracy: 87.71%
Epoch 5, Accuracy: 88.22%
Learning rate decayed to 0.0001
Epoch 6, Accuracy: 89.91%
Epoch 7, Accuracy: 90.67%
Epoch 8, Accuracy: 90.79%
Epoch 9, Accuracy: 90.98%
Epoch 10, Accuracy: 91.07%
Learning rate decayed to 1e-06
Epoch 11, Accuracy: 91.13%
Epoch 12, Accuracy: 91.14%
Epoch 13, Accuracy: 91.14%
Epoch 14, Accuracy: 91.14%


In [None]:
indexes = [61, 115, 124, 149, 151, 217, 241, 247, 266, 321]
for i in indexes:
    input_data = x_test[i].reshape(784, 1)
    A_1, Z_1, A_2 , Z_2, A_3, A_3 = forwardProp(input_data, c_W_1, c_b_1 , c_W_2, c_b_2,c_W_3, c_b_3 )
    predicted_label = np.argmax(A_3)
    print("Label:" , y_test[i])
    print("Prediction:", predicted_label)
    plt.imshow(x_test[i], cmap='gray')
    plt.title(f"MNIST Digit: {y_test[i]}") 
    plt.show()