In [79]:
import torch
import numpy as np

from torch import Tensor
from typing import Tuple
from torchvision.datasets import MNIST

In [80]:
def get_MNIST(path: str = "./data", train: bool = True, pin_memory: bool = True):
  mnist_all = MNIST(path, download=True, train=train)
  mnist_bits = []
  mnist_target = []
  for image, label in mnist_all:
    #force from numpy
    tensor = torch.from_numpy(np.array(image))
    mnist_bits.append(tensor)
    mnist_target.append(label)

  mnist_bits = to_tensor(mnist_bits).float()#60000 x 28 x 28
  mnist_bits = mnist_bits.flatten(start_dim=1)#60000 x 784
  mnist_bits /= mnist_bits.max()#norm
  mnist_target = to_tensor(mnist_target)#60000
  if train:
    mnist_target = to_one_hot(mnist_target)#6000 x 10
  if pin_memory:
    return mnist_bits.pin_memory(), mnist_target.pin_memory()
  return mnist_bits, mnist_target

In [81]:
def use_CPU_GPU():
  #Avem GPU?
  if torch.cuda.is_available():
      return torch.device('cuda')
  return torch.device('cpu')

In [82]:
def sigmoid(z):
    return z.sigmoid()

def softmax(z):
    return z.softmax(dim=1)

In [83]:
def to_tensor(x):
    if isinstance(x, (tuple, list)):
        if isinstance(x[0], Tensor):
            return torch.stack(x)
        return torch.tensor(x)

def to_one_hot(x):
    return torch.eye(x.max() + 1)[x]

In [84]:
def forwardProp(x, weight, bias):
    return torch.add(torch.mm(x, weight), bias)

def backProp(x, y, y_h, y_o, w2):
    Error_L2 = y_o - y #batch_size, 10
    del_W2 = torch.mm(y_h.T, Error_L2) #100, batch_size, 10
    del_B2 = Error_L2.mean(dim=0) #batch_size, 10

    Error_L1 = y_h * (1 - y_h) * (torch.mm(w2, Error_L2.T)).T #batch_size, 100
    del_W1 = torch.mm(x.T, Error_L1) #784, batch_size, 100
    del_B1 = Error_L1.mean(dim=0) #batch_size, 100

    return del_W1, del_B1, del_W2, del_B2

In [85]:
def batch_training(x, y, w_ih, b_ih, w_ho, b_ho, mu):
  #Input-Hidden
  y_h = sigmoid(forwardProp(x, w_ih, b_ih))
  #Hidden-Output
  y_o = softmax(forwardProp(y_h, w_ho, b_ho))
  #Loss
  Loss = torch.nn.functional.cross_entropy(y_o, y)
  #BackProp
  del_W1, del_B1, del_W2, del_B2 = backProp(x, y, y_h, y_o, w_ho)
  #New Values
  w_ih -= mu * del_W1
  b_ih -= mu * del_B1
  w_ho -= mu * del_W2
  b_ho -= mu * del_B2

  return w_ih, b_ih, w_ho, b_ho, Loss

def epoch_training(data, labels, w_ih, b_ih, w_ho, b_ho, mu, batch_size):
  non_blocking = w_ih.device.type == 'cuda'
  epoch_loss = 0

  for i in range(0, data.shape[0], batch_size):
    x = data[i: i + batch_size].to(w_ih.device, non_blocking=non_blocking)
    y = labels[i: i + batch_size].to(w_ih.device, non_blocking=non_blocking)
    w_ih, b_ih, w_ho, b_ho, batch_loss = batch_training(x, y, w_ih, b_ih, w_ho, b_ho, mu)
    epoch_loss += batch_loss

  return w_ih, b_ih, w_ho, b_ho, epoch_loss / batch_size

In [86]:
def test(bits, targets, w_ih, b_ih, w_ho, b_ho, batch_size):
  Total_corect = 0
  Total = bits.shape[0]
  non_blocking = w_ih.device.type == 'cuda'

  for i in range(0, Total, batch_size):
    x = bits[i: i + batch_size].to(w_ih.device, non_blocking=non_blocking)
    y = targets[i: i + batch_size].to(w_ih.device, non_blocking=non_blocking)
    result = softmax(forwardProp(sigmoid(forwardProp(x, w_ih, b_ih)), w_ho, b_ho))

    resulted_max_value, resulted_max_value_indices = torch.max(result, dim=1)
    bool_mask = resulted_max_value_indices == y
    correct_predictions = bool_mask.sum().item()
    Total_corect += correct_predictions

  return Total_corect / Total

In [93]:
def train(epochs, device: torch.device = use_CPU_GPU()):

  pin_memory = device.type == 'cuda'
  w_ih = torch.normal(0, 1 / np.sqrt(784) , (784, 100), device=device)
  b_ih = torch.zeros((1, 100), device=device)
  w_ho = torch.normal(0, 1 / np.sqrt(784), (100, 10), device=device)
  b_ho = torch.zeros((1, 10), device=device)
  mu = 0.01
  batch_size = 60
  test_batch_size = 500
  bits_train, targets_train = get_MNIST(train=True, pin_memory=pin_memory)
  bits_test, targets_test = get_MNIST(train=False, pin_memory=pin_memory)
  total_loss = 0
  for epoch in range(epochs):
    print(epoch)
    epoch_loss = 0
    w_ih, b_ih, w_ho, b_ho, epoch_loss = epoch_training(bits_train, targets_train, w_ih, b_ih, w_ho, b_ho, mu, batch_size)
    total_loss += epoch_loss
    accuracy = test(bits_test, targets_test, w_ih, b_ih, w_ho, b_ho, test_batch_size)
  print(" Folosind: "+ str(device) + " Precizia = " + str(accuracy) + " Loss-epoci = " + str(epoch_loss) + " Loss-Total = " + str(total_loss))

In [94]:
train(50, torch.device('cpu'))
train(50)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
 Folosind: cpu Precizia = 0.9799 Loss-epoci = tensor(24.4111) Loss-Total = tensor(1235.9165)
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
 Folosind: cpu Precizia = 0.9793 Loss-epoci = tensor(24.4105) Loss-Total = tensor(1235.9904)
