In [None]:
import torch
import torchvision
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

In [None]:
# Hyper Parameters
input_size = 784
num_epochs = 10
batch_size = 100

transform = transforms.Compose(
         [transforms.ToTensor(),
         transforms.Normalize((0.1307,), (0.3015,))])

In [None]:
# MNIST Dataset (Images and Labels)
train_dataset = dsets.MNIST(root='./data',
                            train=True,
                            transform=transform,
                            download=True)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                            batch_size=batch_size,
                                            shuffle=True)



In [None]:
def tanh(t):
    return torch.div(torch.exp(t) - torch.exp(-t), torch.exp(t) + torch.exp(-t))

def tanhPrime(t):
    return 1 - t*t

def softmax(x):
    out, ind = torch.max(x, dim=1)
    y = out.unsqueeze(0)
    y= y.T
    current_data = x - y

    data_exp = torch.exp(current_data)
    data_sum = torch.sum(data_exp, axis=1, keepdims=True)
    s = torch.divide(data_exp,data_sum)

    return s

In [None]:
class Neural_Network:
    def __init__(self, input_size=784, output_size=10, hidden_size=100):
        # parameters
        self.inputSize = input_size
        self.outputSize = output_size
        self.hiddenSize = hidden_size

        # weights
        self.W1 = torch.randn(self.inputSize, self.hiddenSize)
        self.W1= self.W1/torch.norm(self.W1)
        self.b1 = torch.zeros(self.hiddenSize)

        self.W2 = torch.randn(self.hiddenSize, self.outputSize)
        self.b2 = torch.zeros(self.outputSize)

    def forward(self, X):
      self.z1 = torch.matmul(X, self.W1) + self.b1
      self.h = tanh(self.z1)
      self.z2 = torch.matmul(self.h, self.W2) + self.b2
      return softmax(self.z2)

    def CE_loss(self, y_pred, y):
      loss = -torch.sum(y*(torch.log(y_pred)))
      return loss/batch_size

    def backward(self, X, y, y_pred, lr):
        dl_dz2 = (y_pred - y)/batch_size

        dl_dh = torch.matmul(dl_dz2, torch.t(self.W2))
        dl_dz1 = dl_dh * tanhPrime(self.h)

        self.W1 -= lr*torch.matmul(torch.t(X), dl_dz1)
        self.b1 -= lr*torch.matmul(torch.t(dl_dz1), torch.ones(batch_size))
        self.W2 -= lr*torch.matmul(torch.t(self.h), dl_dz2)
        self.b2 -= lr*torch.matmul(torch.t(dl_dz2), torch.ones(batch_size))


    def train (self):
      train_count =[]
      train_acc =[]
      train_total =0
      train_correct = 0

      for epoch in range(num_epochs):

            if epoch <4:
              lr=0.1
            else:
              lr= 0.001



            for i, (images, labels) in enumerate(train_loader):

              images= images.view(-1, 784)
              one_hot= torch.zeros(labels.size(0), 10)
              one_hot[torch.arange(labels.size(0)), labels]=1

              #forward
              y_pred = nn.forward(images)
              _, predicted = torch.max(y_pred.data, 1)

              train_correct += ((predicted == labels).sum())
              train_total += labels.size(0)

              train_count.append(train_total)

              #loss
              loss = nn.CE_loss(y_pred,one_hot)

              #backward
              nn.backward(images, one_hot, y_pred, lr)

            train_acc.append(train_correct/train_total)

            if (i+1) % 200==0:
                print('Epoch [%d/%d], Iter [%d/%d] Loss: %.4f' %(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.data))

      epochs_lst =list(range(1,num_epochs+1))
      plt.plot(epochs_lst,train_acc, color= 'purple')
      plt.title("Accuracy Value per Epochs")
      plt.xlabel("Epoch")
      plt.ylabel("Accuracy")
      plt.legend()
      plt.show()

In [None]:
nn = Neural_Network()
nn.train()
with open("train_hw1_206238891.pkl", "wb") as f:
     pickle.dump(nn, f)