In [9]:
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [10]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import torch.optim as optim
import random
import torchvision.datasets as dsets
import torchvision.transforms as transforms #should be used to convert images to pytorch tensors

In [11]:
device = 'cuda' if torch.cuda.is_available else 'cpu'
if device == 'cuda':
  torch.cuda.torch.manual_seed(777) #random data sample is the same

In [12]:
training_epoch = 10
batch_size = 500   #2^10
learning_rate = 1e-3
dropout = 0.3 #70% of neurons activated

In [13]:
mnist_train = dsets.MNIST(root='/content/drive/MyDrive/CSCE464/datasets/MNIST',
                          train= True,
                          transform= transforms.ToTensor(),
                          download= True)

mnist_test = dsets.MNIST(root='/content/drive/MyDrive/CSCE464/datasets/MNIST',
                          train= False,
                          transform= transforms.ToTensor(),
                          download= True)

In [14]:
data_loader = torch.utils.data.DataLoader(dataset= mnist_train,
                                          batch_size= batch_size,
                                          shuffle = True,
                                          drop_last = True)

In [15]:
#vanishing gradient if too many hidden layers
linear1 = torch.nn.Linear(784, 1024, bias=True)
linear2 = torch.nn.Linear(1024, 512, bias=True)
linear3 = torch.nn.Linear(512, 256, bias=True)
linear4 = torch.nn.Linear(256, 10, bias=True)
relu = torch.nn.ReLU()
dropout = torch.nn.Dropout(p=dropout)

In [23]:
#to avoid vanishing gradient
#weight initilization using xavier method (most popular, default in TensorFlow)
torch.nn.init.xavier_uniform(linear1.weight)
torch.nn.init.xavier_uniform_(linear2.weight)
torch.nn.init.xavier_uniform_(linear3.weight)
torch.nn.init.xavier_uniform_(linear4.weight)

  torch.nn.init.xavier_uniform(linear1.weight)


Parameter containing:
tensor([[ 0.1363, -0.0804, -0.0481,  ..., -0.0397, -0.1019, -0.1015],
        [ 0.1239, -0.0917,  0.0446,  ..., -0.0031, -0.1315, -0.0405],
        [ 0.0171,  0.1284, -0.0509,  ..., -0.0544,  0.1269,  0.0408],
        ...,
        [-0.0802,  0.0961, -0.1462,  ...,  0.1226,  0.0849, -0.1112],
        [-0.0352, -0.0583, -0.0303,  ..., -0.1065, -0.0780,  0.1341],
        [-0.0064, -0.0639, -0.0482,  ...,  0.0978, -0.1122,  0.1485]],
       requires_grad=True)

In [24]:
model = torch.nn.Sequential(linear1, relu, dropout,
                            linear2, relu, dropout,
                            linear3, relu, dropout,
                            linear4).to(device)

In [25]:
loss = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [26]:
total_batch = len(data_loader)

for epoch in range(training_epoch):
  avg_cost = 0

  for X, Y in data_loader:
    X = X.view(-1, 28*28).to(device)
    Y = Y.to(device)

    pred = model(X).to(device)
    cost = loss(pred, Y).to(device)
    correct_pred = torch.torch.argmax(pred, axis=1) == Y
    accuracy = correct_pred.float().mean()

    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    avg_cost += cost / total_batch

  print("Epoch {:2d} / {} Cost: {:.5f} Training ACC: {:.2f}%".format(epoch+1, 10, avg_cost, accuracy * 100))

Epoch  1 / 10 Cost: 0.38758 Training ACC: 94.60%
Epoch  2 / 10 Cost: 0.12941 Training ACC: 96.40%
Epoch  3 / 10 Cost: 0.09261 Training ACC: 98.40%
Epoch  4 / 10 Cost: 0.07250 Training ACC: 97.40%
Epoch  5 / 10 Cost: 0.05732 Training ACC: 98.40%
Epoch  6 / 10 Cost: 0.04736 Training ACC: 98.20%
Epoch  7 / 10 Cost: 0.04025 Training ACC: 98.40%
Epoch  8 / 10 Cost: 0.03511 Training ACC: 99.00%
Epoch  9 / 10 Cost: 0.03325 Training ACC: 99.20%
Epoch 10 / 10 Cost: 0.02981 Training ACC: 99.00%


In [27]:
#test the model
with torch.no_grad():
  x_test = mnist_test.test_data.view(-1, 28*28).float().to(device)
  y_test = mnist_test.test_labels.to(device)

  pred = model(x_test)
  correct_prediction = torch.torch.argmax(pred, axis=1) == y_test
  accuracy = correct_prediction.float().mean()
  print("Testing Accuracy {:.2f}".format(accuracy.item()*100))

Testing Accuracy 97.56


