In [1]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import random

In [2]:

device = 'cuda'
torch.manual_seed(42)
torch.cuda.manual_seed_all(42)

In [3]:
learning_rate = 0.001
epochs = 15
batch_size = 100

In [4]:
# MNIST dataset
mnist_train = dsets.MNIST(root='MNIST_data/',
                          train=True,
                          transform=transforms.ToTensor(),
                          download=True
                         )
mnist_test = dsets.MNIST(root='MNIST_data/',
                          train=False,
                          transform=transforms.ToTensor(),
                          download=True
                         )

In [5]:
data_loader = torch.utils.data.DataLoader(dataset=mnist_train, 
                                          batch_size=batch_size, 
                                          shuffle=True, 
                                          drop_last=True)

In [6]:
# nn Layers
linear1 = torch.nn.Linear(784, 256, bias=True)#.to(device)
linear2 = torch.nn.Linear(256, 256, bias=True)#.to(device)
linear3 = torch.nn.Linear(256, 10, bias=True)#.to(device)
relu = torch.nn.ReLU()

In [7]:
torch.nn.init.normal_(linear1.weight)
torch.nn.init.normal_(linear2.weight)
torch.nn.init.normal_(linear3.weight)


Parameter containing:
tensor([[-0.1373,  0.2451, -0.6227,  ..., -1.2575, -0.5513,  0.1391],
        [ 1.3016, -0.3155, -0.3628,  ..., -0.6300, -1.6651, -0.5991],
        [-0.2805, -1.9425, -0.1479,  ..., -0.8257,  0.0412, -0.2893],
        ...,
        [ 0.6031,  0.4906,  1.4963,  ..., -0.8270,  0.2543,  0.3207],
        [-0.9555, -2.7106, -0.0405,  ..., -0.5436,  0.2581, -0.1293],
        [ 1.2371,  0.8544, -0.7735,  ...,  1.6935,  0.6544,  1.1300]],
       requires_grad=True)

In [8]:
model = torch.nn.Sequential(linear1, relu, linear2, relu, linear3).to(device)

In [9]:
criterion = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [10]:
total_batch = len(data_loader)
for epoch in range(epochs):
    avg_cost = 0
    
    for X, y in data_loader:
        
        X = X.view(-1, 28 * 28).to(device)
        y = y.to(device)
        
        hypothesis = model(X)
        cost = criterion(hypothesis, y)
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()
        
        avg_cost += cost/total_batch
    print('Epoch : {:4d}/{} Cost :  {}'.format(epoch+1, epochs, cost.item()))
    

Epoch :    1/15 Cost :  50.7212028503418
Epoch :    2/15 Cost :  30.852436065673828
Epoch :    3/15 Cost :  32.253761291503906
Epoch :    4/15 Cost :  27.745861053466797
Epoch :    5/15 Cost :  6.265905857086182
Epoch :    6/15 Cost :  22.083864212036133
Epoch :    7/15 Cost :  2.9932949542999268
Epoch :    8/15 Cost :  0.45243287086486816
Epoch :    9/15 Cost :  1.5649091005325317
Epoch :   10/15 Cost :  0.8011517524719238
Epoch :   11/15 Cost :  2.101971387863159
Epoch :   12/15 Cost :  0.0
Epoch :   13/15 Cost :  0.0
Epoch :   14/15 Cost :  0.4060244858264923
Epoch :   15/15 Cost :  3.177647113800049


In [11]:
import matplotlib.pyplot as plt
# test the model using test sets
with torch.no_grad():
    X_test = mnist_test.test_data.view(-1, 28*28).float().to(device)
    y_test = mnist_test.test_labels.to(device)
    
    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == y_test
    acc = correct_prediction.float().mean()
    print('Acc {:.6f}'.format(acc.item()))
    
    # Get one and predict
    r = random.randint(0, len(mnist_test) -1 )
    X_single_data = mnist_test.test_data[r:r+1].view(-1, 28*28).float().to(device)
    y_single_data = mnist_test.test_labels[r:r+1].to(device)
    
    print('Label {}'.format(y_single_data.item()))
    single_prediction = model(X_single_data)
    print('Prediction : {}'.format(torch.argmax(single_prediction, 1).item()))
    
    plt.imshow(mnist_test.test_data[r:r+1].view(28,28), cmap='Greys')
    plt.show()




Acc 0.945800
Label 1
Prediction : 1


<Figure size 640x480 with 1 Axes>