In [7]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms

device = 'cuda' if torch.cuda.is_available() else 'cpu'

mnist_train = dsets.MNIST(root='../Data/MNIST_data/',
                          train=True,
                          transform=transforms.ToTensor(),
                          download=True)

mnist_test = dsets.MNIST(root='../Data/MNIST_data/',
                         train=False,
                         transform=transforms.ToTensor(),
                         download=True)


batch_size = 100
data_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)

linear1 = torch.nn.Linear(784, 256, bias=True)
linear2 = torch.nn.Linear(256, 256, bias=True)
linear3 = torch.nn.Linear(256, 10, bias=True)
sigmoid = torch.nn.Sigmoid()
model = torch.nn.Sequential(linear1, sigmoid, linear2, sigmoid, linear3)

criterion = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

nb_epochs = 10                                      
for epoch in range(1, nb_epochs+1):
    avg_cost = 0
    total_batch = len(data_loader)

    for X, Y in data_loader:
        optimizer.zero_grad()
        X = X.view(-1, 28 * 28).to(device)
        Y = Y.to(device)
        
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)
        
        cost.backward()
        optimizer.step()

        avg_cost += cost / total_batch

    if epoch % 5 == 0:
        print(f'epochs: {epoch}/{nb_epochs}, cost: {avg_cost}')                                          

epochs: 5/10, cost: 0.3936338424682617
epochs: 10/10, cost: 0.2932482063770294


In [8]:
import random

with torch.no_grad():
    X_test = mnist_test.test_data.view(-1, 28 * 28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)

    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy:', accuracy.item())

    # Get one and predict
    r = random.randint(0, len(mnist_test) - 1)
    X_single_data = mnist_test.test_data[r:r + 1].view(-1, 28 * 28).float().to(device)
    Y_single_data = mnist_test.test_labels[r:r + 1].to(device)

    print(f'Label: {Y_single_data.item()}')
    single_prediction = model(X_single_data)
    print(f'Prediction: {torch.argmax(single_prediction, 1).item()}')

Accuracy: 0.9146000146865845
Label: 4
Prediction: 4


In [5]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms

device = 'cuda' if torch.cuda.is_available() else 'cpu'

mnist_train = dsets.MNIST(root='../Data/MNIST_data/',
                          train=True,
                          transform=transforms.ToTensor(),
                          download=True)

mnist_test = dsets.MNIST(root='../Data/MNIST_data/',
                         train=False,
                         transform=transforms.ToTensor(),
                         download=True)


batch_size = 100
data_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)

linear1 = torch.nn.Linear(784, 256, bias=True)
linear2 = torch.nn.Linear(256, 256, bias=True)
linear3 = torch.nn.Linear(256, 10, bias=True)
relu = torch.nn.ReLU()

model = torch.nn.Sequential(linear1, relu, linear2, relu, linear3)

criterion = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

nb_epochs = 10                                    
for epoch in range(1, nb_epochs+1):
    avg_cost = 0
    total_batch = len(data_loader)

    for X, Y in data_loader:
        optimizer.zero_grad()
        X = X.view(-1, 28 * 28)
        Y = Y

        hypothesis = model(X)
        cost = criterion(hypothesis, Y)

        cost.backward()
        optimizer.step()

        avg_cost += cost / total_batch

    if epoch % 5 == 0:
        print(f'epochs: {epoch}/{nb_epochs}, cost: {avg_cost}')   

epochs: 5/10, cost: 0.10082405060529709
epochs: 10/10, cost: 0.04618535563349724


In [6]:
import random

with torch.no_grad():
    X_test = mnist_test.test_data.view(-1, 28 * 28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)

    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy:', accuracy.item())

    # Get one and predict
    r = random.randint(0, len(mnist_test) - 1)
    X_single_data = mnist_test.test_data[r:r + 1].view(-1, 28 * 28).float().to(device)
    Y_single_data = mnist_test.test_labels[r:r + 1].to(device)

    print(f'Label: {Y_single_data.item()}')
    single_prediction = model(X_single_data)
    print(f'Prediction: {torch.argmax(single_prediction, 1).item()}')

Accuracy: 0.9722999930381775
Label: 5
Prediction: 5
