In [8]:
import torch
import torchvision
import torchvision.transforms as transforms
import numpy as np
import time

In [2]:
transform = transforms.Compose([transforms.ToTensor()])
batch_size = 128

trainset = torchvision.datasets.MNIST(root='./data', train=True,download=True, transform=transform)
bin_train_x = []
bin_train_y = []
for i, (x,y) in enumerate(trainset):
    x[x>0.5] = 1
    x[x<0.5] = 0
    bin_train_x.append(x)
    bin_train_y.append(torch.tensor(y))
bin_train_data = torch.utils.data.TensorDataset(torch.stack(bin_train_x), torch.stack(bin_train_y))
trainloader = torch.utils.data.DataLoader(bin_train_data, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
bin_test_x = []
bin_test_y = []
for i, (x,y) in enumerate(testset):
    x[x>0.5] = 1
    x[x<0.5] = 0
    bin_test_x.append(x)
    bin_test_y.append(torch.tensor(y))
bin_test_data = torch.utils.data.TensorDataset(torch.stack(bin_test_x), torch.stack(bin_test_y))
testloader = torch.utils.data.DataLoader(bin_test_data, batch_size=batch_size, shuffle=False, num_workers=2)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [9]:
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(784, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

#net = Net()
net = Net().to(device)

In [10]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [13]:
for epoch in range(20):  # loop over the dataset multiple times
    start_time = time.time()
    loss_ = []
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()
        
        # reshape inputs
        inputs = inputs.view(-1, 784)
        
        # send to gpu
        inputs = inputs.to(device)
        labels = labels.to(device)

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        loss_.append(loss.item())
        
    #print(f"time elapsed: {time.time() - start_time}")
    print(f"epoch {epoch+1} loss: {np.mean(loss_)}")

print('Finished Training')

epoch 1 loss: 0.22598979193200944
epoch 2 loss: 0.21947288278069324
epoch 3 loss: 0.21352804809618098
epoch 4 loss: 0.20755838868079154
epoch 5 loss: 0.20171031174756315
epoch 6 loss: 0.19621131415051946
epoch 7 loss: 0.19138674682645657
epoch 8 loss: 0.18620620234243906
epoch 9 loss: 0.18166352230221477
epoch 10 loss: 0.17720582459304632
epoch 11 loss: 0.1726247578986418
epoch 12 loss: 0.16860156223527403
epoch 13 loss: 0.16462217311837526
epoch 14 loss: 0.16076839813736202
epoch 15 loss: 0.15712316079275696
epoch 16 loss: 0.15350701506616973
epoch 17 loss: 0.1499631660563478
epoch 18 loss: 0.14682745856485133
epoch 19 loss: 0.14361624503885506
epoch 20 loss: 0.14049927107910357
Finished Training


In [14]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        inputs, labels = data
        
        # reshape inputs
        inputs = inputs.view(-1, 784)
        
        # send to gpu
        inputs = inputs.to(device)
        labels = labels.to(device)      
        
        outputs = net(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {float(correct) / float(total)}')

Accuracy of the network on the 10000 test images: 0.9554
