In [None]:
!wget http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
!wget http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
!wget http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
!wget http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz

!gunzip t10k-images-idx3-ubyte.gz
!gunzip t10k-labels-idx1-ubyte.gz
!gunzip train-images-idx3-ubyte.gz
!gunzip train-labels-idx1-ubyte.gz

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

t = transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize(mean=(0), std=(1))]
                       )

dl_train = torch.utils.data.DataLoader( torchvision.datasets.MNIST('/data/mnist', download=True, train=True, transform=t), 
                batch_size=64, drop_last=True, shuffle=True)
dl_test = torch.utils.data.DataLoader( torchvision.datasets.MNIST('/data/mnist', download=True, train=False, transform=t), 
                batch_size=64, drop_last=True, shuffle=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to /data/mnist/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting /data/mnist/MNIST/raw/train-images-idx3-ubyte.gz to /data/mnist/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to /data/mnist/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting /data/mnist/MNIST/raw/train-labels-idx1-ubyte.gz to /data/mnist/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to /data/mnist/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting /data/mnist/MNIST/raw/t10k-images-idx3-ubyte.gz to /data/mnist/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to /data/mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting /data/mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz to /data/mnist/MNIST/raw
Processing...
Done!





  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [2]:
print(len(dl_train))
print(len(dl_test))

937
156


In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [4]:
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 3x3 square convolution
        # kernel
        self.conv1 = nn.Conv2d(1, 6, 3)
        self.conv2 = nn.Conv2d(6, 16, 3)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 5 * 5, 120)  # 5*5 from image dimension
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        # If the size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return F.softmax(x)

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


net = Net()
net.to(device)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [5]:
optimizer = optim.SGD(net.parameters(), lr=0.2, momentum=0.01)
criterion = nn.CrossEntropyLoss()

for x in range(10):
  for i, (input,target) in enumerate(dl_train):
    input, target = input.to(device), target.to(device)
    output = net(input)
   # print(target)

    optimizer.zero_grad()
    loss = criterion(output, target)
    loss.backward()
    optimizer.step()
  print(loss)



tensor(1.5092, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(1.5330, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(1.4931, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(1.5306, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(1.4840, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(1.4639, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(1.4614, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(1.4649, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(1.4768, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(1.4784, device='cuda:0', grad_fn=<NllLossBackward>)


In [6]:
correct = 0
total = 0
with torch.no_grad():
  for i, (input,target) in enumerate(dl_test):
    input, target = input.to(device), target.to(device)
    output = net(input)
    _, pred = torch.max(output.data,1)
    total += target.size(0)
    correct += (pred == target).sum().item()
print(correct/total)



0.9825721153846154
