In [5]:
! pip install ipywidgets

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Collecting ipywidgets
  Downloading ipywidgets-7.7.0-py2.py3-none-any.whl (123 kB)
[K     |████████████████████████████████| 123 kB 1.5 MB/s eta 0:00:01
[?25hCollecting jupyterlab-widgets>=1.0.0
  Downloading jupyterlab_widgets-1.1.0-py3-none-any.whl (245 kB)
[K     |████████████████████████████████| 245 kB 9.4 MB/s eta 0:00:01
Collecting widgetsnbextension~=3.6.0
  Downloading widgetsnbextension-3.6.0-py2.py3-none-any.whl (1.6 MB)
[K     |████████████████████████████████| 1.6 MB 8.1 MB/s eta 0:00:01
Installing collected packages: widgetsnbextension, jupyterlab-widgets, ipywidgets
Successfully installed ipywidgets-7.7.0 jupyterlab-widgets-1.1.0 widgetsnbextension-3.6.0


# MNIST

In [14]:
## import torch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as dset
import torch.optim as optim

import torchvision
from torchvision import datasets, transforms

torch.manual_seed(1)

image_x = 28
image_y = 28
image_channel = 1
output_channel = 10

class MNIST_NN(nn.Module):
    def __init__(self, image_x, image_y, image_channel, output_channel):
        super(MNIST_NN, self).__init__()
        
        self.conv_1 = nn.Conv2d(1, 32, kernel_size=(5,5))
        self.conv_2 = nn.Conv2d(32, 64, kernel_size=(5,5))
        
        self.linear_1 = nn.Linear(1024, 256)
        self.linear_2 = nn.Linear(256, output_channel)
        
    def forward(self, image):
        out = F.max_pool2d(F.relu(self.conv_1(image)), kernel_size=(2,2))
        out = F.max_pool2d(F.relu(self.conv_2(out)), kernel_size=(2,2))
        
        out = self.linear_1(torch.flatten(out, 1))
        out = self.linear_2(out)
        
        return out
    
# GPU
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
print('GPU State:', device)

# image preprocessing
# Converts a PIL Image or numpy.ndarray (H x W x C) in the range [0, 255] 
# to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0]
# if the PIL Image belongs to one of the modes (L, LA, P, I, F, RGB, YCbCr, RGBA, CMYK, 1) 
# or if the numpy.ndarray has dtype = np.uint8
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5,), (0.5,)),]
)
train_data = datasets.MNIST(root='MNIST', download=True, train=True, transform=transform)
test_data = datasets.MNIST(root='MNIST', download=True, train=False, transform=transform)
trainLoader = dset.DataLoader(train_data, batch_size=64, shuffle=True)
testLoader = dset.DataLoader(test_data, batch_size=64, shuffle=False)


model = MNIST_NN(image_x, image_y, image_channel, output_channel).to(device)
print(model)

print(next(model.parameters()).is_cuda)

loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 3
for epochs in range(3):  # again, normally you would NOT do 300 epochs, it is toy data
    for i, (data) in enumerate(trainLoader):
        # Step 1. Remember that Pytorch accumulates gradients.
        # We need to clear them out before each instance
        model.zero_grad()

        # Step 2. Get our inputs ready for the network, that is, turn them into
        # Tensors of word indices.
        image = data[0].to(device)
        labels = data[1].to(device)
        
        # Step 3. Run our forward pass.
        res = model(image)
        
        # Step 4. Compute the loss, gradients, and update the parameters by
        #  calling optimizer.step()
        loss = loss_function(res, labels)
        
        if (i + 1) % 100 == 0:
            with torch.no_grad():
                correct_count = 0
                for _, (data) in enumerate(testLoader):
                    image = data[0].to(device)
                    labels = data[1].to(device)
                    output = model(image)
                    predict_label = torch.argmax(nn.Softmax(dim=1)(output), dim=1, keepdim=False)
                    correct_count += (predict_label == labels).float().sum()
                acc = correct_count / test_data.targets.size()[0]
            print('Epoch [{}/{}], Step [{}], Loss: {:.4f}, Acc : {:.4f}'.format(epochs + 1, num_epochs, i + 1, loss.item(), acc.item()))
#             loss_rec.append(loss.item())
#             acc_rec.append(acc.item())
#             print({
#             'epoch': epochs,
#             'model_state_dict': model.state_dict(),
# #             'loss': loss,
#             })
    loss.backward()
    if (i + 1) % 10 == 0:
        total_norm = 0.0
        for p in model.parameters():
            param_norm = p.grad.detach().data.norm(2)
            total_norm += param_norm.item() ** 2
        total_norm = total_norm ** 0.5
        print(total_norm)
    optimizer.step()

# with torch.no_grad():
#     for i, (image, tags) in enumerate(testLoader):
#         bow_vec = make_bow_vector(instance, word_to_ix)
#         probs = model(bow_vec)
       
#         print(nn.Softmax(dim=1)(probs))

GPU State: cuda:0
MNIST_NN(
  (conv_1): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1))
  (conv_2): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
  (linear_1): Linear(in_features=1024, out_features=256, bias=True)
  (linear_2): Linear(in_features=256, out_features=10, bias=True)
)
True
Epoch [1/3], Step [100], Loss: 2.3001, Acc : 0.1028
Epoch [1/3], Step [200], Loss: 2.2988, Acc : 0.1028
Epoch [1/3], Step [300], Loss: 2.3269, Acc : 0.1028
Epoch [1/3], Step [400], Loss: 2.3084, Acc : 0.1028
Epoch [1/3], Step [500], Loss: 2.3023, Acc : 0.1028
Epoch [1/3], Step [600], Loss: 2.3225, Acc : 0.1028
Epoch [1/3], Step [700], Loss: 2.3059, Acc : 0.1028
Epoch [1/3], Step [800], Loss: 2.3133, Acc : 0.1028
Epoch [1/3], Step [900], Loss: 2.3001, Acc : 0.1028
Epoch [2/3], Step [100], Loss: 2.4579, Acc : 0.1028
Epoch [2/3], Step [200], Loss: 2.3253, Acc : 0.1028
Epoch [2/3], Step [300], Loss: 2.2060, Acc : 0.1028
Epoch [2/3], Step [400], Loss: 2.4023, Acc : 0.1028
Epoch [2/3], Step [500], Loss: 2