In [3]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from matplotlib import pyplot
import MNISTtools
from torchvision.transforms import ToTensor
import torchvision.datasets as datasets

from PIL import Image
import matplotlib.pyplot as plt
import nntools as nt
%matplotlib notebook

%load_ext autoreload
%autoreload 2
%reload_ext autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
train = datasets.MNIST(root='data',train=True,transform=ToTensor(),download=True)
test = datasets.MNIST(root='data',train=False,transform=ToTensor(),download=True)

print(train)


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


Dataset MNIST
    Number of datapoints: 60000
    Root location: data
    Split: Train
    StandardTransform
Transform: ToTensor()


In [5]:
class NNRegressor(nt.NeuralNetwork):

    def __init__(self):
        super(NNRegressor, self).__init__()
        self.cel= torch.nn.CrossEntropyLoss(reduction='sum')

    def criterion(self, y, d):
        return self.cel(y, d)

# This is our neural networks class that inherits from nn.Module
class LeNet(NNRegressor):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1,out_channels=6,kernel_size=(5,5))
        
        #Start implementation of Layer 3 (C3) which has 16 kernels of size 5x5 with padding 0 and stride 1
        
        self.conv2 = nn.Conv2d(in_channels = 6, out_channels = 16,kernel_size = (5,5))
        
        #Start implementation of Layer 5 (C5) which is basically flattening the data 
            
        self.L1 = nn.Linear(256, 120)
        
        #Start implementation of Layer 6 (F6) which has 85 Linear Neurons and input of 120
        
        self.L2 = nn.Linear(120,84)
        
        #Start implementation of Layer 7 (F7) which has 10 Linear Neurons and input of 84
        
        self.L3 = nn.Linear(84,10)
        
        #We have used pooling of size 2 and stride 2 in this architecture 
        
        self.pool = nn.AvgPool2d(kernel_size = 2, stride = 2)
        
        #We have used tanh as an activation function in this architecture so we will use tanh at all layers excluding F7.
        self.act = nn.Tanh()
        
    def forward(self, x):
        x = self.conv1(x)
        #We have used tanh as an activation function in this architecture so we will use tanh at all layers excluding F7.
        x = self.act(x)
        #Now this will be passed from pooling 
        x = self.pool(x)
        #Next stage is convolution
        x = self.conv2(x)
        x = self.act(x)
        x = self.pool(x)
        #next we will pass from conv3, here we will not pass data from pooling as per Architecture 
        
        
        #Now the data should be flaten and it would be passed from FC layers. 
        x = x.view(x.size()[0], -1)
        x = self.L1(x)
        x = self.act(x)
        x = self.L2(x)
        x = self.act(x)
        x = self.L3(x)
        
        return x
    # Determine the number of features in a batch of tensors
    def num_flat_features(self, x ):
        size = x.size()[1:]
        return np.prod(size)

model = LeNet()


device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)




cuda


In [6]:
figure = plt.figure(figsize=(8, 8))
cols, rows = 3, 3
for i in range(1, cols * rows + 1):
    sample_idx = torch.randint(len(train), size=(1,)).item()
    img, label = train[sample_idx]
    figure.add_subplot(rows, cols, i)
    plt.title(label)
    plt.axis("off")
    plt.imshow(img.squeeze(), cmap="gray")
plt.show()

<IPython.core.display.Javascript object>

In [7]:
class DenoisingStatsManager(nt.StatsManager):

    def __init__(self):
        super(DenoisingStatsManager, self).__init__()

    def init(self):
        super(DenoisingStatsManager, self).init()

    def accumulate(self, loss, x, y, d):
        super(DenoisingStatsManager, self).accumulate(loss, x, y, d)    

    def summarize(self):
        loss = super(DenoisingStatsManager, self).summarize()
        return {'loss': loss}

In [8]:
lr = 1e-3
net = model.to(device)
device = torch.device("cuda")

In [9]:

adam = torch.optim.Adam(net.parameters(), lr=lr)
stats_manager = DenoisingStatsManager()
exp1 = nt.Experiment(model, train, test, adam, stats_manager, batch_size=128, 
               output_dir="../checkpoints/denoisingMNIST", perform_validation_during_training=True)

In [10]:
def plot(exp, fig, image, visu_rate=2):
    if exp.epoch % visu_rate != 0:
        return
    with torch.no_grad():
        out = exp.net(image[None].to('cpu'))[0]
    plt.figure()
    plt.subplots(211)
    plt.imshow(image[0])
    plt.title('prediction:{}'.format(out))

    plt.subplots(212)
    plt.plot([exp.history[k][0]['loss'] for k in range(exp.epoch)], label='training loss')
    plt.legend()

    plt.tight_layout()
    fig.canvas.draw()


In [11]:
fig = plt.figure(figsize=(5, 5))

exp1.run(num_epochs=20, plot=lambda exp: plot(exp, fig=fig,image=test[73][0]))

<IPython.core.display.Javascript object>

Start/Continue training from epoch 20


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Finish training for 20 epochs


  plt.tight_layout()


In [12]:
model = exp1.net.to(device)


In [19]:
x = test[1223][0]
x = x.unsqueeze(0).to(device)

model.eval()
with torch.no_grad():
    y = model.forward(x)
print(y)
plt.figure()
plt.imshow(x[0][0].to('cpu').numpy())
plt.show()    

tensor([[15.0212, -2.9681,  4.9707, -3.6241, -7.2277, -0.0527,  7.9133, -3.7509,
         -2.5494, -1.6183]], device='cuda:0')


<IPython.core.display.Javascript object>

In [14]:
x.shape

torch.Size([1, 1, 28, 28])

tensor([[-4.6872, -3.8588, -6.8235,  2.2907,  0.9949, -1.7230, -7.4524,  0.6664,
         -0.9812, 15.7640]], device='cuda:0')

<IPython.core.display.Javascript object>