In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import sklearn.metrics as metrics

convolutional NNs exploit spatial invariance to learn useful representations of the data with fewer parameters.

- network should respond similarly to the same patch, regardless of where it appears in the image. This principle is called translation invariance (or translation equivariance).

- The earliest layers of the network should focus on local regions

- As we proceed, deeper layers should be able to capture longer-range features of the image


In [2]:
BATCH_SIZE = 32

## transformations
transform = transforms.Compose([transforms.ToTensor()])

## download and load training dataset
trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE,
                                          shuffle=True, num_workers=4)

## download and load testing dataset
testset = torchvision.datasets.MNIST(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE,
                                         shuffle=False, num_workers=4)

In [3]:
print(len(trainset))
print(trainset[0])
first_image, first_label = trainset[0]
print(first_image)

60000
(tensor([[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 

In [4]:
print(first_label)

5


In [5]:
first_image.shape

torch.Size([1, 28, 28])

In [6]:
for image, label in trainloader:
    print(f'image.shape = {image.shape}, which is batch_size, input_channels, height, width')
    print(f'label.shape = {label.shape}')
    break

image.shape = torch.Size([32, 1, 28, 28])
label.shape = torch.Size([32])


### output of a conv2d layer
Output = ((Input - Kernel_size + 2 * Padding) / Stride) + 1

stride=1 by default, padding=0 by default.

In [7]:
def get_cnn_output_shape(input_size, kernel_size, padding, stride):
    output = ((input_size - kernel_size + 2 * padding) / stride) + 1
    print(f'output size: {output}')
    return int(output)




Input Image:      [BATCHSIZE, IN_CHANNELS, HEIGHT, WIDTH]
                         ↓

Conv1 Filters:    32 filters of size [3, 3]
                         
                         ↓
Output Features:  [batch_size, 32, 26, 26]

In [8]:
IN_CHANNELS, HEIGHT, WIDTH = first_image.shape
print(IN_CHANNELS, HEIGHT, WIDTH)
print(BATCH_SIZE)
KERNEL_SIZE=3

1 28 28
32


In [9]:
trainset

Dataset MNIST
    Number of datapoints: 60000
    Root location: ./data
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
           )

## practicalities of CNN


- use conv layer followed by pool layer or 2 conv layers followed by pool
- use filters (kernels) of odd sizes (e.g. 3x3 or 5x5)
- start with smaller filters closer to the input layers to icapture local features and optionally increase their size as you go deeper in the NN
- the number of channels should be small in the the first layers and increase as the NN gets deeper. Low level featuers (like edges) 
- using padding is recommended, to maintain the size of the image and to keep the information at the border of the image.
- for pooling use 2x2 or 3x3 kernels
- the out_channels in the first conv layer corresponds to how many filters (feature maps) you want to apply 




In [76]:
torch.zeros(1, 1, 28, 28).

torch.Size([784])

In [17]:

class MNIST_CNN(nn.Module):
    def __init__(self, n_classes, height, in_channels, out_channels, kernel_size, padding, stride, n_hidden):
        super(MNIST_CNN, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size)
        
        self.output_size_conv1 = get_cnn_output_shape(input_size=height, kernel_size=kernel_size, padding=padding, stride=stride)

        self.linear1 = nn.Linear(self.output_size_conv1 * self.output_size_conv1 * out_channels, n_hidden)
        self.output_linear = nn.Linear(n_hidden, n_classes)

    def forward(self, x):
        print('input x shape', x.shape)
        
        x = self.conv1(x)
        x = F.relu(x)

        x = x.flatten(start_dim = 1)

        x = self.linear1(x)
        x = F.relu(x)

        logits = self.output_linear(x)
        out = F.softmax(logits, dim=1)
        return out

In [85]:
class MNIST_CNN_2(nn.Module):
    def __init__(self, n_classes, height, in_channels, out_channels, kernel_size, n_hidden, padding, stride):
        super(MNIST_CNN_2, self).__init__()

        self.cnn = nn.Sequential(
            nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size),
            nn.ReLU(),
            nn.Conv2d(in_channels=out_channels, out_channels=out_channels, kernel_size=kernel_size),
            nn.ReLU(),

            nn.Conv2d(in_channels=out_channels, out_channels=out_channels, kernel_size=kernel_size),
            nn.ReLU(),
            nn.Conv2d(in_channels=out_channels, out_channels=out_channels, kernel_size=kernel_size),
            nn.ReLU(),
        )

        # Manually compute output dimensions
        def compute_output_size(input_size, kernel_size, padding, stride):
            return (input_size + 2*padding - kernel_size) // stride + 1

        O1 = compute_output_size(height, kernel_size, padding, stride)
        O2 = compute_output_size(O1, kernel_size,  padding, stride)
        O3 = compute_output_size(O2, kernel_size, padding, stride)
        O4 = compute_output_size(O3, kernel_size,  padding, stride)
        output_dim_conv = O4  # This should be 20 with your parameters

        self.flattened_size = out_channels * output_dim_conv * output_dim_conv

        self.mlp = nn.Sequential(
            nn.Linear(self.flattened_size, n_hidden),
            nn.ReLU(),
            nn.Linear(n_hidden, n_classes)    
        )

    def forward(self, x):
        x = self.cnn(x)
        x = x.view(x.size(0), -1)
        x = self.mlp(x)
        return x


In [86]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device="cpu"
device

'cpu'

In [87]:
IN_CHANNELS, HEIGHT, WIDTH = first_image.shape
OUT_CHANNELS=3
print(IN_CHANNELS, HEIGHT, WIDTH)
print(BATCH_SIZE)
KERNEL_SIZE=3
STRIDE=1
PADDING=0
N_CLASSES=10
N_HIDDEN = 16

CNN_model = MNIST_CNN_2(n_classes=N_CLASSES, 
                        height=HEIGHT, 
                        in_channels=IN_CHANNELS,
                        out_channels=OUT_CHANNELS,
                        kernel_size=KERNEL_SIZE, 
                        padding=PADDING, 
                        stride=STRIDE, 
                        n_hidden=N_HIDDEN)

CNN_model = CNN_model.to(device)

1 28 28
32


In [88]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(CNN_model.parameters(), lr=1E-3)

In [89]:
EPOCHS=2
for epoch in range(EPOCHS):
    train_running_loss = 0.0
    train_acc = 0.0

    ## training step
    for i, (images, labels) in enumerate(trainloader):
        
        images = images.to(device)
        labels = labels.to(device)

        ## forward + backprop + loss
        logits = CNN_model(images)
        loss = criterion(logits, labels)
        optimizer.zero_grad()
        loss.backward()

        ## update model params
        optimizer.step()

        train_running_loss += loss.detach().item()
        train_acc += (torch.argmax(logits, 1).flatten() == labels).type(torch.float).mean().item()
    
    print('Epoch: %d | Loss: %.4f | Train Accuracy: %.2f' \
          %(epoch, train_running_loss / i, train_acc/i))

KeyboardInterrupt: 

In [None]:
#conda install anaconda::networkx
#conda install conda-forge::tensorboardx

In [187]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import torch_geometric.nn as pyg_nn
import torch_geometric.utils as pyg_utils

import time
from datetime import datetime

import networkx as nx
import numpy as np
import torch
import torch.optim as optim

from torch_geometric.datasets import TUDataset
from torch_geometric.datasets import Planetoid
from torch_geometric.data import DataLoader

import torch_geometric.transforms as T

from tensorboardX import SummaryWriter
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt


In [188]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import torch_geometric.nn as pyg_nn
import torch_geometric.utils as pyg_utils

import time
from datetime import datetime

import networkx as nx
import numpy as np
import torch
import torch.optim as optim

from torch_geometric.datasets import TUDataset
from torch_geometric.datasets import Planetoid
from torch_geometric.data import DataLoader

import torch_geometric.transforms as T

from tensorboardX import SummaryWriter
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
