In [1]:
import numpy as np 
import matplotlib.pyplot as plt 

import torch 
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms

In [2]:
# image processing with pytorhc notebook

mean = [0.49159095, 0.48234567, 0.4467194 ]
std = [0.23834856, 0.23494388, 0.25271425]

In [3]:
train_transform = transforms.Compose([
    transforms.Resize(32),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

In [4]:
test_transform = transforms.Compose([
    transforms.Resize(32),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])


In [None]:
trainset = torchvision.datasets.CIFAR10(root='datasets/cifar10/train',
                                       train=True,
                                       download=True,
                                       transform=train_transform)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to datasets/cifar10/train/cifar-10-python.tar.gz


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=170498071.0), HTML(value='')))

In [None]:
testset = torchvision.datasets.CIFAR10(root='datasets/cifar10/train',
                                       train=False,
                                       download=True,
                                       transform=test_transform)

In [None]:
trainloader = torch.utils.data.DataLoader(trainset,
                                         batch_size=16,
                                         shuffle=True,
                                         num_workers=2)

In [None]:
testloader = torch.utils.data.DataLoader(testset,
                                         batch_size=16,
                                         shuffle=False,
                                         num_workers=2)

In [None]:
class_names = trainset.classes

print(class_names)

In [None]:
image_batch, labels = iter(trainloader).next()

In [None]:
# 16 images in batch, each image is multi-channel (RGB) with 32px height and 32px width
# 16 labels correspond to 16 images

image_batch.shape, labels.shape

In [None]:
labels

In [None]:
def get_text_label(index):
    return trainset.classes[index]

In [None]:
get_text_label(0)

In [None]:
in_size = 3 # 3 channels

# 2 convolutional layers

# first layer generates 16 feature maps
hid1_size = 16 

# second layer generates 32 feature maps
hid2_size = 32


# 2 linear layers

# Layer 1: 32 x 32 => 28 x 28 => pooling => 14 x 14
# Layer 2: 14 x 14 => 10 x 10 => pooling => 5 x 5
# 32 x 5 x 5 = 800

# first
out1_size = 400

# second layer correspond to 10 classes: trainset.classes
out2_size = 10

# 5x5 kernel
k_conv_size = 5

In [None]:
class ConvNet(nn.Module):
    
    def __init__(self):
        super(ConvNet, self).__init__()
        
        self.layer1 = nn.Sequential(
            nn.Conv2d(in_size, hid1_size, k_conv_size), # 28 x 28
            nn.BatchNorm2d(hid1_size),
            nn.Sigmoid(),
            nn.MaxPool2d(kernel_size=2)) # 14 x 14
        
        self.layer2 = nn.Sequential(
            nn.Conv2d(hid1_size, hid2_size, k_conv_size), # 10 x 10
            nn.BatchNorm2d(hid2_size),
            nn.Sigmoid(),
            nn.MaxPool2d(kernel_size=2)) # 5 x 5
        
        self.layer3 = nn.Sequential(
            nn.Linear(hid2_size * k_conv_size * k_conv_size, out1_size),
            nn.Sigmoid(),
            # randomly turn of certain neurons,
            # forcing other neurons to learn significant features
            # mitigate neural network overfitting
            nn.Dropout(0.5), # 800 x 0.5 = 400 = out1_size
            nn.Linear(out1_size, out2_size))
        
        
    def forward(self, x):
        
        out = self.layer1(x) 
        out = self.layer2(out) 
        
        out = out.reshape(out.size(0), -1)
        
        out = self.layer3(out)
        
        return F.log_softmax(out, dim=-1)
        

In [None]:
model = ConvNet()

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

print(device)

In [None]:
learning_rate = 0.001 

criterion = nn.NLLLoss()

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
total_step = len(trainloader)
num_epochs = 20 
loss_values = list()

In [None]:
for epoch in range(num_epochs):
    
    # loop over every image in batch
    for i, (images, labels) in enumerate(trainloader):
        
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i + 1) % 2000 == 0:
            print('Epoch [{}/{}], Step [{}/{}], Loss {:.4f}'
                .format(epoch + 1, num_epochs, i+1, total_step, loss.item()))
            
            loss_values.append(loss.item())
            
print('Finished Training')

In [None]:
x = (range(1, num_epochs+1))

plt.figure(figsize=(12, 10))

plt.plot(x, loss_values)
plt.xlabel('Step')
plt.ylabel('Loss')

In [None]:
model.eval()

In [None]:
with torch.no_grad():
    correct = 0
    total = 0
    
    for images, labels in testloader:
        
        outputs = model(images)
        
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
    print('Accuracy of the model on 10000 test images: {}%'
             .format(100 * correct / total))

In [None]:
sample_images, sample_labels = testset[23]

In [None]:
sample_images.shape

In [None]:
sample_image = np.transpose(sample_images, (1, 2, 0))
sample_image.shape

In [None]:
m, M = sample_image.min(), sample_image.max()

In [None]:
sample_image = (1/(abs(m) * M)) * sample_image + 0.5

In [None]:
plt.figure(figsize=(6,6))
plt.imshow(sample_image)

In [None]:
test_img, test_label = testset[23]

test_img = test_img.reshape(-1, 3, 32, 32)

In [None]:
out_predicted = model(test_img)
_, predicted = torch.max(out_predicted.data, 1)

In [None]:
print('Actual label: ', get_text_label(test_label))

In [None]:
print('Predicted label: ', get_text_label(predicted.item()))