In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import torch
import torchvision
from torchvision.datasets import ImageFolder
import torchvision.transforms as transforms

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [9]:
#Setup Torch CUDA torch device
device = torch.device('cuda:0')

## Dataset

In [3]:
transform = transforms.Compose([
    transforms.ToTensor(),  # Transform to tensor
    transforms.Normalize([0.5,0.5,0.5],[0.5,0.5,0.5])
    #transforms.Normalize((0.5,), (0.5,))  # Min-max scaling to [-1, 1]
])

data_dir = os.path.join('fruits')
print('Data stored in %s' % data_dir)

trainset = ImageFolder("./fruits/Training",transform=transform)
testset = ImageFolder("./fruits/Test",transform=transform)

Data stored in fruits


In [4]:
def generate_labels():
    trainset_labels = []
    testset_labels = []
    for i in trainset.imgs:
        trainset_labels.append(i[1])
    
    for j in testset.imgs:
        testset_labels.append(j[1])
    
    return (trainset_labels, testset_labels)

In [5]:
# Total classes
classes_idx_dict = trainset.class_to_idx # {'Class Name': idx }
classes = len(trainset.classes)
len_trainset = len(trainset)
len_testset = len(testset)

train_labels, test_labels = generate_labels()
print(f'Trainset has total of {classes} classes')

Trainset has total of 103 classes


In [6]:
trainloader = torch.utils.data.DataLoader(trainset, batch_size=60, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=60, shuffle=False)

image_shape = iter(trainloader).next()[0].shape
_, CHANNELS, HEIGHT, WIDTH = iter(trainloader).next()[0].shape
print(f'Image: batch size={image_shape[0]}, channels={image_shape[1]}, image height={image_shape[2]}, image width={image_shape[3]}')

Image: batch size=60, channels=3, image height=100, image width=100


In [7]:
class CCNet(nn.Module):
    def __init__(self):
        """
        Args:
          n_channels (int): Number of channels in the first convolutional layer. The number of channels in the
                             following layers are the multipliers of n_channels.
        """
        super(CCNet, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=5), 
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(16, 32, kernel_size=5),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=5),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=5),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=3)
        )
        self.fc1 = nn.Sequential(
            nn.Linear(128, 1024),
            nn.ReLU(),
            nn.Dropout2d(p=0.8),
            nn.Linear(1024, 256),
            nn.ReLU(),
            nn.Dropout2d(p=0.8),
            nn.Linear(256, classes),
        )


    def forward(self, x, verbose=False):
        """You can (optionally) print the shapes of the intermediate variables with verbose=True."""
        x = self.conv1(x)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        return x

In [10]:
# Let's test the shapes of the tensors
net = CCNet()
net.to(device)

with torch.no_grad():
    dataiter = iter(trainloader)
    images, labels = dataiter.next()
    images = images.to(device)
    print('Shape of the input tensor:', images.shape)

    y = net(images, verbose=True)
    print(y.shape)
    assert y.shape == torch.Size([60, classes]), f'Bad shape of y: y.shape={y.shape}'

print('The shapes seem to be ok.')

Shape of the input tensor: torch.Size([60, 3, 100, 100])
torch.Size([60, 103])
The shapes seem to be ok.


In [134]:
def compute_accuracy(net, testloader):
    net.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in testloader:
            images, labels = images.to(device), labels.to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return correct / total

In [135]:
initial_learning_rate = 0.001
final_learning_rate = 0.00001
learning_rate = initial_learning_rate

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=learning_rate)

In [136]:
n_epochs=75
net.train()
for epoch in range(n_epochs):
    running_loss = 0.0
    print_every = 200  # mini-batches
    for i, (inputs, labels) in enumerate(trainloader, 0):
        # Transfer to GPU
        inputs, labels = inputs.to(device), labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if (i % print_every) == (print_every-1):
            print('[%d, %5d] loss: %.3f' % (epoch+1, i+1, running_loss/print_every))
            running_loss = 0.0

    # Print accuracy after every epoch
    accuracy = compute_accuracy(net, testloader)
    print(f'Accuracy of the network on the {len_testset} test images: {100 * accuracy}%')

print('Finished Training')

[1,   200] loss: 4.283
[1,   400] loss: 3.528
[1,   600] loss: 2.849
[1,   800] loss: 2.326
Accuracy of the network on the 17845 test images: 53.28103110114878%
[2,   200] loss: 0.752
[2,   400] loss: 0.273
[2,   600] loss: 0.151
[2,   800] loss: 0.068
Accuracy of the network on the 17845 test images: 91.3981507425049%
[3,   200] loss: 0.075
[3,   400] loss: 0.082
[3,   600] loss: 0.047
[3,   800] loss: 0.027
Accuracy of the network on the 17845 test images: 90.92743065284394%
[4,   200] loss: 0.077
[4,   400] loss: 0.065
[4,   600] loss: 0.035
[4,   800] loss: 0.019
Accuracy of the network on the 17845 test images: 95.50574390585598%
[5,   200] loss: 0.057
[5,   400] loss: 0.062
[5,   600] loss: 0.050
[5,   800] loss: 0.052
Accuracy of the network on the 17845 test images: 93.65648641075933%
[6,   200] loss: 0.006
[6,   400] loss: 0.003
[6,   600] loss: 0.002
[6,   800] loss: 0.002
Accuracy of the network on the 17845 test images: 95.78033062482488%
[7,   200] loss: 0.001
[7,   400] l

[51,   200] loss: 0.125
[51,   400] loss: 0.055
[51,   600] loss: 0.073
[51,   800] loss: 0.011
Accuracy of the network on the 17845 test images: 96.14457831325302%
[52,   200] loss: 0.000
[52,   400] loss: 0.000
[52,   600] loss: 0.000
[52,   800] loss: 0.000
Accuracy of the network on the 17845 test images: 96.81143177360605%
[53,   200] loss: 0.000
[53,   400] loss: 0.000
[53,   600] loss: 0.000
[53,   800] loss: 0.000
Accuracy of the network on the 17845 test images: 96.89548893247408%
[54,   200] loss: 0.000
[54,   400] loss: 0.000
[54,   600] loss: 0.000
[54,   800] loss: 0.000
Accuracy of the network on the 17845 test images: 96.87307369010928%
[55,   200] loss: 0.000
[55,   400] loss: 0.070
[55,   600] loss: 0.188
[55,   800] loss: 0.045
Accuracy of the network on the 17845 test images: 93.0456710563183%
[56,   200] loss: 0.031
[56,   400] loss: 0.006
[56,   600] loss: 0.008
[56,   800] loss: 0.009
Accuracy of the network on the 17845 test images: 93.44354160829363%
[57,   200]

In [137]:
accuracy = compute_accuracy(net, testloader)
print('Accuracy of the network on the test images: %.3f' % accuracy)

Accuracy of the network on the test images: 0.963


In [139]:
filename = 'cnn.pth'

try:
    do_save = input('Do you want to save the model (type yes to confirm)? ').lower()
    if do_save == 'yes':
        torch.save(net.state_dict(), filename)
        print('Model saved to %s' % filename)
    else:
        print('Model not saved')
except:
    raise Exception('The notebook should be run or validated with skip_training=True.')


Do you want to save the model (type yes to confirm)? yes
Model saved to cnn.pth


In [None]:
net = CCNet()
net.load_state_dict(torch.load(filename, map_location=lambda storage, loc: storage))
net.to(device)
print(f'Model loaded from {filename}')