Build CNN:
- import libraries
- Load and normalize data
- OHEC data
- Split data into training and test
- Build CNN class with layers
- Evaluate model
- Tune hyper params
- Improve model and see what can be done better


The basic steps to build an image classification model using a neural network are:

- Flatten the input image dimensions to 1D (width pixels x height pixels)
- Normalize the image pixel values (divide by 255)
- One-Hot Encode the categorical column
- Build a model architecture (Sequential) with Dense layers
- Train the model and make predictions

In [None]:
import pickle as pkl
import numpy as np
import argparse

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

from torch.utils.data import Dataset
from torch.utils.data import DataLoader
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
class ConfigureDataset(Dataset):
    def __init__(self, X_Train, Y_Train, transform=None):
        self.X_Train = X_Train
        self.Y_Train = Y_Train
        self.transform = transform

    def __len__(self):
        return len(self.X_Train)

    def __getitem__(self, idx):
        x = self.X_Train[idx]
        y = self.Y_Train[idx]

        if self.transform:
            x = self.transform(x)

        return x, y

In [None]:
# converts labels from an array of 36 to a value between 0 nd 259
def convert_label(label):
    numl = label[:10]
    letter = label[10:]
    return 26 * np.where(numl == 1)[0][0] + np.where(letter == 1)[0][0]


In [None]:
file = open('../data/images_l.pkl', 'rb')
X_data = pkl.load(file)
file.close()

file = open('../data/labels_l.pkl', 'rb')
Y_data = pkl.load(file)
file.close()

Y_data_2 = []
print(Y_data[:10])
for idx in range(len(Y_data)):
    Y_data_2.append(convert_label(Y_data[idx]))
print(Y_data_2[:10])

X_train = X_data[:25000]
Y_train = Y_data_2[:25000]

X_test = X_data[25000:]
Y_test = Y_data_2[25000:]

In [None]:
print(type(X_train[0][0][0]))

In [None]:
train_set = ConfigureDataset(X_train, Y_train, transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize(12.2904, 48.2189)]))
train_loader = DataLoader(train_set, batch_size=64, shuffle=True)

In [205]:
test_set = ConfigureDataset(X_test, Y_test, transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize(12.2904, 48.2189)]))
test_loader = DataLoader(test_set, batch_size=64, shuffle=True)

In [None]:
def get_mean_and_std(dataloader):
    channels_sum, channels_squared_sum, num_batches = 0, 0, 0
    for data, _ in dataloader:
        # Mean over batch, height and width, but not over the channels
        channels_sum += torch.mean(data, dim=[0,2,3])
        channels_squared_sum += torch.mean(data**2, dim=[0,2,3])
        num_batches += 1

    mean = channels_sum / num_batches

    # std = sqrt(E[X^2] - (E[X])^2)
    std = (channels_squared_sum / num_batches - mean ** 2) ** 0.5

    return mean, std

In [None]:
print(next(iter(train_loader))[0][0].mean())

In [None]:
print(get_mean_and_std(train_loader))
print(get_mean_and_std(test_loader))

In [None]:
import matplotlib.pyplot as plt
import torchvision
# functions to show an image
def imshow(img):
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

# get some random training images
dataiter = iter(train_loader)
images, labels = next(dataiter)
# print(images)
# print(labels)
# show images
imshow(torchvision.utils.make_grid(images[:4]))
# print labels
print(' '.join('%5s' % convert_label(labels[j]) for j in range(8)))

In [201]:
# Define a convolutional neural network

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        self.conv_layer_1 = nn.Sequential(
            nn.Conv2d(1, 64, 3),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2,2)
        )

        self.conv_layer_2 = nn.Sequential(
            nn.Conv2d(64, 128, 3),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2,2)
        )

        self.conv_layer_3 = nn.Sequential(
            nn.Conv2d(128, 128, 3),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2,2)
        )

        self.full_layer_1 = nn.Sequential(
            nn.Linear(128 * 5 * 5, 1024),
            nn.ReLU()
        )

        self.output_layer = nn.Sequential(
            nn.Linear(1024, 260),
        )

    def forward(self, x):
        x = self.conv_layer_1(x)
        x = self.conv_layer_2(x)
        x = self.conv_layer_3(x)
        x = x.view(-1, 128 * 5 * 5)
        x = self.full_layer_1(x)
        x = self.output_layer(x)

        return x

net = Net().to(device)

In [202]:
# Define a loss function and optimizer

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)


In [None]:
# test one set of images
# images and lables from image print cell
# zero the parameter gradients
optimizer.zero_grad()

# forward + backward + optimize
outputs = net(images.float())
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()

_, predicted = torch.max(outputs.data, 1)


In [None]:
print(loss)
print(labels)
print(predicted)
print((predicted == labels).sum().item())

In [204]:
# Train the network

for epoch in range(10):  # loop over the dataset multiple times
    net = net.float()
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs.float())
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 50 == 49:    # print every 50 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 50))
            running_loss = 0.0

print('Finished Training')

[1,    50] loss: 5.146
[1,   100] loss: 4.955
[1,   150] loss: 4.772
[1,   200] loss: 4.658
[1,   250] loss: 4.581
[1,   300] loss: 4.507
[1,   350] loss: 4.454
[2,    50] loss: 4.294
[2,   100] loss: 4.257
[2,   150] loss: 4.225
[2,   200] loss: 4.166
[2,   250] loss: 4.134
[2,   300] loss: 4.085
[2,   350] loss: 4.058
[3,    50] loss: 3.956
[3,   100] loss: 3.908
[3,   150] loss: 3.919
[3,   200] loss: 3.879
[3,   250] loss: 3.834
[3,   300] loss: 3.815
[3,   350] loss: 3.814
[4,    50] loss: 3.717
[4,   100] loss: 3.681
[4,   150] loss: 3.677
[4,   200] loss: 3.647
[4,   250] loss: 3.601
[4,   300] loss: 3.581
[4,   350] loss: 3.552
[5,    50] loss: 3.453
[5,   100] loss: 3.423
[5,   150] loss: 3.392
[5,   200] loss: 3.341
[5,   250] loss: 3.331
[5,   300] loss: 3.324
[5,   350] loss: 3.328
[6,    50] loss: 3.216
[6,   100] loss: 3.170
[6,   150] loss: 3.159
[6,   200] loss: 3.146
[6,   250] loss: 3.130
[6,   300] loss: 3.102
[6,   350] loss: 3.085
[7,    50] loss: 2.978
[7,   100] 

In [None]:
print(list(net.parameters()))


In [None]:
testiter = iter(test_loader)
images, labels = next(testiter)
outputs = net(images.float())
print(outputs.data)
_, predicted = torch.max(outputs.data, 1)
print(predicted)
print(labels)

In [206]:
# Let's see how the network performs on the whole test dataset

correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        outputs = net(images.float())
        # _, predicted = torch.topk(outputs, 2)
        _, predicted = torch.max(outputs.data, 1)

        total += len(labels)
        correct += (predicted == labels).sum().item()


In [207]:
print(outputs.shape)
print(labels.shape)
print(total)
print(correct)

print(torch.max(outputs.data, 1))
print(labels)

print('Accuracy of the network on the 5000 test images: %d %%' % (
    100 * correct / total))

torch.Size([8, 260])
torch.Size([8])
5000
1192
torch.return_types.max(
values=tensor([ 6.1377,  7.0807, 10.1748,  7.5296, 12.3916,  7.0211,  7.5582,  7.1933]),
indices=tensor([191, 183, 243, 204,  38, 162, 135, 233]))
tensor([ 72, 113,  64,   5, 118,  14,  78, 150])
Accuracy of the network on the 5000 test images: 23 %
