Build CNN:
- import libraries
- Load and normalize data
- OHEC data
- Split data into training and test
- Build CNN class with layers
- Evaluate model
- Tune hyper params
- Improve model and see what can be done better


The basic steps to build an image classification model using a neural network are:

- Flatten the input image dimensions to 1D (width pixels x height pixels)
- Normalize the image pixel values (divide by 255)
- One-Hot Encode the categorical column
- Build a model architecture (Sequential) with Dense layers
- Train the model and make predictions

In [2]:
import pickle as pkl
import numpy as np
import argparse
import cv2
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

from torch.utils.data import Dataset
from torch.utils.data import DataLoader


In [3]:
class ConfigureDataset(Dataset):
    def __init__(self, X_Train, Y_Train, transform=None):
        self.X_Train = X_Train
        self.Y_Train = Y_Train
        self.transform = transform

    def __len__(self):
        return len(self.X_Train)

    def __getitem__(self, idx):
        x = self.X_Train[idx]
        y = self.Y_Train[idx]

        if self.transform:
            x = self.transform(x)

        return x, y

In [4]:
def get_mean_and_std(dataloader):
    channels_sum, channels_squared_sum, num_batches = 0, 0, 0
    for data, _ in dataloader:
        # Mean over batch, height and width, but not over the channels
        channels_sum += torch.mean(data, dim=[0,2,3])
        channels_squared_sum += torch.mean(data**2, dim=[0,2,3])
        num_batches += 1

    mean = channels_sum / num_batches

    # std = sqrt(E[X^2] - (E[X])^2)
    std = (channels_squared_sum / num_batches - mean ** 2) ** 0.5

    return mean, std



In [15]:
# converts labels from an array of 36 to a value between 0 nd 259
def encode_label(label):
    numl = label[:10]
    letter = label[10:]
    return 26 * np.where(numl == 1)[0][0] + np.where(letter == 1)[0][0]

def decode_label(label):
    new_label = np.zeros(36).astype(np.float64)
    num_idx = label//26
    let_idx = label%26
    new_label[num_idx] = 1.
    new_label[10+let_idx] = 1.
    return new_label




In [6]:
file = open('../data/images_l.pkl', 'rb')
X_data = pkl.load(file)
file.close()

file = open('../data/labels_l.pkl', 'rb')
Y_data = pkl.load(file)
file.close()

Y_data_2 = []
print(Y_data[:10])
for idx in range(len(Y_data)):
    Y_data_2.append(encode_label(Y_data[idx]))
print(Y_data_2[:10])

X_train = X_data[:25000]
Y_train = Y_data_2[:25000]

X_test = X_data[25000:]
Y_test = Y_data_2[25000:]

[[0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 1. 0. 0. 0. 0.

In [17]:
# test for decoder
for i in range(10):
    decode_label(Y_data_2[i])
    print(Y_data[i])

9
[0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
5
[0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
8
[0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
5
[0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
4
[0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 1. 0. 0. 0

In [None]:
#, transforms.Normalize(8.4319, 33.8480)
train_set = ConfigureDataset(X_train, Y_train, transform=transforms.Compose([transforms.ToTensor(), transforms.RandomHorizontalFlip(p=0.5)]))
train_loader = DataLoader(train_set, batch_size=64, shuffle=True)


In [None]:
mean, std = get_mean_and_std(train_loader)
mean, std = mean.item(), std.item()

In [None]:
print(mean, std)

In [None]:
#, transforms.Normalize(8.4319, 33.8480)
train_set = ConfigureDataset(X_train, Y_train, transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean, std), transforms.RandomHorizontalFlip(p=0.5)]))
train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
test_set = ConfigureDataset(X_test, Y_test, transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean, std)]))
test_loader = DataLoader(test_set, batch_size=64, shuffle=True)

In [None]:
print(next(iter(train_loader))[0][0].mean())

In [None]:
import matplotlib.pyplot as plt
import torchvision
# functions to show an image
def imshow(img):
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

# get some random training images
dataiter = iter(train_loader)
images, labels = next(dataiter)

# show images
imshow(torchvision.utils.make_grid(images[:4]))

# print labels
print(' '.join('%5s' % labels[j] for j in range(4)))

In [None]:
# Define a convolutional neural network

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        self.conv_layer_1 = nn.Sequential(
            nn.Conv2d(1, 128, 5),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2,2)
        )

        self.conv_layer_2 = nn.Sequential(
            nn.Conv2d(128, 256, 3),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2,2)
        )

        self.conv_layer_3 = nn.Sequential(
            nn.Conv2d(256, 256, 3),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2,2)
        )

        self.full_layer_1 = nn.Sequential(
            nn.Linear(256 * 5 * 5, 1024),
            nn.ReLU()
        )

        self.output_layer = nn.Sequential(
            nn.Linear(1024, 260),
        )

    def forward(self, x):
        x = self.conv_layer_1(x)
        x = self.conv_layer_2(x)
        x = self.conv_layer_3(x)
        x = x.view(-1, 256 * 5 * 5)
        x = self.full_layer_1(x)
        x = self.output_layer(x)

        return x


In [None]:
# test one set of images
# images and lables from image print cell
# zero the parameter gradients
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
net = Net().to(device)
criterion = nn.CrossEntropyLoss()

# forward + backward + optimize
outputs = net(images.float())
loss = criterion(outputs, labels)

_, predicted = torch.max(outputs.data, 1)

print(loss)
print(labels)
print(predicted)
print((predicted == labels).sum().item())

In [None]:
# Train the network
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
net = Net().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=3e-4, weight_decay=1e-5)#lr=0.01,momentum=0.9,lr=3e-4

for epoch in range(20):  # loop over the dataset multiple times
    net = net.float()
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs.float())
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 25 == 24:    # print every 25 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 25))
            running_loss = 0.0

print('Finished Training')

In [None]:
#round 2 to get the epochs up to 20 and see how much taht improves the model
for epoch in range(10):  # loop over the dataset multiple times
    net = net.float()
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs.float())
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 25 == 24:    # print every 25 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 10, i + 1, running_loss / 25))
            running_loss = 0.0

print('Finished Training')

In [None]:
print(list(net.parameters()))


In [None]:
testiter = iter(test_loader)
images, labels = next(testiter)
outputs = net(images.float())
print(outputs.data)
_, predicted = torch.max(outputs.data, 1)
print(predicted)
print(labels)

In [None]:
# Let's see how the network performs on the whole test dataset

correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        outputs = net(images.float())
        # _, predicted = torch.topk(outputs, 2)
        _, predicted = torch.max(outputs.data, 1)

        total += len(labels)
        correct += (predicted == labels).sum().item()


In [None]:
print(outputs.shape)
print(labels.shape)
print(total)
print(correct)

print(torch.max(outputs.data, 1))
print(labels)

print('Accuracy of the network on the 5000 test images: %d %%' % (
    100 * correct / total))

