In [18]:
# CONCOLUTIONAR NN
# The difference between NN and CNN is that uses the convolution to locate features of an image. And it
# uses multiple instances of this convolution that enable it to condense for example an image.
# these convolutions are done by making a smaller square on the image, moving it around and saving each piece.
# Then the machine takes each small features and uses them to create a new condensed version of this image
# using the pieces (features). Then follows a process of polling where the maximum value inside each window
# is chosen and then replaces the part of image with this piece.
# So in summary CNN is drastically simplifying the image and looking for features in it.
# Simple: Reduces your image to simple building blocks and then finds patterns of these blocks given how many
# layers you have.

import os
import cv2
import numpy as np
from tqdm import tqdm

REBUILD_DATA = False

# We generally don't need a class for this, but in our case (image processing) there are quite a few steps
# we are/have to make like the same methods and functions...
# Important note to make here is that for iamge classification many trick can be used to make our dataset
# more rounded and higher the accuarcy of generalization. These tricks involve: croping images, resizing
# them and adding white spaces or even rotating them and using the reotated versions as new images which
# can increase the ammount of our data by 4x (4 ways of rotating an image) or more...
class DogsVSCats():
    IMG_SIZE = 50 # 50x50 pixels
    CATS = "Kaggle/PetImages/Cat"
    DOGS = "Kaggle/PetImages/Dog"
    LABELS = {CATS: 0, DOGS: 1}
    training_data = []
    # A important fact to mention is the importance of balanced ammount of data for each class we are trying
    # to disscerne. Therefore we will create counters here for each class
    catcount = 0
    dogcount = 0
    
    def make_training_data(self):
        # iterate trough our dictionary of classes
        for label in self.LABELS:
            print(label)
            # iterate trough images in directory
            for f in tqdm(os.listdir(label)):
                try: 
                    # We use the os.path.join functrion to add a label (0 or 1) to each image
                    path = os.path.join(label, f)
                    img = cv2.imread(path, cv2.IMREAD_GRAYSCALE) # converting to GRAYSCALE is not a necessity
                    img = cv2.resize(img, (self.IMG_SIZE, self.IMG_SIZE))
                    self.training_data.append([np.array(img), np.eye(2)[self.LABELS[label]]])

                    if label == self.CATS:
                        self.catcount += 1
                    elif label == self.DOGS:
                        self.dogcount += 1
                    
                except Exception as e:
                    # For some images there is an exception, maybe it's because they are corrupted or 
                    # maybe it's the resize...
                    pass
                    #print(str(e))

        np.random.shuffle(self.training_data)
        np.save("training_data.npy", self.training_data)
        print("Cats:", self.catcount)
        print("Dogs:", self.dogcount)

# if we want to rebuild everything (takes long time)
if REBUILD_DATA:
    dogsvcats = DogsVSCats()
    dogsvcats.make_training_data()
    
# We extract the training data so we don't have to create it again, for speeds sake
# There is an issue with the pickle function.
training_data = np.load("training_data.npy", allow_pickle=True)
print(len(training_data))


24946


In the next cell we build the convolutional network for this we have to import PyTorch-es libraries. We need the general torch library for tensors, the NN module for convolutional functionality and we specifically save the functional part of the NN module in to F so we can access it through there.

We start by defining a `Net` function, which will work as a constructor for our NN. We defien a initialization class `__init__(self)` which inherits it's parent init class form the `nn.Module`.
Let's start the init by defining 3 convolutional layers (`conv1, conv2, conv3`), for which we use `nn.Conv2d` - 
We define the 2d convolutional function with 3 parameters `in_channels, out_channels, kernel_size`, where:
- `in_channels`: number of channels in the input image
- `out_channels`: number of channels produced by the convolution
- `kernel_size`: size of the convolving kernel - int(5) = 5x5; tuple(5, 3) = 5x3 kernel

Next we use a max pooling function, which is a *sample-based discretizaion process*, whose objective is to down-sample an input (in our case an image), which reduces its dimensionality by making assumptions about the features contained in the sub-regions of the sample being pooled.

** Max pooling example: **

<img src="https://computersciencewiki.org/images/8/8a/MaxpoolSample2.png">







I our case we use 

In [19]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, 5)
        self.conv2 = nn.Conv2d(32, 64, 5)
        self.conv3 = nn.Conv2d(64, 128, 5)

        self.pool1 = nn.MaxPool2d((2, 2))
        self.pool2 = nn.MaxPool2d((2, 2))
        self.pool3 = nn.MaxPool2d((2, 2))
        
        self.fc1 = nn.Linear(512, 512) 
        self.fc2 = nn.Linear(512, 2) 
    
    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = self.pool3(F.relu(self.conv3(x)))
        x = x.flatten(start_dim=1) # flattening out
        #print(x.shape) # We print the shape for fc1 Linear

        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.softmax(x, dim=1)
        
net = Net()
#net.forward(torch.randn(1, 1, 50, 50)) # passing random sample data, to determine size of the fc1 layer input
print("Finished running")

Finished running


In [20]:
import torch.optim as optim

optimizer = optim.Adam(net.parameters(), lr=0.001)
loss_function = nn.MSELoss()

X = torch.Tensor([i[0] for i in training_data]).view(-1, 50, 50)
X = X/255.0
y = torch.Tensor([i[1] for i in training_data])

VAL_PCT = 0.1
val_size = int(len(X)*VAL_PCT)
print(val_size)

2494


In [21]:
train_X = X[:-val_size]
train_y = y[:-val_size]

test_X = X[-val_size:]
test_y = y[-val_size:]

print(len(train_X))
print(len(test_X))

22452
2494


In [22]:
BATCH_SIZE = 100
EPOCHS = 1

for epoch in range(EPOCHS):
    for i in tqdm(range(0, len(train_X), BATCH_SIZE)):
       #print(i, i+BATCH_SIZE)
        batch_X = train_X[i:i+BATCH_SIZE].view(-1, 1, 50, 50)
        batch_y = train_y[i:i+BATCH_SIZE]
        
        net.zero_grad()
        outputs = net(batch_X)
        loss = loss_function(outputs, batch_y)
        loss.backward()
        optimizer.step()
        
print(loss)

100%|██████████| 225/225 [01:26<00:00,  2.59it/s]
tensor(0.2171, grad_fn=<MseLossBackward>)


In [23]:
correct = 0
total = 0
with torch.no_grad():
    for i in tqdm(range(len(test_X))):
        real_class = torch.argmax(test_y[i])
        net_out = net(test_X[i].view(-1, 1, 50, 50))[0]
        predicted_class = torch.argmax(net_out)
        if predicted_class == real_class:
            correct += 1
        total += 1
        
print("Accuarcy:", round(correct/total,3))

100%|██████████| 2494/2494 [00:13<00:00, 182.10it/s]
Accuarcy:0.626
