# Part 5: CNN intro

In [2]:
'''
____________Author_____________
Nazanin Mohammadrezaei

__________ Resource ___________
Sentdex YouTube channel

___________ Playlist __________
Deep Learning and Neural Networks using PyTorch

___________ Video ____________
Part 5-6 : Convnet intro our Training Convnet

'''



import os
import numpy as np
from tqdm import tqdm
import cv2 



# make a dataset containing each image (of both dogs and cats) besides its class number
REBUILD_DATA = True

class DogVsCat():
    
    IMG_SIZE = 50
    Cat = "PetImages/Cat"
    Dog = "PetImages/Dog"
    LABELS = {Cat:0, Dog:1}
    training_data = []
    catcount = 0
    dogcount = 0
    
    def make_training_data(self):
        for label in self.LABELS:
            print(label)
            for file in tqdm(os.listdir(label)):
                try:
                    # reading the data
                    path = os.path.join(label,file)
                    img = cv2.imread(path,cv2.IMREAD_GRAYSCALE)
                    img = cv2.resize(img,(self.IMG_SIZE, self.IMG_SIZE))
                    
                    # add each data and its class to training_data
                    # use hot vectors to demonstrate classes: class0: [1,0] and class1:[0,1]
                    self.training_data.append([np.array(img,dtype='object'),np.eye(2)[self.LABELS[label]]])

                    # count the number of cats and dogs
                    if label == self.Cat:
                        self.catcount += 1
                    elif label == self.Dog:
                        self.dogcount += 1
                        
                except Exception as e:
                    pass

        # shuffle and save the training set
        np.random.shuffle(self.training_data)
        np.save("training_data.npy", self.training_data)
        print('Cats:',self.catcount)
        print('Dogs:',self.dogcount)

if REBUILD_DATA:
    dogVscat = DogVsCat()
    dogVscat.make_training_data()


  0%|▎                                                                             | 50/12501 [00:00<00:25, 494.70it/s]

PetImages/Cat


100%|████████████████████████████████████████████████████████████████████████████| 12501/12501 [02:48<00:00, 74.29it/s]
  0%|                                                                               | 10/12501 [00:00<02:06, 98.94it/s]

PetImages/Dog


100%|████████████████████████████████████████████████████████████████████████████| 12501/12501 [02:05<00:00, 99.42it/s]
  return array(a, dtype, copy=False, order=order, subok=True)


Cats: 12476
Dogs: 12470


# Part 6: Training a CNN

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# the neural network
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        # three convolution layers
        self.conv1 = nn.Conv2d(1,32,5)
        self.conv2 = nn.Conv2d(32,64,5)
        self.conv3 = nn.Conv2d(64,128,5)
        
        # generate a random image, and guide through the network to figure out the size of first fully connected layer 
        # this line gives us 512 as the size of fc1
        x = torch.randn(50,50).view(-1,1,50,50)
        self._to_linear = None
        self.convs(x)
        
        # two fully connected layers
        self.fc1 = nn.Linear(self._to_linear,512)
        self.fc2 = nn.Linear(512,2)
        
    def convs(self,x):
        # max pooling
        x = F.max_pool2d(F.relu(self.conv1(x)),2,2)
        x = F.max_pool2d(F.relu(self.conv2(x)),2,2)
        x = F.max_pool2d(F.relu(self.conv3(x)),2,2)
        
        # calculate the size of fc1
        if self._to_linear is None:
            self._to_linear = x[0].shape[0] * x[0].shape[1] * x[0].shape[2]
        return x
    
    def forward(self,x):
        x = self.convs(x)
        x = x.view(-1,self._to_linear)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.softmax(x, dim=1)
    
    
net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=512, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=2, bias=True)
)


In [4]:
import torch.optim as optim

# load the dataset which contains each image besides its class number
training_data = np.load('training_data.npy',allow_pickle=True)
print('the shape of training data: ',training_data.shape,'\n')
print('first unit of data looks like:','\n',training_data[0])


# set a learning rate and a loss function (mean squared error)
optimizer = optim.Adam(net.parameters(), lr=0.001)
loss_function = nn.MSELoss()


# divide your dataset into train and test set
X = torch.Tensor([i[0] for i in training_data]).view(-1,50,50)
X = X/255.0
y = torch.Tensor([i[1] for i in training_data])


VAL_PCT = 0.1     # reserve 10% of our data for the test set
val_size = int(len(X)*VAL_PCT)

train_X = X[:-val_size]
train_y = y[:-val_size]

test_X = X[-val_size:]
test_y = y[-val_size:]

print('trainset size: ',len(train_X),'\n', 'testset size: ',len(test_X),'\n')


# train the network on the trainset
BATCH_SIZE = 100
EPOCHS = 2

for epoch in range(EPOCHS):
    for i in tqdm(range(0, len(train_X), BATCH_SIZE)): # from 0, to the len of x, stepping BATCH_SIZE at a time
        batch_X = train_X[i:i+BATCH_SIZE].view(-1, 1, 50, 50)
        batch_y = train_y[i:i+BATCH_SIZE]
        net.zero_grad()     # make the gradients zero
        outputs = net(batch_X)
        loss = loss_function(outputs, batch_y)
        loss.backward()
        optimizer.step()    # Does the update
    print(f"Epoch: {epoch}. Loss: {loss}")
print('\n')    


# evaluate the accuracy of your network using your testset
correct = 0
total = 0
with torch.no_grad():
    for i in tqdm(range(len(test_X))):
        real_class = torch.argmax(test_y[i])
        net_out = net(test_X[i].view(-1, 1, 50, 50))[0]  # returns a list, 
        predicted_class = torch.argmax(net_out)

        if predicted_class == real_class:
            correct += 1
        total += 1
print("Accuracy: ", round(correct/total, 3))

the shape of training data:  (24946, 2) 

first unit of data looks like: 
 [array([[255, 255, 253, ..., 254, 255, 255],
       [255, 253, 246, ..., 248, 253, 255],
       [254, 246, 221, ..., 229, 248, 255],
       ...,
       [253, 245, 211, ..., 143, 227, 255],
       [253, 243, 212, ..., 150, 232, 255],
       [253, 246, 224, ..., 185, 238, 254]], dtype=object)
 array([1., 0.])]


  X = torch.Tensor([i[0] for i in training_data]).view(-1,50,50)
  0%|                                                                                          | 0/225 [00:00<?, ?it/s]

trainset size:  22452 
 testset size:  2494 



100%|████████████████████████████████████████████████████████████████████████████████| 225/225 [02:40<00:00,  1.40it/s]
  0%|                                                                                          | 0/225 [00:00<?, ?it/s]

Epoch: 0. Loss: 0.23205877840518951


100%|████████████████████████████████████████████████████████████████████████████████| 225/225 [02:43<00:00,  1.38it/s]
  0%|                                                                                          | 0/225 [00:00<?, ?it/s]

Epoch: 1. Loss: 0.23176461458206177


100%|████████████████████████████████████████████████████████████████████████████████| 225/225 [03:01<00:00,  1.24it/s]
  0%|                                                                                         | 0/2494 [00:00<?, ?it/s]

Epoch: 2. Loss: 0.18345031142234802




100%|██████████████████████████████████████████████████████████████████████████████| 2494/2494 [05:00<00:00,  8.30it/s]


Accuracy:  0.735
