# Building a CNN for object detection 

In [1]:
# IMPORTS
import torch
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader, random_split
import S40dataset
import matplotlib.pyplot as plt
import time


In [2]:

# HYPERPARAMETERS
batch_size = 4
epochs = 1
lr = 0.00001


In [3]:

transform = transforms.Compose([# compose transforms 
    transforms.Resize((64, 64)),# resize imgs
    transforms.ToTensor()# convert PIL images into Torch Tensors
])


# MAKE DATASET
dataset = S40dataset.S40dataset(transform=transform)# make the dataset and pass it our transforms

# SPLIT DATASET
train_len = int( 0.8 * len(dataset))
val_len = int( 0.1 * len(dataset))
test_len = len(dataset) - train_len - val_len
train_data, val_data, test_data = random_split(dataset, [train_len, val_len, test_len])# split the dataset into train, test, validation

# MAKE DATALOADERS
train_loader = DataLoader(train_data,# make the training dataloader
                          batch_size = batch_size,
                          shuffle=True)
test_loader = DataLoader(test_data,
                       batch_size=batch_size)

# SHOW A RANDOM EXAMPLE FROM THE DATASET
for i in test_loader:
    b = i
    
    break
import numpy as np
idx = np.random.randint(len(test_loader))
S40dataset.show(b)

['images/applauding_001.jpg', 'images/applauding_002.jpg', 'images/applauding_003.jpg', 'images/applauding_004.jpg', 'images/applauding_005.jpg', 'images/applauding_006.jpg', 'images/applauding_007.jpg', 'images/applauding_008.jpg', 'images/applauding_009.jpg', 'images/applauding_010.jpg', 'images/applauding_011.jpg', 'images/applauding_012.jpg', 'images/applauding_013.jpg', 'images/applauding_014.jpg', 'images/applauding_015.jpg', 'images/applauding_016.jpg', 'images/applauding_017.jpg', 'images/applauding_018.jpg', 'images/applauding_019.jpg', 'images/applauding_020.jpg', 'images/applauding_021.jpg', 'images/applauding_022.jpg', 'images/applauding_023.jpg', 'images/applauding_024.jpg', 'images/applauding_025.jpg', 'images/applauding_026.jpg', 'images/applauding_027.jpg', 'images/applauding_028.jpg', 'images/applauding_029.jpg', 'images/applauding_030.jpg', 'images/applauding_031.jpg', 'images/applauding_032.jpg', 'images/applauding_033.jpg', 'images/applauding_034.jpg', 'images/appla

In [10]:

# SET NUMBER OF FILTERS TO USE IN EACH CONV LAYER
channels1 = 128
channels2 = 64
channels3 = 64
channels4 = 64

# CREATE MODEL
class DetectCNN(torch.nn.Module):# create class
    def __init__(self):# initialise
        super().__init__()         # initialise parent class
        self.conv_layers = torch.nn.Sequential(        # make sequential model for conv layers
            torch.nn.Conv2d(3, channels1, 7),            # make layers
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(channels1),
            torch.nn.Dropout(0.2),
            torch.nn.Conv2d(channels1, channels2, 7),            # make layers
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(channels2),
            torch.nn.Dropout(0.2),
            torch.nn.Conv2d(channels2, channels3, 7),            # make layers
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(channels3),
            torch.nn.Dropout(0.2),
        )
        self.fc_layers = torch.nn.Sequential(# make sequential model for fully connected (linear) layers
            torch.nn.Linear(64*46*46, 4),# make layers
            torch.nn.Sigmoid()
        )
    
    def forward(self, x):# define forward pass
        x = self.conv_layers(x)# forward pass through conv layers
        print(x.shape)
        x = x.view(-1, 64*46*46)# flatten conv output to pass to fully connected layers
        x = self.fc_layers(x)# pass through fc layers
        return x# return output
        
cnn = DetectCNN()# create instance of model
criterion = torch.nn.MSELoss()# define loss function
optimiser = torch.optim.Adam(cnn.parameters(), lr=lr, weight_decay=1)# define optimiser

# TRAIN
def train(model):

    train_losses = []
    for epoch in range(epochs):
        for batch_idx, batch in enumerate(train_loader):
            x, bndbox = batch    # unpack batch
            pred_bndbox = model(x)# forward pass
            #print('label:', bndbox, 'prediction:', pred_bndbox)
            loss = criterion(pred_bndbox, bndbox)     # compute loss for this batch
            optimiser.zero_grad()# zero gradients of optimiser
            loss.backward()     # backward pass (find rate of change of loss with respect to model parameters)
            optimiser.step()# take optimisation step
            print('Epoch:', epoch, 'Batch:', batch_idx, 'Loss:', loss.item())
            train_losses.append(loss.item())    #append loss for this batch to list of all losses
            if batch_idx == 200:    # tell your model to stop at some batch_idx if you want
                #break
                pass
            
    return train_losses

train_losses = train(cnn)
torch.save(cnn.state_dict(), str(time.time()))# save model

plt.plot(train_losses)
plt.show()
           


images/waving_hands_125.jpg
images/washing_dishes_172.jpg
images/gardening_057.jpg
images/pushing_a_cart_080.jpg
torch.Size([4, 64, 46, 46])


KeyboardInterrupt: 

In [None]:

def test(model):
    # set in evaluation mode
    for idx, batch in enumerate(test_loader):
        print(type(batch))
        # unpack batch
        # forward pass
        S40dataset.show(batch, pred_bndbox=pred_bndbox)
        if idx == 10:
            break


test(cnn)