# Convolutional Neural Network

### Libraries

In [None]:
# import libraries

import os
import cv2
import torch

import numpy as np
import matplotlib.pyplot as plt

from google.colab import drive
from xml.etree import ElementTree as ET
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

In [None]:
# major variables

photos_dir = '/content/data/photos'
renders_dir = '/content/data/renders'

### Datasets

In [None]:
# mount drive on colab notebook

drive.mount('/content/drive')

In [None]:
# unzip data files

!unzip "/content/drive/MyDrive/02 - tagged1.zip" -d "/content/data"

In [None]:
def parse_xml(xml_file):
    '''
    Read the xml file and return the bounding box coordinates
    '''
    tree = ET.parse(xml_file)
    root = tree.getroot()
    bounding_boxes = []
    for obj in root.findall('object'):
        bbox = obj.find('bndbox')
        xmin = int(bbox.find('xmin').text)
        ymin = int(bbox.find('ymin').text)
        xmax = int(bbox.find('xmax').text)
        ymax = int(bbox.find('ymax').text)
        bounding_boxes.append([xmin, ymin, xmax, ymax])
    return bounding_boxes

In [1]:
def load_data(data_dir):
    '''
    Returns a list of images and labels for each image
    '''
    image_paths = []
    num_legos = []
    for subdir, _, files in os.walk(data_dir):
        for file in files:
            n = int(subdir.split(os.sep)[-1])
            num_legos.append(n)
            if file.endswith('.jpg'):
                image_paths.append(os.path.join(subdir, file))
    # image_paths.sort()
    image_paths = np.asarray(image_paths)
    num_legos = torch.Tensor(num_legos).to(torch.int64)
    return image_paths, num_legos

In [None]:
# load data

image_paths, num_legos = load_data(photos_dir)

In [None]:
# classes distribution

plt.hist(num_legos, bins=range(1, max(num_legos)), align='left', rwidth=0.8)
plt.xlabel('Number of Legos')
plt.ylabel('Frequency')
plt.title('Number of Legos Distribution')
plt.show()

In [None]:
# TODO: change split strategy

split = np.random.choice([0, 1, 2], len(image_paths), p=[0.8, 0.1, 0.1])

train_indexes = np.where(split == 0)[0]
valid_indexes = np.where(split == 1)[0]
test_indexes = np.where(split == 2)[0]

In [None]:
class LegosDataset(Dataset):
    '''
    Dataset class for the legos dataset
    '''
    def __init__(self, images_filenames, num_legos, transform=None):
        self.images_filenames = images_filenames
        self.transform = transform
        self.labels = num_legos - 1

    def __len__(self):
        return len(self.images_filenames)

    def __getitem__(self, idx):
        image_filename = self.images_filenames[idx]
        label = self.labels[idx]
        image = cv2.imread(image_filename)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform is not None:
            image = self.transform(image)
        return image, label

In [None]:
# train, valid and test datasets

batch_size = 32
num_workers = 2

transform = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Resize(224),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]
)

train_dataset = LegosDataset(image_paths[train_indexes], num_legos[train_indexes], transform=transform)
valid_dataset = LegosDataset(image_paths[valid_indexes], num_legos[valid_indexes], transform=transform)
test_dataset = LegosDataset(image_paths[test_indexes], num_legos[test_indexes], transform=transform)

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True)
valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=False)