# **Data Processing (Splitting and Data Augmentation)**





The intended use of this .ipynb file is to take the raw data file, augment the data and split it into a training set, validation set and testing set.


In [None]:
# To execute the code it is necessary to retrieve the data files and add them to the Colab Workspace.
# There are multiple ways to do this. In this case, the drive will be mounted into the workspace. This means that the 'path' for the data folder will not work universally. Each one must modify the path.
# Another way is to upload the raw data into the workspace manually.

In [None]:
# We retrieve the raw data from the google drive folder.

from google.colab import drive
drive.mount('/content/drive', force_remount = True)

Mounted at /content/drive


In [None]:
# Replace the path with the correct path for each individual (the path has been set up to work for all three members of the team)
DATA_FOLDER = "/content/drive/MyDrive/TAED2-PedestrianDetection/Datasets/PennFudan/Raw data"

In [None]:
%%shell

# TorchVision repo is downloaded to use some reference files
git clone https://github.com/pytorch/vision.git
cd vision
git checkout v0.15.1

cp references/detection/utils.py ../
cp references/detection/transforms.py ../
cp references/detection/coco_eval.py ../
cp references/detection/engine.py ../
cp references/detection/coco_utils.py ../

Cloning into 'vision'...
remote: Enumerating objects: 402326, done.[K
remote: Counting objects: 100% (52553/52553), done.[K
remote: Compressing objects: 100% (2687/2687), done.[K
remote: Total 402326 (delta 49855), reused 52310 (delta 49724), pack-reused 349773[K
Receiving objects: 100% (402326/402326), 793.50 MiB | 31.27 MiB/s, done.
Resolving deltas: 100% (372391/372391), done.
Note: switching to 'v0.15.1'.

You are in 'detached HEAD' state. You can look around, make experimental
changes and commit them, and you can discard any commits you make in this
state without impacting any branches by switching back to a branch.

If you want to create a new branch to retain commits you create, you may
do so (now or later) by using -c with the switch command. Example:

  git switch -c <new-branch-name>

Or undo this operation with:

  git switch -

Turn off this advice by setting config variable advice.detachedHead to false

HEAD is now at 42759b1cc8 Version number bump for vision (#7419)




## Data Augmention

In [None]:
from engine import train_one_epoch, evaluate
import utils
import transforms as T
import random

def get_transform(train, transform_value = 1):
    transforms = []
    # Transforms that are applied to all images from the folder (convert to tensor and floats)
    transforms.append(T.PILToTensor())
    transforms.append(T.ConvertImageDtype(torch.float))

    # Transforms to augment the raw data
    if train:
      if transform_value == 1:
        transforms.append(T.RandomHorizontalFlip(p=1))

      elif transform_value == 2:
        transforms.append(T.RandomShortestSize(120,800))

      else:
        transforms.append(T.RandomPhotometricDistort(p = 1))


    return T.Compose(transforms)

In [None]:
import os
import numpy as np

import torch
import torch.utils.data

from torchvision import transforms
from torchvision import utils as tutils


import skimage.transform as sktf
import skimage.io as skio

from PIL import Image

random.seed(356)

class PedestrianDataset(torch.utils.data.Dataset):
    def __init__(self, root, transforms=None):
        self.root = root
        self.transforms = transforms
        # The images are loaded and sorted to make sure they match with the appropiate mask
        self.imgs = list(sorted(os.listdir(os.path.join(root, "PNGImages"))))
        self.masks = list(sorted(os.listdir(os.path.join(root, "PedMasks"))))

    def __getitem__(self, idx):
        # The images and masks are loaded
        img_path = os.path.join(self.root, "PNGImages", self.imgs[idx])
        mask_path = os.path.join(self.root, "PedMasks", self.masks[idx])
        img = Image.open(img_path).convert("RGB")
        # The mask is not converted to RGB given that each color corresponds to an object, the background being 0
        mask = Image.open(mask_path)

        mask = np.array(mask)
        # The objects are coded with different colors
        obj_ids = np.unique(mask)
        # The first id is not necessary as it is the background
        obj_ids = obj_ids[1:]

        # The color coded mask is separated into a binary mask
        masks = mask == obj_ids[:, None, None]

        # We get each mask's bounding box coordinates
        num_objs = len(obj_ids)
        boxes = []
        for i in range(num_objs):
            pos = np.where(masks[i])
            xmin = np.min(pos[1])
            xmax = np.max(pos[1])
            ymin = np.min(pos[0])
            ymax = np.max(pos[0])
            boxes.append([xmin, ymin, xmax, ymax])

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # There is only one class
        labels = torch.ones((num_objs,), dtype=torch.int64)
        masks = torch.as_tensor(masks, dtype=torch.uint8)

        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        # We assume that all instances are not crowded
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["masks"] = masks
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.imgs)

In [None]:
random.seed(356)
dataset = PedestrianDataset(DATA_FOLDER,get_transform(train = 1, transform_value = 2))
(img,target) = dataset[89]

# We check the size of the tensors
print(img.shape)
print(target["masks"].shape)
print(target["labels"].shape)
print(target["boxes"].shape)

torch.Size([3, 120, 205])
torch.Size([3, 120, 205])
torch.Size([3])
torch.Size([3, 4])




In [None]:
COCO_NAMES = ['__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
    'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
    'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
    'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
    'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
    'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
    'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
    'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
    'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
    'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']

COLORS = np.random.uniform(0, 255, size=(len(COCO_NAMES), 3)).astype(int)
import cv2
import random
random.seed(356)

def draw_segmentation_map(image, target, score_thres=0.8):

    # It's converted to arrays
    _image = np.copy(image.cpu().detach().numpy().transpose(1,2,0)*255)
    _masks = np.copy(target['masks'].cpu().detach().numpy().astype(np.float32))
    _boxes = np.copy(target['boxes'].cpu().detach().numpy().astype(int))
    _labels = np.copy(target['labels'].cpu().detach().numpy().astype(int))
    if "scores" in target:
      _scores = np.copy(target["scores"].cpu().detach().numpy())
    else:
      _scores = np.ones(len(_masks),dtype=np.float32)

    alpha = 0.3

    label_names = [COCO_NAMES[i] for i in _labels]

    # The mask is added only if the score surpasses the threshold
    m = np.zeros_like(_masks[0].squeeze())
    for i in range(len(_masks)):
      if _scores[i] > score_thres:
        m = m + _masks[i]

    # Make sure the m has the correct size (no dimension at 1)
    m = m.squeeze()

    # The pixels outside of the image are darkened
    _image[m<0.5] = 0.3*_image[m<0.5]

    # We transform from RGB to OpenCV BGR and back
    _image = cv2.cvtColor(_image, cv2.COLOR_RGB2BGR)
    _image = cv2.cvtColor(_image, cv2.COLOR_BGR2RGB)

    for i in range(len(_masks)):
      if _scores[i] > score_thres:
        # Apply a random color to each object
        color = COLORS[random.randrange(0, len(COLORS))].tolist()

        # The bounding boxes are drawn around the objects
        cv2.rectangle(_image, _boxes[i][0:2], _boxes[i][2:4], color=color, thickness=2)
        # We add the class label above the objects
        cv2.putText(_image , label_names[i], (_boxes[i][0], _boxes[i][1]-10),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, color,
                    thickness=1, lineType=cv2.LINE_AA)

    return _image/255

In [None]:
import plotly.express as px
px.imshow(draw_segmentation_map(img, target))

### Data Splitting

In [None]:
# The entire dataset is put through the basic transformations and transformed into the PedestrianDetectionDataset class

dataset_whole = PedestrianDataset(DATA_FOLDER, get_transform(train = False, transform_value = 0))

In [None]:
# The dataset is split. This split is made to ensure that the validation and testing data is not modified using the augmentation transforms

training_dataset0, validation_dataset, testing_dataset = torch.utils.data.dataset.random_split(dataset_whole, [0.41, 0.295, 0.295], generator = torch.Generator().manual_seed(42))

In [None]:
# Different training datasets are created using data augmentation transforms. Using the same split we ensure that the same images are used as in the training_dataset0

training_dataset1, val, test = torch.utils.data.dataset.random_split(PedestrianDataset(DATA_FOLDER,get_transform(train = 1, transform_value = 1)), [0.41, 0.295, 0.295], generator = torch.Generator().manual_seed(42))
training_dataset2, val, test = torch.utils.data.dataset.random_split(PedestrianDataset(DATA_FOLDER,get_transform(train = 1, transform_value = 2)), [0.41, 0.295, 0.295], generator = torch.Generator().manual_seed(42))
training_dataset3, val, test = torch.utils.data.dataset.random_split(PedestrianDataset(DATA_FOLDER,get_transform(train = 1, transform_value = 3)), [0.41, 0.295, 0.295], generator = torch.Generator().manual_seed(42))

In [None]:
from torch.utils.data import ConcatDataset

# A bigger training dataset is created by combining all four smaller training datasets.

training_dataset = ConcatDataset([training_dataset0, training_dataset1, training_dataset2, training_dataset3])

In [None]:
print(len(training_dataset))
print(len(validation_dataset))
print(len(testing_dataset))

# We have a distribution of 73.7 % in training, 13.15 % in validation and 13.15 % in testing.

280
50
50


In [None]:
# Using this code we save each dataset to a file.

import pickle

# it does not work, the files are saved in the 'vision' directory
import sys
sys.path.append("/content/drive/MyDrive/TAED2-PedestrianDetection/Datasets/PennFudan/Processed data") # Add the files in this path (the same for the three members of the group)


with open('training_dataset.pkl', 'wb') as file:
    pickle.dump(training_dataset, file)

with open('validation_dataset.pkl', 'wb') as file:
    pickle.dump(validation_dataset, file)

with open('testing_dataset.pkl', 'wb') as file:
    pickle.dump(testing_dataset, file)


In [None]:
# Using this code we load each dataset from their respective file.

with open('training_dataset.pkl', 'rb') as file:
    training_dataset_loaded = pickle.load(file)
    file.close()
with open('validation_dataset.pkl', 'rb') as file:
    validation_dataset_loaded = pickle.load(file)
    file.close()
with open('testing_dataset.pkl', 'rb') as file:
    testing_dataset_loaded = pickle.load(file)
    file.close()


In [None]:
# We check that the training dataset is loaded correctly

(img,target) = training_dataset_loaded[89]
px.imshow(draw_segmentation_map(img, target))

In [None]:
print(len(training_dataset_loaded))
print(len(validation_dataset_loaded))
print(len(testing_dataset_loaded))

280
50
50
