# Private dataset

Follow this notebook to prepare our collected data (currently dataset is not public).

**Description:**
- This dataset is containing both RGB images and thermal images of people captured by drone platforms in different places at different height and daytime.

**labels:**
- object_category x_left y_top width_scaled height_scaled
- bounding box in annotation in xywhn (YOLO) format

**Table of content:**

0. Init - imports and data download
1. Dataset split to train, val and test
2. Dataset visualization

## 0. Init - imports and data download

Dataset directory:
```
data
└───source
    └───private-dataset
        ├───RGB
        │   ├───annotations
        │   └───images
        └───Termo
            ├───annotations
            └───images
```

In [None]:
# Uncomment below two lines to reload imported packages (in case of modifying them)
# %load_ext autoreload
# %autoreload 2

# Imports
import os
import cv2
import random
import numpy as np
import shutil
import pybboxes as pbx
import albumentations as A
from pathlib import Path

from prj_utils.consts import ROOT_DIR
from data_processing.image_processing import plot_xywhn_annotated_image_from_file, get_brightness_stats, copy_annotated_images, get_number_of_objects_stats

# Consts
RGB_DIR = f'{ROOT_DIR}/data/source/private-dataset/RGB/images'
RGB_ANNO_DIR = f'{ROOT_DIR}/data/source/private-dataset/RGB/annotations'
THERM_DIR = f'{ROOT_DIR}/data/source/private-dataset/Thermal/images'
THERM_ANNO_DIR = f'{ROOT_DIR}/data/source/private-dataset/Thermal/annotations'
RGB_PROCESSED_DIR = f'{ROOT_DIR}/data/processed/private-dataset/RGB'


ANNOTATION_HEADER = ['object_category', 'x_left', 'y_top', 'width_scaled', 'height_scaled']
CLASS = '0'

In [None]:
def read_bboxes(path) -> list[list[float]]:
    with open(path, 'r') as file:
        labels = []
        for line in file:
            labels.append([float(i) for i in line.split(' ')[1:]] + [int(line.split(' ')[0])])
        return labels

def save_labels(output_labels_filepath: str, yolo_bboxes: list[float]):
    with open(output_labels_filepath, 'w') as file:
        for label in yolo_bboxes:
            line = '0 ' + ' '.join([str(l) for l in label][:-1])
            file.write(f'{line}\n')

def crop_to_thermo(input_rgb_filepath, input_annotation_filepath, output_rgb_filepath, output_annotation_filepath):
    rgb_image = cv2.imread(input_rgb_filepath)
    yolo_bboxes = read_bboxes(input_annotation_filepath)

    WIDTH = 1200
    HEIGHT = 900
    class_labels = ['human']
    to_thermo_crop_transform = A.Compose([
        A.CenterCrop(width=WIDTH, height=HEIGHT)
    ], bbox_params=A.BboxParams(format='yolo'))

    transformed = to_thermo_crop_transform(image=rgb_image, bboxes=yolo_bboxes)
    transformed_yolo_bboxes = transformed['bboxes']
    transformed_image = transformed['image']

    cv2.imwrite(output_rgb_filepath, transformed_image)
    save_labels(output_annotation_filepath, transformed_yolo_bboxes)

In [None]:
Path(f'{RGB_PROCESSED_DIR}/images').mkdir(parents=True, exist_ok=True)
Path(f'{RGB_PROCESSED_DIR}/labels').mkdir(parents=True, exist_ok=True)

for rgb_file in os.listdir(RGB_DIR):
    rgb_filename = Path(rgb_file).stem
    rgb_filepath = os.path.join(RGB_DIR, rgb_file)

    thermo_file = f'{rgb_filename}.jpeg'
    thermo_filepath = os.path.join(THERM_DIR, thermo_file)

    annotation_file = f'{rgb_filename}.txt'
    annotation_filepath = os.path.join(RGB_ANNO_DIR, annotation_file)

    output_rgb_filepath = f'{RGB_PROCESSED_DIR}/images/{rgb_file}'
    output_annotation_filepath = f'{RGB_PROCESSED_DIR}/labels/{annotation_file}'

    crop_to_thermo(rgb_filepath, annotation_filepath, output_rgb_filepath, output_annotation_filepath)

    plot_xywhn_annotated_image_from_file(output_rgb_filepath, output_annotation_filepath)
    plot_xywhn_annotated_image_from_file(thermo_filepath, output_annotation_filepath)
