In [1]:
import json
import os
import time
from pprint import pprint

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from IPython.display import clear_output
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torch import optim
from torchvision import transforms

%matplotlib inline

# Observe data

Data consists of the following
- _train/images/_ - directory with train images
- _test/images/_ - directory with test images
- _iwildcam2022_train_annotations.json_ - file with train images metadata
- _iwildcam2022_test_information.json_ - file with test images metadata
- _iwildcam2022_mdv4_detections.json_ - file with detections data

In [4]:
DATA_PATH = '../../data'

In [5]:
_ = os.system(f'tree -d {DATA_PATH}')

../../data
├── instance_masks
│   └── instance_masks
├── metadata
│   └── metadata
├── test
│   └── images
└── train
    └── images

8 directories


In [6]:
TRAIN_IMAGES_PATH = f'{DATA_PATH}/train/images'
TEST_IMAGES_PATH = f'{DATA_PATH}/test/images'

TRAIN_INFO_PATH = f'{DATA_PATH}/metadata/metadata/iwildcam2022_train_annotations.json'
TEST_INFO_PATH = f'{DATA_PATH}/metadata/metadata/iwildcam2022_test_information.json'

DETECTIONS_INFO_PATH = f'{DATA_PATH}/metadata/metadata/iwildcam2022_mdv4_detections.json'

## Count number of images

In [7]:
cmd = f'ls {TRAIN_IMAGES_PATH} | wc -l'
print('Number of train images:', end=' ')
_ = os.system(cmd)

Number of train images: 197977


In [8]:
cmd = f'ls {TEST_IMAGES_PATH} | wc -l'
print('Number of test images:', end=' ')
_ = os.system(cmd)

Number of test images: 60029


## Load metadata

In [9]:
with open(TRAIN_INFO_PATH) as file:
    train_info = json.load(file)

train_images_info = train_info['images']

print('Example of train image metadata:\n')
pprint(train_images_info[0])

Example of train image metadata:

{'datetime': '2013-06-05 05:44:19.000',
 'file_name': '8b02698a-21bc-11ea-a13a-137349068a90.jpg',
 'height': 1080,
 'id': '8b02698a-21bc-11ea-a13a-137349068a90',
 'location': 3,
 'seq_frame_num': 0,
 'seq_id': '30048d32-7d42-11eb-8fb5-0242ac1c0002',
 'seq_num_frames': 6,
 'sub_location': 0,
 'width': 1920}


In [10]:
with open(TEST_INFO_PATH) as file:
    test_info = json.load(file)

test_images_info = test_info['images']

print('Example of test image metadata:\n')
pprint(test_images_info[0])

Example of test image metadata:

{'datetime': '2013-06-09 16:01:38.000',
 'file_name': '8b31d3be-21bc-11ea-a13a-137349068a90.jpg',
 'height': 1024,
 'id': '8b31d3be-21bc-11ea-a13a-137349068a90',
 'location': 20,
 'seq_frame_num': 0,
 'seq_id': 'a91ebc18-0cd3-11eb-bed1-0242ac1c0002',
 'seq_num_frames': 10,
 'width': 1280}


In [11]:
with open(DETECTIONS_INFO_PATH) as file:
    detections_info = json.load(file)

image_detections_info = detections_info['images']

print('Example of image detections data:\n')
pprint(image_detections_info[0])

Example of image detections data:

{'detections': [{'bbox': [0.534, 0.464, 0.091, 0.249],
                 'category': '1',
                 'conf': 0.999}],
 'file': 'test/87aaf7d4-21bc-11ea-a13a-137349068a90.jpg',
 'max_detection_conf': 0.999}


## Classes of objects

In [12]:
detection_categories = detections_info['detection_categories']
pprint(detection_categories)

{'1': 'animal', '2': 'person', '3': 'vehicle'}


In [13]:
ANIMAL_CATEGORY = '1'
PERSON_CATEGORY = '2'
VEHICLE_CATEGORY = '3'

# Data preparation

## 0. Synchronize train images info

During experiments we lost 3422 training images due to one incareful command, therefore we need to synchronize image metadata with actual content of _train/images_ directory

In [14]:
num_lost_images = len(train_images_info) - 197977
print(f'Number of lost images = {num_lost_images}')

Number of lost images = 3422


In [15]:
IMAGE_INFO = dict[str, str | int]


def sync_images_info(images_info: list[IMAGE_INFO], directory: str) -> list[IMAGE_INFO]:
    result = []
    for image_info in images_info:
        file_path = f'{directory}/{image_info["file_name"]}'    
        if os.path.isfile(file_path):
            result.append(image_info)
    return result

In [16]:
train_images_info = sync_images_info(train_images_info, TRAIN_IMAGES_PATH)
print(f'Number of training images = {len(train_images_info)}')

Final number of training images = 197977


## 1. Reduce dataset

To make training process feasible we need to reduce dataset

### 1.1. Leave images of one size

In [17]:
from collections import defaultdict


sizes = defaultdict(int)

for image_info in train_images_info:
    width, height = image_info['width'], image_info['height']
    sizes[(width, height)] += 1

for image_info in test_images_info:
    width, height = image_info['width'], image_info['height']
    sizes[(width, height)] += 1

sizes

defaultdict(int,
            {(1920, 1080): 87237,
             (2048, 1536): 109371,
             (1280, 1024): 56578,
             (1280, 720): 3484,
             (2592, 1944): 1131,
             (2592, 2000): 40,
             (2825, 1810): 1,
             (2941, 1849): 2,
             (2833, 1873): 1,
             (2895, 1826): 2,
             (2778, 1818): 1,
             (2918, 1826): 3,
             (2794, 1795): 1,
             (2902, 1873): 1,
             (2949, 1865): 1,
             (2902, 1810): 1,
             (2778, 1857): 1,
             (2856, 1810): 1,
             (2879, 1826): 4,
             (2786, 1795): 1,
             (1795, 1222): 2,
             (1891, 1222): 1,
             (2926, 1841): 1,
             (2887, 1857): 3,
             (2933, 1795): 1,
             (2833, 1841): 2,
             (2926, 1865): 2,
             (2732, 1841): 1,
             (2833, 1849): 2,
             (2871, 1849): 1,
             (2848, 1849): 2,
             (2864, 1834): 2,
    

Large part of images have size of 1920 x 1080. Let us remove others out of consideration

In [18]:
def move_images(
    images_info: list[IMAGE_INFO],
    leave_size: tuple[int, int],
    from_dir: str,
    to_dir: str
) -> list[IMAGE_INFO]:
    """Moves images of size not equal to leave_size from from_dir to to_dir"""

    result = []
    for image_info in images_info:
        if image_info['width'] == leave_size[0] and image_info['height'] == leave_size[1]:
            result.append(image_info)
            continue
        file_path = f'{from_dir}/{image_info["file_name"]}'    
        if os.path.isfile(file_path):
            os.system(f'mv {file_path} {to_dir}')
    return result

In [19]:
TRAIN_IGNORED_IMAGES_PATH = f'{DATA_PATH}/train/ignored_images'
TEST_IGNORED_IMAGES_PATH = f'{DATA_PATH}/test/ignored_images'


if not os.path.exists(TRAIN_IGNORED_IMAGES_PATH):
    os.makedirs(TRAIN_IGNORED_IMAGES_PATH)

if not os.path.exists(TEST_IGNORED_IMAGES_PATH):
    os.makedirs(TEST_IGNORED_IMAGES_PATH)

In [20]:
train_images_info = move_images(train_images_info, (1920, 1080), TRAIN_IMAGES_PATH, TRAIN_IGNORED_IMAGES_PATH)
test_images_info = move_images(test_images_info, (1920, 1080), TEST_IMAGES_PATH, TEST_IGNORED_IMAGES_PATH)

In [21]:
print('Number of train images:', len(train_images_info))
print('Number of test images:', len(test_images_info))

Number of train images: 65036
Number of test images: 22201


### 1.2 Reduce dataset by some percent

Dataset is still large, therefore we proceed with reducing its size

In [22]:
DISCARDED_TRAIN_IMAGES_PATH = f'{DATA_PATH}/train/discarded_images'
DISCARDED_TEST_IMAGES_PATH = f'{DATA_PATH}/test/discarded_images'


if not os.path.exists(DISCARDED_TRAIN_IMAGES_PATH):
    os.makedirs(DISCARDED_TRAIN_IMAGES_PATH)

if not os.path.exists(DISCARDED_TEST_IMAGES_PATH):
    os.makedirs(DISCARDED_TEST_IMAGES_PATH)

In [23]:
from math import floor


def reduce_dataset(
    images_info: list[IMAGE_INFO],
    from_dir: str,
    to_dir: str,
    reduced_size: float,
) -> list[IMAGE_INFO]:
    """Leaves reduced_size images in consideration, others are move do to_dir directory"""

    num_reduced = floor(reduced_size  * len(images_info))

    result = []
    for i in range(num_reduced):
        result.append(images_info[i])

    for i in range(num_reduced, len(images_info)):
        image_info = images_info[i]
        file_path = f'{from_dir}/{image_info["file_name"]}'    
        if os.path.isfile(file_path):
            os.system(f'mv {file_path} {to_dir}')

    return result

In [25]:
SIZE = 0.2

In [26]:
train_images_info = reduce_dataset(train_images_info, TRAIN_IMAGES_PATH, DISCARDED_TRAIN_IMAGES_PATH, SIZE)
test_images_info = reduce_dataset(test_images_info, TEST_IMAGES_PATH, DISCARDED_TEST_IMAGES_PATH, SIZE)

In [27]:
print('Number of train images:', len(train_images_info))
print('Number of test images:', len(test_images_info))

Number of train images: 13007
Number of test images: 4440


## 2. Prepare validation set

Let us extract 25% of training set images for validation set

In [28]:
VALIDATION_IMAGES_PATH = f'{DATA_PATH}/validation/images'


if not os.path.exists(VALIDATION_IMAGES_PATH):
    os.makedirs(VALIDATION_IMAGES_PATH)

In [29]:
import random
from math import floor


def extract_validation_set(
    images_info: list[IMAGE_INFO],
    from_directory: str,
    to_directory: str,
    size: int
) -> tuple[list[IMAGE_INFO], list[IMAGE_INFO]]:
    num_images = floor(len(images_info) * size)
    indices = set(random.sample(range(len(images_info)), k=num_images))
    
    train_images_info = []
    val_images_info = []

    for ind, image_info in enumerate(images_info):
        if ind in indices:
            val_images_info.append(image_info)
            file_name = image_info['file_name']
            file_path = f'{from_directory}/{file_name}'    
            if os.path.isfile(file_path):
                os.system(f'mv {file_path} {to_directory}')
        else:
            train_images_info.append(image_info)

    return train_images_info, val_images_info

In [30]:
train_images_info, val_images_info = extract_validation_set(train_images_info, TRAIN_IMAGES_PATH, VALIDATION_IMAGES_PATH, 0.25)

In [31]:
print('Number of train images:', len(train_images_info))
print('Number of validation images:', len(val_images_info))
print('Number of test images:', len(test_images_info))

Number of train images: 9756
Number of validation images: 3251
Number of test images: 4440


## 3. Prepare labels

### 3.0 Preliminary actions

Let us convert detections metadata from list to dict representation

In [32]:
from typing import Any

IMAGE_DETECTIONS = dict[str, Any]


def convert_image_detections_info(image_detections_info: list[dict]) -> dict[str, IMAGE_DETECTIONS]:
    result = dict()
    for info in image_detections_info:
        file_name = info['file']
        image_id = file_name.split('/')[1].split('.')[0]
        result[image_id] = info['detections']
    return result

In [33]:
detections = convert_image_detections_info(image_detections_info)

### 3.1 Create files with labels

In [34]:
TRAIN_LABELS_PATH = f'{DATA_PATH}/train/labels'
VALIDATION_LABELS_PATH = f'{DATA_PATH}/validation/labels'
TEST_LABELS_PATH = f'{DATA_PATH}/test/labels'


if not os.path.exists(TRAIN_LABELS_PATH):
    os.makedirs(TRAIN_LABELS_PATH)

if not os.path.exists(VALIDATION_LABELS_PATH):
    os.makedirs(VALIDATION_LABELS_PATH)

if not os.path.exists(TEST_LABELS_PATH):
    os.makedirs(TEST_LABELS_PATH)

In [35]:
BBOX = list[int]


def get_bboxes(image_id: str, detections: dict[str, IMAGE_DETECTIONS]) -> list[BBOX]:
    image_detections = detections[image_id]
    bboxes = [
        detection['bbox'] for detection in image_detections
        if detection['category'] == ANIMAL_CATEGORY and detection['conf'] > 0.5
    ]
    return bboxes

In [36]:
def create_label_files(
    images_info: list[IMAGE_INFO],
    detections: dict[str, IMAGE_DETECTIONS],
    directory: str
):
    for image_info in images_info:
        bboxes = get_bboxes(image_info['id'], detections)        
        file_path = f'{directory}/{image_info["id"]}.txt'
        with open(file_path, 'w') as file:
            for bbox in bboxes:
                file.write(f'0 {bbox[0]} {bbox[1]} {bbox[2]} {bbox[3]}\n')

In [37]:
create_label_files(train_images_info, detections, TRAIN_LABELS_PATH)
create_label_files(val_images_info, detections, VALIDATION_LABELS_PATH)
create_label_files(test_images_info, detections, TEST_LABELS_PATH)

# Result

In [38]:
_ = os.system(f'tree -d {DATA_PATH}')

../../data
├── instance_masks
│   └── instance_masks
├── metadata
│   └── metadata
├── test
│   ├── discarded_images
│   ├── ignored_images
│   ├── images
│   └── labels
├── train
│   ├── discarded_images
│   ├── ignored_images
│   ├── images
│   └── labels
└── validation
    ├── images
    └── labels

17 directories
