In [1]:
import json
import os
import time
from pprint import pprint

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from IPython.display import clear_output
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torch import optim
from torchvision import transforms

%matplotlib inline

In [5]:
# !git clone https://github.com/ultralytics/yolov5
# !pip install -qr ./yolov5/requirements.txt

In [None]:
from yolov5 import train, val, detect
from yolov5.utils import metrics

Data consists of the following

- train - directory with train images
- test - directory with test images
- iwildcam2022_train_annotations.json - file with train images metadata
- iwildcam2022_test_information.json - file with test images metadata
- iwildcam2022_mdv4_detections.json - file with detections data


- processed_train.csv - file with train images metadata and labels (will be generated on load section)
- processed_test.csv - file with test images metadata and labels (will be generated on load section)

On data folder it is expected
- data.yaml -  which store location of images path

pretrained weights in format .pt
- weights.pt

In [4]:
DATA = '../../data'
TRAIN_IMAGES_LOCATION = DATA + '/train/images'
TEST_IMAGES_LOCATION = DATA + '/test/images'

DEST_DATA_LOCATION = 'dataset'

# Load data

In [5]:
def get_label(filename: str, labels: list[dict]):
    row = next((x for x in labels if filename == x['file']), [None])
    if row is None:
        return row
    detections = row['detections']
    filtered_detections = list(filter(lambda x: x['category'] == '1' and x['conf'] > 0.5, detections))
    bboxes = list(map(lambda x: x['bbox'], filtered_detections))
    return json.dumps(bboxes)

To make a pytorch dataset we extract filenames from json file. Extracting label was a harder, because we need to extract one json and for every record we need to take filename and match it with record's filename from another json(basically it is similar to SQL `JOIN ON`), taking needed fields, filter by 'category' and confidence, and converting it to json string, for easy reading and saving. We save a progress in csv file, so we can always read it, instead of executing everything again. Dataset consist of taking image paths from our csv, reading it, resizing it, converting to rgb, and converting it to tensors. Right now dataset consist of images(that's why we use results from a pretrained model, as they are written for images, instead of `train_sequence_counts`), later it may change to sequences of images.

In [6]:
def get_data(images_json_path: str, labels_json_path: str, train: bool = True):
    d = None
    with open(labels_json_path) as f:
        d = json.load(f)
    labels = d['images']

    with open(images_json_path) as f:
        d = json.load(f)
    images = json_normalize(data=d['images'],
                   meta=['seq_num_frames', 'location', 'datetime', 'id', 'seq_id', 'width', 'height', 'file_name', 'sub_location', 'seq_frame_num'])
    get_path = lambda x: get_label(filename=(('train/' if train else 'test/') + x['file_name']), labels=labels)
    images['box'] = images.apply(get_path, axis=1)
    return images

In [4]:
df = get_data(
    DATA + '/metadata/metadata/iwildcam2022_train_annotations.json',
    DATA + '/metadata/metadata/iwildcam2022_mdv4_detections.json',
    train=True
)
df.to_csv('processed_train.csv', sep='\t')

In [14]:
df = get_data(
    DATA + '/metadata/metadata/iwildcam2022_test_information.json',
    DATA + '/metadata/metadata/iwildcam2022_mdv4_detections.json',
    train=False
)
df.to_csv('processed_test.csv', sep='\t')

In [7]:
df = pd.read_csv('processed_train.csv', sep='\t', index_col=0)
df.head(2)

Unnamed: 0,seq_num_frames,location,datetime,id,seq_id,...,height,file_name,sub_location,seq_frame_num,box
0,6,3,2013-06-05 05:44:19.000,8b02698a-21bc-11ea-a13a-137349068a90,30048d32-7d42-11eb-8fb5-0242ac1c0002,...,1080,8b02698a-21bc-11ea-a13a-137349068a90.jpg,0.0,0,"[[0, 0.091, 0.983, 0.876]]"
1,6,3,2013-06-05 05:44:20.000,8e5b81de-21bc-11ea-a13a-137349068a90,30048d32-7d42-11eb-8fb5-0242ac1c0002,...,1080,8e5b81de-21bc-11ea-a13a-137349068a90.jpg,0.0,1,"[[0.193, 0.261, 0.803, 0.714]]"


In [8]:
df = pd.read_csv('processed_test.csv', sep='\t', index_col=0)
df.head(2)

Unnamed: 0,height,id,seq_id,location,width,...,file_name,seq_frame_num,seq_num_frames,sub_location,box
0,1024,8b31d3be-21bc-11ea-a13a-137349068a90,a91ebc18-0cd3-11eb-bed1-0242ac1c0002,20,1280,...,8b31d3be-21bc-11ea-a13a-137349068a90.jpg,0,10,,"[[0, 0.55, 0.558, 0.438]]"
1,1024,8cf202be-21bc-11ea-a13a-137349068a90,a91ebc18-0cd3-11eb-bed1-0242ac1c0002,20,1280,...,8cf202be-21bc-11ea-a13a-137349068a90.jpg,1,10,,"[[0, 0.557, 0.684, 0.431]]"


# Preprocess dataset

### Helper functions

In [9]:
def convert_str_to_list(str_list: str):
    """Converts string with labels to (taken from csv file) list format"""

    res_list = []
    i = 1
    while i < len(str_list):
        start = 0
        # if '[' found then new labels started
        if str_list[i]=='[':
            start = i+1
            # store all characters before ']'
            while str_list[i]!=']':
                i+=1
            end = i
            #  if string collected in while is not
            # empty then convert string to array
            if start!=end:
                res_list.append(list( 
                                map(float,
                                str_list[start:end].split(', ')) 
                            ))
        i+=1
    #if labels is not found store empty list
    if len(res_list)==0:
        return []
    
    return res_list

#example
convert_str_to_list("[[1, 0.1, 0.1, 0.3, 0.4]]")

[[1.0, 0.1, 0.1, 0.3, 0.4]]

In [10]:
def load_labels(path, image_name, bbox_list):
    """Saves labels in 'image_name.txt"""
    
    # take image name
    label_file_name = image_name.split('.')[0] + '.txt'
    # create file
    with open(path+label_file_name, 'w') as f:
        #save in format: "class x y w h"
        for l in bbox_list:
            line = "1"

            for i in l:
                line = line+" "+ str(i)
            f.write(line)
            f.write("\n")

In [11]:
# take labels from
def process_label(box_string, file_name, dest_folder):
    bbox = convert_str_to_list(box_string)
    load_labels(dest_folder,file_name,bbox)

In [12]:
def load_image(f_name, init_folder, dest_folder):
    """
    Copies image with name f_name from init folder to dest_folder.
    Returns true if image file is found and copied
    """
    
    if os.path.isfile(f'{init_folder}/{f_name}'):# and check_image(f'./{init_folder}/{f_name}'):
        os.system(f'cp {init_folder}/{f_name} {dest_folder}/{f_name}')
        return True
    return False

In [13]:
#statistic of common image sizes
df = pd.read_csv('processed_train.csv', sep='\t', index_col=0)

stat_h_w = {
    "width": [],
    "height": [],
    "counts": []
}

hw = df[['height','width']]
for h in np.unique(df['height']):
    for w in np.unique(df['width']):
        stat_h_w["width"].append(w)
        stat_h_w["height"].append(h)
        stat_h_w["counts"].append(((hw['height']==h) & (hw['width']==w)).sum())

stat_h_w = pd.DataFrame(stat_h_w)    
stat_h_w.sort_values(by=['counts'],ascending=False).head(4)

Unnamed: 0,width,height,counts
279,2048,1536,85877
83,1920,1080,68458
39,1280,1024,42987
0,1280,720,2882


In [14]:
if not os.path.exists(f"{DEST_DATA_LOCATION}/images/train"):
    os.makedirs(f"{DEST_DATA_LOCATION}/images/train")

if not os.path.exists(f"{DEST_DATA_LOCATION}/images/val"):
    os.makedirs(f"{DEST_DATA_LOCATION}/images/val")

if not os.path.exists(f"{DEST_DATA_LOCATION}/images/test"):
    os.makedirs(f"{DEST_DATA_LOCATION}/images/test")

In [15]:
if not os.path.exists(f"{DEST_DATA_LOCATION}/labels/train"):
    os.makedirs(f"{DEST_DATA_LOCATION}/labels/train")

if not os.path.exists(f"{DEST_DATA_LOCATION}/labels/val"):
    os.makedirs(f"{DEST_DATA_LOCATION}/labels/val")

In [17]:
"""
Dataset_Descriptor stores data about images:

path to datasets:
DATASET_LOCATION
DATASET_INIT_LOCATION
DATASET_DEST_LOCATION

path to each image
self.image_dir
labels for each image
self.labels
number of samples
self.sample_size


"""
class Dataset_Descriptor:
    DATASET_LOCATION = "dataset"
    LOAD_TYPES = ["train","val","test"]
    DATASET_INIT_LOCATION = {
        "train": TRAIN_IMAGES_LOCATION,
        "val": TRAIN_IMAGES_LOCATION,
        "test": TEST_IMAGES_LOCATION
    }
    DATASET_DEST_LOCATION = {
        "images": {
            "train": f"{DEST_DATA_LOCATION}/images/train",
            "val": f"{DEST_DATA_LOCATION}/images/val",
            "test": f"{DEST_DATA_LOCATION}/images/test"
        },
        "labels": {
            "train": f"{DEST_DATA_LOCATION}/labels/train/",
            "val": f"{DEST_DATA_LOCATION}/labels/val/",
            "test": f"{DEST_DATA_LOCATION}/labels/test/"
        }
    }

    def __init__(self, l_type, init_pos, sample_size):
        # dataset_type [train,test,val]
        self.l_type = l_type
        # path to all images
        self.image_dir = []
        self.labels = []
        self.seq_id = []
        self.image_name = []
        
        self.pos = init_pos
        self.sample_size = sample_size
        
        # select images with given size
        # load images to working directory and store files locations
        if self.l_type in ["train","val"]:
            df = pd.read_csv('processed_train.csv', sep='\t', index_col=0)
            df = df[(df["height"]==1080) & (df["width"]==1920)]
            df = df.reset_index(drop=True)
        else:
            df = pd.read_csv('processed_test.csv', sep='\t', index_col=0)
            df = df[(df["height"]==1080) & (df["width"]==1920)]
            df = df.reset_index(drop=True)

        self.pos = self.search_images(df=df, l_type=self.l_type, init_pos=self.pos, set_size=self.sample_size)
    
    def load_from_working_dir(self):
        images_dir = [Dataset_Descriptor.DATASET_DEST_LOCATION['images']['train'],
                      Dataset_Descriptor.DATASET_DEST_LOCATION['images']['test'],
                      Dataset_Descriptor.DATASET_DEST_LOCATION['images']['val']]
        labels_dir = [Dataset_Descriptor.DATASET_DEST_LOCATION['labels']['train'],
                      Dataset_Descriptor.DATASET_DEST_LOCATION['labels']['val']]
        for i in range(3):
            for img in os.listdir(images_dir[i]):
                self.image_dir.append(img)
            labels_array = convert_str_to_list(df['box'][init_pos])
            df['id']==img
            labels_array = np.array(labels_array) if len(labels_array)>0 else np.array([])
            self.labels.append(labels_array )
            self.seq_id.append(df['seq_id'][init_pos])
            self.image_name.append(df['id'][init_pos])
    
    def search_images(self, df, l_type, init_pos, set_size):
        #get path to init dir
        #get path to worling dir: images,labels
        init_path = Dataset_Descriptor.DATASET_INIT_LOCATION[l_type]
        dest_path_i = Dataset_Descriptor.DATASET_DEST_LOCATION["images"][l_type]
        dest_path_l = Dataset_Descriptor.DATASET_DEST_LOCATION["labels"][l_type]
        
        #get first set_size images
        while set_size > 0:
            #load images
            res = load_image(f_name = df['file_name'][init_pos],
                       init_folder = init_path,
                       dest_folder = dest_path_i)
            if res:
                self.image_dir.append(f"{dest_path_i}/{df['file_name'][init_pos]}")
                labels_array = convert_str_to_list(df['box'][init_pos])
                labels_array = np.array(labels_array) if len(labels_array)>0 else np.array([])
                self.labels.append(labels_array )
                self.seq_id.append(df['seq_id'][init_pos])
                self.image_name.append(df['id'][init_pos])
                
                if l_type in ["train","val"]:
                    process_label(box_string = df['box'][init_pos],
                                  file_name = df['file_name'][init_pos],
                                  dest_folder = dest_path_l)
                set_size-=1
            init_pos += 1
        return init_pos

In [18]:
def clear_work_dir():
    os.system(f"rm -r {Dataset_Descriptor.DATASET_DEST_LOCATION['images']['train']}/*")
    os.system(f"rm -r {Dataset_Descriptor.DATASET_DEST_LOCATION['images']['test']}/*")
    os.system(f"rm -r {Dataset_Descriptor.DATASET_DEST_LOCATION['images']['val']}/*")
    os.system(f"rm -r {Dataset_Descriptor.DATASET_DEST_LOCATION['labels']['train']}/*")
    os.system(f"rm -r {Dataset_Descriptor.DATASET_DEST_LOCATION['labels']['val']}/*")

In [20]:
data_descriptor_train = Dataset_Descriptor(l_type="train", init_pos=0, sample_size=7000)
data_descriptor_val = Dataset_Descriptor(l_type="val", init_pos=data_descriptor_train.pos, sample_size=1000)
data_descriptor_test = Dataset_Descriptor(l_type="test", init_pos=0, sample_size=2000)

# Model

## Train

In [3]:
data_path = 'data.yaml'

train.run(
    data=data_path,
    epochs=1,
    batch_size=15,
    single_cls=True,
    optimizer="SGD",
    noplots=False,
    tensorboard=True,
    device='cuda:0'
)

[34m[1mtrain: [0mweights=yolov5/yolov5s.pt, cfg=, data=data.yaml, hyp=yolov5/data/hyps/hyp.scratch-low.yaml, epochs=1, batch_size=15, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, bucket=, cache=None, image_weights=False, device=cuda:0, multi_scale=False, single_cls=True, optimizer=SGD, sync_bn=False, workers=8, project=yolov5/runs/train, name=exp, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, seed=0, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest, tensorboard=True
[34m[1mgithub: [0mup to date with https://github.com/ultralytics/yolov5 ✅
YOLOv5 🚀 v7.0-8-g350e8eb Python-3.10.6 torch-1.13.0+cu117 CUDA:0 (NVIDIA GeForce GTX 1050, 4040MiB)

[34m[1mhyperparameters: [0mlr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=0.05, cls=0.5, cls_pw=1.0, obj

Namespace(weights='yolov5/yolov5s.pt', cfg='', data='data.yaml', hyp={'lr0': 0.01, 'lrf': 0.01, 'momentum': 0.937, 'weight_decay': 0.0005, 'warmup_epochs': 3.0, 'warmup_momentum': 0.8, 'warmup_bias_lr': 0.1, 'box': 0.05, 'cls': 0.5, 'cls_pw': 1.0, 'obj': 1.0, 'obj_pw': 1.0, 'iou_t': 0.2, 'anchor_t': 4.0, 'fl_gamma': 0.0, 'hsv_h': 0.015, 'hsv_s': 0.7, 'hsv_v': 0.4, 'degrees': 0.0, 'translate': 0.1, 'scale': 0.5, 'shear': 0.0, 'perspective': 0.0, 'flipud': 0.0, 'fliplr': 0.5, 'mosaic': 1.0, 'mixup': 0.0, 'copy_paste': 0.0}, epochs=1, batch_size=15, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, bucket='', cache=None, image_weights=False, device='cuda:0', multi_scale=False, single_cls=True, optimizer='SGD', sync_bn=False, workers=8, project='yolov5/runs/train', name='exp', exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, seed=0, local_rank=-1, entity=None, upload_datase

After that model is tuned with script with lr, optimizer, momentum tuning