In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import os
import json
from pandas.io.json import json_normalize

import torch
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image

In [2]:
os.chdir('data')
!ls

iwildcam2022_mdv4_detections.json    processed_train.csv
iwildcam2022_test_information.json   train_sequence_counts.csv
iwildcam2022_train_annotations.json


In [11]:
def get_label(filename: str, labels: list[dict]):
    row = next((x for x in labels if filename == x['file']), [None])
    if row is None:
        return row
    detections = row['detections']
    filtered_detections = list(filter(lambda x: x['category'] == '1' and x['conf'] > 0.5, detections))
    bboxes = list(map(lambda x: x['bbox'], filtered_detections))
    return json.dumps(bboxes)

To make a pytorch dataset we extract filenames from json file. Extracting label was a harder, because we need to extract one json and for every record we need to take filename and match it with record's filename from another json(basically it is similar to SQL `JOIN ON`), taking needed fields, filter by 'category' and confidence, and converting it to json string, for easy reading and saving. We save a progress in csv file, so we can always read it, instead of executing everything again. Dataset consist of taking image paths from our csv, reading it, resizing it, converting to rgb, and converting it to tensors. Right now dataset consist of images(that's why we use results from a pretrained model, as they are written for images, instead of `train_sequence_counts`), later it may change to sequences of images.

In [12]:
def get_data(images_json_path: str, labels_json_path: str):
    d = None
    with open(labels_json_path) as f:
        d = json.load(f)
    labels = d['images']

    with open(images_json_path) as f:
        d = json.load(f)
    images = json_normalize(data=d['images'],
                   meta=['seq_num_frames', 'location', 'datetime', 'id', 'seq_id', 'width', 'height', 'file_name', 'sub_location', 'seq_frame_num'])
    # annotations_df = json_normalize(data=d['annotations'],
    #                              meta=['id', 'image_id', 'category_id'])
    # categories_df = json_normalize(data=d['categories'],
    #                             meta=['id', 'name'])

    # return get_label('train/' + images.iloc[0]['file_name'], labels)
    images['box'] = images.apply(lambda x: get_label('train/' + x['file_name'], labels), axis=1)
    return images



In [13]:
df = get_data('iwildcam2022_train_annotations.json', 'iwildcam2022_mdv4_detections.json')

  images = json_normalize(data=d['images'],


In [14]:
df

Unnamed: 0,seq_num_frames,location,datetime,id,seq_id,width,height,file_name,sub_location,seq_frame_num,box
0,6,3,2013-06-05 05:44:19.000,8b02698a-21bc-11ea-a13a-137349068a90,30048d32-7d42-11eb-8fb5-0242ac1c0002,1920,1080,8b02698a-21bc-11ea-a13a-137349068a90.jpg,0.0,0,"[[0, 0.091, 0.983, 0.876]]"
1,6,3,2013-06-05 05:44:20.000,8e5b81de-21bc-11ea-a13a-137349068a90,30048d32-7d42-11eb-8fb5-0242ac1c0002,1920,1080,8e5b81de-21bc-11ea-a13a-137349068a90.jpg,0.0,1,"[[0.193, 0.261, 0.803, 0.714]]"
2,6,3,2013-06-05 05:44:21.000,8c6be0e4-21bc-11ea-a13a-137349068a90,30048d32-7d42-11eb-8fb5-0242ac1c0002,1920,1080,8c6be0e4-21bc-11ea-a13a-137349068a90.jpg,0.0,2,"[[0.717, 0.412, 0.282, 0.558]]"
3,6,3,2013-06-05 05:44:22.000,8fdf7998-21bc-11ea-a13a-137349068a90,30048d32-7d42-11eb-8fb5-0242ac1c0002,1920,1080,8fdf7998-21bc-11ea-a13a-137349068a90.jpg,0.0,3,"[[0.772, 0.474, 0.226, 0.504]]"
4,6,3,2013-06-05 05:44:23.000,96093c50-21bc-11ea-a13a-137349068a90,30048d32-7d42-11eb-8fb5-0242ac1c0002,1920,1080,96093c50-21bc-11ea-a13a-137349068a90.jpg,0.0,4,"[[0.862, 0.539, 0.136, 0.43]]"
...,...,...,...,...,...,...,...,...,...,...,...
201394,6,165,2013-06-05 13:08:12.000,91421eee-21bc-11ea-a13a-137349068a90,974a375e-21bc-11ea-a13a-137349068a90,2048,1536,91421eee-21bc-11ea-a13a-137349068a90.jpg,,1,[]
201395,6,165,2013-06-05 13:08:19.000,91140194-21bc-11ea-a13a-137349068a90,974a375e-21bc-11ea-a13a-137349068a90,2048,1536,91140194-21bc-11ea-a13a-137349068a90.jpg,,2,[]
201396,6,165,2013-06-05 13:08:42.000,8f1f5f46-21bc-11ea-a13a-137349068a90,974a375e-21bc-11ea-a13a-137349068a90,2048,1536,8f1f5f46-21bc-11ea-a13a-137349068a90.jpg,,3,[]
201397,6,165,2013-06-05 13:08:48.000,974a365a-21bc-11ea-a13a-137349068a90,974a375e-21bc-11ea-a13a-137349068a90,2048,1536,974a365a-21bc-11ea-a13a-137349068a90.jpg,,4,[]


In [16]:
df.to_csv('processed_train.csv', sep='\t')

In [3]:
df = pd.read_csv('processed_train.csv', sep='\t', index_col=0)
df.head()

Unnamed: 0,seq_num_frames,location,datetime,id,seq_id,width,height,file_name,sub_location,seq_frame_num,box
0,6,3,2013-06-05 05:44:19.000,8b02698a-21bc-11ea-a13a-137349068a90,30048d32-7d42-11eb-8fb5-0242ac1c0002,1920,1080,8b02698a-21bc-11ea-a13a-137349068a90.jpg,0.0,0,"[[0, 0.091, 0.983, 0.876]]"
1,6,3,2013-06-05 05:44:20.000,8e5b81de-21bc-11ea-a13a-137349068a90,30048d32-7d42-11eb-8fb5-0242ac1c0002,1920,1080,8e5b81de-21bc-11ea-a13a-137349068a90.jpg,0.0,1,"[[0.193, 0.261, 0.803, 0.714]]"
2,6,3,2013-06-05 05:44:21.000,8c6be0e4-21bc-11ea-a13a-137349068a90,30048d32-7d42-11eb-8fb5-0242ac1c0002,1920,1080,8c6be0e4-21bc-11ea-a13a-137349068a90.jpg,0.0,2,"[[0.717, 0.412, 0.282, 0.558]]"
3,6,3,2013-06-05 05:44:22.000,8fdf7998-21bc-11ea-a13a-137349068a90,30048d32-7d42-11eb-8fb5-0242ac1c0002,1920,1080,8fdf7998-21bc-11ea-a13a-137349068a90.jpg,0.0,3,"[[0.772, 0.474, 0.226, 0.504]]"
4,6,3,2013-06-05 05:44:23.000,96093c50-21bc-11ea-a13a-137349068a90,30048d32-7d42-11eb-8fb5-0242ac1c0002,1920,1080,96093c50-21bc-11ea-a13a-137349068a90.jpg,0.0,4,"[[0.862, 0.539, 0.136, 0.43]]"


0

In [None]:
class CustomDataset(Dataset):
    def __init__(self, df, transforms=None):
        self.labels = df['box']
        self.images = df['file_name']
        self.transform = transforms

    def __len__(self):
        return self.images.shape[0]

    def __getitem__(self, idx):
        path = 'train/' + self.images.iloc[idx]
        image = Image.open(path).convert('RGB').resize((1920, 1080))
        label = json.loads(self.labels.iloc[idx])
        if self.transform:
            image = self.transform(image)
        return image, label

BATCH_SIZE = 70
IMAGE_SIZE = 112

train_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])


train_data = CustomDataset(df, transforms= train_transform)
train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)