In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import os
import json
from pandas.io.json import json_normalize

import torch
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image

%matplotlib inline
from cv2 import cv2

from IPython.display import clear_output
import time

In [2]:
from torch import optim

In [3]:
#!git clone https://github.com/ultralytics/yolov5
#!pip install -qr ./yolov5/requirements.txt

In [4]:
from yolov5 import train, val, detect
from yolov5.utils import metrics

In [5]:
if False:
    os.chdir('data')
    data_location = 'data'
else:
    os.chdir('/media/danil-pass123/Новый том/DataSet')
    data_location = '/media/danil-pass123/Новый том/DataSet'
!ls

archives			     runs
data_my.yaml			     runs_test
dataset				     runs_train
data.yaml			     seq_dataset
gps_locations.json		     test
iwildcam2022_mdv4_detections.json    train
iwildcam2022_test_information.json   train1
iwildcam2022_train_annotations.json  train_sequence_counts.csv
processed_test.csv		     yolov5s.pt
processed_train.csv


# Load data

In [10]:
#def get_label(filename: str, labels: list[dict]):
def get_label(filename, labels):
    row = next((x for x in labels if filename == x['file']), [None])
    if row is None:
        return row
    detections = row['detections']
    filtered_detections = list(filter(lambda x: x['category'] == '1' and x['conf'] > 0.5, detections))
    bboxes = list(map(lambda x: x['bbox'], filtered_detections))
    return json.dumps(bboxes)

To make a pytorch dataset we extract filenames from json file. Extracting label was a harder, because we need to extract one json and for every record we need to take filename and match it with record's filename from another json(basically it is similar to SQL `JOIN ON`), taking needed fields, filter by 'category' and confidence, and converting it to json string, for easy reading and saving. We save a progress in csv file, so we can always read it, instead of executing everything again. Dataset consist of taking image paths from our csv, reading it, resizing it, converting to rgb, and converting it to tensors. Right now dataset consist of images(that's why we use results from a pretrained model, as they are written for images, instead of `train_sequence_counts`), later it may change to sequences of images.

In [11]:
#def get_data(images_json_path: str, labels_json_path: str, train: bool = True):
def get_data(images_json_path, labels_json_path, train = True):
    d = None
    with open(labels_json_path) as f:
        d = json.load(f)
    labels = d['images']

    with open(images_json_path) as f:
        d = json.load(f)
    images = json_normalize(data=d['images'],
                   meta=['seq_num_frames', 'location', 'datetime', 'id', 'seq_id', 'width', 'height', 'file_name', 'sub_location', 'seq_frame_num'])
    # annotations_df = json_normalize(data=d['annotations'],
    #                              meta=['id', 'image_id', 'category_id'])
    # categories_df = json_normalize(data=d['categories'],
    #                             meta=['id', 'name'])

    # return get_label('train/' + images.iloc[0]['file_name'], labels)
    get_path = lambda x: get_label(filename=(('train/' if train else 'test/') + x['file_name']), labels=labels)
    images['box'] = images.apply(get_path, axis=1)
    return images

In [12]:
#df = get_data('iwildcam2022_train_annotations.json', 'iwildcam2022_mdv4_detections.json', train=True)
#df = get_data('iwildcam2022_test_information.json', 'iwildcam2022_mdv4_detections.json', train=False)

In [13]:
#df.head(5)

In [14]:
#df.to_csv('processed_train.csv', sep='\t')
#df.to_csv('processed_test.csv', sep='\t')

In [15]:
df = pd.read_csv('processed_train.csv', sep='\t', index_col=0)
df.head(5)

Unnamed: 0,seq_num_frames,location,datetime,id,seq_id,...,height,file_name,sub_location,seq_frame_num,box
0,6,3,2013-06-05 05:44:19.000,8b02698a-21bc-11ea-a13a-137349068a90,30048d32-7d42-11eb-8fb5-0242ac1c0002,...,1080,8b02698a-21bc-11ea-a13a-137349068a90.jpg,0.0,0,"[[0, 0.091, 0.983, 0.876]]"
1,6,3,2013-06-05 05:44:20.000,8e5b81de-21bc-11ea-a13a-137349068a90,30048d32-7d42-11eb-8fb5-0242ac1c0002,...,1080,8e5b81de-21bc-11ea-a13a-137349068a90.jpg,0.0,1,"[[0.193, 0.261, 0.803, 0.714]]"
2,6,3,2013-06-05 05:44:21.000,8c6be0e4-21bc-11ea-a13a-137349068a90,30048d32-7d42-11eb-8fb5-0242ac1c0002,...,1080,8c6be0e4-21bc-11ea-a13a-137349068a90.jpg,0.0,2,"[[0.717, 0.412, 0.282, 0.558]]"
3,6,3,2013-06-05 05:44:22.000,8fdf7998-21bc-11ea-a13a-137349068a90,30048d32-7d42-11eb-8fb5-0242ac1c0002,...,1080,8fdf7998-21bc-11ea-a13a-137349068a90.jpg,0.0,3,"[[0.772, 0.474, 0.226, 0.504]]"
4,6,3,2013-06-05 05:44:23.000,96093c50-21bc-11ea-a13a-137349068a90,30048d32-7d42-11eb-8fb5-0242ac1c0002,...,1080,96093c50-21bc-11ea-a13a-137349068a90.jpg,0.0,4,"[[0.862, 0.539, 0.136, 0.43]]"


In [16]:
df = pd.read_csv('processed_test.csv', sep='\t', index_col=0)
df.head(5)

Unnamed: 0,height,id,seq_id,location,width,...,file_name,seq_frame_num,seq_num_frames,sub_location,box
0,1024,8b31d3be-21bc-11ea-a13a-137349068a90,a91ebc18-0cd3-11eb-bed1-0242ac1c0002,20,1280,...,8b31d3be-21bc-11ea-a13a-137349068a90.jpg,0,10,,"[[0, 0.55, 0.558, 0.438]]"
1,1024,8cf202be-21bc-11ea-a13a-137349068a90,a91ebc18-0cd3-11eb-bed1-0242ac1c0002,20,1280,...,8cf202be-21bc-11ea-a13a-137349068a90.jpg,1,10,,"[[0, 0.557, 0.684, 0.431]]"
2,1024,8a87e62e-21bc-11ea-a13a-137349068a90,a91ebc18-0cd3-11eb-bed1-0242ac1c0002,20,1280,...,8a87e62e-21bc-11ea-a13a-137349068a90.jpg,2,10,,"[[0, 0.418, 0.712, 0.581]]"
3,1024,8e6994f4-21bc-11ea-a13a-137349068a90,a91ebc18-0cd3-11eb-bed1-0242ac1c0002,20,1280,...,8e6994f4-21bc-11ea-a13a-137349068a90.jpg,3,10,,"[[0, 0.402, 0.673, 0.584]]"
4,1024,948b29e2-21bc-11ea-a13a-137349068a90,a91ebc18-0cd3-11eb-bed1-0242ac1c0002,20,1280,...,948b29e2-21bc-11ea-a13a-137349068a90.jpg,4,10,,"[[0, 0.547, 0.693, 0.439]]"


In [17]:
#def parse_bbox_tuple(labels: tuple[str]):
def parse_bbox_tuple(labels):
    return [json.loads(l) for l in labels]

class CustomDataset(Dataset):
    def __init__(self, df, train: bool = True):
        self.labels = df['box']
        self.images = df['file_name']
        self.train = train

    def __len__(self):
        return self.images.shape[0]

    def __getitem__(self, idx):
        image = ('train/' if self.train else 'test/') + self.images.iloc[idx]
        label = self.labels.iloc[idx]
        return image, label

BATCH_SIZE = 10

# full_hd_df = df[df["width"]==1920]
hd_df = df[(df["height"]==720) & (df["width"]==1280)]

#train_data = CustomDataset(hd_df, train=True)
#train_loader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=True)

test_data = CustomDataset(hd_df, train=False)
test_loader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=True)

# Preprocess dataset

In [6]:
def convert_str_to_list(str_list):
    res_list = []
    i = 1
    while i < len(str_list):
        start = 0
        if str_list[i]=='[':
            start = i+1
            while str_list[i]!=']':
                i+=1
            end = i

            if start!=end:
                res_list.append(list( 
                                map(float,
                                str_list[start:end].split(', ')) 
                            ))
        i+=1
    if len(res_list)==0:
        return []
    
    return res_list

#convert_str_to_list(df['box'][100])

In [7]:
def load_labels(path,image_name,bbox_list):
    label_file_name = image_name.split('.')[0] + '.txt'
    with open(path+label_file_name,'w') as f:
        for l in bbox_list:
            line = "1"

            for i in l:
                line = line+" "+ str(i)
            f.write(line)
            f.write("\n")

In [8]:
def process_label(box_string,file_name,dest_folder):
    bbox = convert_str_to_list(box_string)
    load_labels(dest_folder,file_name,bbox)

In [9]:
"""def check_image(path):
    with open(path, 'rb') as f:
        check_chars = f.read()[-2:]
    return  check_chars == b'\xff\xd9'"""

"def check_image(path):\n    with open(path, 'rb') as f:\n        check_chars = f.read()[-2:]\n    return  check_chars == b'ÿÙ'"

In [10]:
def load_image(f_name,init_folder,dest_folder):
    
    if os.path.isfile(f'{init_folder}/{f_name}'):# and check_image(f'./{init_folder}/{f_name}'):
        os.system(f'cp {init_folder}/{f_name} {dest_folder}/{f_name}')
        return True
    return False

In [11]:
"""
df = pd.read_csv('processed_train.csv', sep='\t', index_col=0)

stat_h_w = {"width":[],"height":[],"counts":[]}
hw = df[['height','width']]
for h in np.unique(df['height']):
    for w in np.unique(df['width']):
        stat_h_w["width"].append(w)
        stat_h_w["height"].append(h)
        
        stat_h_w["counts"].append(((hw['height']==h) & (hw['width']==w)).sum())

stat_h_w = pd.DataFrame(stat_h_w)    
stat_h_w.sort_values(by=['counts'],ascending=False)
"""

'\ndf = pd.read_csv(\'processed_train.csv\', sep=\'\t\', index_col=0)\n\nstat_h_w = {"width":[],"height":[],"counts":[]}\nhw = df[[\'height\',\'width\']]\nfor h in np.unique(df[\'height\']):\n    for w in np.unique(df[\'width\']):\n        stat_h_w["width"].append(w)\n        stat_h_w["height"].append(h)\n        \n        stat_h_w["counts"].append(((hw[\'height\']==h) & (hw[\'width\']==w)).sum())\n\nstat_h_w = pd.DataFrame(stat_h_w)    \nstat_h_w.sort_values(by=[\'counts\'],ascending=False)\n'

In [12]:
#Remove images if needed

os.system("rm -r dataset/images/train/*")
os.system("rm -r dataset/images/test/*")
os.system("rm -r dataset/images/val/*")
os.system("rm -r dataset/labels/train/*")
os.system("rm -r dataset/labels/val/*")

0

In [13]:
class Dataset_Descriptor:
    DATASET_LOCATION = "dataset"
    LOAD_TYPES = ["train","val","test"]
    DATASET_INIT_LOCATION = {"train":"train1/train/",
                             "val":"train1/train/",
                             "test":"test/"}
    DATASET_DEST_LOCATION = {"images":
                            {"train":"dataset/images/train/",
                             "val":"dataset/images/val/",
                             "test":"dataset/images/test/"},
                        "labels":
                            {"train":"dataset/labels/train/",
                             "val":"dataset/labels/val/",
                             "test":"dataset/labels/test/"}}
    def __init__(self,l_type,init_pos,sample_size):
        self.l_type = l_type
        self.image_dir = []
        self.labels = []
        self.seq_id = []
        self.image_name = []
        
        self.pos = init_pos
        self.sample_size = sample_size
        
        if self.l_type in ["train","val"]:
            df = pd.read_csv('processed_train.csv', sep='\t', index_col=0)
            df = df[(df["height"]==1536) & (df["width"]==2048)]
            df = df.reset_index(drop=True)
        else:
            df = pd.read_csv('processed_test.csv', sep='\t', index_col=0)
            df = df[(df["height"]==1536) & (df["width"]==2048)]
            df = df.reset_index(drop=True)
        
        self.pos = self.search_images(df = df,l_type = self.l_type,
                  init_pos = self.pos,set_size = self.sample_size)
        
    """def __del__(self):
        os.system("rm -r dataset/images/train/*")
        os.system("rm -r dataset/images/test/*")
        os.system("rm -r dataset/images/val/*")
        os.system("rm -r dataset/labels/train/*")
        os.system("rm -r dataset/labels/val/*")"""
        
    def search_images(self,df,l_type,init_pos,set_size):
    
        init_path = Dataset_Descriptor.DATASET_INIT_LOCATION[l_type]
        dest_path_i = Dataset_Descriptor.DATASET_DEST_LOCATION["images"][l_type]
        dest_path_l = Dataset_Descriptor.DATASET_DEST_LOCATION["labels"][l_type]
        
        while set_size>0:

            res = load_image(f_name = df['file_name'][init_pos],
                       init_folder = init_path,
                       dest_folder = dest_path_i)
            if res:
                self.image_dir.append(f"{dest_path_i}/{df['file_name'][init_pos]}")
                labels_array = convert_str_to_list(df['box'][init_pos])
                labels_array = np.array(labels_array) if len(labels_array)>0 else np.array([-5,-5,-5,-5])
                self.labels.append(labels_array )
                self.seq_id.append(df['seq_id'][init_pos])
                self.image_name.append(df['id'][init_pos])
                
                if l_type in ["train","val"]:
                    process_label(box_string = df['box'][init_pos],
                                  file_name = df['file_name'][init_pos],
                                  dest_folder = dest_path_l)
                set_size-=1
            init_pos += 1
        return init_pos


data_descriptor_train = Dataset_Descriptor(l_type="train",init_pos=0,sample_size=100)
data_descriptor_val = Dataset_Descriptor(l_type="val",init_pos=data_descriptor_train.pos,sample_size=200)
data_descriptor_test = Dataset_Descriptor(l_type="test",init_pos=0,sample_size=100)

# Model

## Train from zero

In [191]:
!ls 

dataset.zip  model-Copy1.ipynb	model.ipynb  Sequence_Dataset  yolov5
data.yaml    model_count.ipynb	runs	     yolov3.cfg


In [None]:
#data_path = '/media/danil-pass123/Новый том/DataSet/data.yaml'
data_path = 'data_my.yaml'

train.run(data = data_path,epochs = 20,batch_size=10,single_cls= True,optimizer="SGD",noplots=False,tensorboard=True,workers=2,device='cpu')

[34m[1mtrain: [0mweights=yolov5/yolov5s.pt, cfg=, data=data_my.yaml, hyp=yolov5/data/hyps/hyp.scratch-low.yaml, epochs=20, batch_size=10, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, bucket=, cache=None, image_weights=False, device=cpu, multi_scale=False, single_cls=True, optimizer=SGD, sync_bn=False, workers=2, project=yolov5/runs/train, name=exp, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, seed=0, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest, tensorboard=True


## Train

In [14]:
!pwd

/media/danil-pass123/Новый том/DataSet


In [15]:
!ls

archives			     processed_train.csv
data_my.yaml			     runs
dataset				     seq_dataset
data.yaml			     test
gps_locations.json		     train
iwildcam2022_mdv4_detections.json    train1
iwildcam2022_test_information.json   train_sequence_counts.csv
iwildcam2022_train_annotations.json  yolov5s.pt
processed_test.csv


In [24]:
#/media/danil-pass123/Новый том/DataSet

#data_path = '/media/danil-pass123/Новый том/DataSet/data.yaml'
data_path = data_location + '/data_my.yaml'

runs_location = data_location + "/runs_train"

#data_path = 'data.yaml'
train.run(data = data_path,epochs = 1,batch_size=1,
          single_cls= True,optimizer="SGD",tensorboard=True,
          workers=1,device='cpu',
          project = runs_location,name = "exec_num")
#noplots=False

[34m[1mtrain: [0mweights=yolov5/yolov5s.pt, cfg=, data=/media/danil-pass123/Новый том/DataSet/data_my.yaml, hyp=yolov5/data/hyps/hyp.scratch-low.yaml, epochs=1, batch_size=1, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, bucket=, cache=None, image_weights=False, device=cpu, multi_scale=False, single_cls=True, optimizer=SGD, sync_bn=False, workers=1, project=/media/danil-pass123/Новый том/DataSet/runs_train, name=exec_num, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, seed=0, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest, tensorboard=True
Из https://github.com/ultralytics/yolov5
   1510111..ff6e6e3  master     -> origin/master
[34m[1mgithub: [0m⚠️ YOLOv5 is out of date by 18 commits. Use `git pull` or `git clone https://github.com/ultralytics/yolov5` to update.
YOLOv5 🚀 v6.2-231-g0039870 Python-3.8.8 torch-1.12.1 C




        0/0         0G          0    0.02504          0          0        640:  


KeyboardInterrupt: 

## Detect

In [14]:
#Remove images if needed
if len(os.listdir("runs"))>0:
    os.system("rm -r runs/*")

### With trained model

In [15]:
test_images = []
test_labels = []
#train test val location descriptor
#data_path = data_location + '/data.yaml'
data_path = data_location + '/data_my.yaml'
#data_path = '/media/danil-pass123/"Новый том"/DataSet/data.yaml'

#weights = "./yolov5/runs/train/exp19/weights/best.pt"
weights = data_location + '/runs_train/exec_num3/weights/best.pt'

classes = [i for i in range(14,24,1)]
classes = ["1"]

#source = '/media/danil-pass123/Новый том/DataSet/dataset/images/test/'
runs_location = data_location + "/runs_test"

for i,img_path,img in zip(range(data_descriptor_test.sample_size),data_descriptor_test.image_dir,data_descriptor_test.image_name):
    clear_output(wait=True)
    print(f"{i}/{data_descriptor_test.sample_size-1}")
    detect.run(imgsz=(1536,2048),weights = weights,source = img_path, 
               project = runs_location,name = f"iter{i}",save_txt=True,conf_thres=0.25,
               classes=classes,data = data_path)
    test_images.append(f'runs/iter{i}/{img}')
    test_labels.append(f'runs/iter{i}/labels/{img}.txt')
    #time.sleep(0.3)

YOLOv5 🚀 v6.2-231-g0039870 Python-3.8.8 torch-1.12.1 CPU

Fusing layers... 


99/99


Model summary: 157 layers, 7012822 parameters, 0 gradients, 15.8 GFLOPs
image 1/1 /media/danil-pass123/Новый том/DataSet/dataset/images/test/93376d9e-21bc-11ea-a13a-137349068a90.jpg: 1536x2048 (no detections), 1564.5ms
Speed: 11.1ms pre-process, 1564.5ms inference, 0.6ms NMS per image at shape (1, 3, 1536, 2048)
Results saved to [1m/media/danil-pass123/Новый том/DataSet/runs_test/iter99[0m
0 labels saved to /media/danil-pass123/Новый том/DataSet/runs_test/iter99/labels


### With initial weights

In [31]:
test_images = []
test_labels = []
#train test val location descriptor
data_path = data_location + '/data.yaml'
#data_path = '/media/danil-pass123/"Новый том"/DataSet/data.yaml'

#weights = "./yolov5/runs/train/exp19/weights/best.pt"
weights = 'yolov5s.pt'

classes = [i for i in range(14,24,1)]

#source = '/media/danil-pass123/Новый том/DataSet/dataset/images/test/'
runs_location = data_location + "/runs_test"

for i,img_path,img in zip(range(data_descriptor_test.sample_size),data_descriptor_test.image_dir,data_descriptor_test.image_name):
    clear_output(wait=True)
    print(f"{i}/{data_descriptor_test.sample_size-1}")
    detect.run(imgsz=(1536,2048),weights = weights,source = img_path, 
               project = runs_location,name = f"iter{i}",save_txt=True,conf_thres=0.25,classes=classes)
    test_images.append(f'runs/iter{i}/{img}')
    test_labels.append(f'runs/iter{i}/labels/{img}.txt')
    #time.sleep(0.3)

0/99


YOLOv5 🚀 v6.2-231-g0039870 Python-3.8.8 torch-1.12.1 CPU

Downloading https://github.com/ultralytics/yolov5/releases/download/v6.2/yolov5s.pt to yolov5s.pt...


  0%|          | 0.00/14.1M [00:00<?, ?B/s]


Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients


FileNotFoundError: /home/danil-pass123/Desctop/semester7/PMLDL/Project/dataset/images/test/8bc642a6-21bc-11ea-a13a-137349068a90.jpg does not exist

### Accuracy measurments

In [21]:
"""
def draw_images(pos):
    global test_images
    if pos < len(test_images):
        plt.imshow(cv2.imread(test_images[pos]))
    else:
        return
"""

'\ndef draw_images(pos):\n    global test_images\n    if pos < len(test_images):\n        plt.imshow(cv2.imread(test_images[pos]))\n    else:\n        return\n'

In [22]:
#draw_images(0)

In [16]:
def load_pred_labels(file_name):
    box_pred = []
    conf_pred = []
    
    if os.path.isfile(file_name):
        with open(file_name,'r') as f:
            lns = f.readlines()
        
            for l in lns:
                pred = list(map(float,l.split(' ')))
                box_pred.append(pred[1:])
                conf_pred.append(pred[0])
        return conf_pred,np.array(box_pred)
    else:
        return 0,np.array([[-5,-5,-5,-5]])

def get_pred_labels_generator(files):
    for f in files:
        yield load_pred_labels(f)
        
        
        
def get_pred_labels(files):
    labels = []
    for f in files:
        _,bbox = load_pred_labels(f)
        labels.append(bbox)
    return labels

In [17]:
#for i in range(100):
#    if data_descriptor_test.labels[i].shape[0]==2:
#        print(i)

In [18]:
def iou_metric(y_true,y_pred):
    iou_res = 0
    #y_true = [torch.tensor(y_t) for y_t in y_true]
    y_true = torch.tensor(y_true)

    for y_p in y_pred:

        y_p = torch.tensor(y_p).reshape(1,-1)
        #print(y_true.shape,y_p.shape)
        iou_res += float(metrics.bbox_iou(y_p,y_true,xywh=True).min())
    return iou_res/y_pred.shape[0]

Cartesian product

In [19]:
def mse_box(y_true,y_pred):
    a = np.concatenate([y_pred,np.arange(len(y_pred)).reshape(-1,1)],axis=1)
    b = np.concatenate([y_true,np.arange(len(y_true)).reshape(-1,1)],axis=1)

    c = np.repeat(a,len(b),axis=0)
    d = np.tile(b,(len(a),1))

    res = np.concatenate([c,d],axis=1)
    diff = ((res[:,[0,1]] - res[:,[5,6]])**2).sum(axis=1)**0.5

    selected = [[],[]]
    res_mse = 0
    for i in np.argsort(diff):
        if (res[i][4] not in selected[0]) & (res[i][9]  not in selected[1]):
            selected[0].append(res[i][4])
            selected[1].append(res[i][9])

            point = ((res[i][[0,1]]-res[i][[5,6]])**2).sum()
            box = ((res[i][[2,3]]-res[i][[7,8]])**2).sum()

            res_mse += (point + box)**0.5
        res_mse /= len(selected)
    return res_mse

In [20]:
def num_found_obj(y_true,y_pred):
    mse = np.abs(y_true.shape[0]-y_pred.shape[0])
    return mse

In [21]:
def evaluate(true_list,pred_list):
    
    acc_res = {"iou":[],"mse_box":[],"acc_obj":[],"correctnces":[]}
    for y_true,y_pred in zip(true_list,pred_list):
        #print(y_true.shape,y_pred.shape)
        if (y_true==-5).all() and (y_pred==-5).all():
            acc_res["iou"].append(0)
            acc_res["mse_box"].append(0)
            acc_res["acc_obj"].append(0)
            acc_res["correctnces"].append(1)
            
        elif (y_true==-5).all() and (y_pred!=-5).all():
            acc_res["iou"].append(-1)
            acc_res["mse_box"].append(-1)
            acc_res["acc_obj"].append(-y_pred.shape[0])
            acc_res["correctnces"].append(0)
        elif (y_true!=-5).all() and (y_pred==-5).all():
            acc_res["iou"].append(-1)
            acc_res["mse_box"].append(-1)
            acc_res["acc_obj"].append(-y_true.shape[0])
            acc_res["correctnces"].append(0)
        else:
            acc_res["iou"].append(iou_metric(y_true,y_pred))
            acc_res["mse_box"].append(mse_box(y_true,y_pred))
            acc_res["acc_obj"].append( num_found_obj(y_true,y_pred) )
            acc_res["correctnces"].append(1)
    
    return acc_res
y_true = data_descriptor_test.labels
y_pred = get_pred_labels(test_labels)
acc_res = evaluate(y_true,y_pred)

In [35]:
(np.array(y_pred)==np.array([[-5]])).all()

True

In [22]:
acc_res = pd.DataFrame(acc_res)

In [23]:
acc_res[acc_res["iou"]==acc_res["iou"].max()]

Unnamed: 0,iou,mse_box,acc_obj,correctnces
0,0,0,0,1
1,0,0,0,1
2,0,0,0,1
3,0,0,0,1
4,0,0,0,1
...,...,...,...,...
95,0,0,0,1
96,0,0,0,1
97,0,0,0,1
98,0,0,0,1


In [26]:
acc_res[acc_res["mse_box"]==acc_res[acc_res["mse_box"]>0]["mse_box"].min()]

Unnamed: 0,iou,mse_box,acc_obj,correctnces


In [27]:
acc_res[acc_res["acc_obj"]==0]

Unnamed: 0,iou,mse_box,acc_obj,correctnces
0,0,0,0,1
1,0,0,0,1
2,0,0,0,1
3,0,0,0,1
4,0,0,0,1
...,...,...,...,...
95,0,0,0,1
96,0,0,0,1
97,0,0,0,1
98,0,0,0,1


In [None]:
acc_res#.head(4)

In [27]:
def calculate_num_objects(labels,seq_list,img_name):
    num_obj_seq = {}
    num_obj_img = {}
    
    for l,seq,i_name in zip(labels,seq_list,img_name):
        if seq not in num_obj_seq.keys():
            num_obj_seq[seq] = []
        num_obj_seq[seq].append(len(l))
        num_obj_img[i_name] = len(l)
    return num_obj_seq, num_obj_img
        

num_obj_seq, num_obj_img = calculate_num_objects(get_pred_labels(test_labels),data_descriptor_test.seq_id,data_descriptor_test.image_name)

In [55]:
#get_pred_labels(test_labels)

In [56]:
num_obj_seq

{'386914ce-6fe2-11eb-844f-0242ac1c0002': [1, 2, 1, 3, 1, 2, 3, 1],
 '988ae29e-21bc-11ea-a13a-137349068a90': [1, 1],
 '947b2c2c-21bc-11ea-a13a-137349068a90': [1, 1, 1],
 '386af23a-6fe2-11eb-844f-0242ac1c0002': [2, 1],
 '96760128-21bc-11ea-a13a-137349068a90': [4, 1],
 '9871f374-21bc-11ea-a13a-137349068a90': [1],
 '969ab9b4-21bc-11ea-a13a-137349068a90': [1, 1],
 '3868eee0-6fe2-11eb-844f-0242ac1c0002': [1, 1, 1, 1, 1, 1, 1],
 '38699142-6fe2-11eb-844f-0242ac1c0002': [1, 1, 1, 1, 1, 1, 1],
 '922ca874-21bc-11ea-a13a-137349068a90': [1, 1, 3],
 'a9187f88-0cd3-11eb-bed1-0242ac1c0002': [1, 1, 1, 1, 1, 1],
 '3866fb08-6fe2-11eb-844f-0242ac1c0002': [3, 1, 1, 1, 1, 1, 1, 2, 1],
 '946cf95e-21bc-11ea-a13a-137349068a90': [1, 1, 1, 1, 1, 1, 1, 1],
 '91b21776-21bc-11ea-a13a-137349068a90': [1, 1, 1],
 '94d9c5c0-21bc-11ea-a13a-137349068a90': [1, 1, 1, 1, 1, 1],
 '9788357c-21bc-11ea-a13a-137349068a90': [1],
 '8e8f2e3a-21bc-11ea-a13a-137349068a90': [1, 1, 1],
 '916916a2-21bc-11ea-a13a-137349068a90': [1, 1, 1,

### Save sequence of numbers of animals

In [57]:
seq_dataset = {"seq_id":[],"count":[]}
for k in num_obj_seq.keys():
    seq_dataset["seq_id"].append(k)
    seq_dataset["count"].append(num_obj_seq[k])
seq_dataset = pd.DataFrame(seq_dataset)

In [60]:
img_dataset = {"id":[],"count":[]}
for k in num_obj_img.keys():
    img_dataset["id"].append(k)
    img_dataset["count"].append(num_obj_img[k])
img_dataset = pd.DataFrame(img_dataset)

In [66]:
seq_dataset.to_csv("seq_dataset/seq_data.csv")

In [None]:
img_dataset.to_csv("seq_dataset/img_data.csv")