## Тестирование датасета MOT20_ext

In [5]:
import functools

import cv2
import torch
import matplotlib as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from torch import Generator
from torch.utils.data import ConcatDataset, DataLoader, Subset, random_split
from tqdm import tqdm

from src.data import MOT20ExtDataset
from src.transforms import get_norm_transform, get_resize_transform
from src.data.preparing.mot import get_dataframe
from src.config import DATA_PATH
from os.path import join

In [1]:
dataset = MOT20ExtDataset('data/MOT20_ext/train/MOT20-01/')

NameError: name 'MOT20ExtDataset' is not defined

### Полное тестирование 

In [3]:
len(dataset)

4768

In [4]:
img1, img2, label = dataset[4000]

In [6]:
label

0

In [6]:
def display_images(img_tensor, title=''):
    image = img_tensor.permute(1, 2, 0).numpy()
    plt.imshow(image.clip(0, 1))
    plt.title(title)
    plt.show()
    plt.pause(0.001)

In [7]:
TEST_PROPROTION = 0.2
VAL_PROPORTION = 0.1
TRAIN_PROPORTION = 1 - TEST_PROPROTION - VAL_PROPORTION
sum([TEST_PROPROTION, VAL_PROPORTION, TRAIN_PROPORTION])

1.0

In [8]:
train_set, val_set, test_set = random_split(dataset, [TRAIN_PROPORTION, VAL_PROPORTION, TEST_PROPROTION], generator=Generator().manual_seed(0))

In [9]:
len(train_set), len(val_set), len(test_set)

(1669, 239, 476)

In [11]:
# display_images(img1)

### Фильтрация

In [12]:
df = dataset.ground_truth
df

Unnamed: 0,frame,id,bb_left,bb_top,bb_width,bb_height,is_consider,class,visibility
391,54,2,455,808,113,271,1,1,1.0
392,55,2,457,808,113,272,1,1,1.0
393,56,2,459,806,113,274,1,1,1.0
394,57,2,461,804,113,276,1,1,1.0
395,58,2,463,802,114,278,1,1,1.0
...,...,...,...,...,...,...,...,...,...
18529,425,61,969,517,61,139,1,1,1.0
18530,426,61,970,518,61,139,1,1,1.0
18531,427,61,972,519,61,139,1,1,1.0
18532,428,61,973,520,61,139,1,1,1.0


In [13]:
len(dataset)
len(dataset)
len(dataset)

2384

In [14]:
df = df[df['is_consider'] == 1]
df

Unnamed: 0,frame,id,bb_left,bb_top,bb_width,bb_height,is_consider,class,visibility
391,54,2,455,808,113,271,1,1,1.0
392,55,2,457,808,113,272,1,1,1.0
393,56,2,459,806,113,274,1,1,1.0
394,57,2,461,804,113,276,1,1,1.0
395,58,2,463,802,114,278,1,1,1.0
...,...,...,...,...,...,...,...,...,...
18529,425,61,969,517,61,139,1,1,1.0
18530,426,61,970,518,61,139,1,1,1.0
18531,427,61,972,519,61,139,1,1,1.0
18532,428,61,973,520,61,139,1,1,1.0


In [15]:
test_visibility = 0.95

In [16]:
df = df[df['visibility'] >= test_visibility]
df

Unnamed: 0,frame,id,bb_left,bb_top,bb_width,bb_height,is_consider,class,visibility
391,54,2,455,808,113,271,1,1,1.0
392,55,2,457,808,113,272,1,1,1.0
393,56,2,459,806,113,274,1,1,1.0
394,57,2,461,804,113,276,1,1,1.0
395,58,2,463,802,114,278,1,1,1.0
...,...,...,...,...,...,...,...,...,...
18529,425,61,969,517,61,139,1,1,1.0
18530,426,61,970,518,61,139,1,1,1.0
18531,427,61,972,519,61,139,1,1,1.0
18532,428,61,973,520,61,139,1,1,1.0


### Расчет длины

In [17]:
test_len = 1
type(test_len) == int

True

In [18]:
objects = df[df['id'] == 4]
print(objects['frame'].values)

[21 22 23 24 25 26]


In [19]:
def aggregate(x, y):
    if (not type(x) == list):
        if (y - x == 1):
            return [[x, y]]
        else:
            return [[x], [y]]
    else:
        last = x[-1][-1]
        if (y - last > 1):
            x.append([y])
        else:
            x[-1].append(y)
    return x

In [20]:
def split_to_continuous_segments(array):
    """Возвращает список непрерывных отрезков чисел"""
    if (len(array) == 0):
        return [[]]
    elif (len(array) == 1):
        return [array]
    else:
        return functools.reduce(aggregate, sorted(array))

In [21]:
split_to_continuous_segments([2, 23, 4, 6, 5, 1])

[[1, 2], [4, 5, 6], [23]]

In [22]:
def get_possible_tuples_count_segment(distance: int, segment: list[int]) -> int:
    return len(segment) - distance - 1

In [23]:
def get_neighbours_tuples_count(distance: int, segments: list[list[int]]):
    sum = 0
    prev = None
    for s in segments:
        if (prev == None):
            prev = s
            continue
        if (s[0] - prev[-1] - 1 == distance):
            sum += 1
        prev = s

    return sum

In [24]:
def get_possible_tuples_count(distance: int, segments: list[list[int]]) -> int:
    sum = 0
    for s in segments:
        sum += max(0, get_possible_tuples_count_segment(distance, s))

    sum += get_neighbours_tuples_count(distance, segments)
    return sum

### Тестирование

In [25]:
def f(d, s):
    return get_possible_tuples_count_segment(d, s)

def F(d, ss):
    return get_possible_tuples_count(d, ss)

In [26]:
ss = [[1, 2, 3]]

assert(F(0, ss) == 2)
assert(F(1, ss) == 1)
assert(F(2, ss) == 0)

In [27]:
ss = [[1, 2, 3], [5, 6]]

assert(F(0, ss) == 3)
assert(F(1, ss) == 2)

In [28]:
ss = [[1, 2, 3], [5, 6, 7]]

assert(F(0, ss) == 4)
assert(F(1, ss) == 3)

In [29]:
ss = [[1], [5, 6, 7, 8], [10, 11, 12]]

assert (F(0, ss) == 5)
assert(F(1, ss) == 4)

In [30]:
ss = [[2], [5]]

assert (F(0, ss) == 0)
assert (F(1, ss) == 0)
assert (F(2, ss) == 1)
assert (F(3, ss) == 0)

### Индексация

In [31]:
d = {1: 3}
d

{1: 3}

In [32]:
l = [21, 43]
list2 = [1, 2, 3]

In [33]:
l + list2

[21, 43, 1, 2, 3]

In [34]:
from src.data.utils import get_possible_tuples

In [35]:
ss = [[1, 2, 3, 4, 5], [9, 10, 11, 12, 13], [23, 24, 25]]

In [36]:
get_possible_tuples(0, ss)

[(1, 2),
 (2, 3),
 (3, 4),
 (4, 5),
 (9, 10),
 (10, 11),
 (11, 12),
 (12, 13),
 (23, 24),
 (24, 25)]

In [37]:
get_possible_tuples(1, ss)

[(1, 3), (2, 4), (3, 5), (9, 11), (10, 12), (11, 13), (23, 25)]

In [38]:
get_possible_tuples(2, ss)

[(1, 4), (2, 5), (9, 12), (10, 13)]

In [39]:
get_possible_tuples(3, ss)

[(1, 5), (5, 9), (9, 13)]

In [40]:
get_possible_tuples(9, ss)

[(13, 23)]

In [41]:
for k, v in {1: 32, 3: 3}.items():
    print(v)

32
3


In [42]:
(1, *(1, 2))

(1, 1, 2)

## Расчет параметров датасета

### Средние размеры изображения

In [44]:
dataset01 = MOT20ExtDataset('data/MOT20_ext/train/MOT20-01/')
dataset02 = MOT20ExtDataset('data/MOT20_ext/train/MOT20-02/')
dataset03 = MOT20ExtDataset('data/MOT20_ext/train/MOT20-03/')
dataset05 = MOT20ExtDataset('data/MOT20_ext/train/MOT20-05/')
dataset = ConcatDataset([dataset01, dataset02, dataset03, dataset05])

In [45]:
len(dataset)

135721

#### Обработка данных, долгая операция

In [59]:
# xs = [(x[0].shape[0] + x[1].shape[0]) / 2 for x in tqdm(dataset)]

100%|██████████| 135721/135721 [03:48<00:00, 592.85it/s]


In [60]:
# ys = [(x[0].shape[1] + x[1].shape[1]) / 2 for x in tqdm(dataset)]

100%|██████████| 135721/135721 [03:26<00:00, 656.41it/s]


In [61]:
len(xs), len(ys)

(135721, 135721)

In [71]:
xxs = torch.tensor(xs)
yys = torch.tensor(ys)

In [72]:
xxs.mean(), yys.mean()

(tensor(139.0535), tensor(61.7174))

### Разделение на тест

In [34]:
from os import listdir

current_path_ext = join(DATA_PATH, 'MOT20_ext', 'train', 'MOT20-03')
new_path_ext = join(DATA_PATH, 'MOT20_ext', 'test', 'MOT20-03')
det = get_dataframe(current_path_ext, 'det')
gt = get_dataframe(current_path_ext, 'gt')
t = ['340', '477', '663', '314', '294', '468', '527', '601', '133', '645', '250', '33', '700', '97', '348', '223', '214', '65', '193', '265', '417', '225', '603', '573', '436', '355', '584', '608', '41', '440', '18', '55', '339', '443', '57', '667', '407', '95', '61', '661', '283', '560', '511', '673', '614', '450', '83', '25', '325', '222', '318', '311', '353', '291', '172', '89', '288', '447', '290', '484', '284', '321', '178', '11', '192', '237', '144', '677', '586', '195', '626', '46', '660', '224', '152', '524', '638', '590', '191', '548', '414', '730', '375', '249', '332', '431', '592',
              '280', '492', '398', '221', '424', '253', '604', '51', '435', '66', '200', '241', '452', '631', '212', '362', '615', '618', '438', '148', '342', '68', '469', '170', '594', '609', '319', '423', '190', '629', '84', '49', '323', '29', '213', '315', '24', '575', '42', '495', '689', '304', '588', '699', '169', '517', '87', '415', '286', '487', '322', '3', '28', '344', '154', '343', '405', '163', '428', '351', '529', '330', '357', '572', '36', '359', '705', '260', '166', '657', '142', '374', '14', '525', 'gt', '658', '328', '628', '268', '276', '570', '683', '153', '479', '422', '617', '558', '516', '483', '111']
t = set(t) - set(['gt', 'det'])
to_extract = list(map(int, t))


In [35]:
# len(to_extract)
# len(to_extract) / len(listdir(current_path_ext))
340 in to_extract

True

In [39]:
mask_test = gt['id'].apply(lambda id: id in to_extract)
mask_train = gt['id'].apply(lambda id: id not in to_extract)

In [40]:
train_gt = gt[mask_train]
train_gt


Unnamed: 0,frame,id,bb_left,bb_top,bb_width,bb_height,is_consider,class,visibility
0,695,1,1127,817,45,62,1,1,1.00000
1,696,1,1124,815,47,64,1,1,1.00000
2,697,1,1122,813,48,66,1,1,1.00000
3,698,1,1120,811,49,68,1,1,1.00000
4,699,1,1118,809,50,70,1,1,1.00000
...,...,...,...,...,...,...,...,...,...
356723,2153,735,267,1,42,16,0,7,0.79070
356724,2154,735,268,1,41,15,0,7,0.78571
356725,2155,735,268,1,41,13,0,7,0.78571
356726,2156,735,268,1,41,12,0,7,0.78571


In [43]:
test_det = gt[mask_test]
test_det

Unnamed: 0,frame,id,bb_left,bb_top,bb_width,bb_height,is_consider,class,visibility
729,617,3,1,853,8,28,1,1,0.965520
730,618,3,1,852,10,29,1,1,0.966670
731,619,3,1,852,12,29,1,1,0.966670
732,620,3,1,851,14,30,1,1,0.967740
733,621,3,1,851,16,30,1,1,0.967740
...,...,...,...,...,...,...,...,...,...
355402,1966,730,574,1,46,32,1,1,0.000000
355403,1967,730,574,1,46,32,1,1,0.000000
355404,1968,730,574,1,46,33,1,1,0.016896
355405,1969,730,574,1,46,34,1,1,0.028571
