## Тестирование датасета MOT20_ext

In [1]:
import functools

import cv2
import torch
import matplotlib as plt
import numpy as np
from sklearn.model_selection import train_test_split
from torch import Generator
from torch.utils.data import ConcatDataset, DataLoader, Subset, random_split
from tqdm import tqdm

from src.data import MOT20ExtDataset
from src.transforms import get_norm_transform, get_resize_transform

  from .autonotebook import tqdm as notebook_tqdm
  warn(f"Failed to load image Python extension: {e}")


In [2]:
dataset = MOT20ExtDataset('data/MOT20_ext/train/MOT20-01/')

### Полное тестирование 

In [3]:
len(dataset)

4768

In [4]:
img1, img2, label = dataset[4000]

In [6]:
label

0

In [6]:
def display_images(img_tensor, title=''):
    image = img_tensor.permute(1, 2, 0).numpy()
    plt.imshow(image.clip(0, 1))
    plt.title(title)
    plt.show()
    plt.pause(0.001)

In [7]:
TEST_PROPROTION = 0.2
VAL_PROPORTION = 0.1
TRAIN_PROPORTION = 1 - TEST_PROPROTION - VAL_PROPORTION
sum([TEST_PROPROTION, VAL_PROPORTION, TRAIN_PROPORTION])

1.0

In [8]:
train_set, val_set, test_set = random_split(dataset, [TRAIN_PROPORTION, VAL_PROPORTION, TEST_PROPROTION], generator=Generator().manual_seed(0))

In [9]:
len(train_set), len(val_set), len(test_set)

(1669, 239, 476)

In [11]:
# display_images(img1)

### Фильтрация

In [12]:
df = dataset.ground_truth
df

Unnamed: 0,frame,id,bb_left,bb_top,bb_width,bb_height,is_consider,class,visibility
391,54,2,455,808,113,271,1,1,1.0
392,55,2,457,808,113,272,1,1,1.0
393,56,2,459,806,113,274,1,1,1.0
394,57,2,461,804,113,276,1,1,1.0
395,58,2,463,802,114,278,1,1,1.0
...,...,...,...,...,...,...,...,...,...
18529,425,61,969,517,61,139,1,1,1.0
18530,426,61,970,518,61,139,1,1,1.0
18531,427,61,972,519,61,139,1,1,1.0
18532,428,61,973,520,61,139,1,1,1.0


In [13]:
len(dataset)
len(dataset)
len(dataset)

2384

In [14]:
df = df[df['is_consider'] == 1]
df

Unnamed: 0,frame,id,bb_left,bb_top,bb_width,bb_height,is_consider,class,visibility
391,54,2,455,808,113,271,1,1,1.0
392,55,2,457,808,113,272,1,1,1.0
393,56,2,459,806,113,274,1,1,1.0
394,57,2,461,804,113,276,1,1,1.0
395,58,2,463,802,114,278,1,1,1.0
...,...,...,...,...,...,...,...,...,...
18529,425,61,969,517,61,139,1,1,1.0
18530,426,61,970,518,61,139,1,1,1.0
18531,427,61,972,519,61,139,1,1,1.0
18532,428,61,973,520,61,139,1,1,1.0


In [15]:
test_visibility = 0.95

In [16]:
df = df[df['visibility'] >= test_visibility]
df

Unnamed: 0,frame,id,bb_left,bb_top,bb_width,bb_height,is_consider,class,visibility
391,54,2,455,808,113,271,1,1,1.0
392,55,2,457,808,113,272,1,1,1.0
393,56,2,459,806,113,274,1,1,1.0
394,57,2,461,804,113,276,1,1,1.0
395,58,2,463,802,114,278,1,1,1.0
...,...,...,...,...,...,...,...,...,...
18529,425,61,969,517,61,139,1,1,1.0
18530,426,61,970,518,61,139,1,1,1.0
18531,427,61,972,519,61,139,1,1,1.0
18532,428,61,973,520,61,139,1,1,1.0


### Расчет длины

In [17]:
test_len = 1
type(test_len) == int

True

In [18]:
objects = df[df['id'] == 4]
print(objects['frame'].values)

[21 22 23 24 25 26]


In [19]:
def aggregate(x, y):
    if (not type(x) == list):
        if (y - x == 1):
            return [[x, y]]
        else:
            return [[x], [y]]
    else:
        last = x[-1][-1]
        if (y - last > 1):
            x.append([y])
        else:
            x[-1].append(y)
    return x

In [20]:
def split_to_continuous_segments(array):
    """Возвращает список непрерывных отрезков чисел"""
    if (len(array) == 0):
        return [[]]
    elif (len(array) == 1):
        return [array]
    else:
        return functools.reduce(aggregate, sorted(array))

In [21]:
split_to_continuous_segments([2, 23, 4, 6, 5, 1])

[[1, 2], [4, 5, 6], [23]]

In [22]:
def get_possible_tuples_count_segment(distance: int, segment: list[int]) -> int:
    return len(segment) - distance - 1

In [23]:
def get_neighbours_tuples_count(distance: int, segments: list[list[int]]):
    sum = 0
    prev = None
    for s in segments:
        if (prev == None):
            prev = s
            continue
        if (s[0] - prev[-1] - 1 == distance):
            sum += 1
        prev = s

    return sum

In [24]:
def get_possible_tuples_count(distance: int, segments: list[list[int]]) -> int:
    sum = 0
    for s in segments:
        sum += max(0, get_possible_tuples_count_segment(distance, s))

    sum += get_neighbours_tuples_count(distance, segments)
    return sum

### Тестирование

In [25]:
def f(d, s):
    return get_possible_tuples_count_segment(d, s)

def F(d, ss):
    return get_possible_tuples_count(d, ss)

In [26]:
ss = [[1, 2, 3]]

assert(F(0, ss) == 2)
assert(F(1, ss) == 1)
assert(F(2, ss) == 0)

In [27]:
ss = [[1, 2, 3], [5, 6]]

assert(F(0, ss) == 3)
assert(F(1, ss) == 2)

In [28]:
ss = [[1, 2, 3], [5, 6, 7]]

assert(F(0, ss) == 4)
assert(F(1, ss) == 3)

In [29]:
ss = [[1], [5, 6, 7, 8], [10, 11, 12]]

assert (F(0, ss) == 5)
assert(F(1, ss) == 4)

In [30]:
ss = [[2], [5]]

assert (F(0, ss) == 0)
assert (F(1, ss) == 0)
assert (F(2, ss) == 1)
assert (F(3, ss) == 0)

### Индексация

In [31]:
d = {1: 3}
d

{1: 3}

In [32]:
l = [21, 43]
list2 = [1, 2, 3]

In [33]:
l + list2

[21, 43, 1, 2, 3]

In [34]:
from src.data.utils import get_possible_tuples

In [35]:
ss = [[1, 2, 3, 4, 5], [9, 10, 11, 12, 13], [23, 24, 25]]

In [36]:
get_possible_tuples(0, ss)

[(1, 2),
 (2, 3),
 (3, 4),
 (4, 5),
 (9, 10),
 (10, 11),
 (11, 12),
 (12, 13),
 (23, 24),
 (24, 25)]

In [37]:
get_possible_tuples(1, ss)

[(1, 3), (2, 4), (3, 5), (9, 11), (10, 12), (11, 13), (23, 25)]

In [38]:
get_possible_tuples(2, ss)

[(1, 4), (2, 5), (9, 12), (10, 13)]

In [39]:
get_possible_tuples(3, ss)

[(1, 5), (5, 9), (9, 13)]

In [40]:
get_possible_tuples(9, ss)

[(13, 23)]

In [41]:
for k, v in {1: 32, 3: 3}.items():
    print(v)

32
3


In [42]:
(1, *(1, 2))

(1, 1, 2)

## Расчет параметров датасета

### Средние размеры изображения

In [44]:
dataset01 = MOT20ExtDataset('data/MOT20_ext/train/MOT20-01/')
dataset02 = MOT20ExtDataset('data/MOT20_ext/train/MOT20-02/')
dataset03 = MOT20ExtDataset('data/MOT20_ext/train/MOT20-03/')
dataset05 = MOT20ExtDataset('data/MOT20_ext/train/MOT20-05/')
dataset = ConcatDataset([dataset01, dataset02, dataset03, dataset05])

In [45]:
len(dataset)

135721

#### Обработка данных, долгая операция

In [59]:
# xs = [(x[0].shape[0] + x[1].shape[0]) / 2 for x in tqdm(dataset)]

100%|██████████| 135721/135721 [03:48<00:00, 592.85it/s]


In [60]:
# ys = [(x[0].shape[1] + x[1].shape[1]) / 2 for x in tqdm(dataset)]

100%|██████████| 135721/135721 [03:26<00:00, 656.41it/s]


In [61]:
len(xs), len(ys)

(135721, 135721)

In [71]:
xxs = torch.tensor(xs)
yys = torch.tensor(ys)

In [72]:
xxs.mean(), yys.mean()

(tensor(139.0535), tensor(61.7174))