In [1]:
import os
import pandas as pd
from tqdm import tqdm
import random
import shutil

In [2]:
MOT20_PATH = 'datasets/MOT20'
MOT20_YOLO_PATH = 'datasets/MOT20_YOLO'

In [3]:
train_folders = os.listdir(f"{MOT20_PATH}/train")
print(train_folders)

['MOT20-01', 'MOT20-02', 'MOT20-03', 'MOT20-05']


In [4]:
directories = [f"{MOT20_YOLO_PATH}/images/train", f"{MOT20_YOLO_PATH}/images/val", f"{MOT20_YOLO_PATH}/labels/train", f"{MOT20_YOLO_PATH}/labels/val"]

In [5]:
for i in directories:
    if not os.path.exists(i):
        os.makedirs(i)

In [6]:
GT_COLUMNS = ['Frame', 'ID', 'X', 'Y', 'W', 'H', 'Confidence', 'Class', 'Visibility']

In [7]:
def extract_seq_info(directory):
    with open(directory) as f:
        lines = f.readlines()
    lines = [line.strip() for line in lines]
    lines = [line for line in lines if line]
    lines = [line.split("=") for line in lines if line.count("=") == 1]
    return {line[0]: line[1] for line in lines}

In [11]:
data = {}
for folder in train_folders:
    gt = f"{MOT20_PATH}/train/{folder}/gt"
    df = pd.read_csv(f"{gt}/gt.txt", sep=",", header=None, names=GT_COLUMNS)
    images = os.listdir(f"{MOT20_PATH}/train/{folder}/img1")
    # extract imWidth and imHeight from seqinfo.ini
    seq = extract_seq_info(f"{MOT20_PATH}/train/{folder}/seqinfo.ini")
    image_width = int(seq["imWidth"])
    image_length = int(seq["imHeight"])
    
    df["X"] = df["X"].apply(lambda x: x / image_width)
    df["Y"] = df["Y"].apply(lambda x: x / image_length)
    df["W"] = df["W"].apply(lambda x: x / image_width)
    df["H"] = df["H"].apply(lambda x: x / image_length)
    
    for image in tqdm(images, desc=f"{folder}/img1"):
        frame = int(image.split(".")[0])
        data[f"{MOT20_PATH}/train/{folder}/img1/{image}"] = df[(df["Frame"] == frame) & ((df["Class"] == 1) | (df["Class"] == 7))]

MOT20-01/img1: 100%|██████████| 429/429 [00:00<00:00, 844.61it/s]
MOT20-02/img1: 100%|██████████| 2782/2782 [00:07<00:00, 353.46it/s]
MOT20-03/img1: 100%|██████████| 2405/2405 [00:06<00:00, 385.35it/s]
MOT20-05/img1: 100%|██████████| 3315/3315 [00:21<00:00, 155.55it/s]


In [12]:
keys = list(data.keys())
print(f"Before shuffle: {keys[:5]}")
random.Random(42).shuffle(keys)
print(f"After shuffle: {keys[:5]}")

Before shuffle: ['datasets/MOT20/train/MOT20-01/img1/000001.jpg', 'datasets/MOT20/train/MOT20-01/img1/000002.jpg', 'datasets/MOT20/train/MOT20-01/img1/000003.jpg', 'datasets/MOT20/train/MOT20-01/img1/000004.jpg', 'datasets/MOT20/train/MOT20-01/img1/000005.jpg']
After shuffle: ['datasets/MOT20/train/MOT20-02/img1/000396.jpg', 'datasets/MOT20/train/MOT20-03/img1/000298.jpg', 'datasets/MOT20/train/MOT20-03/img1/002080.jpg', 'datasets/MOT20/train/MOT20-03/img1/001702.jpg', 'datasets/MOT20/train/MOT20-02/img1/002040.jpg']


In [13]:
example_image = keys[0]
df = data[example_image]

In [14]:
df.describe()

Unnamed: 0,Frame,ID,X,Y,W,H,Confidence,Class,Visibility
count,46.0,46.0,46.0,46.0,46.0,46.0,46.0,46.0,46.0
mean,396.0,221.108696,0.438836,0.300483,0.040851,0.163587,0.913043,1.521739,0.491526
std,0.0,49.514186,0.218062,0.199689,0.01775,0.065295,0.284885,1.70931,0.377545
min,396.0,7.0,0.000521,0.075926,0.021875,0.076852,0.0,1.0,0.0
25%,396.0,205.25,0.34401,0.169676,0.028516,0.118981,1.0,1.0,0.096947
50%,396.0,232.5,0.440104,0.239352,0.034115,0.144907,1.0,1.0,0.530845
75%,396.0,251.75,0.570182,0.354167,0.045573,0.187731,1.0,1.0,0.85904
max,396.0,295.0,0.872396,0.856481,0.098958,0.324074,1.0,7.0,1.0


In [15]:
TRAIN_VAL_SPLIT = 0.8

In [16]:
count = 0
train_count = int(len(keys) * TRAIN_VAL_SPLIT)
test_count = len(keys) - train_count

for key in tqdm(keys):
    count += 1
    image_name = f"{count:06}.jpg"
    if count <= train_count:
        directory = "train"
    else:
        directory = "val"
    shutil.copy(key, f"{MOT20_YOLO_PATH}/images/{directory}/{image_name}")
    df = data[key]
    output = ""
    for row in df.itertuples():
        x_center = row.X + row.W / 2
        y_center = row.Y + row.H / 2
        output += f"{0} {x_center:6f} {y_center:6f} {row.W:6f} {row.H:6f}\n"
    with open(f"{MOT20_YOLO_PATH}/labels/{directory}/{image_name[:-4]}.txt", "w") as f:
        f.write(output)

100%|██████████| 8931/8931 [00:39<00:00, 228.25it/s]
