In [2]:
import os
extract_folder = 'train'
# Checking the first few lines of an annotation file in the "gt" folder
gt_folder = os.path.join(extract_folder, "MOT20-01", "gt")
gt_files = [f for f in os.listdir(gt_folder) if os.path.isfile(os.path.join(gt_folder, f))]
gt_file_path = os.path.join(gt_folder, gt_files[0])

with open(gt_file_path, 'r') as file:
    gt_annotations = [file.readline().strip() for _ in range(5)]

# Checking the "seqinfo.ini" file
seqinfo_path = os.path.join(extract_folder, "MOT20-01", "seqinfo.ini")

with open(seqinfo_path, 'r') as file:
    seqinfo = [file.readline().strip() for _ in range(10)]

# Listing a few image names from the "img1" folder
img1_folder = os.path.join(extract_folder, "MOT20-01", "img1")
img_files = os.listdir(img1_folder)[:5]

gt_annotations, seqinfo, img_files

(['1,1,199,813,140,268,1,1,0.83643',
  '2,1,201,812,140,268,1,1,0.84015',
  '3,1,203,812,140,268,1,1,0.84015',
  '4,1,206,812,140,268,1,1,0.84015',
  '5,1,208,812,140,268,1,1,0.84015'],
 ['[Sequence]',
  'name=MOT20-01',
  'imDir=img1',
  'frameRate=25',
  'seqLength=429',
  'imWidth=1920',
  'imHeight=1080',
  'imExt=.jpg',
  '',
  ''],
 ['000152.jpg', '000146.jpg', '000191.jpg', '000185.jpg', '000378.jpg'])

In [3]:
import pandas as pd

# Loading the ground truth annotations into a dataframe
column_names = ["frame", "id", "xmin", "ymin", "width", "height", "conf", "class", "visibility"]
df_annotations = pd.read_csv(gt_file_path, header=None, names=column_names)

# Displaying the first few rows of the dataframe
df_annotations.head()

Unnamed: 0,frame,id,xmin,ymin,width,height,conf,class,visibility
0,1,1,199,813,140,268,1,1,0.83643
1,2,1,201,812,140,268,1,1,0.84015
2,3,1,203,812,140,268,1,1,0.84015
3,4,1,206,812,140,268,1,1,0.84015
4,5,1,208,812,140,268,1,1,0.84015


In [4]:
# Convert bounding boxes from MOT format to YOLO format
df_annotations["x_center"] = (df_annotations["xmin"] + df_annotations["width"] / 2) / 1920
df_annotations["y_center"] = (df_annotations["ymin"] + df_annotations["height"] / 2) / 1080
df_annotations["norm_width"] = df_annotations["width"] / 1920
df_annotations["norm_height"] = df_annotations["height"] / 1080

# Extracting the relevant columns for YOLO format
df_yolo = df_annotations[["frame", "class", "x_center", "y_center", "norm_width", "norm_height"]]

# Displaying the first few rows of the YOLO-formatted dataframe
df_yolo.head()

Unnamed: 0,frame,class,x_center,y_center,norm_width,norm_height
0,1,1,0.140104,0.876852,0.072917,0.248148
1,2,1,0.141146,0.875926,0.072917,0.248148
2,3,1,0.142187,0.875926,0.072917,0.248148
3,4,1,0.14375,0.875926,0.072917,0.248148
4,5,1,0.144792,0.875926,0.072917,0.248148


In [5]:
# Creating training and validation splits based on frame numbers
total_frames = df_yolo["frame"].max()
train_frames = int(0.9 * total_frames)

# Splitting the dataframe into training and validation sets
df_train = df_yolo[df_yolo["frame"] <= train_frames]
df_val = df_yolo[df_yolo["frame"] > train_frames]

# Number of unique frames in training and validation sets
num_train_frames = df_train["frame"].nunique()
num_val_frames = df_val["frame"].nunique()

num_train_frames, num_val_frames

(386, 43)

In [6]:
import numpy as np

# Directory paths for saving YOLO-formatted label files
train_labels_dir = os.path.join(extract_folder, "MOT20-01", "labels", "train")
val_labels_dir = os.path.join(extract_folder, "MOT20-01", "labels", "val")

os.makedirs(train_labels_dir, exist_ok=True)
os.makedirs(val_labels_dir, exist_ok=True)

def save_yolo_labels(df, save_dir):
    """Save YOLO formatted labels to the specified directory."""
    for frame_num, group in df.groupby("frame"):
        label_file = os.path.join(save_dir, f"{frame_num:06}.txt")
        yolo_data = group[["class", "x_center", "y_center", "norm_width", "norm_height"]].values
        np.savetxt(label_file, yolo_data, fmt=["%d", "%f", "%f", "%f", "%f"])

# Save label files for training and validation sets
save_yolo_labels(df_train, train_labels_dir)
save_yolo_labels(df_val, val_labels_dir)

# Check the first few generated label files
train_label_files = os.listdir(train_labels_dir)[:5]
val_label_files = os.listdir(val_labels_dir)[:5]

train_label_files, val_label_files

(['000252.txt', '000246.txt', '000291.txt', '000285.txt', '000050.txt'],
 ['000423.txt', '000392.txt', '000393.txt', '000387.txt', '000422.txt'])