In [2]:
import os
import cv2
import csv
import numpy as np
from collections import defaultdict
from scipy.signal import savgol_filter

# Input directories
ANNOTATIONS_DIR = "D:\\ML_PROJECT\\ML_2025_GROUP_XYZ\\VisDrone2019-MOT-train\\annotations"
SEQUENCES_DIR   = "D:\\ML_PROJECT\\ML_2025_GROUP_XYZ\\VisDrone2019-MOT-train\\sequences"

# Output directories
PREPROCESSED_DIR = "preprocessed"
os.makedirs(PREPROCESSED_DIR, exist_ok=True)

# Keep only these categories
VALID_CATEGORIES = {4, 5, 6}

# ID counters
global_track_id_counter = 1
tracklet_id_counter = 1

# Mapping from global track ID to its tracklets
trackid_to_tracklets = defaultdict(list)

# For each sequence
def process_sequence(seq_name):
    global global_track_id_counter, tracklet_id_counter
    
    print(f"\nProcessing sequence: {seq_name}")
    
    # Paths
    image_folder = os.path.join(SEQUENCES_DIR, seq_name)
    annot_path   = os.path.join(ANNOTATIONS_DIR, seq_name + ".txt")
    output_csv   = os.path.join(PREPROCESSED_DIR, f"{seq_name}_processed.csv")

    if not os.path.exists(annot_path):
        print(f"Annotation missing: {seq_name}")
        return
    
    # Read annotations and group by object_id
    annotations = defaultdict(list)
    with open(annot_path, 'r') as f:
        for line in f:
            vals = list(map(int, line.strip().split(',')))
            if len(vals) < 10:
                continue
            frame_id, obj_id, x, y, w, h, conf, cat, vis, _ = vals
            if cat not in VALID_CATEGORIES:
                continue
            cx, cy = x + w // 2, y + h // 2
            annotations[obj_id].append((frame_id, x, y, w, h, cx, cy))

    output_rows = []

    for old_obj_id, data in annotations.items():
        data.sort()  # Sort by frame_id
        global_id = global_track_id_counter
        global_track_id_counter += 1

        # Introduce gaps randomly to simulate broken tracklets
        i = 0
        while i < len(data):
            if np.random.rand() < 0.3 and len(data) - i > 60:  # Random gap
                gap = np.random.randint(15, 60)
                i += gap
                continue

            # Define a chunk
            chunk = []
            while i < len(data):
                if np.random.rand() < 0.2 and len(chunk) > 10:
                    break
                chunk.append(data[i])
                i += 1
            
            if not chunk:
                continue

            # Assign new tracklet ID
            current_tracklet_id = tracklet_id_counter
            tracklet_id_counter += 1
            trackid_to_tracklets[global_id].append(current_tracklet_id)

            for (frame_id, x, y, w, h, cx, cy) in chunk:
                output_rows.append([
                    seq_name, global_id, current_tracklet_id,
                    frame_id, x, y, w, h, cx, cy
                ])
    
    # Save the processed annotations
    with open(output_csv, "w", newline='') as f:
        writer = csv.writer(f)
        writer.writerow([
            "sequence", "global_track_id", "tracklet_id", "frame_id",
            "x", "y", "w", "h", "cx", "cy"
        ])
        writer.writerows(output_rows)
    
    print(f"Saved: {output_csv} with {len(output_rows)} entries.")

# Process all sequences
if __name__ == "__main__":
    seq_list = sorted(os.listdir(ANNOTATIONS_DIR))
    for filename in seq_list:
        if filename.endswith(".txt"):
            seq_name = filename.replace(".txt", "")
            process_sequence(seq_name)

    # Save trackid to tracklet map
    map_file = os.path.join(PREPROCESSED_DIR, "trackid_to_tracklets.csv")
    with open(map_file, "w", newline='') as f:
        writer = csv.writer(f)
        writer.writerow(["global_track_id", "tracklet_ids"])
        for gid, tlist in trackid_to_tracklets.items():
            writer.writerow([gid, " ".join(map(str, tlist))])
    print(f"\nTracklet mapping saved: {map_file}")



Processing sequence: uav0000013_00000_v
Saved: preprocessed\uav0000013_00000_v_processed.csv with 0 entries.

Processing sequence: uav0000013_01073_v
Saved: preprocessed\uav0000013_01073_v_processed.csv with 0 entries.

Processing sequence: uav0000013_01392_v
Saved: preprocessed\uav0000013_01392_v_processed.csv with 0 entries.

Processing sequence: uav0000020_00406_v
Saved: preprocessed\uav0000020_00406_v_processed.csv with 0 entries.

Processing sequence: uav0000071_03240_v
Saved: preprocessed\uav0000071_03240_v_processed.csv with 1375 entries.

Processing sequence: uav0000072_04488_v
Saved: preprocessed\uav0000072_04488_v_processed.csv with 484 entries.

Processing sequence: uav0000072_05448_v
Saved: preprocessed\uav0000072_05448_v_processed.csv with 1120 entries.

Processing sequence: uav0000072_06432_v
Saved: preprocessed\uav0000072_06432_v_processed.csv with 855 entries.

Processing sequence: uav0000076_00720_v
Saved: preprocessed\uav0000076_00720_v_processed.csv with 2021 entrie

In [3]:
import os
import pandas as pd

preprocessed_dir = "preprocessed"

deleted_files = []

for filename in os.listdir(preprocessed_dir):
    if filename.endswith(".csv"):
        filepath = os.path.join(preprocessed_dir, filename)
        try:
            df = pd.read_csv(filepath)
            if df.empty:
                os.remove(filepath)
                deleted_files.append(filename)
        except Exception as e:
            print(f"Error reading {filename}: {e}")

print(f"Deleted {len(deleted_files)} empty files:")
for file in deleted_files:
    print(f" - {file}")


Deleted 6 empty files:
 - uav0000013_00000_v_processed.csv
 - uav0000013_01073_v_processed.csv
 - uav0000013_01392_v_processed.csv
 - uav0000020_00406_v_processed.csv
 - uav0000084_00000_v_processed.csv
 - uav0000099_02109_v_processed.csv


In [5]:
import os
import pandas as pd

preprocessed_dir = "preprocessed"
tracklet_dir = "tracklets"
os.makedirs(tracklet_dir, exist_ok=True)

for filename in os.listdir(preprocessed_dir):
    if not filename.endswith(".csv"):
        continue

    filepath = os.path.join(preprocessed_dir, filename)
    try:
        df = pd.read_csv(filepath)

        if df.empty:
            print(f"[SKIP] {filename} is empty.")
            continue

        if 'tracklet_id' not in df.columns:
            print(f"[ERROR] {filename} missing 'tracklet_id' column.")
            continue

        video_id = filename.replace("_processed.csv", "")
        unique_tracklets = df['tracklet_id'].unique()
        print(f"🎯 {filename}: {len(df)} entries | {len(unique_tracklets)} tracklets")

        for tid in unique_tracklets:
            tracklet_df = df[df['tracklet_id'] == tid].sort_values(by="frame_id")
            if tracklet_df.empty:
                print(f"[WARN] Empty tracklet {tid} in {video_id}")
                continue

            out_path = os.path.join(tracklet_dir, f"{video_id}_tracklet{tid}.csv")
            tracklet_df.to_csv(out_path, index=False)

    except Exception as e:
        print(f"[FAIL] {filename} -> {e}")

print("✅ All tracklets extracted.")


[ERROR] trackid_to_tracklets.csv missing 'tracklet_id' column.
🎯 uav0000071_03240_v_processed.csv: 1375 entries | 102 tracklets
🎯 uav0000072_04488_v_processed.csv: 484 entries | 38 tracklets
🎯 uav0000072_05448_v_processed.csv: 1120 entries | 85 tracklets
🎯 uav0000072_06432_v_processed.csv: 855 entries | 71 tracklets
🎯 uav0000076_00720_v_processed.csv: 2021 entries | 150 tracklets
🎯 uav0000079_00480_v_processed.csv: 20 entries | 2 tracklets
🎯 uav0000124_00944_v_processed.csv: 19553 entries | 1337 tracklets
🎯 uav0000126_00001_v_processed.csv: 25276 entries | 1772 tracklets
🎯 uav0000138_00000_v_processed.csv: 8866 entries | 631 tracklets
🎯 uav0000140_01590_v_processed.csv: 9150 entries | 687 tracklets
🎯 uav0000143_02250_v_processed.csv: 6108 entries | 440 tracklets
🎯 uav0000145_00000_v_processed.csv: 1588 entries | 110 tracklets
🎯 uav0000150_02310_v_processed.csv: 5268 entries | 373 tracklets
🎯 uav0000218_00001_v_processed.csv: 829 entries | 66 tracklets
🎯 uav0000222_03150_v_processed.csv

In [10]:
import pandas as pd
import os
import random

# Input/output directories
INPUT_DIR = 'preprocessed'  # Replace with your path
OUTPUT_DIR = 'processed_tracklets'  # Replace with your path

# Ensure output directory exists
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

# Define the range for skipping frames (between 15 to 60)
MIN_SKIP = 15
MAX_SKIP = 60

# Process files in input directory
for filename in os.listdir(INPUT_DIR):
    path = os.path.join(INPUT_DIR, filename)
    
    # Read in the data from CSV
    df = pd.read_csv(path)
    
    # If empty or no 'global_track_id' column, skip file
    if df.empty or 'global_track_id' not in df.columns:
        continue
    
    # Extract the sequence name (from the file name, excluding extension)
    sequence_name = os.path.splitext(filename)[0]
    
    # Iterate over unique global_track_id
    for global_track_id in df['global_track_id'].unique():
        # Get all frames corresponding to this global_track_id
        track_data = df[df['global_track_id'] == global_track_id]
        
        # Sort by frame_id to ensure proper order
        track_data = track_data.sort_values(by='frame_id')
        
        # Create tracklets by splitting the track into smaller segments
        tracklet_id = 1
        start_frame = 0
        while start_frame < len(track_data):
            # Determine the length of the current tracklet by skipping random frames
            skip = random.randint(MIN_SKIP, MAX_SKIP)
            end_frame = min(start_frame + skip, len(track_data))
            
            # Assign tracklet_id for the segment
            track_data.loc[start_frame:end_frame-1, 'tracklet_id'] = tracklet_id
            
            # Move to the next tracklet
            tracklet_id += 1
            start_frame = end_frame
        
        # Save the processed track data with tracklet IDs to a new CSV
        output_path = os.path.join(OUTPUT_DIR, f"{sequence_name}_processed.csv")
        track_data.to_csv(output_path, index=False, mode='a', header=not os.path.exists(output_path))

print("Tracklet generation and saving completed.")


Tracklet generation and saving completed.


In [9]:
print(df.columns)  # Print column names to check for any discrepancies


Index(['global_track_id', 'tracklet_ids'], dtype='object')


In [13]:
import os
import numpy as np
import csv
from collections import defaultdict

# === Paths ===
ROOT_DIR = r"D:\ML_PROJECT\ML_2025_GROUP_XYZ\VisDrone2019-MOT-train"
ANNOT_DIR = os.path.join(ROOT_DIR, "annotations")
OUTPUT_DIR = os.path.join(ROOT_DIR, "processed_annotations")

os.makedirs(OUTPUT_DIR, exist_ok=True)

# === Settings ===
VEHICLE_CATEGORIES = {4, 5, 6}
MIN_TRACKLET_LENGTH = 5

# === Helper Function ===
def process_annotation_file(file_path, output_dir):
    filename = os.path.basename(file_path)
    basename = os.path.splitext(filename)[0]

    object_tracks = defaultdict(list)

    with open(file_path, "r") as f:
        for line in f:
            parts = list(map(int, line.strip().split(',')))
            frame_id, obj_id, x, y, w, h, _, category, _, _ = parts
            if category in VEHICLE_CATEGORIES:
                object_tracks[obj_id].append((frame_id, x, y, w, h, category))

    tracklet_id_counter = 0
    tracklet_annotations = []
    tracklet_mapping = []

    for track_id, entries in object_tracks.items():
        entries = sorted(entries, key=lambda e: e[0])
        frames = [e[0] for e in entries]

        cut_indices = []
        i = 0
        while i < len(frames):
            cut_len = np.random.randint(15, 60)
            cut_indices.append(i)
            i += cut_len
        cut_indices.append(len(frames))

        for i in range(len(cut_indices) - 1):
            start = cut_indices[i]
            end = cut_indices[i + 1]
            if end - start < MIN_TRACKLET_LENGTH:
                continue

            tracklet_id = tracklet_id_counter
            tracklet_id_counter += 1
            tracklet_mapping.append((track_id, tracklet_id))

            for j in range(start, end):
                f_id, x, y, w, h, cat = entries[j]
                tracklet_annotations.append([f_id, tracklet_id, x, y, w, h, 1, cat, 1, 0])

    tracklet_annotations.sort(key=lambda x: x[0])

    filtered_csv = os.path.join(output_dir, f"filtered_{basename}.csv")
    mapping_csv = os.path.join(output_dir, f"mapping_{basename}.csv")

    with open(filtered_csv, "w", newline='') as f_out:
        writer = csv.writer(f_out)
        writer.writerow(["frame_id", "tracklet_id", "x", "y", "w", "h", "conf", "category", "visibility", "unused"])
        writer.writerows(tracklet_annotations)

    with open(mapping_csv, "w", newline='') as f_map:
        writer = csv.writer(f_map)
        writer.writerow(["original_track_id", "new_tracklet_id"])
        writer.writerows(tracklet_mapping)

    print(f"[✓] CSVs saved for {filename}")


# === Run All Files ===
all_files = [f for f in os.listdir(ANNOT_DIR) if f.endswith('.txt')]

for fname in all_files:
    full_path = os.path.join(ANNOT_DIR, fname)
    process_annotation_file(full_path, OUTPUT_DIR)

print("\n🔥 All annotation files converted to CSVs. Check 'processed_annotations' folder.")


[✓] CSVs saved for uav0000013_00000_v.txt
[✓] CSVs saved for uav0000013_01073_v.txt
[✓] CSVs saved for uav0000013_01392_v.txt
[✓] CSVs saved for uav0000020_00406_v.txt
[✓] CSVs saved for uav0000071_03240_v.txt
[✓] CSVs saved for uav0000072_04488_v.txt
[✓] CSVs saved for uav0000072_05448_v.txt
[✓] CSVs saved for uav0000072_06432_v.txt
[✓] CSVs saved for uav0000076_00720_v.txt
[✓] CSVs saved for uav0000079_00480_v.txt
[✓] CSVs saved for uav0000084_00000_v.txt
[✓] CSVs saved for uav0000099_02109_v.txt
[✓] CSVs saved for uav0000124_00944_v.txt
[✓] CSVs saved for uav0000126_00001_v.txt
[✓] CSVs saved for uav0000138_00000_v.txt
[✓] CSVs saved for uav0000140_01590_v.txt
[✓] CSVs saved for uav0000143_02250_v.txt
[✓] CSVs saved for uav0000145_00000_v.txt
[✓] CSVs saved for uav0000150_02310_v.txt
[✓] CSVs saved for uav0000218_00001_v.txt
[✓] CSVs saved for uav0000222_03150_v.txt
[✓] CSVs saved for uav0000239_03720_v.txt
[✓] CSVs saved for uav0000239_12336_v.txt
[✓] CSVs saved for uav0000243_0000