In [11]:
import os
import numpy as np
import pandas as pd
from scipy.spatial import KDTree
from collections import defaultdict
from sklearn.preprocessing import StandardScaler

def load_tracklet_data(tracklet_folder):
    """Load per-frame tracklet data."""
    tracklets = {}
    for file in os.listdir(tracklet_folder):
        file_path = os.path.join(tracklet_folder, file)
        if file.endswith(".txt"):
            df = pd.read_csv(file_path, header=None)
            for _, row in df.iterrows():
                tracklet_id = row[0]  # Tracklet ID
                features = row[1:].tolist()  # Other features
                if tracklet_id not in tracklets:
                    tracklets[tracklet_id] = []
                tracklets[tracklet_id].append(features)

    print(f"✅ Loaded {len(tracklets)} tracklets from {tracklet_folder}")
    return tracklets

def load_statistics(stats_folder):
    """Load tracklet statistics (Velocity, Area, Aspect Ratio)."""
    stats = {}
    for file in os.listdir(stats_folder):
        file_path = os.path.join(stats_folder, file)
        if file.endswith(".txt"):
            df = pd.read_csv(file_path, header=None)
            for _, row in df.iterrows():
                tracklet_id = row[0]
                stats[tracklet_id] = row[1:].tolist()  # Avg Velocity, Area, Aspect Ratio

    print(f"✅ Loaded {len(stats)} tracklet statistics from {stats_folder}")
    return stats

def load_visual_features(feature_folder):
    """Load visual features (Color Histogram + SIFT)."""
    visual_features = {}
    for file in os.listdir(feature_folder):
        file_path = os.path.join(feature_folder, file)
        if file.endswith(".txt"):
            df = pd.read_csv(file_path, header=None)
            for _, row in df.iterrows():
                tracklet_id = row[0]
                hist = row[1:513].tolist()  # Color histogram (512 values)
                sift = row[513:].tolist()  # SIFT descriptor (128 values)
                visual_features[tracklet_id] = hist + sift  # Combine all features

    print(f"✅ Loaded {len(visual_features)} visual features from {feature_folder}")
    return visual_features

def construct_feature_matrix(tracklets, stats, visuals):
    """Construct a feature matrix combining all features."""
    feature_matrix = []
    tracklet_ids = []

    for tracklet_id in tracklets.keys():
        position_features = np.mean(tracklets[tracklet_id], axis=0)  # Average frame features
        stat_features = stats.get(tracklet_id, [0, 0, 0])  # Default 0 if missing
        visual_features = visuals.get(tracklet_id, [0] * 640)  # Default 0 if missing

        # Apply different weights to features
        weighted_position = np.array(position_features) * 1.0  # Keep position as is
        weighted_stats = np.array(stat_features) * 5.0  # Increase weight of velocity, area, etc.
        weighted_visuals = np.array(visual_features) * 0.1  # Reduce weight of visual features

        feature_vector = np.concatenate([weighted_position, weighted_stats, weighted_visuals])
        feature_matrix.append(feature_vector)
        tracklet_ids.append(tracklet_id)
    
    # Normalize features to ensure KD-Tree works correctly
    scaler = StandardScaler()
    feature_matrix = scaler.fit_transform(feature_matrix)

    print(f"✅ Constructed feature matrix with {len(feature_matrix)} entries.")
    return np.array(feature_matrix), tracklet_ids

def merge_tracklets_with_kdtree(feature_matrix, tracklet_ids, tracklets):
    """Merge tracklets by always choosing the nearest neighbor."""
    tree = KDTree(feature_matrix)
    merged_tracklets = {}

    for i, tracklet_id in enumerate(tracklet_ids):
        distances, indices = tree.query(feature_matrix[i], k=2)  # Get the closest neighbor
        best_match_idx = indices[1]  # Second closest (first is itself)

        best_match = tracklet_ids[best_match_idx]
        merged_tracklets.setdefault(tracklet_id, set()).add(best_match)

    print(f"✅ Found {len(merged_tracklets)} merging tracklet pairs.")
    return merged_tracklets

def interpolate_missing_info(tracklets, merged_tracklets):
    """Fill missing information using interpolation."""
    filled_tracklets = defaultdict(list)

    for tracklet_id, similar_ids in merged_tracklets.items():
        all_data = []

        for t_id in similar_ids:
            if t_id in tracklets:
                all_data.extend(tracklets[t_id])

        if all_data:
            filled_tracklets[tracklet_id] = np.mean(all_data, axis=0)  # Use mean for missing values

    print(f"✅ Interpolated missing info for {len(filled_tracklets)} tracklets.")
    return filled_tracklets

def iterative_tracklet_merging(tracklet_folder, stats_folder, feature_folder, output_folder):
    """Main function to process and iteratively merge tracklets."""
    os.makedirs(output_folder, exist_ok=True)

    # Load data
    tracklets = load_tracklet_data(tracklet_folder)
    stats = load_statistics(stats_folder)
    visuals = load_visual_features(feature_folder)

    # Construct feature matrix
    feature_matrix, tracklet_ids = construct_feature_matrix(tracklets, stats, visuals)

    # Iteratively merge tracklets in pairs
    iteration = 1
    while len(tracklet_ids) > 1:
        print(f"🔄 Iteration {iteration}: {len(tracklet_ids)} tracklets remaining.")
        merged_tracklets = merge_tracklets_with_kdtree(feature_matrix, tracklet_ids, tracklets)

        if not merged_tracklets:  # If no pairs found, stop merging
            break

        # Merge closest pairs
        merged = set()
        for t1, t2_set in merged_tracklets.items():
            for t2 in t2_set:
                if t1 in tracklets and t2 in tracklets and t1 not in merged and t2 not in merged:
                    tracklets[t1].extend(tracklets[t2])  # Merge tracklet data
                    del tracklets[t2]  # Remove merged tracklet
                    merged.add(t1)
                    merged.add(t2)

        # Rebuild feature matrix for next iteration
        feature_matrix, tracklet_ids = construct_feature_matrix(tracklets, stats, visuals)
        iteration += 1

    # Fill missing info
    filled_tracklets = interpolate_missing_info(tracklets, merged_tracklets)

    # Save output
    output_count = 0
    for tracklet_id, features in filled_tracklets.items():
        output_file = os.path.join(output_folder, f"{tracklet_id}_merged.txt")
        np.savetxt(output_file, [features], delimiter=',', fmt="%.6f")
        output_count += 1

    print(f"✅ Processed and saved {output_count} merged tracklets in: {output_folder}")

# Example Usage
tracklet_folder = r"F:\edited visdrone\tracklets"
stats_folder = r"F:\edited visdrone\processed_tracklets"
feature_folder = r"F:\edited visdrone\sift_and_histogram"
output_folder = r"F:\edited visdrone\merged_tracklets"

iterative_tracklet_merging(tracklet_folder, stats_folder, feature_folder, output_folder)


✅ Loaded 4 tracklets from F:\edited visdrone\tracklets
✅ Loaded 4 tracklet statistics from F:\edited visdrone\processed_tracklets
✅ Loaded 4 visual features from F:\edited visdrone\sift_and_histogram
✅ Constructed feature matrix with 4 entries.
🔄 Iteration 1: 4 tracklets remaining.
✅ Found 4 merging tracklet pairs.
✅ Constructed feature matrix with 2 entries.
🔄 Iteration 2: 2 tracklets remaining.
✅ Found 2 merging tracklet pairs.
✅ Constructed feature matrix with 1 entries.
✅ Interpolated missing info for 1 tracklets.
✅ Processed and saved 1 merged tracklets in: F:\edited visdrone\merged_tracklets
