In [11]:
import pandas as pd
import numpy as np
from scipy import spatial
import ast
from datetime import datetime
import math

In [12]:
training = pd.read_csv('TruckReidDataset/train.txt')
# Flatten the array because eval(x) returns a list of lists [[...]] which becomes a 2D array (1, N)
training['features'] = training['features'].apply(lambda x: np.array(eval(x)).flatten())
training.head()

Unnamed: 0,id,reid,timestamp,filepath,features,direction,drone,position_utm
0,2041,19.0,2025-09-09 13:43:00.333328247,train/02041.jpg,"[-0.0032804568763822317, -0.029575718566775322...",Northbound,3,"[601044.64, 4859593.68]"
1,2471,65.0,2025-09-13 10:45:01.366668701,train/02471.jpg,"[-0.005662569310516119, -0.03177795559167862, ...",Southbound,1,"[604794.84, 4855944.94]"
2,2005,50.0,2025-09-09 17:56:19.533332825,train/02005.jpg,"[-0.01376084890216589, -0.0229105893522501, -0...",Southbound,2,"[602829.93, 4857858.49]"
3,1732,10.0,2025-09-09 10:30:34.466667175,train/01732.jpg,"[-0.01391203235834837, -0.02876005880534649, -...",Northbound,2,"[602829.93, 4857858.49]"
4,2008,34.0,2025-09-09 16:29:21.866668701,train/02008.jpg,"[-0.0010643844725564122, -0.001412697485648095...",Northbound,2,"[602829.93, 4857858.49]"


In [13]:
def parse_timestamp(ts):
    """
    Convert timestamp string with nanoseconds to datetime
    """
    if "." in ts:
        base, frac = ts.split(".")
        frac = frac[:6]              # keep microseconds
        ts = f"{base}.{frac}"
    return datetime.strptime(ts, "%Y-%m-%d %H:%M:%S.%f")

In [21]:
def valid_speed(truck1, truck2):

    if(truck1["direction"] != truck2["direction"]):
        return False
    
    if(truck1["direction"] == "Northbound" and truck2["drone"] < truck1["drone"]):
        return False

    if (truck1["direction"] == "Southbound" and truck2["drone"] > truck1["drone"]):
        return False
    

    pos1 = ast.literal_eval(truck1["position_utm"])
    pos2 = ast.literal_eval(truck2["position_utm"])

    # Parse timestamp strings
    t1 = parse_timestamp(truck1["timestamp"])
    t2 = parse_timestamp(truck2["timestamp"])

    # Euclidean distance (UTM meters)
    dx = pos2[0] - pos1[0]
    dy = pos2[1] - pos1[1]
    distance = math.sqrt(dx * dx + dy * dy)

    # Time difference in seconds
    delta_t = (t2 - t1).total_seconds()

    if delta_t == 0:
        return True

    if delta_t <= 0:
        return False
    
    speed_m_s = distance / delta_t
    speed_kmh = speed_m_s * 3.6
    if speed_kmh > 200:  # Assuming 130 km/h as a reasonable speed limit for trucks
        return False

    return True


In [15]:
def compute_similarity(query, training, threshold=0.75):
    scores = []
    for i in range(len(training)):
        if(not valid_speed(query, training.iloc[i])):
           scores.append(-1)
        else:
            score = 1-spatial.distance.cosine(query['features'], training.iloc[i]['features'])
            scores.append(score)
    scores = np.array(scores)
    rank_ID = np.argsort(scores)[::-1]
    rank_score = scores[rank_ID]

    res = []

    for i in range(len(rank_ID)):
        truck = training.iloc[rank_ID[i]]
        if (scores[rank_ID[i]] >= threshold):
            res.append({truck['filepath']: [scores[rank_ID[i]], truck["direction"], truck["position_utm"], truck["timestamp"], truck["drone"]]})

    return res

In [16]:
def print_top_results(results, top_k=10):
    for i in range(top_k):
        print(f"Image: {list(results[i].keys())[0]}, {list(results[i].values())[0]}")

In [17]:
def results_over_threshold(results, threshold=0.8):
    filtered_results = []
    for item in results:
        score = list(item.values())[0][0]
        if score >= threshold:
            filtered_results.append(item)
    return filtered_results

In [18]:
def write_results_to_file(results, filename='results.txt'):
    with open(filename, 'a') as f:
        for item in results:
            for img_path, values in item.items():
                f.write(f"{img_path}\t{values[0]}\n")

## running the on the entire training dataset

In [23]:
length = len(training)
for i in range(length):
    img = training.iloc[i]
    results = compute_similarity(img, training, threshold=0.75)
    write_results_to_file(results, filename='results1.txt')
    print(f"{round((i/length)*100, 2)}/100% done", end='\r', flush=True)

99.96/100% done

## running on individual trucks

In [None]:
queryImg = training.iloc[0]
query_feat = queryImg['features']
queryImg

id                                                           2041
reid                                                         19.0
timestamp                           2025-09-09 13:43:00.333328247
filepath                                          train/02041.jpg
features        [-0.0032804568763822317, -0.029575718566775322...
direction                                              Northbound
drone                                                           3
position_utm                              [601044.64, 4859593.68]
Name: 0, dtype: object

In [None]:
results = compute_similarity(queryImg, training)
results

[{'train/02041.jpg': [np.float64(1.0),
   'Northbound',
   '[601044.64, 4859593.68]',
   '2025-09-09 13:43:00.333328247',
   np.int64(3)]}]

In [None]:
write_results_to_file(results_over_threshold(results, threshold=0.7), filename='results1.txt')