In [1]:
import pandas as pd
import numpy as np
from scipy import spatial

In [2]:
training = pd.read_csv('TruckReidDataset/train.txt')
# Flatten the array because eval(x) returns a list of lists [[...]] which becomes a 2D array (1, N)
training['features'] = training['features'].apply(lambda x: np.array(eval(x)).flatten())
training.head()

Unnamed: 0,id,reid,timestamp,filepath,features,direction,drone,position_utm
0,2041,19.0,2025-09-09 13:43:00.333328247,train/02041.jpg,"[-0.0032804568763822317, -0.029575718566775322...",Northbound,3,"[601044.64, 4859593.68]"
1,2471,65.0,2025-09-13 10:45:01.366668701,train/02471.jpg,"[-0.005662569310516119, -0.03177795559167862, ...",Southbound,1,"[604794.84, 4855944.94]"
2,2005,50.0,2025-09-09 17:56:19.533332825,train/02005.jpg,"[-0.01376084890216589, -0.0229105893522501, -0...",Southbound,2,"[602829.93, 4857858.49]"
3,1732,10.0,2025-09-09 10:30:34.466667175,train/01732.jpg,"[-0.01391203235834837, -0.02876005880534649, -...",Northbound,2,"[602829.93, 4857858.49]"
4,2008,34.0,2025-09-09 16:29:21.866668701,train/02008.jpg,"[-0.0010643844725564122, -0.001412697485648095...",Northbound,2,"[602829.93, 4857858.49]"


In [75]:
def valid_speed(truck1, truck2):

    if(truck1["direction"] != truck2["direction"]):
        return False
    
    if(truck1["direction"] == "Northbound" and truck2["drone"] < truck1["drone"]):
        return False

    if (truck1["direction"] == "Southbound" and truck2["drone"] > truck1["drone"]):
        return False

    return True


In [91]:
def compute_similarity(query, training, threshold=0.75):
    scores = []
    for i in range(len(training)):
        if(not valid_speed(query, training.iloc[i])):
           scores.append(-1)
        else:
            score = 1-spatial.distance.cosine(query['features'], training.iloc[i]['features'])
            scores.append(score)
    scores = np.array(scores)
    rank_ID = np.argsort(scores)[::-1]
    rank_score = scores[rank_ID]

    res = []

    for i in range(len(rank_ID)):
        truck = training.iloc[rank_ID[i]]
        if (scores[rank_ID[i]] >= threshold):
            res.append({truck['filepath']: [scores[rank_ID[i]], truck["direction"], truck["position_utm"], truck["timestamp"], truck["drone"]]})

    return res

In [43]:
def print_top_results(results, top_k=10):
    for i in range(top_k):
        print(f"Image: {list(results[i].keys())[0]}, {list(results[i].values())[0]}")

In [44]:
def results_over_threshold(results, threshold=0.8):
    filtered_results = []
    for item in results:
        score = list(item.values())[0][0]
        if score >= threshold:
            filtered_results.append(item)
    return filtered_results

In [45]:
def write_results_to_file(results, filename='results.txt'):
    with open(filename, 'a') as f:
        for item in results:
            for img_path, values in item.items():
                f.write(f"{img_path}\t{values[0]}\n")

## running the on the entire training dataset

In [94]:
length = len(training)
for i in range(length):
    img = training.iloc[i]
    results = compute_similarity(img, training, threshold=0.75)
    write_results_to_file(results, filename='results1.txt')
    print(f"{round(i/length, 2)}/100% done", end='\r', flush=True)

0.67/100% done

KeyboardInterrupt: 

## running on individual trucks

In [5]:
queryImg = training.iloc[0]
query_feat = queryImg['features']
queryImg

id                                                           2041
reid                                                         19.0
timestamp                           2025-09-09 13:43:00.333328247
filepath                                          train/02041.jpg
features        [-0.0032804568763822317, -0.029575718566775322...
direction                                              Northbound
drone                                                           3
position_utm                              [601044.64, 4859593.68]
Name: 0, dtype: object

In [None]:
results = compute_similarity(query_feat, training)
print_top_results(results, top_k=10)


Image: train/02041.jpg, [np.float64(1.0), 'Northbound', '[601044.64, 4859593.68]', '2025-09-09 13:43:00.333328247', np.int64(3)]
Image: train/01857.jpg, [np.float64(0.7521161732508888), 'Northbound', '[602829.93, 4857858.49]', '2025-09-09 13:37:50.966667175', np.int64(2)]
Image: train/02814.jpg, [np.float64(0.7306936878550699), 'Northbound', '[601044.64, 4859593.68]', '2025-09-13 11:30:03.699996948', np.int64(3)]
Image: train/02520.jpg, [np.float64(0.7193395844313243), 'Northbound', '[604794.84, 4855944.94]', '2025-09-13 13:59:02.333333969', np.int64(1)]
Image: train/01856.jpg, [np.float64(0.700899179926376), 'Northbound', '[602829.93, 4857858.49]', '2025-09-09 13:37:37.066667557', np.int64(2)]
Image: train/02094.jpg, [np.float64(0.6969665737915276), 'Northbound', '[601044.64, 4859593.68]', '2025-09-09 08:00:57.266662597', np.int64(3)]
Image: train/02100.jpg, [np.float64(0.6845774617135623), 'Northbound', '[601044.64, 4859593.68]', '2025-09-09 18:55:10.399993896', np.int64(3)]
Image: t

In [36]:
write_results_to_file(results_over_threshold(results, threshold=0.7), filename='results1.txt')