In [1]:
import pandas as pd
import numpy as np
from scipy import spatial

In [2]:
training = pd.read_csv('TruckReidDataset/train.txt')
# Flatten the array because eval(x) returns a list of lists [[...]] which becomes a 2D array (1, N)
training['features'] = training['features'].apply(lambda x: np.array(eval(x)).flatten())
training.head()

Unnamed: 0,id,reid,timestamp,filepath,features,direction,drone,position_utm
0,2041,19.0,2025-09-09 13:43:00.333328247,train/02041.jpg,"[-0.0032804568763822317, -0.029575718566775322...",Northbound,3,"[601044.64, 4859593.68]"
1,2471,65.0,2025-09-13 10:45:01.366668701,train/02471.jpg,"[-0.005662569310516119, -0.03177795559167862, ...",Southbound,1,"[604794.84, 4855944.94]"
2,2005,50.0,2025-09-09 17:56:19.533332825,train/02005.jpg,"[-0.01376084890216589, -0.0229105893522501, -0...",Southbound,2,"[602829.93, 4857858.49]"
3,1732,10.0,2025-09-09 10:30:34.466667175,train/01732.jpg,"[-0.01391203235834837, -0.02876005880534649, -...",Northbound,2,"[602829.93, 4857858.49]"
4,2008,34.0,2025-09-09 16:29:21.866668701,train/02008.jpg,"[-0.0010643844725564122, -0.001412697485648095...",Northbound,2,"[602829.93, 4857858.49]"


In [3]:
def compute_similarity(query_feat, training):
    scores = []
    for i in range(len(training)):
        score = 1-spatial.distance.cosine(query_feat, training.iloc[i]['features'])
        scores.append(score)
    scores = np.array(scores)
    rank_ID = np.argsort(scores)[::-1]
    rank_score = scores[rank_ID]

    res = []

    for i in range(len(rank_ID)):
        res.append({training.iloc[rank_ID[i]]['filepath']: scores[rank_ID[i]]})

    return res

In [4]:
def print_top_results(results, top_k=10):
    for i in range(top_k):
        print(f"Image: {list(results[i].keys())[0]}, Score: {list(results[i].values())[0]}")

In [9]:
def results_over_threshold(results, threshold=0.8):
    filtered_results = []
    for item in results:
        score = list(item.values())[0]
        if score >= threshold:
            filtered_results.append(item)
    return filtered_results

In [15]:
def write_results_to_file(results, filename='results.txt'):
    with open(filename, 'a') as f:
        for item in results:
            for img_path, score in item.items():
                f.write(f"{img_path}\t{score}\n")

In [5]:
queryImg = training.iloc[0]
query_feat = queryImg['features']
queryImg

id                                                           2041
reid                                                         19.0
timestamp                           2025-09-09 13:43:00.333328247
filepath                                          train/02041.jpg
features        [-0.0032804568763822317, -0.029575718566775322...
direction                                              Northbound
drone                                                           3
position_utm                              [601044.64, 4859593.68]
Name: 0, dtype: object

In [6]:
results = compute_similarity(query_feat, training)
print_top_results(results, top_k=10)

Image: train/02041.jpg, Score: 1.0
Image: train/01857.jpg, Score: 0.7521161732508888
Image: train/02814.jpg, Score: 0.7306936878550699
Image: train/02520.jpg, Score: 0.7193395844313243
Image: train/01856.jpg, Score: 0.700899179926376
Image: train/02094.jpg, Score: 0.6969665737915276
Image: train/02100.jpg, Score: 0.6845774617135623
Image: train/02801.jpg, Score: 0.6790316325667095
Image: train/02561.jpg, Score: 0.6736835436952263
Image: train/01993.jpg, Score: 0.6728211109461885


In [18]:
write_results_to_file(results_over_threshold(results, threshold=0.7), filename='results.txt')

In [19]:

for img in training.iloc:
    results = compute_similarity(img["features"], training)
    write_results_to_file(results_over_threshold(results, threshold=0.75), filename='results.txt')
