In [None]:
import pandas as pd
from geopandas import GeoDataFrame, read_file

import sys
sys.path.append("..")
import movingpandas as mpd
mpd.show_versions()

import warnings
warnings.simplefilter("ignore")

import os
import csv
import numpy as np
from scipy.interpolate import interp1d

Mapping file

In [2]:
def filter_and_record_csv_files(folder_path, output_csv):
    
    files = os.listdir(folder_path)
    # filter out .csv files
    csv_files = [file for file in files if file.endswith('.csv')]
    
    # create a new .csv file to store the mapping
    with open(output_csv, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(["Original Filename", "New ID"])
        
        for idx, csv_file in enumerate(csv_files, start=1):
            writer.writerow([csv_file, idx])


folder_path = './data/csv' 
output_csv = 'output_file.csv'

filter_and_record_csv_files(folder_path, output_csv)
print(f"create mapping file: {output_csv}")

create mapping file: output_file.csv


Calculate ATE and sort

In [3]:
def haversine(lat1, lon1, lat2, lon2):
    R = 6371.0  #radius of the Earth in km
    phi1, phi2 = np.radians(lat1), np.radians(lat2)
    dphi = np.radians(lat2 - lat1)
    dlambda = np.radians(lon2 - lon1)
    
    a = np.sin(dphi / 2.0) ** 2 + np.cos(phi1) * np.cos(phi2) * np.sin(dlambda / 2.0) ** 2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    
    return R * c

def interpolate_trajectory(trajectory, num_points):
    lat, lon = trajectory[:, 0], trajectory[:, 1]
    t_original = np.linspace(0, 1, len(lat))
    t_target = np.linspace(0, 1, num_points)
    
    interp_lat = interp1d(t_original, lat, kind='linear')
    interp_lon = interp1d(t_original, lon, kind='linear')
    
    lat_target = interp_lat(t_target)
    lon_target = interp_lon(t_target)
    
    return np.vstack((lat_target, lon_target)).T

def calculate_ate(ground_truth, estimated):
    num_points = max(len(ground_truth), len(estimated))
    
    ground_truth_interp = interpolate_trajectory(ground_truth, num_points)
    estimated_interp = interpolate_trajectory(estimated, num_points)
    
    errors = np.array([haversine(lat1, lon1, lat2, lon2) for (lat1, lon1), (lat2, lon2) in zip(ground_truth_interp, estimated_interp)])
    rmse = np.sqrt(np.mean(errors ** 2))
    
    return rmse


#read mapping csv files
files = 'output_file.csv'
df = pd.read_csv(files)
coordinates = []
for index, row in df.iterrows():
    original_filename = row.iloc[0]
    file_id = row.iloc[1]

    # read original csv file
    file_path = './data/csv/' + original_filename
    
    df1 = pd.read_csv(file_path, delimiter=';')
    # get the first trajectory coordinates
    filtered_df = df1[df1['trajectory_id'] == 1][['X', 'Y']]

    # to numpy array
    coordinates.append(np.array(filtered_df))

print('read csv files done ')
rmse_records = []

# calculate ATE for each pair of consecutive trajectories
for i in range(1, len(coordinates)):
    rmse = calculate_ate(coordinates[i-1], coordinates[i])
    #print('Id:', i-1, 'and', i, '\tATE (RMSE):', rmse, 'km')
    rmse_records.append((i, i+1, rmse))
# sort the results by RMSE
rmse_records_sorted = sorted(rmse_records, key=lambda x: x[2])

# output the top 10 results
for idx, record in enumerate(rmse_records_sorted[:10]):
    id1, id2, rmse = record
    print(f'Id: {id1:4} and {id2:4}\tATE (RMSE): {rmse:.2f} km')
    if idx == 9:
        break

# save the sorted results to a csv file
dfs = pd.DataFrame(rmse_records_sorted, columns=['Id1', 'Id2', 'ATE_RMSE_km'])
csv_filename = 'sorted_rmse_results.csv'
dfs.to_csv(csv_filename, index=False)

print(f'save to: {csv_filename}')

read csv files done 
Id: 6006 and 6007	ATE (RMSE): 0.02 km
Id: 4763 and 4764	ATE (RMSE): 0.02 km
Id:  115 and  116	ATE (RMSE): 0.03 km
Id: 3553 and 3554	ATE (RMSE): 0.04 km
Id: 4711 and 4712	ATE (RMSE): 0.04 km
Id: 3688 and 3689	ATE (RMSE): 0.05 km
Id: 1390 and 1391	ATE (RMSE): 0.05 km
Id: 4013 and 4014	ATE (RMSE): 0.05 km
Id: 4093 and 4094	ATE (RMSE): 0.05 km
Id: 3931 and 3932	ATE (RMSE): 0.06 km
save to: sorted_rmse_results.csv


In [None]:
rmse_file = './sorted_rmse_results.csv'
filename_file = './output_file.csv'
filename_map = {}
pair = []
# read mapping file and store the mapping in a dictionary
with open(filename_file, mode='r', encoding='utf-8') as f:
    reader = csv.DictReader(f)
    for row in reader:
        new_id = int(row['New ID'])
        original_filename = row['Original Filename']
        filename_map[new_id] = original_filename
count=0
# read the sorted RMSE results and print the original filenames
with open(rmse_file, mode='r', encoding='utf-8') as f:
    
    reader = csv.DictReader(f)
    for row in reader:
        if count>31:
            break
        count+=1
        id1 = int(row['Id1'])
        id2 = int(row['Id2'])
        filename_id1 = filename_map.get(id1, 'Unknown')
        filename_id2 = filename_map.get(id2, 'Unknown')
        
        print(f'Id1: {id1}\tOriginal Filename: {filename_id1}')
        print(f'Id2: {id2}\tOriginal Filename: {filename_id2}')
        print()
        pair.append((filename_id1, filename_id2))
        


In [11]:
import geoviews as gv
bing_maps_tile_source = gv.tile_sources.WMTS("http://ecn.t3.tiles.virtualearth.net/tiles/a{q}.jpeg?g=1")
hvplot_defaults = {'tiles':bing_maps_tile_source, 'frame_height':400, 'frame_width':400, 'colorbar':True}


In [None]:
for i in range(5):
    df = pd.read_csv('data/csv/'+pair[i][0], delimiter=';')
    traj_collection = mpd.TrajectoryCollection(df, 'trajectory_id', t='t', x='X', y='Y')
    plot1 = traj_collection.hvplot(title=str(traj_collection), line_width=[5,.8], **hvplot_defaults)
    df = pd.read_csv('data/csv/'+pair[i][1], delimiter=';')
    traj_collection = mpd.TrajectoryCollection(df, 'trajectory_id', t='t', x='X', y='Y')
    plot2 = traj_collection.hvplot(title=str(traj_collection), line_width=[5,.8], **hvplot_defaults)
    print(pair[i][0], 'and', pair[i][1])
    display(plot1 + plot2)

<mark>OVERLAPPING TRAJECTORIES sorted by ATE<mark>

![alt text](img/ATE01.png)
![alt text](img/ATE02.png)
![alt text](img/ATE03.png)
![alt text](img/ATE04.png)
![alt text](img/ATE05.png)

In [None]:
for i in range(5,10):
    df = pd.read_csv('data/csv/'+pair[i][0], delimiter=';')
    traj_collection = mpd.TrajectoryCollection(df, 'trajectory_id', t='t', x='X', y='Y')
    plot1 = traj_collection.hvplot(title=str(traj_collection), line_width=[5,.8], **hvplot_defaults)
    df = pd.read_csv('data/csv/'+pair[i][1], delimiter=';')
    traj_collection = mpd.TrajectoryCollection(df, 'trajectory_id', t='t', x='X', y='Y')
    plot2 = traj_collection.hvplot(title=str(traj_collection), line_width=[5,.8], **hvplot_defaults)
    print(pair[i][0], 'and', pair[i][1])
    display(plot1 + plot2)

![alt text](img/ATE06.png)
![alt text](img/ATE07.png)
![alt text](img/ATE08.png)
![alt text](img/ATE09.png)
![alt text](img/ATE10.png)

In [None]:
for i in range(10,15):
    df = pd.read_csv('data/csv/'+pair[i][0], delimiter=';')
    traj_collection = mpd.TrajectoryCollection(df, 'trajectory_id', t='t', x='X', y='Y')
    plot1 = traj_collection.hvplot(title=str(traj_collection), line_width=[5,.8], **hvplot_defaults)
    df = pd.read_csv('data/csv/'+pair[i][1], delimiter=';')
    traj_collection = mpd.TrajectoryCollection(df, 'trajectory_id', t='t', x='X', y='Y')
    plot2 = traj_collection.hvplot(title=str(traj_collection), line_width=[5,.8], **hvplot_defaults)
    print(pair[i][0], 'and', pair[i][1])
    display(plot1 + plot2)

![alt text](img/ATE11.png)
![alt text](img/ATE12.png)
![alt text](img/ATE13.png)
![alt text](img/ATE14.png)
![alt text](img/ATE15.png)

In [None]:
for i in range(15,20):
    df = pd.read_csv('data/csv/'+pair[i][0], delimiter=';')
    traj_collection = mpd.TrajectoryCollection(df, 'trajectory_id', t='t', x='X', y='Y')
    plot1 = traj_collection.hvplot(title=str(traj_collection), line_width=[5,.8], **hvplot_defaults)
    df = pd.read_csv('data/csv/'+pair[i][1], delimiter=';')
    traj_collection = mpd.TrajectoryCollection(df, 'trajectory_id', t='t', x='X', y='Y')
    plot2 = traj_collection.hvplot(title=str(traj_collection), line_width=[5,.8], **hvplot_defaults)
    print(pair[i][0], 'and', pair[i][1])
    display(plot1 + plot2)

![alt text](img/ATE16.png)
![alt text](img/ATE17.png)
![alt text](img/ATE18.png)
![alt text](img/ATE19.png)
![alt text](img/ATE20.png)

In [None]:
for i in range(20,25):
    df = pd.read_csv('data/csv/'+pair[i][0], delimiter=';')
    traj_collection = mpd.TrajectoryCollection(df, 'trajectory_id', t='t', x='X', y='Y')
    plot1 = traj_collection.hvplot(title=str(traj_collection), line_width=[5,.8], **hvplot_defaults)
    df = pd.read_csv('data/csv/'+pair[i][1], delimiter=';')
    traj_collection = mpd.TrajectoryCollection(df, 'trajectory_id', t='t', x='X', y='Y')
    plot2 = traj_collection.hvplot(title=str(traj_collection), line_width=[5,.8], **hvplot_defaults)
    print(pair[i][0], 'and', pair[i][1])
    display(plot1 + plot2)

![alt text](img/ATE21.png)
![alt text](img/ATE22.png)
![alt text](img/ATE23.png)
![alt text](img/ATE24.png)
![alt text](img/ATE25.png)