In [None]:
import pandas as pd

df = pd.read_parquet("gps_data_relaxed_parameters_more.parquet")

df["time"] = pd.to_datetime(df["time"])

print(df.head())
print(df.info())

numeric_columns = ["speed", "heading", "latitude", "longitude", "altitude"]
df[numeric_columns] = df[numeric_columns].astype(float)


   id_tracking         id                time  type  sequence comment  speed  \
0        88926  107329270 2024-08-01 05:18:02     0        43    None    0.0   
1        88926  107329271 2024-08-01 05:18:05     0        44    None    0.0   
2        88926  107329272 2024-08-01 05:18:08     0        45    None    0.0   
3        88926  107329273 2024-08-01 05:18:10     0        46    None    0.0   
4        88926  107329274 2024-08-01 05:18:12     0        47    None    0.0   

   heading  duration  block_type   log   latitude  longitude  altitude  \
0      0.0         0           0  None  47.069676  15.781769       0.0   
1      0.0         0           0  None  47.069235  15.781806       0.0   
2      0.0         0           0  None  47.068783  15.781572       0.0   
3      0.0         0           0  None  47.068537  15.781260       0.0   
4      0.0         0           0  None  47.068364  15.780865       0.0   

   meta_tag meta_value  
0         0             
1         0             

In [None]:
track_features = df.groupby("id_tracking").agg({
    "latitude": ["mean", "min", "max"], 
    "longitude": ["mean", "min", "max"],
    "time": [lambda x: (x.max() - x.min()).total_seconds()], 
    "speed": ["mean", "std"],  
    "heading": ["std"], 
}).reset_index()

track_features.columns = ["id_tracking", "lat_mean", "lat_min", "lat_max",
                          "lon_mean", "lon_min", "lon_max", 
                          "duration_sec", "speed_mean", "speed_std", "heading_variability"]

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
track_features_scaled = scaler.fit_transform(track_features.drop("id_tracking", axis=1))

track_features_scaled_df = pd.DataFrame(track_features_scaled, columns=track_features.columns[1:])
track_features_scaled_df["id_tracking"] = track_features["id_tracking"]

print(track_features_scaled_df.head())


   lat_mean   lat_min   lat_max  lon_mean   lon_min   lon_max  duration_sec  \
0  1.480474  1.491130  1.458440  0.632649  0.646100  0.619220     -0.719479   
1  1.481229  1.490236  1.496883  0.657559  0.658506  0.652613     -0.616567   
2  1.745272  1.650029  1.909924  0.629823  0.592652  0.650832     -0.232226   
3  1.312990  1.343624  1.300467  0.684495  0.696467  0.672573      0.510204   
4  2.083682  2.146057  2.032515  0.715293  0.727103  0.699668      0.482902   

   speed_mean  speed_std  heading_variability  id_tracking  
0         0.0        0.0                  0.0           59  
1         0.0        0.0                  0.0           65  
2         0.0        0.0                  0.0           78  
3         0.0        0.0                  0.0           87  
4         0.0        0.0                  0.0          172  


In [None]:
from fastdtw import fastdtw
from scipy.spatial.distance import euclidean

track1_id = track_features.iloc[0]["id_tracking"]
track2_id = track_features.iloc[1]["id_tracking"]

track1 = df[df["id_tracking"] == track1_id][["latitude", "longitude"]].values
track2 = df[df["id_tracking"] == track2_id][["latitude", "longitude"]].values

distance, _ = fastdtw(track1, track2, dist=euclidean)

print(f"DTW distance between track {track1_id} and track {track2_id}: {distance}")


DTW distance between track 59.0 and track 65.0: 8.494140141088382


In [None]:
from sklearn.neighbors import NearestNeighbors

nn = NearestNeighbors(n_neighbors=5, metric="euclidean").fit(track_features_scaled)

new_route_features = [[47.5, 47.4, 47.6, 9.7, 9.6, 9.8, 3600, 30, 5, 0.2]]  

distances, indices = nn.kneighbors(new_route_features)
closest_routes = track_features.iloc[indices[0]]

print("Closest Routes to New Path:")
print(closest_routes)


Closest Routes to New Path:
     id_tracking   lat_mean    lat_min    lat_max   lon_mean    lon_min  \
743        83041  47.474826  47.425455  47.491522   9.671329   9.661372   
726        81581  47.476995  47.464238  47.479114   9.684944   9.670762   
838        99440  48.255467  48.210260  48.298840  14.301365  14.273230   
6            438  48.408271  48.399322  48.416802  12.795485  12.775661   
12           932  48.395934  48.352036  48.445736  12.729184  12.557832   

       lon_max  duration_sec  speed_mean  speed_std  heading_variability  
743   9.777867       21892.0         0.0        0.0                  0.0  
726   9.688858       16184.0         0.0        0.0                  0.0  
838  14.327431        3504.0         0.0        0.0                  0.0  
6    12.820470        3490.0         0.0        0.0                  0.0  
12   12.815932        3475.0         0.0        0.0                  0.0  
