In [1]:
%load_ext autoreload
%autoreload 2

import os

import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import pandas as pd
import geopandas as gpd
import osmnx as ox
import networkx as nx
import folium
from shapely.geometry import LineString
from shapely import wkt
import numpy as np
import swifter
from generator.preprocess import *
from generator.preprocess import remove_outlier_trajectories
from generator.road_network import RoadNetwork
import glob
from tqdm import tqdm
import torch

In [2]:
"""
 Generate osmnx network from csv
"""

df = pd.read_csv("../datasets/trajectories/hanover/temporal/hannover_streetgraph.csv")
df["geometry"] = df["geometry"].swifter.apply(wkt.loads)
gdf = gpd.GeoDataFrame(df, geometry="geometry")
gdf["coords"] = gdf["geometry"].swifter.apply(lambda x: list(x.coords))

Pandas Apply:   0%|          | 0/8620 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/8620 [00:00<?, ?it/s]

In [6]:
gdf = ox.io._stringify_nonnumeric_cols(gdf)
gdf["fid"] = np.arange(0, gdf.shape[0], dtype="int")  # id for each edge
gdf.to_file("../osm_data/hanover_temp" + "/edges.shp", encoding="utf-8")

In [4]:
G = nx.from_pandas_edgelist(gdf, "source", "target", True, nx.MultiDiGraph)
sG = [G.subgraph(c) for c in sorted(nx.weakly_connected_components(G), key=len, reverse=True)][0]
sdf = nx.to_pandas_edgelist(sG)
sdf["coords"] = sdf["geometry"].swifter.apply(lambda x: list(x.coords))
sdf.to_csv("../datasets/trajectories/hanover/temporal/hannover_streetgraph.csv")

Pandas Apply:   0%|          | 0/8620 [00:00<?, ?it/s]

In [11]:
# Folium Heat Map
from folium import plugins
from folium.plugins import HeatMap
from collections import defaultdict
import branca.colormap

m = folium.Map(location=[52.37052, 9.73322],
                    zoom_start = 8)

coords = gdf.loc[:, "coords"].values

for line in coords:
    data = [(c[1], c[0]) for c in line]
    folium.PolyLine(data, color="red", weight=2.5, opacity=0.8).add_to(m)

# coords = gdf["coords"].values
# for line in coords:
#     data = [(c[1], c[0]) for c in line]
#     folium.PolyLine(data, color="green", weight=2.5, opacity=0.8).add_to(m)

# Display the map
#map_porto.save("heatmap_gps_points_porto.html")
m

In [2]:
network = RoadNetwork()
network.load_hanover_temporal(path="../datasets/trajectories/hanover/temporal/hannover_streetgraph.csv")

Pandas Apply:   0%|          | 0/8620 [00:00<?, ?it/s]

In [7]:
gdf_edges = ox.io._stringify_nonnumeric_cols(network.gdf_edges)
gdf_edges["fid"] = np.arange(
    0, gdf_edges.shape[0], dtype="int"
)  # id for each edge

gdf_edges.to_file("../osm_data/hanover_temp/" + "/edges.shp", encoding="utf-8")

In [4]:
# create dataframe for road segment mapping
df = pd.read_csv("../datasets/trajectories/hanover/hannover_inrix.csv")
rows = []
for _, g in tqdm(df.groupby("id")):
    if g.shape[0] < 5:
        continue
    traj = g[["longitude", "latitude", "time"]].copy()
    traj.loc[:, "time"] = pd.to_datetime(traj["time"], infer_datetime_format=True).dt.floor('s')
    traj.loc[:, "time"] = traj["time"].values.astype(np.int64) // 10 ** 9
    
    data = traj.to_numpy()
    time = data[1:, 2] - data[0, 2]
    time = np.insert(time, 0, 0)
    seq = LineString(data[:, :2])
    assert len(list(seq.coords)) == time.shape[0]
    rows.append((seq, time.astype(int).tolist(), traj.iloc[0, 2], traj.iloc[-1, 2]))

processed_df = pd.DataFrame(rows, columns=["POLYLINE", "timestamp", "start_stamp", "end_stamp"])
processed_df

100%|██████████| 77188/77188 [03:12<00:00, 400.83it/s]


Unnamed: 0,POLYLINE,timestamp,start_stamp,end_stamp
0,"LINESTRING (9.8288 52.3326, 9.819705 52.331356...","[0, 80, 424, 535, 613, 1094, 1171, 1453, 1528,...",1565974403,1565982018
1,"LINESTRING (9.6472 52.2823, 9.643848 52.283978...","[0, 30, 60, 90, 120, 172, 202, 232, 262, 292, ...",1567075203,1567075868
2,"LINESTRING (9.7376 52.5465, 9.72797 52.541855,...","[0, 138, 300, 485, 668, 801, 882]",1566184436,1566185318
3,"LINESTRING (9.7119 52.343, 9.716048 52.344969,...","[0, 90, 180, 229, 319, 409, 492, 582, 672, 762]",1568621637,1568622399
4,"LINESTRING (9.7585 52.3363, 9.762929 52.34235,...","[0, 79, 141, 224, 377, 453, 580, 685, 763, 881...",1566671810,1566673656
...,...,...,...,...
73924,"LINESTRING (10.0369 53.3593, 10.037012 53.3582...","[0, 5, 8, 14, 15, 19, 25, 28, 30, 34, 39, 44, ...",1569563606,1569569649
73925,"LINESTRING (9.7278 52.3654, 9.727219 52.365731...","[0, 5, 8, 16, 19, 20, 25, 29, 35, 40, 46, 49, ...",1575029942,1575031187
73926,"LINESTRING (9.8726 52.9458, 9.718267 52.63466,...","[0, 599, 703, 718, 733, 748, 763, 778, 1295, 1...",1573224957,1573226965
73927,"LINESTRING (9.738963 52.382663, 9.738996 52.38...","[0, 5, 10, 15, 20, 25, 30, 35, 45, 50, 55, 60,...",1574319815,1574320945


In [5]:
from shapely.geometry import LineString, box

df = processed_df.copy()
city_bounds = network.bounds_edges
# df_clipped = clip_trajectories(processed_df.copy(), city_bounds, polyline_convert=True)
# df_clipped = filter_min_points(processed_df.copy(), 5)
bbox = box(*city_bounds)
gdf = gpd.GeoDataFrame(df, crs="epsg:4326", geometry="POLYLINE")

clipped = gpd.clip(gdf, bbox, keep_geom_type=True)

In [6]:
clipped

Unnamed: 0,POLYLINE,timestamp,start_stamp,end_stamp
39943,"LINESTRING (9.92467 52.39460, 9.92430 52.39465...","[0, 19, 57, 72, 87, 102, 132, 147, 162, 177, 2...",1574720329,1574730046
30662,"LINESTRING (9.92467 52.39465, 9.92384 52.39475...","[0, 2, 6, 11, 15, 16, 21, 26, 30, 32, 37, 41, ...",1566078561,1566082688
18565,"LINESTRING (9.75970 52.39390, 9.76275 52.39302...","[0, 120, 241, 362, 483, 603, 724, 968, 1089, 1...",1573818302,1573824822
37080,"LINESTRING (9.85610 52.40830, 9.88168 52.39904...","[0, 82, 172, 252, 350, 433, 533, 619, 713, 891...",1573512511,1573519625
27615,"LINESTRING (9.92467 52.39508, 9.89775 52.39775...","[0, 135, 317, 497, 680, 860, 1040, 1227, 1408,...",1573121197,1573129512
...,...,...,...,...
44973,"LINESTRING (9.90620 52.39650, 9.90623 52.39651...","[0, 2, 10, 15, 20, 24, 29, 30, 34, 44, 45, 49,...",1574152056,1574166079
52286,"MULTILINESTRING ((9.81024 52.31900, 9.81029 52...","[0, 5, 7, 9, 24, 39, 54, 69, 84, 99, 114, 129,...",1573662945,1573677039
14977,"LINESTRING (9.70990 52.44800, 9.70993 52.44808...","[0, 15, 30, 45, 60, 76, 91, 105, 120, 135, 151...",1566376125,1566386730
54650,"MULTILINESTRING ((9.76860 52.37600, 9.76838 52...","[0, 2, 3, 13, 28, 43, 54, 58, 74, 89, 103, 119...",1567093909,1567105126


In [7]:
print(clipped.loc[0, "POLYLINE"])
print(processed_df.loc[0, "POLYLINE"])

#print(processed_df.loc[0, "POLYLINE"].contains(clipped.loc[0, "POLYLINE"]))

LINESTRING (9.8288 52.3326, 9.819705 52.331356, 9.787736 52.390743, 9.824655 52.408264, 9.850968 52.412861, 9.60824 52.422359, 9.583172600000001 52.422533930869854)
LINESTRING (9.8288 52.3326, 9.819705 52.331356, 9.787736 52.390743, 9.824655 52.408264, 9.850968 52.412861, 9.60824 52.422359, 9.569406 52.42263, 9.542397 52.42115, 9.508204 52.408611, 9.466822 52.381687, 9.435784 52.361549, 9.399967 52.328964, 9.372162 52.30423, 9.339224 52.277271, 9.322924 52.269669, 9.316408 52.265495, 9.313453 52.262424, 9.311065 52.259903, 9.30757 52.257214, 9.302485 52.254589, 9.297933 52.252476, 9.294051 52.250629, 9.289948 52.248016, 9.256525 52.233921, 9.200723 52.220062, 9.16455 52.216946, 9.106794 52.221832, 9.06195 52.220818, 9.003625 52.219688, 8.956563 52.215717, 8.882984 52.211132, 8.845288 52.206764, 8.80543 52.234589, 8.728555 52.206097, 8.650899 52.187443, 8.612735 52.178822, 8.608962 52.179413, 8.58045 52.188202, 8.532821 52.194088, 8.484649 52.195026, 8.421346 52.198208, 8.38777 52.19701

In [21]:
def strictly_increasing(L):
    return all(x+20>=y for x, y in zip(L, L[1:]))


def correct_timestamps(traj, orig_trajs, orig_ts):
    corrected_ts = []
    corrected_traj = []
    idxs = []
    found = False
    for i, g1 in enumerate(traj):
        ridx = 0
        for j, g2 in enumerate(orig_trajs[ridx:]):
            if g1 == g2:
                found = True
                corrected_ts.append(orig_ts[j])
                corrected_traj.append(g1)
                idxs.append(j)
                ridx = j+1
                break
            # if found:
            #     break

    assert len(corrected_traj) == len(corrected_ts)
    # assert strictly_increasing(idxs), (idxs)
    
    return corrected_traj, (np.array(corrected_ts) - corrected_ts[0]).astype(int).tolist()


rows = []
i = 0
orig_polies, orig_ts = processed_df.POLYLINE, processed_df.timestamp
for i, r in tqdm(clipped.iterrows()):
    op = list(orig_polies.loc[r.name].coords)
    ot = orig_ts.loc[r.name]
    if type(r.POLYLINE) == LineString:
        traj = list(r.POLYLINE.coords)
        if len(traj) < 5:
            continue
        ctraj, cts = correct_timestamps(traj, op, ot)
        rows.append([LineString(ctraj), cts, r.start_stamp, r.end_stamp])
    else:
        for line in r.POLYLINE:
            traj = list(line.coords)
            if len(traj) < 5:
                continue
            ctraj, cts = correct_timestamps(traj, op, ot)
            rows.append([LineString(ctraj), cts, r.start_stamp, r.end_stamp])

df = pd.DataFrame(rows, columns=["POLYLINE", "timestamp", "start_stamp", "end_stamp"])

# i

73871it [02:05, 588.99it/s] 


In [22]:
df

Unnamed: 0,POLYLINE,timestamp,start_stamp,end_stamp
0,"LINESTRING (9.924301 52.394651, 9.919142 52.39...","[0, 15, 45, 75, 90, 105, 120]",1574720329,1574730046
1,"LINESTRING (9.923842 52.394754, 9.922241 52.39...","[0, 6, 10, 11, 17, 21, 25, 27, 31, 36, 40, 41,...",1566078561,1566082688
2,"LINESTRING (9.7597 52.3939, 9.76275 52.393017,...","[0, 120, 241, 362, 483, 603, 724]",1573818302,1573824822
3,"LINESTRING (9.89775 52.397747, 9.875479 52.360...","[0, 178, 356, 573]",1573121197,1573129512
4,"LINESTRING (9.7565 52.3949, 9.758446 52.395981...","[0, 30, 94, 124, 154, 184, 214, 277, 307, 337,...",1574525822,1574533547
...,...,...,...,...
156861,"LINESTRING (9.7686 52.376, 9.768375 52.376103,...","[0, 2, 3, 54, 74, 89, 103, 119, 122, 128, 134,...",1567093909,1567105126
156862,"LINESTRING (9.850325 52.413299, 9.851001 52.41...","[0, 4, 9, 14, 20]",1567093909,1567105126
156863,"LINESTRING (9.847199 52.412983, 9.846692 52.41...","[0, 4, 9, 15, 19, 24, 30, 34, 38, 44, 49, 54, 58]",1567093909,1567105126
156864,"LINESTRING (9.817837 52.428074, 9.813874 52.42...","[0, 6, 11, 15, 19, 25, 30, 35, 40, 45, 50, 55,...",1567093909,1567105126


In [23]:
df["id"] = np.arange(1, df.shape[0]+1)
df["timestamp"] = df["timestamp"].astype(str)
df["timestamp"] = df["timestamp"].str.replace("[", "")
df["timestamp"] = df["timestamp"].str.replace("]", "")
#df_clipped["timestamp"] = df_clipped["timestamp"].str.replace("  ", ", ")
df.to_csv("../datasets/trajectories/hanover/temporal/mapped_id_poly_clipped.csv", sep=";", index=False)

In [24]:
df = pd.read_csv("../datasets/trajectories/hanover/temporal/mapped_id_poly_clipped.csv", sep=";")
df

Unnamed: 0,POLYLINE,timestamp,start_stamp,end_stamp,id
0,"LINESTRING (9.924301 52.394651, 9.919142 52.39...","0, 15, 45, 75, 90, 105, 120",1574720329,1574730046,1
1,"LINESTRING (9.923842 52.394754, 9.922241 52.39...","0, 6, 10, 11, 17, 21, 25, 27, 31, 36, 40, 41, ...",1566078561,1566082688,2
2,"LINESTRING (9.7597 52.3939, 9.76275 52.393017,...","0, 120, 241, 362, 483, 603, 724",1573818302,1573824822,3
3,"LINESTRING (9.89775 52.397747, 9.875479 52.360...","0, 178, 356, 573",1573121197,1573129512,4
4,"LINESTRING (9.7565 52.3949, 9.758446 52.395981...","0, 30, 94, 124, 154, 184, 214, 277, 307, 337, ...",1574525822,1574533547,5
...,...,...,...,...,...
156861,"LINESTRING (9.7686 52.376, 9.768375 52.376103,...","0, 2, 3, 54, 74, 89, 103, 119, 122, 128, 134, ...",1567093909,1567105126,156862
156862,"LINESTRING (9.850325 52.413299, 9.851001 52.41...","0, 4, 9, 14, 20",1567093909,1567105126,156863
156863,"LINESTRING (9.847199 52.412983, 9.846692 52.41...","0, 4, 9, 15, 19, 24, 30, 34, 38, 44, 49, 54, 58",1567093909,1567105126,156864
156864,"LINESTRING (9.817837 52.428074, 9.813874 52.42...","0, 6, 11, 15, 19, 25, 30, 35, 40, 45, 50, 55, ...",1567093909,1567105126,156865


In [None]:
""" 
Speed data analysis
"""

In [13]:
temporal = pd.read_csv("../datasets/trajectories/hanover/temporal/hannover_traffic.csv")

In [17]:
# General temporal data (Note: Nodes are ordered by line graph)
temporal["time"] = pd.to_datetime(temporal["time"])
max_steps, min_steps = temporal["time"].max(), temporal["time"].min()
pad = 0
data = []
for i, index in tqdm(enumerate(network.line_graph.nodes)):
    row = network.gdf_edges.loc[index]
    temp = temporal[temporal["id"]==row["id"]][["time", "speed"]].sort_values("time")
    temp = temp.set_index('time')
    if min_steps not in temp.index:
        temp.loc[min_steps] = pad
    if max_steps not in temp.index:
        temp.loc[max_steps] = pad
    temp = temp.asfreq('15Min', fill_value=0)
    temp[["length", "speed_limit", "highway_enc"]] = row[["length", "speed_limit", "highway_enc"]]
    data.append(temp.values)

8620it [07:28, 19.24it/s]


In [18]:
x = torch.Tensor(data)
x.shape

torch.Size([8620, 5952, 4])

In [19]:
x.isnan().sum()

tensor(0)

In [20]:
torch.save(x, 'temporal_data.pt')

In [19]:
# map trajectories to network
from ast import literal_eval


df = pd.read_csv("../datasets/trajectories/hanover/mapped_id_poly_clipped.csv", sep=";")
df["timestamp"] = df["timestamp"].apply(literal_eval)
df["POLYLINE"] = df["POLYLINE"].apply(wkt.loads)

In [10]:
network.fmm_trajectorie_mapping(
    network_file="../osm_data/hanover_temp/edges.shp",
    input_file="../datasets/trajectories/hanover/mapped_id_poly_clipped.csv",
    output_file="../datasets/trajectories/hanover/temporal/road-segment-mapping.txt",
    source_field="u",
    target_field="v"
)

gps file : ../datasets/trajectories/hanover/mapped_id_poly_clipped.csv
id column : id
geom column : POLYLINE
timestamp column : timestamp
x column : x
y column : y
GPS point : false

Result file : ../datasets/trajectories/hanover/temporal/road-segment-mapping.txt
Output fields: opath pgeom spdist cpath mgeom duration speed 
Status: success
Time takes 1252.21 seconds
Total points 4607172 matched 3429926
Map match speed 2739.09 points/s 

[2022-09-15 22:16:53.874] [info] [network.cpp:72] Read network from file ../osm_data/hanover_temp/edges.shp
[2022-09-15 22:16:53.952] [info] [network.cpp:170] Number of edges 8620 nodes 5409
[2022-09-15 22:16:53.952] [info] [network.cpp:171] Field index: id 12 source 0 target 1
[2022-09-15 22:16:53.956] [info] [network.cpp:174] Read network done.
[2022-09-15 22:16:53.956] [info] [network_graph.cpp:17] Construct graph from network edges start
[2022-09-15 22:16:53.958] [info] [network_graph.cpp:30] Graph nodes 5409 edges 8620
[2022-09-15 22:16:53.958] [in

In [11]:
# preprocess the mapping especially the speed and distance values need to be verified
df = pd.read_csv("../datasets/trajectories/hanover/temporal/road-segment-mapping.csv", sep=";")
df_prep = remove_outlier_trajectories(df.copy(), min_edges_traversed=3, max_speed=0.000251)
df_prep.to_csv("../datasets/trajectories/hanover/temporal/road_segment_map_final.csv", sep=";")

Pandas Apply:   0%|          | 0/109207 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/109207 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[(df["speed_mean"] > max_speed)]["speed"] = df[(df["speed_mean"] > max_speed)][


Pandas Apply:   0%|          | 0/85984 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["speed_mean"] = df["speed"].swifter.apply(np.mean)


In [8]:
df_prep = pd.read_csv("../datasets/trajectories/hanover/temporal/road_segment_map_final.csv", sep=";")

In [14]:
d = df[df["id"]==1872]
print(d)
print(d["end_stamp"] - d["start_stamp"])
print(d["POLYLINE"].values)

                                              timestamp  start_stamp  \
1871     0    2    5   10   14   16   17   24   27  ...   1574998674   

       end_stamp                                           POLYLINE    id  
1871  1575001061  LINESTRING (9.630023942886398 52.4212434865260...  1872  
1871    2387
dtype: int64
['LINESTRING (9.630023942886398 52.421243486526095, 9.629115 52.421171, 9.628161 52.421147, 9.627631 52.421338, 9.627528 52.421801, 9.627596 52.422034, 9.628007 52.422238, 9.62872 52.422043, 9.630023942886398 52.421243486526095)']


In [16]:
df_prep[df_prep["id"]==1872]

Unnamed: 0.1,Unnamed: 0,id,opath,spdist,pgeom,cpath,mgeom,duration,speed,speed_mean
1,226,1872,468846886882688268826882688268825570,"0.000911354,0.000955415,0.00056174,0.000469219...","LINESTRING(9.63002577495 52.4212081598,9.62911...","(4688, 6882, 8047, 5570)","LINESTRING(9.63002577495 52.4212081598,9.62947...",1530,"[6.0756949851e-05, 3.18471622339e-05]",4.6e-05
