In [1]:
%load_ext autoreload
%autoreload 2

import os

import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import pandas as pd
import geopandas as gpd
import osmnx as ox
import networkx as nx
import folium
from shapely.geometry import LineString
from shapely import wkt
import numpy as np
import swifter
from generator.preprocess import *
from generator.preprocess import remove_outlier_trajectories
from generator.road_network import RoadNetwork
import glob
from tqdm import tqdm
import torch

In [6]:
"""
 Generate osmnx network from csv
"""

df = pd.read_csv("../datasets/trajectories/hanover/temporal/hannover_streetgraph.csv")
df["geometry"] = df["geometry"].swifter.apply(wkt.loads)
gdf = gpd.GeoDataFrame(df, geometry="geometry")
gdf["coords"] = gdf["geometry"].swifter.apply(lambda x: list(x.coords))

Pandas Apply:   0%|          | 0/8620 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/8620 [00:00<?, ?it/s]

In [6]:
gdf = ox.io._stringify_nonnumeric_cols(gdf)
gdf["fid"] = np.arange(0, gdf.shape[0], dtype="int")  # id for each edge
gdf.to_file("../osm_data/hanover_temp" + "/edges.shp", encoding="utf-8")

In [4]:
G = nx.from_pandas_edgelist(gdf, "source", "target", True, nx.MultiDiGraph)
sG = [G.subgraph(c) for c in sorted(nx.weakly_connected_components(G), key=len, reverse=True)][0]
sdf = nx.to_pandas_edgelist(sG)
sdf["coords"] = sdf["geometry"].swifter.apply(lambda x: list(x.coords))
sdf.to_csv("../datasets/trajectories/hanover/temporal/hannover_streetgraph.csv")

Pandas Apply:   0%|          | 0/8620 [00:00<?, ?it/s]

In [None]:
# Folium Heat Map
from folium import plugins
from folium.plugins import HeatMap
from collections import defaultdict
import branca.colormap

m = folium.Map(location=[52.37052, 9.73322],
                    zoom_start = 8)

coords = gdf.iloc[:, -1].values

for line in coords:
    data = [(c[1], c[0]) for c in line]
    folium.PolyLine(data, color="red", weight=2.5, opacity=0.8).add_to(m)

coords = sdf["coords"].values
for line in coords:
    data = [(c[1], c[0]) for c in line]
    folium.PolyLine(data, color="green", weight=2.5, opacity=0.8).add_to(m)

# Display the map
#map_porto.save("heatmap_gps_points_porto.html")
m

In [2]:
network = RoadNetwork()
network.load_hanover_temporal(path="../datasets/trajectories/hanover/temporal/hannover_streetgraph.csv")

Pandas Apply:   0%|          | 0/8620 [00:00<?, ?it/s]

In [7]:
gdf_edges = ox.io._stringify_nonnumeric_cols(network.gdf_edges)
gdf_edges["fid"] = np.arange(
    0, gdf_edges.shape[0], dtype="int"
)  # id for each edge

gdf_edges.to_file("../osm_data/hanover_temp/" + "/edges.shp", encoding="utf-8")

In [36]:
# create dataframe for road segment mapping
df = pd.read_csv("../datasets/trajectories/hanover/hannover_inrix.csv")
rows = []
for _, g in tqdm(df.groupby("id")):
    if g.shape[0] < 5:
        continue
    traj = g[["longitude", "latitude", "time"]].copy()
    traj.loc[:, "time"] = pd.to_datetime(traj["time"], infer_datetime_format=True).dt.floor('s')
    traj.loc[:, "time"] = traj["time"].values.astype(np.int64) // 10 ** 9
    
    data = traj.to_numpy()
    time = data[1:, 2] - data[0, 2]
    time = np.insert(time, 0, 0)
    seq = LineString(data[:, :2])
    assert len(list(seq.coords)) == time.shape[0]
    rows.append((seq, time.astype(int).tolist(), traj.iloc[0, 2], traj.iloc[-1, 2]))

processed_df = pd.DataFrame(rows, columns=["POLYLINE", "timestamp", "start_stamp", "end_stamp"])
processed_df

100%|██████████| 77188/77188 [03:28<00:00, 370.26it/s]


Unnamed: 0,POLYLINE,timestamp,start_stamp,end_stamp
0,"LINESTRING (9.8288 52.3326, 9.819705 52.331356...","[0, 80, 424, 535, 613, 1094, 1171, 1453, 1528,...",1565974403,1565982018
1,"LINESTRING (9.6472 52.2823, 9.643848 52.283978...","[0, 30, 60, 90, 120, 172, 202, 232, 262, 292, ...",1567075203,1567075868
2,"LINESTRING (9.7376 52.5465, 9.72797 52.541855,...","[0, 138, 300, 485, 668, 801, 882]",1566184436,1566185318
3,"LINESTRING (9.7119 52.343, 9.716048 52.344969,...","[0, 90, 180, 229, 319, 409, 492, 582, 672, 762]",1568621637,1568622399
4,"LINESTRING (9.7585 52.3363, 9.762929 52.34235,...","[0, 79, 141, 224, 377, 453, 580, 685, 763, 881...",1566671810,1566673656
...,...,...,...,...
73924,"LINESTRING (10.0369 53.3593, 10.037012 53.3582...","[0, 5, 8, 14, 15, 19, 25, 28, 30, 34, 39, 44, ...",1569563606,1569569649
73925,"LINESTRING (9.7278 52.3654, 9.727219 52.365731...","[0, 5, 8, 16, 19, 20, 25, 29, 35, 40, 46, 49, ...",1575029942,1575031187
73926,"LINESTRING (9.8726 52.9458, 9.718267 52.63466,...","[0, 599, 703, 718, 733, 748, 763, 778, 1295, 1...",1573224957,1573226965
73927,"LINESTRING (9.738963 52.382663, 9.738996 52.38...","[0, 5, 10, 15, 20, 25, 30, 35, 45, 50, 55, 60,...",1574319815,1574320945


In [30]:
city_bounds = network.bounds_edges
# df_clipped = clip_trajectories(processed_df.copy(), city_bounds, polyline_convert=True)
df_clipped = filter_min_points(processed_df.copy(), 5)

Pandas Apply:   0%|          | 0/73929 [00:00<?, ?it/s]

In [29]:
print(df_clipped.loc[39949, "POLYLINE"])
print(processed_df.loc[39949, "POLYLINE"])

print(processed_df.loc[39949, "POLYLINE"].contains(df_clipped.loc[39949, "POLYLINE"]))

LINESTRING (9.720956836999958 52.4540255, 9.720469 52.450954, 9.73161 52.426079, 9.732494 52.424171, 9.7324 52.4213)
LINESTRING (9.7281 52.499, 9.720469 52.450954, 9.73161 52.426079, 9.732494 52.424171, 9.7324 52.4213)
False


In [None]:
# i = 0
# orig_polies = processed_df.POLYLINE
# for n, g in df_clipped.groupby(level=[0]):
#     op = orig_polies.loc[n]
#     print(op, g)
#     gt = g.reset_index(drop=True)
#     ts = gt.loc[0, "timestamp"]
#     for i, r in gt.iterrows():
#         traj = list(r.POLYLINE.coords)
#         print(traj)
#     break

# i

In [32]:
df_clipped["id"] = np.arange(1, df_clipped.shape[0]+1)
df_clipped.drop("coords", inplace=True, axis=1)
df_clipped["timestamp"] = df_clipped["timestamp"].astype(str)
df_clipped["timestamp"] = df_clipped["timestamp"].str.replace("[", "")
df_clipped["timestamp"] = df_clipped["timestamp"].str.replace("]", "")
#df_clipped["timestamp"] = df_clipped["timestamp"].str.replace("  ", ", ")
df_clipped.to_csv("../datasets/trajectories/hanover/temporal/mapped_id_poly_clipped.csv", sep=";", index=False)

In [33]:
df = pd.read_csv("../datasets/trajectories/hanover/temporal/mapped_id_poly_clipped.csv", sep=";")
df

Unnamed: 0,POLYLINE,timestamp,start_stamp,end_stamp,id
0,"LINESTRING (9.8288 52.3326, 9.819705 52.331356...","0, 80, 424, 535, 613, 1094, 1171, 1453, 1528, ...",1565974403,1565982018,1
1,"LINESTRING (9.6472 52.2823, 9.643848 52.283978...","0, 30, 60, 90, 120, 172, 202, 232, 262, 292, 5...",1567075203,1567075868,2
2,"LINESTRING (9.7376 52.5465, 9.72797 52.541855,...","0, 138, 300, 485, 668, 801, 882",1566184436,1566185318,3
3,"LINESTRING (9.7119 52.343, 9.716048 52.344969,...","0, 90, 180, 229, 319, 409, 492, 582, 672, 762",1568621637,1568622399,4
4,"LINESTRING (9.7585 52.3363, 9.762929 52.34235,...","0, 79, 141, 224, 377, 453, 580, 685, 763, 881,...",1566671810,1566673656,5
...,...,...,...,...,...
73924,"LINESTRING (10.0369 53.3593, 10.037012 53.3582...","0, 5, 8, 14, 15, 19, 25, 28, 30, 34, 39, 44, 4...",1569563606,1569569649,73925
73925,"LINESTRING (9.7278 52.3654, 9.727219 52.365731...","0, 5, 8, 16, 19, 20, 25, 29, 35, 40, 46, 49, 5...",1575029942,1575031187,73926
73926,"LINESTRING (9.8726 52.9458, 9.718267 52.63466,...","0, 599, 703, 718, 733, 748, 763, 778, 1295, 16...",1573224957,1573226965,73927
73927,"LINESTRING (9.738963 52.382663, 9.738996 52.38...","0, 5, 10, 15, 20, 25, 30, 35, 45, 50, 55, 60, ...",1574319815,1574320945,73928


In [None]:
""" 
Speed data analysis
"""

In [13]:
temporal = pd.read_csv("../datasets/trajectories/hanover/temporal/hannover_traffic.csv")

In [17]:
# General temporal data (Note: Nodes are ordered by line graph)
temporal["time"] = pd.to_datetime(temporal["time"])
max_steps, min_steps = temporal["time"].max(), temporal["time"].min()
pad = 0
data = []
for i, index in tqdm(enumerate(network.line_graph.nodes)):
    row = network.gdf_edges.loc[index]
    temp = temporal[temporal["id"]==row["id"]][["time", "speed"]].sort_values("time")
    temp = temp.set_index('time')
    if min_steps not in temp.index:
        temp.loc[min_steps] = pad
    if max_steps not in temp.index:
        temp.loc[max_steps] = pad
    temp = temp.asfreq('15Min', fill_value=0)
    temp[["length", "speed_limit", "highway_enc"]] = row[["length", "speed_limit", "highway_enc"]]
    data.append(temp.values)

8620it [07:28, 19.24it/s]


In [18]:
x = torch.Tensor(data)
x.shape

torch.Size([8620, 5952, 4])

In [19]:
x.isnan().sum()

tensor(0)

In [20]:
torch.save(x, 'temporal_data.pt')

In [19]:
# map trajectories to network
from ast import literal_eval


df = pd.read_csv("../datasets/trajectories/hanover/mapped_id_poly_clipped.csv", sep=";")
df["timestamp"] = df["timestamp"].apply(literal_eval)
df["POLYLINE"] = df["POLYLINE"].apply(wkt.loads)

In [10]:
network.fmm_trajectorie_mapping(
    network_file="../osm_data/hanover_temp/edges.shp",
    input_file="../datasets/trajectories/hanover/mapped_id_poly_clipped.csv",
    output_file="../datasets/trajectories/hanover/temporal/road-segment-mapping.txt",
    source_field="u",
    target_field="v"
)

gps file : ../datasets/trajectories/hanover/mapped_id_poly_clipped.csv
id column : id
geom column : POLYLINE
timestamp column : timestamp
x column : x
y column : y
GPS point : false

Result file : ../datasets/trajectories/hanover/temporal/road-segment-mapping.txt
Output fields: opath pgeom spdist cpath mgeom duration speed 
Status: success
Time takes 1252.21 seconds
Total points 4607172 matched 3429926
Map match speed 2739.09 points/s 

[2022-09-15 22:16:53.874] [info] [network.cpp:72] Read network from file ../osm_data/hanover_temp/edges.shp
[2022-09-15 22:16:53.952] [info] [network.cpp:170] Number of edges 8620 nodes 5409
[2022-09-15 22:16:53.952] [info] [network.cpp:171] Field index: id 12 source 0 target 1
[2022-09-15 22:16:53.956] [info] [network.cpp:174] Read network done.
[2022-09-15 22:16:53.956] [info] [network_graph.cpp:17] Construct graph from network edges start
[2022-09-15 22:16:53.958] [info] [network_graph.cpp:30] Graph nodes 5409 edges 8620
[2022-09-15 22:16:53.958] [in

In [11]:
# preprocess the mapping especially the speed and distance values need to be verified
df = pd.read_csv("../datasets/trajectories/hanover/temporal/road-segment-mapping.csv", sep=";")
df_prep = remove_outlier_trajectories(df.copy(), min_edges_traversed=3, max_speed=0.000251)
df_prep.to_csv("../datasets/trajectories/hanover/temporal/road_segment_map_final.csv", sep=";")

Pandas Apply:   0%|          | 0/109207 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/109207 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[(df["speed_mean"] > max_speed)]["speed"] = df[(df["speed_mean"] > max_speed)][


Pandas Apply:   0%|          | 0/85984 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["speed_mean"] = df["speed"].swifter.apply(np.mean)


In [8]:
df_prep = pd.read_csv("../datasets/trajectories/hanover/temporal/road_segment_map_final.csv", sep=";")

In [14]:
d = df[df["id"]==1872]
print(d)
print(d["end_stamp"] - d["start_stamp"])
print(d["POLYLINE"].values)

                                              timestamp  start_stamp  \
1871     0    2    5   10   14   16   17   24   27  ...   1574998674   

       end_stamp                                           POLYLINE    id  
1871  1575001061  LINESTRING (9.630023942886398 52.4212434865260...  1872  
1871    2387
dtype: int64
['LINESTRING (9.630023942886398 52.421243486526095, 9.629115 52.421171, 9.628161 52.421147, 9.627631 52.421338, 9.627528 52.421801, 9.627596 52.422034, 9.628007 52.422238, 9.62872 52.422043, 9.630023942886398 52.421243486526095)']


In [16]:
df_prep[df_prep["id"]==1872]

Unnamed: 0.1,Unnamed: 0,id,opath,spdist,pgeom,cpath,mgeom,duration,speed,speed_mean
1,226,1872,468846886882688268826882688268825570,"0.000911354,0.000955415,0.00056174,0.000469219...","LINESTRING(9.63002577495 52.4212081598,9.62911...","(4688, 6882, 8047, 5570)","LINESTRING(9.63002577495 52.4212081598,9.62947...",1530,"[6.0756949851e-05, 3.18471622339e-05]",4.6e-05
