# Cleaning and smoothing trajectory data with moving pandas

In [None]:
import movingpandas as mpd
import geopandas as gpd
import pandas as pd
from datetime import datetime, timedelta
from shapely.geometry import LineString, Point
from shapely.wkt import loads
import matplotlib.pyplot as plt
import os
from srai.regionalizers import geocode_to_region_gdf

In [None]:
hvplot_defaults = {'tiles':'CartoLight', 'frame_height':320, 'frame_width':320, 'cmap':'Viridis', 'colorbar':True}
kwargs = {**hvplot_defaults, 'c':'speed', 'line_width':7, 'clim':(0,20)}

In [None]:
gdf = gpd.read_parquet(os.path.join('output_data', 'geolife.parquet'))

In [None]:
gdf['time'] = pd.to_datetime(gdf['time'])
gdf.crs = 'EPSG:4326'

In [None]:
gdf.head()

In [None]:
gdf.shape

In [None]:
def validate_cords(gdf, lon_col='longitude', lat_col='latitude', trip_id_col='trajectory_id'):
    valid_lon = (-180 <= gdf[lon_col]) & (gdf[lon_col] <= 180)
    valid_lat = (-90 <= gdf[lat_col]) & (gdf[lat_col] <= 90)
    
    valid_coords = valid_lon & valid_lat
    
    invalid_trip_ids = gdf.loc[~valid_coords, trip_id_col].unique()
    
    gdf = gdf[~gdf[trip_id_col].isin(invalid_trip_ids)]
    
    return gdf

In [None]:
gdf = validate_cords(gdf)

In [None]:
pekin_area = geocode_to_region_gdf("Pekin, China")

In [None]:
gdf_pekin = gdf.sjoin(pekin_area)

In [None]:
gdf_pekin.shape

In [None]:
gdf_merged = gdf.merge(gdf_pekin, how="left", indicator=True)

In [None]:
gdf_outside_pekin = gdf_merged[gdf_merged["_merge"] == "left_only"]

In [None]:
traj_outside_pekin = list(gdf_outside_pekin["trajectory_id"].unique())

In [None]:
gdf_pekin = gdf_pekin[~gdf_pekin["trajectory_id"].isin(traj_outside_pekin)]

In [None]:
traj_col = mpd.TrajectoryCollection(gdf_pekin,'trajectory_id', t = 'time', x = 'latitude', y = 'longitude')

In [None]:
traj_col

In [None]:
traj_gdf = traj_col.to_point_gdf()
fig, ax = plt.subplots(figsize=(10, 10))

traj_gdf.plot(ax=ax, color='blue', markersize=1, label='Trajektorie')

In [None]:
# traj_col.add_speed(overwrite = True)

In [None]:
# traj_col.add_timedelta(overwrite = True)

In [None]:
# traj_col.add_direction(overwrite = True)

In [None]:
results = traj_col.to_point_gdf()
results.head()

In [None]:
results.shape

# Spatial Embedding

In [None]:
HEX_RES = 9

In [None]:
regionalizer = H3Regionalizer(resolution=HEX_RES)
regions = regionalizer.transform(pekin_area)

In [None]:
loader = OSMPbfLoader()
features = loader.load(regions, HEX2VEC_FILTER)

In [None]:
joiner = IntersectionJoiner()
joint = joiner.transform(regions, features)

In [None]:
neighbourhood = H3Neighbourhood(regions)
embedder_hidden_sizes = [150, 100, 50, 10]
embedder = Hex2VecEmbedder(embedder_hidden_sizes)
device = "cuda" if torch.cuda.is_available() else "cpu"

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    embeddings = embedder.fit_transform(
        regions,
        features,
        joint,
        neighbourhood,
        trainer_kwargs={"max_epochs": 5, "accelerator": device},
        batch_size=100,
    )

In [None]:
embeddings.to_parquet(os.path.join('output_data', f'embeddings_{HEX_RES}.parquet')

# [Generalization](https://movingpandas.github.io/movingpandas-website/1-tutorials/7-generalizing-trajectories.html)

In [None]:
traj_col_dp = mpd.DouglasPeuckerGeneralizer(traj_col).generalize(tolerance=0.0001)

In [None]:
results_dp = traj_col_dp.to_point_gdf()
results_dp.shape

In [None]:
i = 312
traj_col_dp.trajectories[i].hvplot(**kwargs)+traj_col.trajectories[i].hvplot(**kwargs)

# CLEAN

In [None]:
cleaned = traj_col_dp.copy()
cleaned = mpd.OutlierCleaner(cleaned).clean(alpha=1.5)

In [None]:
cleaned.trajectories[i].hvplot(**kwargs)+traj_col_dp.trajectories[i].hvplot(**kwargs)

# SMOOTH

In [None]:
smooth = mpd.KalmanSmootherCV(cleaned).smooth(process_noise_std=0.1, measurement_noise_std=10)
smooth

In [None]:
smooth.trajectories[i].hvplot(**kwargs)+traj_col_dp.trajectories[i].hvplot(**kwargs)


In [None]:
smooth2 = mpd.KalmanSmootherCV(cleaned).smooth(process_noise_std=0.1, measurement_noise_std=2)
smooth2

In [None]:
smooth.trajectories[i].hvplot(**kwargs)+smooth2.trajectories[i].hvplot(**kwargs)+traj_col_dp.trajectories[i].hvplot(**kwargs)


In [None]:
i = 1645
smooth.trajectories[i].hvplot(**kwargs)+smooth2.trajectories[i].hvplot(**kwargs)+traj_col_dp.trajectories[i].hvplot(**kwargs)

In [None]:
smooth2_gdf = smooth2.to_point_gdf()
smooth2_gdf.shape

In [None]:
if not os.path.exists('output_data'):
    os.makedirs('output_data')
smooth2_gdf.to_parquet(os.path.join('output_data', 'geolife_mpd.parquet'))