# Model 1 Experiment 1
This experiment uses the 1000 trajectories input in "./experiment_input/experiment1_1000traj.pkl". 

In [1]:
# import the libraries
import numpy as np
import pandas as pd
import folium
from trajectory_clustering import TrajectoryClustering

In [2]:
# the input data
taxi_1000_df = pd.read_pickle("./experiment_input/experiment1_1000traj.pkl")
taxi_1000_df

Unnamed: 0,TRIP_ID,TIMESTAMP,MISSING_DATA,TRAJECTORY,TRAJ_LEN
310308,1378760461620000008,1378760461,False,"[[41.169564, -8.595108], [41.169735, -8.593992...",174
1302708,1397135305620000504,1397135305,False,"[[41.150385, -8.607006], [41.14953, -8.607267]...",119
1066640,1392714116620000403,1392714116,False,"[[41.166387, -8.577999], [41.166414, -8.577981...",113
1616481,1402560252620000041,1402560252,False,"[[41.183451, -8.695215], [41.18346, -8.695206]...",108
1578304,1401954490620000098,1401954490,False,"[[41.20614, -8.572644], [41.207184, -8.573454]...",189
...,...,...,...,...,...
922581,1389803913620000612,1389803913,False,"[[41.146137, -8.61759], [41.146155, -8.617581]...",104
1451965,1399724751620000320,1399724751,False,"[[41.145552, -8.607339], [41.145606, -8.607357...",137
1641519,1403001636620000001,1403001636,False,"[[41.145399, -8.610948], [41.145318, -8.610948...",178
776647,1387039794620000607,1387039794,False,"[[41.163957, -8.662626], [41.16393, -8.662581]...",216


In [3]:
tc = TrajectoryClustering()

In [4]:
traj = tc.get_trajectories(taxi_1000_df)
traj[0]

[[41.169564, -8.595108],
 [41.169735, -8.593992],
 [41.169078, -8.59302],
 [41.168169, -8.592174],
 [41.16843, -8.59149],
 [41.169537, -8.589645],
 [41.169438, -8.588286],
 [41.169123, -8.587431],
 [41.169132, -8.587449],
 [41.169132, -8.587449],
 [41.16879, -8.586567],
 [41.168439, -8.583903],
 [41.168214, -8.581887],
 [41.167215, -8.582373],
 [41.166621, -8.582247],
 [41.165955, -8.581122],
 [41.165613, -8.579808],
 [41.165406, -8.579106],
 [41.166081, -8.578224],
 [41.165352, -8.57754],
 [41.16402, -8.576343],
 [41.162391, -8.575164],
 [41.16285, -8.575101],
 [41.164038, -8.576136],
 [41.165289, -8.576784],
 [41.165523, -8.57583],
 [41.165262, -8.575407],
 [41.16519, -8.57475],
 [41.165892, -8.574876],
 [41.16645, -8.575992],
 [41.16573, -8.576406],
 [41.165388, -8.57709],
 [41.165361, -8.578062],
 [41.164245, -8.579511],
 [41.164587, -8.581599],
 [41.165955, -8.582895],
 [41.166792, -8.58564],
 [41.168088, -8.588664],
 [41.169609, -8.591184],
 [41.171193, -8.59356],
 [41.173434, -8

In [5]:
tc.plot_all_trajectories_only(traj)

## Trajectory Clutering
### 1. Scale Trajectory to UTM coordinates

In [6]:
trajectories_xy = tc.latitude_longtitude_coord_conversion(traj)
trajectories_xy[0]

array([[ 533964.65804882, 4557659.55381914],
       [ 534058.18617348, 4557678.97317281],
       [ 534140.06411622, 4557606.4183524 ],
       [ 534211.504957  , 4557505.83960844],
       [ 534268.74815052, 4557535.08311426],
       [ 534422.94093832, 4557658.70271877],
       [ 534536.99393958, 4557648.25079652],
       [ 534608.88230093, 4557613.62128258],
       [ 534607.3676084 , 4557614.61324358],
       [ 534607.3676084 , 4557614.61324358],
       [ 534681.5358231 , 4557576.99782376],
       [ 534905.19684135, 4557539.09715262],
       [ 535074.43375679, 4557514.92964342],
       [ 535034.19648381, 4557403.83179477],
       [ 535045.08297818, 4557337.94072946],
       [ 535139.81467679, 4557264.45957022],
       [ 535250.23012222, 4557227.02440086],
       [ 535309.23270376, 4557204.32922263],
       [ 535382.86167412, 4557279.62122534],
       [ 535440.63547973, 4557198.97095137],
       [ 535541.7730804 , 4557051.58959179],
       [ 535641.56553989, 4556871.23139733],
       [ 5

### 2. Reduce each trajectory to predefined length (=15) using RDP algorithm

In [7]:
traj_to_keep, traj_xy_reduced = tc.rdp_reduce(traj, trajectories_xy, 15) #RDP reduce to len=15

#### Reshape the trajectory to 2-D arrays, so it could be input to HDBSCAN algorithm

In [8]:
traj_xy_reshaped = tc.reshape_trajectories(15, traj_xy_reduced)

(1000, 30)


### 3. Trajectory Clustering using HDBSCAN 

In [9]:
labels = tc.trajectories_hdbscan(traj_xy_reshaped, 4)
traj_dict = tc.map_trajectory_with_cluster_labels(traj_to_keep, labels)

Estimated number of clusters: 9
Estimated number of noise points: 310
label: 1, #traj: 5
label: 5, #traj: 606
label: -1, #traj: 310
label: 7, #traj: 13
label: 0, #traj: 8
label: 6, #traj: 6
label: 8, #traj: 33
label: 4, #traj: 5
label: 3, #traj: 4
label: 2, #traj: 10


#### Plot each group of trajectories 

In [10]:
tc.plot_group_trajectories_only(1, traj_dict)

In [11]:
tc.plot_group_trajectories_only(0, traj_dict)

In [12]:
tc.plot_group_trajectories_only(6, traj_dict)

In [13]:
tc.plot_group_trajectories_only(4, traj_dict)

In [14]:
tc.plot_group_trajectories_only(3, traj_dict)

In [15]:
tc.plot_group_trajectories_only(2, traj_dict)

##### The well-clustered trajectories appears to be long and winding, spanning the entire distance. Many of them travel across cities.
##### Below is an example of such high-way trajectories. It travels between Porto and Espinho.

In [16]:
single_traj = traj_dict[2]['trajectories'][0]
m = folium.Map(location=single_traj[0], zoom_start=11)
folium.PolyLine(single_traj).add_to(m)
m

In [17]:
tc.plot_group_trajectories_only(7, traj_dict)

In [18]:
tc.plot_group_trajectories_only(8, traj_dict)

#### Noise group

In [19]:
tc.plot_group_trajectories_only(-1, traj_dict)

#### This group has not been clustered in finer granularity

In [20]:
tc.plot_group_trajectories_only(5, traj_dict)