# About
* **Author**: Adil Rashitov (adil.rashitov.98@gmail.com)
* **Created at**: 09.12.2022


In [None]:
# Imports / Configs / Global vars

# Import of native python tools
import os
import json
from functools import reduce

# Import of base ML stack libs
import numpy as np
import sklearn as sc

# Visualization libraries
# import plotly.express as px

# Logging configuraiton
import logging
logging.basicConfig(format='[ %(asctime)s ][ %(levelname)s ]: %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')
logger = logging.getLogger()
logger.setLevel(logging.INFO)


# Ipython configs
from IPython.core.display import display, HTML
from IPython.core.interactiveshell import InteractiveShell
display(HTML("<style>.container { width:100% !important; }</style>"))
InteractiveShell.ast_node_interactivity = 'all'

# Pandas configs
import pandas as pd
import geopandas as gpd
pd.options.display.max_rows = 350
pd.options.display.max_columns = 250

# Jupyter configs
%load_ext autoreload
%autoreload 2
%config Completer.use_jedi = False

# Configure project PATH
from pathlib import Path
import sys
PROJECT_PATH = os.getcwd().rsplit('/', 1)[0]


if PROJECT_PATH not in sys.path:
    sys.path.append(PROJECT_PATH)

# Data

In [None]:
gps_records = pd.read_parquet("../data/02_intermediate/gps_records.parquet")
route_plans = pd.read_parquet("../data/02_intermediate/route_plan.parquet")

# Main

In [None]:
# VFHDBSCAN
from gps_activity import ActivityExtractionSession
from gps_activity.extraction.factory.preprocessing import PreprocessingFactory
from gps_activity.extraction.factory.fragmentation import VelocityFragmentationFactory
from gps_activity.extraction.factory.clustering import FDBSCANFactory


preprocessing = PreprocessingFactory.factory_pipeline(
    source_lat_column="lat",
    source_lon_column="lon",
    source_datetime="datetime",
    source_vehicle_id="plate_no",
    source_crs="EPSG:4326",
    target_crs="EPSG:2326",
)

fragmentation = VelocityFragmentationFactory.factory_pipeline(max_velocity_hard_limit=4)
clustering = FDBSCANFactory.factory_pipeline(
    source_vehicle_id_column="plate_no",
    eps=30,
    min_samples=3,
)

vfhdbscan = ActivityExtractionSession(
    preprocessing=preprocessing,
    fragmentation=fragmentation,
    clustering=clustering,
)

In [None]:
# STCM
from gps_activity import ActivityExtractionSession
from gps_activity.extraction.factory.preprocessing import PreprocessingFactory
from gps_activity.extraction.factory.fragmentation import VelocityFragmentationFactory
from gps_activity.extraction.factory.clustering import STCMFactory


preprocessing = PreprocessingFactory.factory_pipeline(
    source_lat_column="lat",
    source_lon_column="lon",
    source_datetime="datetime",
    source_vehicle_id="plate_no",
    source_crs="EPSG:4326",
    target_crs="EPSG:2326",
)

fragmentation = VelocityFragmentationFactory.factory_pipeline(max_velocity_hard_limit=4)
clustering = STCMFactory.factory_pipeline(
    source_vehicle_id_column="plate_no",
    eps=30,
    min_duration_sec=60
)

stcm = ActivityExtractionSession(
    preprocessing=preprocessing,
    fragmentation=fragmentation,
    clustering=clustering,
)

In [None]:
def split_gps_over_vehicle_id(gps_records: pd.DataFrame):
    gps = []
    for vehicle_ids in gps_records["plate_no"].unique():
        gps_slice = gps_records[gps_records["plate_no"] == vehicle_ids].reset_index(drop=True)
        gps.append(gps_slice.copy())
    return gps


clustered_gps_stcm = [ stcm.predict(gps) for gps in split_gps_over_vehicle_id(gps_records.copy()) ]
clustered_gps_vfhdbscan = [ stcm.predict(gps) for gps in split_gps_over_vehicle_id(gps_records.copy()) ]

clustered_gps_stcm = pd.concat(clustered_gps_stcm).reset_index(drop=True)
clustered_gps_vfhdbscan = pd.concat(clustered_gps_vfhdbscan).reset_index(drop=True)

In [None]:
!rm -rf ../data/03_primary/clustered_gps/
!mkdir -p ../data/03_primary/clustered_gps/
clustered_gps_stcm.to_parquet("../data/03_primary/clustered_gps/vfhstcm.parquet", index=False)
clustered_gps_vfhdbscan.to_parquet("../data/03_primary/clustered_gps/vfhdbscan.parquet", index=False)