# About
* **Author**: Adil Rashitov (adil.rashitov.98@gmail.com)
* **Created at**: 20.01.2022


In [None]:
# Imports / Configs / Global vars

# Import of native python tools
import os
import json
from functools import reduce

# Import of base ML stack libs
import numpy as np
import sklearn as sc

# Visualization libraries
# import plotly.express as px

# Logging configuraiton
import logging
logging.basicConfig(format='[ %(asctime)s ][ %(levelname)s ]: %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')
logger = logging.getLogger()
logger.setLevel(logging.INFO)


# Ipython configs
from IPython.core.display import display, HTML
from IPython.core.interactiveshell import InteractiveShell
display(HTML("<style>.container { width:100% !important; }</style>"))
InteractiveShell.ast_node_interactivity = 'all'

# Pandas configs
import pandas as pd
import geopandas as gpd
pd.options.display.max_rows = 350
pd.options.display.max_columns = 250

# Jupyter configs
%load_ext autoreload
%autoreload 2
%config Completer.use_jedi = False

# Configure project PATH
from pathlib import Path
import sys
PROJECT_PATH = os.getcwd().rsplit('/', 1)[0]


if PROJECT_PATH not in sys.path:
    sys.path.append(PROJECT_PATH)

# Data

In [None]:
gps_records = pd.read_parquet("../data/02_intermediate/gps_records.parquet")
route_plans = pd.read_parquet("../data/02_intermediate/route_plan.parquet")

In [None]:
gps_records["plate_no"].unique()

In [None]:
overlapping_dates = np.intersect1d(
    list(pd.to_datetime(gps_records["datetime"]).dt.date.astype(str).unique()),
    list(pd.to_datetime(route_plans["date"]).dt.date.astype(str).unique()),
)
route_plans

In [None]:
from gps_activity import ActivityExtractionSession
from gps_activity.extraction.factory.preprocessing import PreprocessingFactory
from gps_activity.extraction.factory.fragmentation import VelocityFragmentationFactory
from gps_activity.extraction.factory.clustering import FDBSCANFactory



def factory_vfhdbscan(max_velocity_hard_limit: float, eps: float, min_samples: int) -> ActivityExtractionSession:
    preprocessing = PreprocessingFactory.factory_pipeline(
        source_lat_column="lat",
        source_lon_column="lon",
        source_datetime="datetime",
        source_vehicle_id="plate_no",
        source_crs="EPSG:4326",
        target_crs="EPSG:2326",
    )

    fragmentation = VelocityFragmentationFactory.factory_pipeline(max_velocity_hard_limit=4)
    clustering = FDBSCANFactory.factory_pipeline(
        source_vehicle_id_column="plate_no",
        eps=30,
        min_samples=3,
    )
    return ActivityExtractionSession(
        preprocessing=preprocessing,
        fragmentation=fragmentation,
        clustering=clustering,
    )


def factory_stcm(max_velocity_hard_limit: float, eps: float, min_duration_sec: float) -> ActivityExtractionSession:
    preprocessing = PreprocessingFactory.factory_pipeline(
        source_lat_column="lat",
        source_lon_column="lon",
        source_datetime="datetime",
        source_vehicle_id="plate_no",
        source_crs="EPSG:4326",
        target_crs="EPSG:2326",
    )

    fragmentation = VelocityFragmentationFactory.factory_pipeline(max_velocity_hard_limit=4)
    clustering = STCMFactory.factory_pipeline(
        source_vehicle_id_column="plate_no",
        eps=30,
        min_duration_sec=60
    )

    stcm = ActivityExtractionSession(
        preprocessing=preprocessing,
        fragmentation=fragmentation,
        clustering=clustering,
    )

## Main :: linkage

In [None]:
# Linker components definition
from gps_activity.linker.factory import PreprocessingFactory
from gps_activity.models import DataFramePivotFields
from gps_activity.linker.factory import ClusterAggregationFactory
from gps_activity.linker.factory import ClusterAggregationFactory
from gps_activity.linker.factory import JoinValidatorFactory
from gps_activity.linker.factory import SpatialJoinerFactory
from gps_activity.linker.factory import CoverageStatisticsFactory


WSG_84="EPSG:4326"
HK_CRS="EPSG:2326"


gps_pivot_fields = DataFramePivotFields(
    source_lat="lat",
    source_lon="lon",
    source_datetime="datetime",
    source_vehicle_id="plate_no",
)


gps_preprocess_pipeline = PreprocessingFactory.factory_pipeline(
    source_lat_column=gps_pivot_fields.source_lat,
    source_lon_column=gps_pivot_fields.source_lon,
    source_datetime=gps_pivot_fields.source_datetime,
    source_vehicle_id=gps_pivot_fields.source_vehicle_id,
    source_crs=WSG_84,
    target_crs=HK_CRS,
    generate_primary_key_for="gps",
    source_composite_keys=[
        gps_pivot_fields.source_vehicle_id,
        gps_pivot_fields.source_datetime,
        gps_pivot_fields.source_lat,
        gps_pivot_fields.source_lon,
    ],
)


plans_pivot_fields = DataFramePivotFields(
    source_lat="lat",
    source_lon="lng",
    source_datetime="date",
    source_vehicle_id="plate_no",
    plans_pk="service_point_id",
)


plans_preprocess_pipeline = PreprocessingFactory.factory_pipeline(
    source_lat_column=plans_pivot_fields.source_lat,
    source_lon_column=plans_pivot_fields.source_lon,
    source_datetime=plans_pivot_fields.source_datetime,
    source_vehicle_id=plans_pivot_fields.source_vehicle_id,
    source_crs=WSG_84,
    target_crs=HK_CRS,
    generate_primary_key_for="plan",
    source_composite_keys=[plans_pivot_fields.plans_pk],
)

cluster_agg_pipeline = ClusterAggregationFactory.factory_pipeline(
    source_lat_column=gps_pivot_fields.source_lat,
    source_lon_column=gps_pivot_fields.source_lon,
    source_datetime=gps_pivot_fields.source_datetime,
    source_vehicle_id=gps_pivot_fields.source_vehicle_id,
    source_crs=WSG_84,
    target_crs=HK_CRS,
)


spatial_joiner = SpatialJoinerFactory.factory_pipeline(how="inner", max_distance=80)
spatial_validator = JoinValidatorFactory.factory_pipeline(
    max_days_distance=1,
    ensure_vehicle_overlap=False,
)

coverage_stats_extractor = CoverageStatisticsFactory.factory_pipeline()

In [None]:
from gps_activity import ActivityLinkageSession


linkage_session = ActivityLinkageSession(
    gps_preprocessor=gps_preprocess_pipeline,
    plan_preprocessor=plans_preprocess_pipeline,
    cluster_aggregator=cluster_agg_pipeline,
    spatial_joiner=spatial_joiner,
    spatial_validator=spatial_validator,
    coverage_stats_extractor=coverage_stats_extractor
)

In [None]:
linkage_data_container = linkage_session.transform(gps=gps_stcm, plan=plan)

In [None]:
cov_stats = linkage_session.compute_coverage_stats(gps=gps_stcm, plan=plan)
cov_stats

## Main :: metrics

In [None]:
from gps_activity import ActivityMetricsSession

In [None]:
metrics = ActivityMetricsSession()
computed_metrics = metrics.transform(X=linkage_data_container)
computed_metrics.dict()