# About
* **Author**: Adil Rashitov (adil.rashitov.98@gmail.com)
* **Created at**: 08.12.2022


In [None]:
# Imports / Configs / Global vars

# Import of native python tools
import os
import json
from functools import reduce

# Import of base ML stack libs
import numpy as np
import sklearn as sc

# Visualization libraries
# import plotly.express as px

# Logging configuraiton
import logging
logging.basicConfig(format='[ %(asctime)s ][ %(levelname)s ]: %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')
logger = logging.getLogger()
logger.setLevel(logging.INFO)


# Ipython configs
from IPython.core.display import display, HTML
from IPython.core.interactiveshell import InteractiveShell
display(HTML("<style>.container { width:100% !important; }</style>"))
InteractiveShell.ast_node_interactivity = 'all'

# Pandas configs
import pandas as pd
import geopandas as gpd
pd.options.display.max_rows = 350
pd.options.display.max_columns = 250

# Jupyter configs
%load_ext autoreload
%autoreload 2
%config Completer.use_jedi = False

# Configure project PATH
from pathlib import Path
import sys
PROJECT_PATH = os.getcwd().rsplit('/', 1)[0]


if PROJECT_PATH not in sys.path:
    sys.path.append(PROJECT_PATH)

# Data

In [None]:
# Reaiding gps
from src import path
from src.gps_formatting_pipeline import factory_raw_gps_formatter_pipeline


gps_formatting_pipeline = factory_raw_gps_formatter_pipeline()
gps_records = pd.read_parquet(path.FILE_01_RAW_GPS)
gps_records = gps_formatting_pipeline.fit_transform(gps_records)


logging.info(f"Amt records before drop duplicates: {gps_records.shape[0]}")
gps_records = gps_records.drop_duplicates().reset_index(drop=True)
logging.info(f"Amt records after drop duplicates: {gps_records.shape[0]}")

In [None]:
gps_records.head()

In [None]:
# Route plan
route_plans = pd.read_parquet(path.FILE_01_RAW_PLANS)
route_plans["date"] = pd.to_datetime(route_plans["date"], format='%Y%m%d').astype(str)
logging.info(f"Amt route plans: {route_plans.shape[0]}")
route_plans = route_plans.drop_duplicates().reset_index(drop=True)
logging.info(f"Amt route plans: {route_plans.shape[0]}")

In [None]:
route_plans.head()

In [None]:
# Geocodes
geocodes = pd.read_csv(path.FILE_01_RAW_GEOCODES)
geocodes = geocodes.drop_duplicates(["address"])

# Main

In [None]:
# Join geocodes & route plan
route_plans = route_plans.merge(geocodes, how="left")
route_plans = route_plans.drop_duplicates(["service_point_id"])

In [None]:
route_plans.head()

In [None]:
route_plans.to_parquet("../data/02_intermediate/route_plan.parquet", index=False)
gps_records.to_parquet("../data/02_intermediate/gps_records.parquet", index=False)

In [None]:
gps = gps_records[gps_records["route_id"] == "2022-06-06 :: Vehicle #1"]

In [None]:
from keplergl import KeplerGl

kepler = KeplerGl(data={"gps": gps}, height=800)

In [None]:
kepler