# __HoWDe__ 
### _a Home and Work location Detection algorithm for GPS data analytics_

This notebook is intended to work as a brief tutorial on how to user "HoWDe". It leverages functions contained in the "HoWDe_utils.py" file (source code). 

In [1]:
%config InlineBackend.figure_format = 'retina'
# # NB: Home and Work location labelling
# 1. Load pre-computed stop locations (in our case computed using infostop)
# 2. Assigning labels to stop locations: Home ("H"), Work ("W"), and Other ("O")
from howde import *

In [3]:
HW_PATH = None
HW_PATH = '/Users/lorentz/JupyterDir/20-04_COVID_all/World_Bank/02-home_work_detection/data/stop_location_results/veraset_location_clustered/'


try: assert type(HW_PATH) is str
except: print("Path to data is missing.")

#### 1. Use HoWDe providing pre-loaded data and Spark Session

In [5]:
#### Load DATA and PASS it to HoWDe
import os
from pyspark.sql.types import *
from pyspark.sql import SparkSession
import warnings
warnings.filterwarnings("ignore")

# Set up Spark
driver_memory=250
packages = "data/work/shared/tools/spark-avro_2.12-3.0.0.jar"
os.environ["PYSPARK_SUBMIT_ARGS"] = "--jars {0} pyspark-shell ".format(packages)
spark = (SparkSession
            .builder.master("local[50]")
            .config("spark.sql.files.ignoreCorruptFiles", "true")
            .config("spark.driver.memory", f"{driver_memory}g")
            .config("spark.executor.memory", "250g")
            .getOrCreate()
)
spark.conf.set("spark.sql.execution.arrow.pyspark.enabled", "true")
spark.sparkContext.setLogLevel("ERROR")

# Load DATA
input_data = spark.read.format("parquet").load(HW_PATH, pathGlobFilter="*.parquet")

res1 = HoWDe_labelling(input_data = input_data, spark=spark, HW_PATH='./',
                    SAVE_PATH=None, SAVE_NAME='', save_multiple=False,
                    edit_config_default=None, 
                    range_window=42, dhn=6,
                    dn_H=[0.4,0.6], dn_W=0.8,
                    hf_H=0.2, hf_W=0.2,
                    df_W=0.2,
                    driver_memory = 250
                   )

res1.printSchema()

HoWDe Labelling: computing LABs ...


TypeError: 'float' object is not iterable

#### 2. Use HoWDe in a self contained way (providing path to data and location to save)

In [None]:
### Let HoWDe load data 
res2 = HoWDe_labelling(input_data=None, spark=None, HW_PATH=HW_PATH,
                    SAVE_PATH=None, SAVE_NAME='', save_multiple=False,
                    edit_config_default=None, 
                    range_window=42, bnd_none_day=6,
                    bnd_none_home=[0.4,0.6], bnd_none_work=0.8,
                    range_freq_home=0.2, range_freq_work_h=0.2,
                    range_freq_work_d=0.2,
                    driver_memory = 250
                   )
res2.printSchema()

In [None]:
res2.show()
