# __HoWDe__ 
### _A Home and Work location Detection algorithm for GPS data analytics_

This notebook is intended to work as a brief tutorial on how to use "HoWDe". It leverages functions contained in `pipeline.py`

In [None]:
%config InlineBackend.figure_format = 'retina'

from pyspark.sql import SparkSession
from howde import HoWDe_labelling

In [None]:
# Initialize Spark session
spark = SparkSession.builder.appName('HoWDeApp').getOrCreate()

# Load your stop location data
input_data = spark.read.parquet('path_to_your_data.parquet')


#### Example 1: Run with default configuration

In [None]:
labeled_df = HoWDe_labelling(
    input_data,
    verbose=True
)

# Save output
labeled_df.write.mode("overwrite").parquet("output/default_config.parquet")

#### Example 2: Run with a custom configuration

In [None]:
labeled_df_custom = HoWDe_labelling(
    input_data,
    range_window_home=14,
    range_window_work=21,
    dhn=2,
    dn_H=0.5,
    dn_W=0.4,
    hf_H=0.6,
    hf_W=0.3,
    df_W=0.5,
    output_format="stop",
    verbose=True
)

labeled_df_custom.printSchema()

#### Example 3: Run multiple configurations (grid search)

In [None]:
labeled_outputs = HoWDe_labelling(
    input_data,
    range_window_home=28,
    range_window_work= 42,
    dhn=3,
    dn_H=[0.6, 0.7],
    dn_W=0.5,
    hf_H=[0.5, 0.7],
    hf_W=0.4,
    df_W=0.6,
    output_format="stop",
    verbose=True
)

# Save and inspect results
for i, res in enumerate(labeled_outputs):
    config = res["configs"]
    df = res["res"]
    
    # Print config
    print(f"\nConfiguration {i+1}:")
    for k, v in config.items():
        print(f"  {k}: {v}")
    
    # Show result preview
    print("\nResult sample:")
    df.show(2)

    # Save output
    save_path = f"./output/config_{i+1}.parquet"
    df.write.mode("overwrite").parquet(save_path)
    print(f"✅ Saved to: {save_path}")

#### Example 4: Use edit_config_default to customize start/end hour work

In [None]:
custom_config = {
        "is_time_local": True,  # If True, timestamps in input are already in local time
        "min_stop_t": 60,  # Minimum duration of a stop in seconds
        "start_hour_day": 6,  # Start of the 'home hours' interval
        "end_hour_day": 24,  # End of the 'home hours' interval
        "start_hour_work": 8,  # Start of the 'work hours' interval >>> UPDATED
        "end_hour_work": 18,  # End of the 'work hours' interval >>> UPDATED
        "data_for_predict": False,  # If True, uses past-only data in sliding windows (prediction mode)
    }

labeled_df_edited = HoWDe_labelling(
    input_data,
    edit_config_default=custom_config,
    range_window_home=28,
    range_window_work=42,
    output_format="stop"
)

labeled_df_edited.printSchema()