# Inferring device residence

Load the necessary libraries.

In [1]:
import os

import datablox_od
import geopandas as gpd
import pandas as pd
from tqdm.notebook import tqdm

pd.set_option("display.max_rows", 5)

%load_ext autoreload
%autoreload 2

Folder names

In [2]:
SAMPLE_OUTPUT_DIRECTORY = os.path.join("..", "sample_output")
RESIDENCE_DIRECTORY = os.path.join(SAMPLE_OUTPUT_DIRECTORY, "residence")
os.makedirs(RESIDENCE_DIRECTORY, exist_ok=True)

Load the Parquet file containing the pings (mapped to Thailand's administrative areas).

To ensure data quality, we will consider only pings with horizontal accuracy less than 1 km ("accuracy" is a bit of a misnomer; a lower horizontal accuracy implies a lower radius of error).

In [3]:
pings = gpd.read_parquet(os.path.join(SAMPLE_OUTPUT_DIRECTORY, "pings.parquet"))

# The horizontal_accuracy column is in meters
pings = pings[pings["horizontal_accuracy"] < 1000]
pings

Unnamed: 0,device_id,horizontal_accuracy,timestamp,geometry,h3_cell,ADM1_EN,ADM2_EN,ADM3_EN
0,AFBDE0F7-34DF-4B2B-9227-1FF9C45C1879,3.0,2019-01-01 17:24:34,POINT (103.16413 14.72815),8865981a25fffff,Buri Ram,Buri Ram#Phlapphla Chai,Buri Ram#Phlapphla Chai#Sadao
1,AFBDE0F7-34DF-4B2B-9227-1FF9C45C1879,3.0,2019-01-01 09:04:29,POINT (102.06894 15.19989),88659bba05fffff,Nakhon Ratchasima,Nakhon Ratchasima#Non Thai,Nakhon Ratchasima#Non Thai#Non Thai
...,...,...,...,...,...,...,...,...
28897519,4CCB1986-85B2-42E6-987F-664A9D24EBA5,16.0,2020-01-01 04:09:37,POINT (100.45427 13.89577),8864a4aa49fffff,Nonthaburi,Nonthaburi#Pak Kret,Nonthaburi#Pak Kret#Tha It
28897520,4CCB1986-85B2-42E6-987F-664A9D24EBA5,16.0,2019-12-31 21:53:39,POINT (100.45433 13.89558),8864a4aa49fffff,Nonthaburi,Nonthaburi#Pak Kret,Nonthaburi#Pak Kret#Tha It


```{note}
The code below has the ``include_statistics`` parameter of ``datablox_od.residence.infer_residence()`` set to ``True``. Hence, the resulting data frames include statistics on the number of days each device issued nighttime pings and the number of nighttime pings from the inferred residence for every month in the observation window, among others. However, turning on this option can significantly increase the runtime and memory consumption. 

If you need to speed up the runtime or reduce memory consumption, consider setting ``include_statistics`` to ``False`` (which is the default).
```

Infer device residence for each month in the period June 2019 to December 2019.

In [4]:
year_of_interest = 2019
for month_of_interest in tqdm(range(6, 13)):
    residence = datablox_od.residence.infer_residence(
        pings,
        device_id_column="device_id",
        timestamp_column="timestamp",
        area_column="ADM2_EN",
        year=year_of_interest,
        month=month_of_interest,
        start_time="19:00",
        end_time="07:00",
        num_months_in_observation_window=6,
        min_fraction_pings_in_residence=0.5,
        min_num_days_present=30,
        include_statistics=True,
        delete_temp_dir=False,
        n_jobs=-1,
        verbose=False,
    )

    residence.to_parquet(
        os.path.join(
            RESIDENCE_DIRECTORY,
            f"{year_of_interest}-{month_of_interest:02}.parquet",
        )
    )

  0%|          | 0/7 [00:00<?, ?it/s]

As an example, display the device residence information for June 2019.

In [5]:
pd.read_parquet(
    os.path.join(
        RESIDENCE_DIRECTORY,
        f"2019-06.parquet",
    )
)

Unnamed: 0_level_0,residence,2019-01_num_days_present,2019-02_num_days_present,2019-03_num_days_present,2019-04_num_days_present,2019-05_num_days_present,2019-06_num_days_present,total_num_days_present,2019-01_num_pings_in_residence,2019-02_num_pings_in_residence,2019-03_num_pings_in_residence,2019-04_num_pings_in_residence,2019-05_num_pings_in_residence,2019-06_num_pings_in_residence,fraction_of_pings_in_residence
device_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
00068053-F5A7-41BE-BE5D-972516D41834,Lamphun#Mae Tha,1,5,8,3,7,20,44,1,10,20,19,106,135,0.970000
0016B09E-B2C4-4E14-BF85-8FD84EBEE301,Songkhla#Hat Yai,13,25,15,8,8,5,74,357,1141,193,40,30,0,0.974543
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
FFCD4F7D-FBFB-42AF-8EB9-2227A02CB6FD,Kalasin#Mueang Kalasin,13,15,7,10,10,8,63,47,39,18,40,61,15,0.960699
FFF3C0F8-41CB-4715-A7E9-18CB811791EF,Bangkok#Bangkok Noi,14,3,3,13,12,20,65,35,9,9,73,78,112,0.747045
