# Identifying internal migration

Load the necessary libraries.

In [1]:
import os
from pathlib import Path

import datablox_od
import pandas as pd
from tqdm.notebook import tqdm

pd.set_option("display.max_rows", 5)

%load_ext autoreload
%autoreload 2

Folder names

In [2]:
SAMPLE_OUTPUT_DIRECTORY = os.path.join("..", "sample_output")
RESIDENCE_DIRECTORY = os.path.join(SAMPLE_OUTPUT_DIRECTORY, "residence")
MIGRATION_DIRECTORY = os.path.join(SAMPLE_OUTPUT_DIRECTORY, "migration")
os.makedirs(MIGRATION_DIRECTORY, exist_ok=True)

Using the residence information from June 2019 to December 2019, we are going to identify the devices that migrated for each month in the period July 2019 to December 2019.

In [3]:
residence_files = sorted(os.listdir(RESIDENCE_DIRECTORY))
for i in tqdm(range(len(residence_files) - 1)):
    residence_from = pd.read_parquet(
        os.path.join(RESIDENCE_DIRECTORY, residence_files[i])
    )
    residence_to = pd.read_parquet(
        os.path.join(RESIDENCE_DIRECTORY, residence_files[i + 1])
    )

    migration = datablox_od.migration.identify_migration(
        residence_from,
        residence_to,
        residence_from_column="residence",
        residence_to_column="residence",
        verbose=False,
    )

    migration.to_parquet(
        os.path.join(
            MIGRATION_DIRECTORY,
            f"{Path(residence_files[i+1]).stem}.parquet",
        )
    )

  0%|          | 0/6 [00:00<?, ?it/s]

As an example, display migration information for July 2019.

In [4]:
pd.read_parquet(
    os.path.join(
        MIGRATION_DIRECTORY,
        f"2019-07.parquet",
    )
)

Unnamed: 0_level_0,residence_from,residence_to
device_id,Unnamed: 1_level_1,Unnamed: 2_level_1
02104489-1579-4A12-87BE-511B927660E8,Songkhla#Mueang Songkhla,Trang#Kantang
027CAF6D-AD39-429D-B0DD-A2D1D19DE540,Nakhon Pathom#Sam Phran,Bangkok#Bang Phlat
...,...,...
F7CB99E6-4866-4BE7-9767-8AEBE844C7F8,Samut Sakhon#Mueang Samut Sakhon,Trang#Kantang
F947C63E-2DBD-4AD6-B773-42CA88FDC869,Chachoengsao#Bang Pakong,Samut Prakan#Bang Bo
