# Intelligent Sampler Demo

In [None]:
import os
import pandas as pd
import numpy as np
import requests

In [None]:
from sandag_rsm.logging import logging_start
logging_start()

In [None]:
from sandag_rsm.sampler import rsm_household_sampler

### Remote I/O

In [None]:
from sandag_rsm.data_load import get_test_file

data_dir = "./data-dl/"
get_test_file([
    "hh_test.csv.gz", 
    "person_test.csv.gz", 
    "taz_crosswalk.csv", 
    "accessibilities.csv", 
    "accessibilities_iter.csv", 
], data_dir)

### Temp directory for output

In [None]:
import tempfile
tempdir = tempfile.TemporaryDirectory()
tempdir.name

## Original Full Household File
This file has the original model household data, not merged TAZs.

In [None]:
h0 = pd.read_csv("data-dl/hh_test.csv.gz")
h0.info()

## Sampling without Accessilibility Changes

In [None]:
h1, p1 = rsm_household_sampler(
    input_dir="./data-dl",
    output_dir=tempdir.name,
    input_household="hh_test.csv.gz",
    input_person="person_test.csv.gz",
    taz_crosswalk="taz_crosswalk.csv",
    output_household="sampled_households_1.csv",
    output_person="sampled_person_1.csv",
)

In [None]:
h1.info()

In [None]:
assert h1.hhid.isin(h0.hhid).all()

## Sampling with Accessilibility Changes

In [None]:
h2, p2 = rsm_household_sampler(
    input_dir="./data-dl",
    output_dir=tempdir.name,
    prev_iter_access="accessibilities.csv",
    curr_iter_access="accessibilities_iter.csv",
    input_household="hh_test.csv.gz",
    input_person="person_test.csv.gz",
    taz_crosswalk="taz_crosswalk.csv",
    output_household="sampled_households_2.csv",
    output_person="sampled_person_2.csv",
)

In [None]:
assert h2.hhid.isin(h0.hhid).all()

In [None]:
assert not h2.hhid.isin(h1.hhid).all()

## Sampling with Study Area Defined

In [None]:
h3, p3 = rsm_household_sampler(
    input_dir="./data-dl",
    output_dir=tempdir.name,
    prev_iter_access="accessibilities.csv",
    curr_iter_access="accessibilities_iter.csv",
    input_household="hh_test.csv.gz",
    input_person="person_test.csv.gz",
    taz_crosswalk="taz_crosswalk.csv",
    output_household="sampled_households_3.csv",
    output_person="sampled_person_3.csv",
    study_area=[49,50],
)

## Contrast Results

In [None]:
pd.concat([h1.taz.value_counts(), h2.taz.value_counts(), h3.taz.value_counts()], axis=1, sort=True)