In [2]:
%reload_ext autoreload
%autoreload 2

from IPython.core.display import Markdown
from tqdm.auto import tqdm
import config
import docs
import pandas as pd
import requests
import acs

pd.set_option('display.max_colwidth', None)

INDICATOR = 'costs_35'
CONFIG = config.get_config(INDICATOR, '../config.toml')
CENSUS_API_KEY = CONFIG['acs_api_key']

display(Markdown('## Raw data path'))
raw_dir_path = str(CONFIG['raw_dir']).replace('\\', '/')
display(Markdown(f"[{raw_dir_path}]({raw_dir_path})"))


  from .autonotebook import tqdm as notebook_tqdm


## Raw data path

[C:/Users/tan/src/regional-pm-2023/data/raw/housing/housing_costs/costs_35](C:/Users/tan/src/regional-pm-2023/data/raw/housing/housing_costs/costs_35)

# Quality of Life: Unemployment

## Unemployment Rate

In [3]:
docs.describe_indicator(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Percentage of households with housing costs greater than 35% of income.

nan

In [4]:
docs.list_schema(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Unnamed: 0_level_0,name,description,type
column,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
year,Year,Year of record.,datetime
geography,Geography,Geography of record.,
households,households,"Percentage of households with housing costs greater than 35% of income, in a given year for a given geography.",float


In [5]:
docs.list_sources(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Unnamed: 0_level_0,name,organization,active,notes
source,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
acs_dp04,American Community Survey API (DP04),US Census Bureau,1.0,


In [6]:
steps = docs.list_update_steps(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)
steps

Unnamed: 0,step
0,Download new year of ACS data and update legacy workbook.
1,Extract legacy data from legacy PM sheet.


In [7]:
docs.list_remarks(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Unnamed: 0,author,note
0,,


### Step 0: Download ACS data

Note that finalized 2021 is not availible for 1 year ACS (and it may never be availible), and 2022 data isn't expected until sometime September 2023.

Unlike the other indicators that use ACS data, the legacy procedure was used (copying the new year of estimates to the sheet).

This unfortunately happened because:
* I thought that a census library could handle all kinds of ACS 1-year data (it only ended up handling detail tables, subject tables weren't supported and profile tables didn't seem to work correctly).
* I had errors I couldn't resolve trying to get profile (DP) tables from the API manually.

In [8]:
display(steps.loc[0])

step    Download  new year of ACS data and update legacy workbook.
Name: 0, dtype: object

[This is](https://data.census.gov/table?q=DP04:+SELECTED+HOUSING+CHARACTERISTICS&g=010XX00US_040XX00US06_050XX00US06073&tid=ACSDP1Y2021.DP04) the ACS data that was saved to the raw data folder.

### Step 1: Extract legacy data

In [9]:
display(steps.loc[1])

step    Extract legacy data from legacy PM sheet.
Name: 1, dtype: object

In [10]:
costs_35 = (
    pd.read_excel(
        CONFIG['legacy_xlsx_path'],
        sheet_name=CONFIG['legacy_sheet'],
        usecols='A:D',
        skiprows=3,
        nrows=17
    )
    .rename(columns={'Unnamed: 0': 'year'})
    .assign(year=lambda df: pd.to_datetime(df.year, format='%Y'))
    .melt('year', var_name='geography', value_name='households')
    .set_index(['year', 'geography'])
)
costs_35.tail(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,households
year,geography,Unnamed: 2_level_1
2019-01-01,United States,0.240829
2020-01-01,United States,
2021-01-01,United States,0.253977


### Save Data

In [11]:
display(Markdown('#### Clean data path'))
clean_dir_path = str(CONFIG['clean_dir']).replace('\\', '/')
display(Markdown(f"[{clean_dir_path}]({clean_dir_path})"))

#### Clean data path

[C:/Users/tan/src/regional-pm-2023/data/clean/housing/housing_costs/costs_35](C:/Users/tan/src/regional-pm-2023/data/clean/housing/housing_costs/costs_35)

In [12]:
costs_35.to_csv(
    CONFIG['clean_dir']
    / f'{INDICATOR}_odp.csv'
)