In [60]:
%reload_ext autoreload
%autoreload 2

from IPython.core.display import Markdown
from tqdm.auto import tqdm
import config
import docs
import pandas as pd
import requests

pd.set_option('display.max_colwidth', None)

INDICATOR = 'fmr_income'
CONFIG = config.get_config(INDICATOR, '../config.toml')

display(Markdown('## Raw data path'))
raw_dir_path = str(CONFIG['raw_dir']).replace('\\', '/')
display(Markdown(f"[{raw_dir_path}]({raw_dir_path})"))


## Raw data path

[C:/Users/tan/src/regional-pm-2023/data/raw/housing/housing_costs/fmr_income](C:/Users/tan/src/regional-pm-2023/data/raw/housing/housing_costs/fmr_income)

# Housing: Housing Costs

## Income Needed for FMR

In [61]:
docs.describe_indicator(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Annual income needed to afford fair market rent.

nan

In [62]:
docs.list_schema(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Unnamed: 0_level_0,name,description,type
column,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
year,Year,Year of record.,datetime
geography,Geography,"Geography of record (""United States"", ""California"", or ""San Diego"").",string
fmr,Fair Market Rent,Fair market rent (for a two bedroom apartment) in a given year for a given geography.,float
income_for_fmr,Income for FMR,Annual income needed to afford fair market rent (for a two bedroom apartment).,float


In [63]:
docs.list_sources(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Unnamed: 0_level_0,name,organization,active,notes
source,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
oor,Out of Reach,National Low Income Housing Coalition,True,"They only have most recent report as a workbook public, with second most recent as PDF. Some numbers had to be manually pulled."


In [64]:
steps = docs.list_update_steps(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)
steps

Unnamed: 0,step
0,Download new OOR data and extract it.
1,Extract legacy OOR and combine it with new data.


In [65]:
docs.list_remarks(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Unnamed: 0,author,note
0,,


### Step 0: Download and update legacy sheet.

* 2021: Bottom of page CA-40
* 2022: Second from the bottom on page CA-48.
* 2023: Filter `COUNTY/METRO` for San Diego-Carlsbad MSA.

In [66]:
display(steps.loc[0])

step    Download new OOR data and extract it.
Name: 0, dtype: object

### Step 1: Extract legacy data and combine.

In [67]:
display(steps.loc[1])

step    Extract legacy OOR and combine it with new data.
Name: 1, dtype: object

In [68]:
# Fair market rent (for two bedroom apartment)
fmr = (
    pd.read_excel(
        CONFIG['legacy_xlsx_path'],
        CONFIG['legacy_sheet'],
        usecols='A,E:F',
        skiprows=4,
        nrows=19,
    )
    .rename(
        columns={
            'Unnamed: 0': 'year',
            'San Diego.1': 'San Diego',
            'California.1': 'California',
        }
    )
    .melt(id_vars='year', var_name='geography', value_name='fmr')
    .assign(year=lambda df: pd.to_datetime(df.year, format='%Y'))
    .set_index(['year', 'geography'])
)
display(fmr.tail(3))

Unnamed: 0_level_0,Unnamed: 1_level_0,fmr
year,geography,Unnamed: 2_level_1
2021-01-01,California,2030.0
2022-01-01,California,2028.0
2023-01-01,California,2196.935175


In [69]:
# Income needed for FMR
income_for_fmr = (
    pd.read_excel(
        CONFIG['legacy_xlsx_path'],
        CONFIG['legacy_sheet'],
        usecols='A,B:C',
        skiprows=4,
        nrows=19,
    )
    .rename(
        columns={
            'Unnamed: 0': 'year',
        }
    )
    .melt(id_vars='year', var_name='geography', value_name='income_for_fmr')
    .assign(year=lambda df: pd.to_datetime(df.year, format='%Y'))
    .set_index(['year', 'geography'])
)
display(fmr_income.tail(3))

Unnamed: 0_level_0,Unnamed: 1_level_0,fmr,income_for_fmr
year,geography,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-01-01,California,2030.0,81191.0
2022-01-01,California,2028.0,81133.0
2023-01-01,California,2196.935175,87877.406991


In [70]:
fmr_income = fmr.join(income_for_fmr)

### Save Data

In [71]:
display(Markdown('#### Clean data path'))
clean_dir_path = str(CONFIG['clean_dir']).replace('\\', '/')
display(Markdown(f"[{clean_dir_path}]({clean_dir_path})"))

#### Clean data path

[C:/Users/tan/src/regional-pm-2023/data/clean/housing/housing_costs/fmr_income](C:/Users/tan/src/regional-pm-2023/data/clean/housing/housing_costs/fmr_income)

In [72]:
fmr_income.to_csv(
    CONFIG['clean_dir']
    / f'{INDICATOR}_odp.csv'
)