In [63]:
%reload_ext autoreload
%autoreload 2

from IPython.core.display import Markdown
from tqdm.auto import tqdm
import config
import docs
import pandas as pd
import requests

pd.set_option('display.max_colwidth', None)

INDICATOR = 'homelessness'
CONFIG = config.get_config(INDICATOR, '../config.toml')

display(Markdown('## Raw data path'))
raw_dir_path = str(CONFIG['raw_dir']).replace('\\', '/')
display(Markdown(f"[{raw_dir_path}]({raw_dir_path})"))


## Raw data path

[C:/Users/tan/src/regional-pm-2023/data/raw/qol/homelessness/homelessness](C:/Users/tan/src/regional-pm-2023/data/raw/qol/homelessness/homelessness)

# Quality of Life: Homelessness

## Homelessness

In [64]:
docs.describe_indicator(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Sheltered and unsheltered homeless individuals.

nan

In [65]:
docs.list_schema(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Unnamed: 0_level_0,name,description,type
column,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
year,Year,Year of record.,datetime
shelter_type,Shelter Type,"Shelter type of record (""Unsheltered"" or ""Sheltered"")",string
homeless,Homeless Individuals,Homeless individuals in a given year under a given shelter type.,int


In [66]:
docs.list_sources(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Unnamed: 0_level_0,name,organization,active,notes
source,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
weallcount,WeAllCount (PIT),San Diego Regional Task Force on Homelessness,True,Numbers pulled from annual reports into an extract.


In [67]:
steps = docs.list_update_steps(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)
steps

Unnamed: 0,step
0,Download and extract homelessness data.
1,Transform homelessness data.


In [68]:
docs.list_remarks(
    indicator=INDICATOR,
    indicators_xlsx_path=CONFIG['indicators_xlsx_path'],
)

Unnamed: 0,author,note
0,TAN,Extract contains more data than we have decided to use. Only sheltered and unsheltered homelessness totals are being reported.
1,TAN,2021 has no publicly reported estimate of unsheletered homeless individuals.


### Step 0: Download and extract data
Data comes from the region's [Task Force on Homelessness](https://www.rtfhsd.org/reports-data/) reports. A manual extract from the PDFs was created, it should include pages for the numbers use.

Note there is extra data categories from the earlier years that we ended up not reporting in Regional PM.

In [69]:
display(steps.loc[0])

step    Download and extract homelessness data.
Name: 0, dtype: object

In [70]:
homelessness = (
    pd.read_csv(
        CONFIG['raw_dir']/'weallcount/weallcount_extract.csv',
        usecols=['year', 'shelter_type', 'subcategory', 'count'],
    )
    .query("`subcategory` == 'Total' and `shelter_type` != 'Total'")
)
homelessness.tail(4)

Unnamed: 0,year,shelter_type,subcategory,count
3,2011,Sheltered,Total,4039.0
4,2011,Unsheltered,Total,4981.0
9,2012,Sheltered,Total,4371.0
10,2012,Unsheltered,Total,5267.0


### Step 1: Transform data

In [71]:
display(steps.loc[1])

step    Transform homelessness data.
Name: 1, dtype: object

### Save Data

In [72]:
display(Markdown('#### Clean data path'))
clean_dir_path = str(CONFIG['clean_dir']).replace('\\', '/')
display(Markdown(f"[{clean_dir_path}]({clean_dir_path})"))

#### Clean data path

[C:/Users/tan/src/regional-pm-2023/data/clean/qol/homelessness/homelessness](C:/Users/tan/src/regional-pm-2023/data/clean/qol/homelessness/homelessness)

In [74]:
homelessness.to_csv(
    CONFIG['clean_dir']
    / f'{INDICATOR}_odp.csv'
)