# Generate new columns to map 
* Unsheltered density (unsheltered / sqmi)
* unsheltered rate per 1000 residents (unsheltered / pop * 1000)
* Move to script when finished
* CARE density (encampment calls / sqmi)
* CARE rate per 1000 residents (encampment calls / pop * 1000)
* 3-yr average homeless count
* 3-yr average CARE calls
* change from prior year

In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import intake
import boto3

In [2]:
catalog = intake.open_catalog('./catalogs/*.yml')
s3 = boto3.client('s3')
bucket_name = 's3://city-of-los-angeles-data-lake/public-health-dashboard/'

In [3]:
df = gpd.read_file('../gis/homelessness_care_tracts.geojson')

## Density and rate per 100

In [4]:
# Generate new columns for density and rate per 1000 residents
for col in ['unsheltered', 'encampment']:
    density_col = f'{col}_density'
    pct_col = f'pct_{col}'
    df[density_col] = df[col] / df.clipped_area
    df[pct_col] = df[col] / df['pop'] * 1000  

df.head()

Unnamed: 0,GEOID,SPA,SD,CD,year,unsheltered,sheltered,tot_homeless,bulky,encampment,illegal,other,pop,full_area,clipped_area,geometry,unsheltered_density,pct_unsheltered,encampment_density,pct_encampment
0,6037139301,2,3,3,2017,16.156,0.0,16.156,1,28,1,0,4603,1.106305,1.106305,"POLYGON ((-118.57150 34.17758, -118.57148 34.1...",14.603564,3.509885,25.30947,6.082989
1,6037139301,2,3,3,2018,1.0,0.0,1.0,0,5,4,0,4603,1.106305,1.106305,"POLYGON ((-118.57150 34.17758, -118.57148 34.1...",0.90391,0.21725,4.519548,1.086248
2,6037139301,2,3,3,2019,4.315,0.0,4.315,6,54,0,0,4603,1.106305,1.106305,"POLYGON ((-118.57150 34.17758, -118.57148 34.1...",3.90037,0.937432,48.811121,11.731479
3,6037139302,2,3,3,2017,4.021,0.0,4.021,0,5,0,0,5389,0.130598,0.130598,"POLYGON ((-118.54073 34.18019, -118.54070 34.1...",30.789116,0.74615,38.285396,0.927816
4,6037139302,2,3,3,2018,3.0,0.0,3.0,0,2,0,0,5389,0.130598,0.130598,"POLYGON ((-118.54073 34.18019, -118.54070 34.1...",22.971238,0.55669,15.314158,0.371126


## Change from prior year

In [5]:
# Change from prior year
tract_characteristics = df[['GEOID']].drop_duplicates()

In [6]:
unshelt = df[['GEOID', 'year', 'unsheltered']]

In [7]:
unshelt_wide = unshelt.pivot(index = 'GEOID', columns = 'year', values = 'unsheltered').reset_index()
unshelt_wide.rename(columns = {2017: 'unsheltered2017', 2018: 'unsheltered2018', 
                      2019: 'unsheltered2019'}, inplace = True)

# Add change (absolute count differences between years)
unshelt_wide['change_1718'] = unshelt_wide.unsheltered2018 - unshelt_wide.unsheltered2017
unshelt_wide['change_1819'] = unshelt_wide.unsheltered2019 - unshelt_wide.unsheltered2018


# Merge tract characteristics back in
change_prior = pd.merge(tract_characteristics, unshelt_wide, on = 'GEOID', how = 'left', validate = 'm:1')

In [8]:
change_prior.head()

Unnamed: 0,GEOID,unsheltered2017,unsheltered2018,unsheltered2019,change_1718,change_1819
0,6037139301,16.156,1.0,4.315,-15.156,3.315
1,6037139302,4.021,3.0,10.448,-1.021,7.448
2,6037139502,2.926,15.421,3.119,12.495,-12.302
3,6037139600,6.946,2.676,11.488,-4.27,8.812
4,6037139701,9.021,1.0,5.119,-8.021,4.119


## 3-year average

In [9]:
# Average over 3 years
avg = df.groupby('GEOID').agg({'unsheltered': 'mean', 
                     'encampment': 'mean'}).reset_index()
avg.head()

Unnamed: 0,GEOID,unsheltered,encampment
0,6037101110,9.832667,1.0
1,6037101122,1.539333,0.0
2,6037101210,13.965667,1.0
3,6037101220,24.154667,0.333333
4,6037101300,12.236667,2.666667


## Merge together

In [10]:
tract_level = pd.merge(change_prior, avg)
tract_level.head()

Unnamed: 0,GEOID,unsheltered2017,unsheltered2018,unsheltered2019,change_1718,change_1819,unsheltered,encampment
0,6037139301,16.156,1.0,4.315,-15.156,3.315,7.157,29.0
1,6037139302,4.021,3.0,10.448,-1.021,7.448,5.823,6.666667
2,6037139502,2.926,15.421,3.119,12.495,-12.302,7.155333,14.333333
3,6037139600,6.946,2.676,11.488,-4.27,8.812,7.036667,10.333333
4,6037139701,9.021,1.0,5.119,-8.021,4.119,5.046667,2.666667


* At this point, we have 2 dfs: tract and tract-year
* Depending on what needs to be mapped, use one of the 2 dfs
* Should Cleanstat be incorporated?