In [None]:
POPU_STATE_PATH = '../data/processed/2020-2021-popu-estimate-by-state.csv'
PIT_BY_COC_PATH = '../data/processed/2015-2020-pit-demographic-by-coc.csv'
PIT_BY_STATE_PATH = '../data/processed/2007-2020-pit-estimates-by-state.csv'
STATE_CODE_PATH = '../data/raw/state_vs_code.csv'
MAP_PATH = '../data/maps/cb_2018_us_state_20m/cb_2018_us_state_20m.shp'

In [None]:
import pandas as pd
import geopandas as gpd
popu_df = pd.read_csv(POPU_STATE_PATH)
coc_df = pd.read_csv(PIT_BY_COC_PATH)
state_df = pd.read_csv(PIT_BY_STATE_PATH)
state_codes = pd.read_csv(STATE_CODE_PATH).rename(columns={'State': 'geographic_area', 'Code': 'State'})
state_codes.head()

In [None]:
selected_cols = set(coc_df.columns)
selected_cols = selected_cols.intersection(set(state_df.columns))
selected_cols.add('State')


In [None]:
# pit by state
state_df = state_df[selected_cols]
state_df.head()

In [None]:
#Select only records for 2020
popu_cols = ["popu_estimate_2020",	"popu_estimate_2021","State", "geographic_area"]
popu_df = popu_df.merge(state_codes[['geographic_area', 'State']], on='geographic_area')[popu_cols]
popu_df.head()
# state_codes

In [None]:
state_df_2020 = state_df[state_df['Year'] == 2020]
state_df_2020.head()

In [None]:
usa = gpd.read_file(MAP_PATH).rename(columns={'STUSPS': 'State'})[['State', 'geometry']]
usa.head()

In [None]:
popu_df = popu_df.merge(usa, on='State')
popu_df.head()

In [None]:
import folium
import branca.colormap as cm
from IPython.core.display import display, HTML


In [None]:
popu_df = gpd.GeoDataFrame(popu_df, geometry="geometry")
popu_df.crs = usa.crs
popu_df = popu_df.to_crs('EPSG:4326')


In [None]:
popu_geo = gpd.GeoSeries(popu_df.set_index('State')['geometry']).to_json()


In [None]:
def create_map(df, viz_col, index_col):
    mymap = folium.Map(location=[39.60688671643651, -93.46949406298138], zoom_start=3,tiles=None)
    folium.TileLayer('CartoDB positron',name="Light Map",control=False).add_to(mymap)
    geo = gpd.GeoSeries(df.set_index(index_col)['geometry']).to_json()
    # myscale = (popu_df['popu_estimate_2020'].quantile((0,0.1,0.75,0.9,0.98,1))).tolist()
    folium.Choropleth(
    titles='My title',
    geo_data=geo,
    name='Choropleth',
    data=df,
    columns=[index_col, viz_col],
    key_on="feature.id",
    fill_color='YlGnBu',
    fill_opacity=0.6,
    line_opacity=1,
    legend_name='Population %',
    smooth_factor=0
    ).add_to(mymap)
    return mymap
    
        
    

### Population by State

In [None]:
create_map(popu_df, 'popu_estimate_2020', 'State')

In [None]:
viz_cols = ['Overall Homeless', 'Sheltered Total Homeless', 'Unsheltered Homeless']
state_norm = state_df_2020[['State'] + viz_cols + ['Year']].merge(popu_df, on='State')

for col in viz_cols:
    state_norm = state_norm.astype({col:'int32'})
    state_norm[col + ' Norm'] = state_norm[col]/state_norm['popu_estimate_2020']
    

In [None]:
state_norm.head()

### Overall Homeless  Normalized by Population

In [None]:
create_map(state_norm, 'Overall Homeless Norm', 'State')

### Sheltered Total Homeless Normalized by Population

In [None]:
create_map(state_norm, 'Sheltered Total Homeless Norm', 'State')

### Unsheltered Homeless Normalized by Population

In [None]:
create_map(state_norm, 'Unsheltered Homeless Norm', 'State')

### **Top 10 states with highest/lowest homelessness count**

#### Top 10 states with highest homelessness count

In [None]:
state_norm.sort_values(by='Overall Homeless Norm', ascending=False)[['geographic_area', 'Overall Homeless Norm', 'Overall Homeless', 'popu_estimate_2020']].reset_index(drop=True).head(10)

In [None]:
state_norm.sort_values(by='Sheltered Total Homeless Norm', ascending=False)[['geographic_area', 'Sheltered Total Homeless Norm', 'Sheltered Total Homeless', 'popu_estimate_2020']].reset_index(drop=True).head(10)

In [None]:
state_norm.sort_values(by='Unsheltered Homeless Norm', ascending=False)[['geographic_area', 'Unsheltered Homeless Norm', 'Unsheltered Homeless', 'popu_estimate_2020']].reset_index(drop=True).head(10)

#### Top 10 states with lowest homelessness count

In [None]:
state_norm.sort_values(by='Overall Homeless Norm', ascending=True)[['geographic_area', 'Overall Homeless Norm', 'Overall Homeless', 'popu_estimate_2020']].reset_index(drop=True).head(10)

In [None]:
state_norm.sort_values(by='Sheltered Total Homeless Norm', ascending=True)[['geographic_area', 'Sheltered Total Homeless Norm', 'Sheltered Total Homeless', 'popu_estimate_2020']].reset_index(drop=True).head(10)

In [None]:
state_norm.sort_values(by='Unsheltered Homeless Norm', ascending=True)[['geographic_area', 'Unsheltered Homeless Norm', 'Unsheltered Homeless', 'popu_estimate_2020']].reset_index(drop=True).head(10)

In [None]:
state_norm.loc[state_norm.sort_values(by='Unsheltered Homeless Norm', ascending=False).index, 'Unsheltered Homeless Rank'] = list(range(1, len(state_norm) + 1))
state_norm.loc[state_norm.sort_values(by='Sheltered Total Homeless Norm', ascending=False).index, 'Sheltered Total Homeless Rank'] = list(range(1, len(state_norm) + 1))
state_norm.loc[state_norm.sort_values(by='Overall Homeless Norm', ascending=False).index, 'Overall Homeless Rank'] = list(range(1, len(state_norm) + 1))

In [None]:
state_norm[['geographic_area', "Overall Homeless Rank", "Unsheltered Homeless Rank",	"Sheltered Total Homeless Rank"]]

## Scatterplot Overall Homeless Rank vs Unsheltered Homeless Rank vs Sheltered Homeless


In [None]:
from matplotlib import pyplot as plt
ax = state_norm.plot.scatter(x='Overall Homeless Rank', y='Unsheltered Homeless Rank', figsize=(10, 10), s=20, alpha=0.6)
for i, txt in enumerate(state_norm.State):
   ax.annotate(txt, (state_norm['Overall Homeless Rank'].iat[i]+0.05, state_norm['Unsheltered Homeless Rank'].iat[i]), )
plt.show()

In [None]:
from matplotlib import pyplot as plt
ax = state_norm.plot.scatter(x='Overall Homeless Rank', y='Sheltered Total Homeless Rank', figsize=(10, 10), alpha=0.6, s=25)
for i, txt in enumerate(state_norm.State):
   ax.annotate(txt, (state_norm['Overall Homeless Rank'].iat[i]+0.05, state_norm['Sheltered Total Homeless Rank'].iat[i]))
plt.show()

In [None]:
from matplotlib import pyplot as plt
ax = state_norm.plot.scatter(x='Overall Homeless Rank', y='Unsheltered Homeless Rank', figsize=(25, 12), s='Sheltered Total Homeless', alpha=0.3, label="Sheltered Homelessness")
for i, txt in enumerate(state_norm.State):
   ax.annotate(txt, (state_norm['Overall Homeless Rank'].iat[i], state_norm['Unsheltered Homeless Rank'].iat[i]), )
plt.show()