In [1]:
import seaborn as sns
import metapack as mp
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display 

%matplotlib inline
sns.set_context('notebook')
mp.jupyter.init()



In [2]:
#pkg = mp.jupyter.open_package()
pkg = mp.jupyter.open_source_package()
pkg

In [3]:
df = pkg.reference('homeless_survey_source').dataframe()
df.head()

Unnamed: 0.1,Unnamed: 0,Survey_Year,Birth_Year,Age,Gender,Ethnicity,Race_Full,Race_Recode,Veteran,Chronic_Time,...,Physical_Disability,Mental_Illness,Alcohol_Abuse,Drug_Abuse,Drug_Alcohol_History,HIV_Positive,Part_Time,Full_Time,Unemployed_Looking,Unemployed_Not_Looking
0,1,Unsheltered 2011,1993.0,18.0,Female,European American,White,European American,0,0,...,0,0,0.0,0.0,1,0,0.0,0.0,1.0,0.0
1,2,Unsheltered 2011,1964.0,46.0,Female,African American,Black-African-American,African American,0,0,...,0,1,0.0,0.0,0,0,0.0,0.0,1.0,0.0
2,3,Unsheltered 2011,1956.0,55.0,Male,European American,White,European American,0,1,...,1,0,0.0,0.0,0,0,0.0,0.0,1.0,0.0
3,4,Unsheltered 2011,1960.0,50.0,Male,European American,White,European American,1,1,...,0,0,1.0,0.0,1,0,0.0,0.0,1.0,0.0
4,5,Unsheltered 2011,1979.0,31.0,Male,Latino,BLANK,Unknown,0,0,...,0,0,0.0,0.0,0,0,0.0,0.0,1.0,0.0


In [4]:
df['sex'] = df.Gender.replace({
    'Male': 'M',
    'Female': 'F',
    'Unknown': 'U',
    'Transgender': 'U'
})

df['Race_Full'] = df.Race_Full.str.strip()

s = df.Race_Full.astype(str).apply

df['black'] = s( lambda v: 1 if 'black' in v.lower() or 'african' in v.lower() else 0)
df['white'] = s( lambda v: 1 if 'european' in v.lower() or 'white' in v.lower() or 'causian' in v.lower() else 0)
df['asian'] = s( lambda v: 1 if 'asian' in v.lower() else 0)
df['aian'] =  s( lambda v: 1 if 'indian' in v.lower()  or 'alaska' in v.lower() else 0)
df['nhopi'] =  s( lambda v: 1 if 'hawaii' in v.lower()  or 'pacific' in v.lower() else 0)
df['hisp'] =  s( lambda v: 1 if 'hisp' in v.lower()  or 'latin' in v.lower() else 0)
df['other'] = (df.black + df.white + df.asian + df.aian + df.hisp + df.nhopi ) 

def raceeth(r):
    
    if r.hisp or r.Ethnicity == 'Latino':
        return 'hisp'
    elif r.other > 1:
        return 'other'
    elif r.black:
        return 'black'
    elif r.aian:
        return 'aian'
    elif r.nhopi:
        return 'nhopi'
    elif r.asian:
        return 'asian'
    elif r.white:
        return 'nhwhite'
    else:
        return 'other'

df['raceeth'] = df.apply(raceeth, axis=1)
df.drop(columns='black white asian aian nhopi hisp other'.split(), inplace = True)

from geoid.acs import Tract
df['geoid'] = df.Census_Tract.apply(lambda v: str(Tract(6,37,int(v))) if v and not np.isnan(v) else None )

df['survey_type'] = df.Survey_Year.apply( lambda v: v.strip().split()[0].lower() ) 
df['year'] = df.Survey_Year.apply( lambda v: v.strip().split()[1].lower() ) 

cols = ['geoid', 'Survey_Year', 'survey_type','year', 'Birth_Year', 'Age', 'Gender', 'sex', 
 'Ethnicity', 'Race_Full', 'Race_Recode', 'raceeth', 'Veteran', 'Chronic_Time', 'Chronic_Condition', 
 'Chronic', 'Adult_With_Child', 'Times_Homeless_3yrs', 'Times_Homeless_Past_Year', 'Current_Stint_Duration',
 'SPA', 'Census_Tract', 'Physical_Sexual_Abuse', 'Physical_Disability', 'Mental_Illness', 'Alcohol_Abuse',
 'Drug_Abuse', 'Drug_Alcohol_History', 'HIV_Positive', 'Part_Time', 'Full_Time', 'Unemployed_Looking', 
 'Unemployed_Not_Looking' ]

df = df[cols]

df.head()

Unnamed: 0,geoid,Survey_Year,survey_type,year,Birth_Year,Age,Gender,sex,Ethnicity,Race_Full,...,Physical_Disability,Mental_Illness,Alcohol_Abuse,Drug_Abuse,Drug_Alcohol_History,HIV_Positive,Part_Time,Full_Time,Unemployed_Looking,Unemployed_Not_Looking
0,,Unsheltered 2011,unsheltered,2011,1993.0,18.0,Female,F,European American,White,...,0,0,0.0,0.0,1,0,0.0,0.0,1.0,0.0
1,,Unsheltered 2011,unsheltered,2011,1964.0,46.0,Female,F,African American,Black-African-American,...,0,1,0.0,0.0,0,0,0.0,0.0,1.0,0.0
2,,Unsheltered 2011,unsheltered,2011,1956.0,55.0,Male,M,European American,White,...,1,0,0.0,0.0,0,0,0.0,0.0,1.0,0.0
3,,Unsheltered 2011,unsheltered,2011,1960.0,50.0,Male,M,European American,White,...,0,0,1.0,0.0,1,0,0.0,0.0,1.0,0.0
4,,Unsheltered 2011,unsheltered,2011,1979.0,31.0,Male,M,Latino,BLANK,...,0,0,0.0,0.0,0,0,0.0,0.0,1.0,0.0


In [5]:
df.geoid.value_counts()

14000US06037206300    4278
14000US06037900806    3252
14000US06037232700    1348
14000US06037231100    1221
14000US06037400402    1163
14000US06037408800     889
14000US06037532304     868
14000US06037206200     820
14000US06037267403     805
14000US06037601302     731
14000US06037121802     710
14000US06037530500     698
14000US06037701100     683
14000US06037241110     669
14000US06037900607     646
14000US06037226002     574
14000US06037191410     495
14000US06037208903     456
14000US06037207301     444
14000US06037232400     427
14000US06037192700     422
14000US06037237300     366
14000US06037502100     329
14000US06037207300     325
14000US06037104320     310
14000US06037190901     295
14000US06037601401     274
14000US06037191000     272
14000US06037701902     252
14000US06037920314     246
                      ... 
14000US06037183401       1
14000US06037500100       1
14000US06037219700       1
14000US06037503106       1
14000US06037111206       1
14000US06037400900       1
1