In [1]:
# Import the modules
import pandas as pd
from pathlib import Path

In [2]:
# Import data for homeless policy and homeless population in two csv files
policy_data_to_load = Path("2021-HNH-State-Crim-Supplement_Edited.csv")
population_data_to_load = Path("Homeless Population by State.csv")

# Read School and Student csv files and store in Pandas dataframes
homeless_laws = pd.read_csv(policy_data_to_load)
homeless_population = pd.read_csv(population_data_to_load)

# Use pd.merge method to combine the dataframe into a single set using outer join 
homeless_merged = pd.merge(homeless_laws, homeless_population, how="outer", on=["State", "State"])
homeless_merged.head()

Unnamed: 0,State,Sleeping in public state- wide,Sleeping in particular public places,Camping in public state- wide,Camping in particular public places,Sitting/lying in particular public places,"Lodging, living, or sleeping in vehicles (or parking a vehicle used as a lodging / living accom- modation)",Loitering / Loafing / Vagrancy state-wide,Loitering / Loafing in particular public places,Standing in Roadway,...,Panhandling in particular public places,Panhandling in particular ways,Restrictiveness Score,Total,Per10kResidents,ChronicallyHomeless,PeopleInFamilyUnits,SoloIndividuals,UnaccompaniedYouths,Veterans
0,Alabama,False,False,False,False,False,False,True,True,True,...,True,True,6,3752,7.4,539,1270,2482,169,308
1,Alaska,False,False,False,True,False,False,False,False,False,...,False,False,1,2320,31.7,576,560,1760,186,134
2,Arizona,False,False,False,True,False,False,True,True,True,...,True,True,6,13553,18.6,2476,2846,10707,917,857
3,Arkansas,False,False,False,False,False,False,True,True,True,...,True,False,5,2459,8.1,376,328,2131,251,92
4,California,False,False,True,True,True,False,False,True,True,...,False,False,5,171521,43.7,57760,25538,145983,9590,10395


In [3]:
# Rename columns to be more intelligible to readers

homeless_merged = homeless_merged.rename(columns={'Sleeping in public state- wide':'Sleep in public statewide',
                                                'Sleeping in particular public places':'Sleep in specific public places',
                                                'Camping in public state- wide':'Camp in public statewide',
                                                 'Camping in particular public places': 'Camp in specific public places',
                                                 'Sitting/lying in particular public places':'Lying in specific public places',
                                                 'Lodging, living, or sleeping in vehicles (or parking a vehicle used as a lodging / living accom- modation)': 'Sleep/live in vehicle',
                                                 'Loitering / Loafing / Vagrancy state-wide' : 'Loitering statewide',
                                                 'Loitering / Loafing in particular public places': 'Loiter in specific public places',
                                                 'Standing in Roadway':'Standing in road',
                                                 'Panhandling in public places state- wide':'Panhandling statewide',
                                                 'Panhandling in particular public places':'Panhandling in specific public places',
                                                 'Total':'TotalPopulation'})

# Print first five rows of dataframe with renamed columns
homeless_merged.head()

Unnamed: 0,State,Sleep in public statewide,Sleep in specific public places,Camp in public statewide,Camp in specific public places,Lying in specific public places,Sleep/live in vehicle,Loitering statewide,Loiter in specific public places,Standing in road,...,Panhandling in specific public places,Panhandling in particular ways,Restrictiveness Score,TotalPopulation,Per10kResidents,ChronicallyHomeless,PeopleInFamilyUnits,SoloIndividuals,UnaccompaniedYouths,Veterans
0,Alabama,False,False,False,False,False,False,True,True,True,...,True,True,6,3752,7.4,539,1270,2482,169,308
1,Alaska,False,False,False,True,False,False,False,False,False,...,False,False,1,2320,31.7,576,560,1760,186,134
2,Arizona,False,False,False,True,False,False,True,True,True,...,True,True,6,13553,18.6,2476,2846,10707,917,857
3,Arkansas,False,False,False,False,False,False,True,True,True,...,True,False,5,2459,8.1,376,328,2131,251,92
4,California,False,False,True,True,True,False,False,True,True,...,False,False,5,171521,43.7,57760,25538,145983,9590,10395


In [4]:
# Create ChronicPer10k column to measure chronic homeless population per 10,000 residents

homeless_merged['ChronicPer10k'] = \
    homeless_merged['Per10kResidents'] * homeless_merged['ChronicallyHomeless'] / homeless_merged['TotalPopulation']
homeless_merged.head()

Unnamed: 0,State,Sleep in public statewide,Sleep in specific public places,Camp in public statewide,Camp in specific public places,Lying in specific public places,Sleep/live in vehicle,Loitering statewide,Loiter in specific public places,Standing in road,...,Panhandling in particular ways,Restrictiveness Score,TotalPopulation,Per10kResidents,ChronicallyHomeless,PeopleInFamilyUnits,SoloIndividuals,UnaccompaniedYouths,Veterans,ChronicPer10k
0,Alabama,False,False,False,False,False,False,True,True,True,...,True,6,3752,7.4,539,1270,2482,169,308,1.06306
1,Alaska,False,False,False,True,False,False,False,False,False,...,False,1,2320,31.7,576,560,1760,186,134,7.870345
2,Arizona,False,False,False,True,False,False,True,True,True,...,True,6,13553,18.6,2476,2846,10707,917,857,3.398037
3,Arkansas,False,False,False,False,False,False,True,True,True,...,False,5,2459,8.1,376,328,2131,251,92,1.238552
4,California,False,False,True,True,True,False,False,True,True,...,False,5,171521,43.7,57760,25538,145983,9590,10395,14.716052


In [5]:
# Use .loc method to get pertinent columns we will need to do statistical hypothesis testing on

homeless_reduced = homeless_merged.loc[:, ['State','Restrictiveness Score','TotalPopulation','Per10kResidents','ChronicallyHomeless','ChronicPer10k']]
homeless_reduced.head(10)

Unnamed: 0,State,Restrictiveness Score,TotalPopulation,Per10kResidents,ChronicallyHomeless,ChronicPer10k
0,Alabama,6,3752,7.4,539,1.06306
1,Alaska,1,2320,31.7,576,7.870345
2,Arizona,6,13553,18.6,2476,3.398037
3,Arkansas,5,2459,8.1,376,1.238552
4,California,5,171521,43.7,57760,14.716052
5,Colorado,2,10397,17.9,3466,5.967241
6,Connecticut,3,2930,8.1,117,0.323447
7,Delaware,4,2369,23.6,196,1.952554
8,District of Columbia,2,4410,65.8,1257,18.755238
9,Florida,4,25959,11.9,4233,1.940472
