In [1]:
# Import the modules
import pandas as pd
from pathlib import Path

In [2]:
# Import data for homeless sheletered population and homeless total population in two csv files
sheltered_data_to_load = Path("2021-AHAR-Part-1_HomelessSheltered.csv")
population_data_to_load = Path("Homeless Population by State.csv")

# Read School and Student csv files and store in Pandas dataframes
homeless_sheltered = pd.read_csv(sheltered_data_to_load)
homeless_population = pd.read_csv(population_data_to_load)

# Use pd.merge method to combine the dataframe into a single set using outer join 
homeless_merged = pd.merge(homeless_sheltered, homeless_population, how="outer", on=["State", "State"])
homeless_merged.head()

Unnamed: 0,State,Sheltered Homeless Population,Total,Per10kResidents,ChronicallyHomeless,PeopleInFamilyUnits,SoloIndividuals,UnaccompaniedYouths,Veterans
0,Alabama,1626,3752,7.4,539,1270,2482,169,308
1,Alaska,1797,2320,31.7,576,560,1760,186,134
2,Arizona,5460,13553,18.6,2476,2846,10707,917,857
3,Arkansas,1470,2459,8.1,376,328,2131,251,92
4,California,51429,171521,43.7,57760,25538,145983,9590,10395


In [3]:
# Rename columns to be more intelligible to readers

homeless_merged = homeless_merged.rename(columns={'Total':'TotalPopulation'})

# Print first five rows of dataframe with renamed columns
homeless_merged.head()

Unnamed: 0,State,Sheltered Homeless Population,TotalPopulation,Per10kResidents,ChronicallyHomeless,PeopleInFamilyUnits,SoloIndividuals,UnaccompaniedYouths,Veterans
0,Alabama,1626,3752,7.4,539,1270,2482,169,308
1,Alaska,1797,2320,31.7,576,560,1760,186,134
2,Arizona,5460,13553,18.6,2476,2846,10707,917,857
3,Arkansas,1470,2459,8.1,376,328,2131,251,92
4,California,51429,171521,43.7,57760,25538,145983,9590,10395


In [4]:
# Create ChronicPer10k column to measure chronic homeless population per 10,000 residents

homeless_merged['ChronicPer10k'] = \
    homeless_merged['Per10kResidents'] * homeless_merged['ChronicallyHomeless'] / homeless_merged['TotalPopulation']
homeless_merged.head()

Unnamed: 0,State,Sheltered Homeless Population,TotalPopulation,Per10kResidents,ChronicallyHomeless,PeopleInFamilyUnits,SoloIndividuals,UnaccompaniedYouths,Veterans,ChronicPer10k
0,Alabama,1626,3752,7.4,539,1270,2482,169,308,1.06306
1,Alaska,1797,2320,31.7,576,560,1760,186,134,7.870345
2,Arizona,5460,13553,18.6,2476,2846,10707,917,857,3.398037
3,Arkansas,1470,2459,8.1,376,328,2131,251,92,1.238552
4,California,51429,171521,43.7,57760,25538,145983,9590,10395,14.716052


In [5]:
# Create ChronicPer10k column to calculate sheltered homeless population per 10,000 residents

homeless_merged['ShelteredPer10k'] = \
    homeless_merged['Per10kResidents'] * homeless_merged['Sheltered Homeless Population'] / homeless_merged['TotalPopulation']
homeless_merged.head()

Unnamed: 0,State,Sheltered Homeless Population,TotalPopulation,Per10kResidents,ChronicallyHomeless,PeopleInFamilyUnits,SoloIndividuals,UnaccompaniedYouths,Veterans,ChronicPer10k,ShelteredPer10k
0,Alabama,1626,3752,7.4,539,1270,2482,169,308,1.06306,3.20693
1,Alaska,1797,2320,31.7,576,560,1760,186,134,7.870345,24.553836
2,Arizona,5460,13553,18.6,2476,2846,10707,917,857,3.398037,7.493249
3,Arkansas,1470,2459,8.1,376,328,2131,251,92,1.238552,4.842212
4,California,51429,171521,43.7,57760,25538,145983,9590,10395,14.716052,13.103045


In [6]:
# Use .loc method to get pertinent columns we will need to do statistical hypothesis testing on

homeless_reduced = homeless_merged.loc[:, ['State','Sheltered Homeless Population', 'TotalPopulation','Per10kResidents','ChronicallyHomeless','ChronicPer10k','ShelteredPer10k']]
homeless_reduced.head(10)

Unnamed: 0,State,Sheltered Homeless Population,TotalPopulation,Per10kResidents,ChronicallyHomeless,ChronicPer10k,ShelteredPer10k
0,Alabama,1626,3752,7.4,539,1.06306,3.20693
1,Alaska,1797,2320,31.7,576,7.870345,24.553836
2,Arizona,5460,13553,18.6,2476,3.398037,7.493249
3,Arkansas,1470,2459,8.1,376,1.238552,4.842212
4,California,51429,171521,43.7,57760,14.716052,13.103045
5,Colorado,8016,10397,17.9,3466,5.967241,13.80075
6,Connecticut,2165,2930,8.1,117,0.323447,5.985154
7,Delaware,1579,2369,23.6,196,1.952554,15.730013
8,District of Columbia,4430,4410,65.8,1257,18.755238,66.098413
9,Florida,13393,25959,11.9,4233,1.940472,6.139555
