In [2]:
# Import the modules
import pandas as pd
from pathlib import Path

In [3]:
# Import data for homeless sheletered population and homeless total population in two csv files
sheltered_data_to_load = Path("Sheltered Homeless Population.csv")
population_data_to_load = Path("Homeless Population by State.csv")

# Read School and Student csv files and store in Pandas dataframes
homeless_sheltered = pd.read_csv(sheltered_data_to_load)
homeless_population = pd.read_csv(population_data_to_load)

# Use pd.merge method to combine the dataframe into a single set using outer join 
homeless_merged = pd.merge(homeless_sheltered, homeless_population, how="outer", on=["State", "State"])
homeless_merged.head()

Unnamed: 0,State,sheltered homeless population,percentage of homeless population that is sheltered,Total,Per10kResidents,ChronicallyHomeless,PeopleInFamilyUnits,SoloIndividuals,UnaccompaniedYouths,Veterans
0,Alabama,2172.0,59.90%,3752.0,7.4,539.0,1270.0,2482.0,169.0,308.0
1,Alaska,1963.0,84.60%,2320.0,31.7,576.0,560.0,1760.0,186.0,134.0
2,Arizona,5526.0,40.80%,13553.0,18.6,2476.0,2846.0,10707.0,917.0,857.0
3,Arkansas,1163.0,47.30%,2459.0,8.1,376.0,328.0,2131.0,251.0,92.0
4,California,56030.0,32.70%,171521.0,43.7,57760.0,25538.0,145983.0,9590.0,10395.0


In [5]:
# Rename columns to be more intelligible to readers

homeless_merged = homeless_merged.rename(columns={'percentage of homeless population that is sheltered':'percent population sheltered',
                                                 'Total':'TotalPopulation'})

# Print first five rows of dataframe with renamed columns
homeless_merged.head()

Unnamed: 0,State,sheltered homeless population,percent population sheltered,TotalPopulation,Per10kResidents,ChronicallyHomeless,PeopleInFamilyUnits,SoloIndividuals,UnaccompaniedYouths,Veterans
0,Alabama,2172.0,59.90%,3752.0,7.4,539.0,1270.0,2482.0,169.0,308.0
1,Alaska,1963.0,84.60%,2320.0,31.7,576.0,560.0,1760.0,186.0,134.0
2,Arizona,5526.0,40.80%,13553.0,18.6,2476.0,2846.0,10707.0,917.0,857.0
3,Arkansas,1163.0,47.30%,2459.0,8.1,376.0,328.0,2131.0,251.0,92.0
4,California,56030.0,32.70%,171521.0,43.7,57760.0,25538.0,145983.0,9590.0,10395.0


In [6]:
# Create ChronicPer10k column to measure chronic homeless population per 10,000 residents

homeless_merged['ChronicPer10k'] = \
    homeless_merged['Per10kResidents'] * homeless_merged['ChronicallyHomeless'] / homeless_merged['TotalPopulation']
homeless_merged.head()

Unnamed: 0,State,sheltered homeless population,percent population sheltered,TotalPopulation,Per10kResidents,ChronicallyHomeless,PeopleInFamilyUnits,SoloIndividuals,UnaccompaniedYouths,Veterans,ChronicPer10k
0,Alabama,2172.0,59.90%,3752.0,7.4,539.0,1270.0,2482.0,169.0,308.0,1.06306
1,Alaska,1963.0,84.60%,2320.0,31.7,576.0,560.0,1760.0,186.0,134.0,7.870345
2,Arizona,5526.0,40.80%,13553.0,18.6,2476.0,2846.0,10707.0,917.0,857.0,3.398037
3,Arkansas,1163.0,47.30%,2459.0,8.1,376.0,328.0,2131.0,251.0,92.0,1.238552
4,California,56030.0,32.70%,171521.0,43.7,57760.0,25538.0,145983.0,9590.0,10395.0,14.716052


In [8]:
# Create ChronicPer10k column to calculate sheltered homeless population per 10,000 residents

homeless_merged['ShelteredPer10k'] = \
    homeless_merged['Per10kResidents'] * homeless_merged['sheltered homeless population'] / homeless_merged['TotalPopulation']
homeless_merged.head()

Unnamed: 0,State,sheltered homeless population,percent population sheltered,TotalPopulation,Per10kResidents,ChronicallyHomeless,PeopleInFamilyUnits,SoloIndividuals,UnaccompaniedYouths,Veterans,ChronicPer10k,ShelteredPer10k
0,Alabama,2172.0,59.90%,3752.0,7.4,539.0,1270.0,2482.0,169.0,308.0,1.06306,4.283795
1,Alaska,1963.0,84.60%,2320.0,31.7,576.0,560.0,1760.0,186.0,134.0,7.870345,26.822026
2,Arizona,5526.0,40.80%,13553.0,18.6,2476.0,2846.0,10707.0,917.0,857.0,3.398037,7.583826
3,Arkansas,1163.0,47.30%,2459.0,8.1,376.0,328.0,2131.0,251.0,92.0,1.238552,3.830948
4,California,56030.0,32.70%,171521.0,43.7,57760.0,25538.0,145983.0,9590.0,10395.0,14.716052,14.275284


In [9]:
# Use .loc method to get pertinent columns we will need to do statistical hypothesis testing on

homeless_reduced = homeless_merged.loc[:, ['State','sheltered homeless population','percent population sheltered','TotalPopulation','Per10kResidents','ChronicallyHomeless','ChronicPer10k','ShelteredPer10k']]
homeless_reduced.head(10)

Unnamed: 0,State,sheltered homeless population,percent population sheltered,TotalPopulation,Per10kResidents,ChronicallyHomeless,ChronicPer10k,ShelteredPer10k
0,Alabama,2172.0,59.90%,3752.0,7.4,539.0,1.06306,4.283795
1,Alaska,1963.0,84.60%,2320.0,31.7,576.0,7.870345,26.822026
2,Arizona,5526.0,40.80%,13553.0,18.6,2476.0,3.398037,7.583826
3,Arkansas,1163.0,47.30%,2459.0,8.1,376.0,1.238552,3.830948
4,California,56030.0,32.70%,171521.0,43.7,57760.0,14.716052,14.275284
5,Colorado,7241.0,69.60%,10397.0,17.9,3466.0,5.967241,12.466471
6,Connecticut,2636.0,90.00%,2930.0,8.1,117.0,0.323447,7.287235
7,Delaware,2215.0,93.50%,2369.0,23.6,196.0,1.952554,22.065851
8,District of Columbia,3720.0,84.40%,4410.0,65.8,1257.0,18.755238,55.504762
9,Florida,14213.0,54.80%,25959.0,11.9,4233.0,1.940472,6.515455
