In [1]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
from census import Census
import gmaps
import os
import json
import time
from scipy.stats import linregress
from pprint import pprint
import seaborn as sns
# Census API Key
from config import api_key

In [2]:
#census_data = c.acs5.get().json()
query_url = f"https://api.census.gov/data/2019/acs/acs5"
display(query_url)
#https://api.census.gov/data#/2019/acs/acs5&{api_key}#?get=NAME,group(B01001)&for=us:1&key={api_key}

'https://api.census.gov/data/2019/acs/acs5'

In [3]:
# Run Census Search to retrieve data on all zip codes (2013 ACS5 Census)
# See: https://github.com/CommerceDataService/census-wrapper for library documentation
# See: https://gist.github.com/afhaque/60558290d6efd892351c4b64e5c01e9b for labels

cols = ["Zipcode", "Household Income", "Population",
        "Median Contract Rent", "Median Gross Rent", "Median Home Value",
        "Median Monthly Owner Costs", "Year"]


years = [2014, 2019]
#years = [2015, 2015, 2016, 2017, 2018, 2019]
#years = [2019]
for year in years:

    c = Census(api_key, year=year)
    census_data = c.acs5.get(("NAME", "B19013_001E", 
                              "B01003_001E",
                              "B25058_001E",
                              "B25064_001E", 
                              "B25077_001E", 
                              "B25088_002E"), {'for': 'zip code tabulation area:*'})

    # Convert to DataFrame
    census_pd = pd.DataFrame(census_data)

    # Column Reordering
    census_pd = census_pd.rename(columns={"B19013_001E": f"Household Income_{year}",
                                          "B01003_001E": f"Population_{year}",
                                          "B25058_001E": f"Median Contract Rent_{year}",
                                          "B25064_001E": f"Median Gross Rent_{year}",
                                          "B25077_001E": f"Median Home Value_{year}",
                                          "B25088_002E": f"Median Monthly Owner Costs_{year}",
                                          "NAME": "Name",
                                          "zip code tabulation area": "Zipcode"})
    
    census_pd = census_pd.drop(columns=['Name'])
    # Final DataFrame
    
    
    if year==2014:
        df = census_pd
    if year==2019:
        df = df.merge(census_pd, how='inner', on=['Zipcode', 'state'])

# Visualize
#print(len(census_pd))
display(df.head())

Unnamed: 0,Household Income_2014,Population_2014,Median Contract Rent_2014,Median Gross Rent_2014,Median Home Value_2014,Median Monthly Owner Costs_2014,state,Zipcode,Household Income_2019,Population_2019,Median Contract Rent_2019,Median Gross Rent_2019,Median Home Value_2019,Median Monthly Owner Costs_2019
0,10833.0,18088.0,282.0,367.0,105400,722.0,72,601,14361.0,17113,292.0,383.0,83900,771.0
1,16353.0,40859.0,329.0,411.0,91200,843.0,72,602,16807.0,37751,293.0,400.0,85300,877.0
2,16323.0,53162.0,267.0,369.0,128700,841.0,72,603,16049.0,47081,328.0,433.0,118400,832.0
3,14138.0,6415.0,234.0,326.0,105800,569.0,72,606,12119.0,6392,196.0,275.0,80800,526.0
4,17265.0,28805.0,324.0,419.0,113700,752.0,72,610,19898.0,26686,338.0,427.0,87600,751.0


In [4]:
# Save as a csv
# Note to avoid any issues later, use encoding="utf-8"
df.to_csv("census_data.csv", encoding="utf-8", index=False)

In [5]:
!ls

__pycache__
api_keys.py
aprils_copy_for_plotting.ipynb
census_data.csv
census_data_2014_2019Years.csv
census_data_2014Years.csv
census_data_2017Years.csv
census_data_2019Years.csv
Census_Demo.ipynb
Census_Demo_Barcharts.ipynb
Census_Demo-Copy1.ipynb
config.py
files
Full_Census_Merge.csv
Full_Census_Merge_Metrics.csv
MSa_Agg.csv
MSA_bottom_growth.csv
MSA_least_affordable.csv
MSA_top_growth_atl.csv
msa_top_growth_zip.csv
png1_test.png
README.md
test_mary.txt
Untitled.ipynb
untitled.txt


In [6]:
#create DataFrame for MSA
#https://www.roelpeters.be/solved-dtypewarning-columns-have-mixed-types-specify-dtype-option-on-import-or-set-low-memory-in-pandas/
msa_df = pd.read_csv('../project_1/files/ScanUSZipCode2017A.csv',low_memory=False)
msa_df

Unnamed: 0,ZIP,MA,MANAME
0,400,35620.0,New York-Newark-Jersey City NY-NJ-PA
1,501,35620.0,New York-Newark-Jersey City NY-NJ-PA
2,544,35620.0,New York-Newark-Jersey City NY-NJ-PA
3,1001,44140.0,Springfield MA
4,1002,44140.0,Springfield MA
...,...,...,...
40932,99926,,
40933,99927,,
40934,99928,,
40935,99929,,


In [7]:
#Comparing Data Types between the two diffrent dataframes to make certain they match for the merge
print(msa_df.dtypes)

ZIP         int64
MA        float64
MANAME     object
dtype: object


In [8]:
print(df.dtypes) 
    

Household Income_2014              float64
Population_2014                    float64
Median Contract Rent_2014          float64
Median Gross Rent_2014             float64
Median Home Value_2014              object
Median Monthly Owner Costs_2014    float64
state                               object
Zipcode                             object
Household Income_2019              float64
Population_2019                     object
Median Contract Rent_2019          float64
Median Gross Rent_2019             float64
Median Home Value_2019              object
Median Monthly Owner Costs_2019    float64
dtype: object


In [9]:
#convert Zipcode datatype in concat_df to be int64
#https://www.kite.com/python/answers/how-to-convert-a-pandas-dataframe-column-from-object-to-int-in-python
df["Zipcode"] = df["Zipcode"].astype(object).astype(int)

In [10]:
#Rename ZIP column in MSA to match Zipcode from Census data
#https://note.nkmk.me/en/python-pandas-dataframe-rename/
msa_df.rename(columns={'ZIP': 'Zipcode'}, inplace=True)


In [11]:
#Merge data frames and drop the values in the census data with -666666666
merged_census_df = pd.merge(df, msa_df, how="left", on=["Zipcode", "Zipcode"])
merged_census_df
merged_census_df.drop(merged_census_df[merged_census_df["Household Income_2014"] == -666666666].index, inplace = True)
merged_census_df.drop(merged_census_df[merged_census_df["Population_2014"] == -666666666].index, inplace = True)
merged_census_df.drop(merged_census_df[merged_census_df["Median Contract Rent_2014"] == -666666666].index, inplace = True)
merged_census_df.drop(merged_census_df[merged_census_df["Median Gross Rent_2014"] == -666666666].index, inplace = True)
merged_census_df.drop(merged_census_df[merged_census_df["Median Home Value_2014"] == -666666666].index, inplace = True)       
merged_census_df.drop(merged_census_df[merged_census_df["Median Monthly Owner Costs_2014"] == -666666666].index, inplace = True) 
merged_census_df.drop(merged_census_df[merged_census_df["Household Income_2019"] == -666666666].index, inplace = True)
merged_census_df.drop(merged_census_df[merged_census_df["Population_2019"] == -666666666].index, inplace = True)
merged_census_df.drop(merged_census_df[merged_census_df["Median Contract Rent_2019"] == -666666666].index, inplace = True)
merged_census_df.drop(merged_census_df[merged_census_df["Median Gross Rent_2019"] == -666666666].index, inplace = True)
merged_census_df.drop(merged_census_df[merged_census_df["Median Home Value_2019"] == -666666666].index, inplace = True)       
merged_census_df.drop(merged_census_df[merged_census_df["Median Monthly Owner Costs_2019"] == -666666666].index, inplace = True)

 

In [12]:
# Save as a csv to check full data set
# Note to avoid any issues later, use encoding="utf-8"
merged_census_df.to_csv("census_data_2014_2019Years.csv", encoding="utf-8", index=False)

In [13]:
# Remove zips with no MA
merged_census_df['MA'].replace('', np.nan, inplace = True)
merged_census_df.dropna(subset=['MA'], inplace=True)
merged_census_df.head()

Unnamed: 0,Household Income_2014,Population_2014,Median Contract Rent_2014,Median Gross Rent_2014,Median Home Value_2014,Median Monthly Owner Costs_2014,state,Zipcode,Household Income_2019,Population_2019,Median Contract Rent_2019,Median Gross Rent_2019,Median Home Value_2019,Median Monthly Owner Costs_2019,MA,MANAME
131,60775.0,17141.0,800.0,895.0,212900,1586.0,25,1001,63949.0,17312,1025.0,1148.0,215700,1506.0,44140.0,Springfield MA
132,55082.0,29844.0,1026.0,1117.0,341600,2094.0,25,1002,61159.0,30014,1280.0,1380.0,355500,2330.0,44140.0,Springfield MA
134,68361.0,5113.0,753.0,906.0,208000,1642.0,25,1005,67302.0,5128,831.0,965.0,236400,1555.0,49340.0,Worcester MA-CT
135,74221.0,14774.0,755.0,876.0,255800,1795.0,25,1007,91191.0,15005,909.0,975.0,276900,1884.0,44140.0,Springfield MA
138,82365.0,3667.0,615.0,683.0,270400,1693.0,25,1010,70063.0,3658,703.0,703.0,272200,1880.0,44140.0,Springfield MA


In [14]:
# Create dataframe of home ownership
msa_home_ownership = pd.read_csv('../project_1/files/Census_Home_Ownership.csv',low_memory=False)
msa_home_ownership


Unnamed: 0,MANAME,2009 H%,2010 H%,2011 H%,2012 H%,2013 H%,2014 H%,2015 H%,2016 H%,2017 H%,2018 H%,2019 H%,2020 H%
0,Inside Metropolitan Statistical Areas,65.9,65.4,64.6,63.9,63.4,62.9,62.2,61.9,62.3,62.9,63.1,65.2
1,Akron OH,74.0,74.9,67.5,65.6,70.8,69.5,77.1,76.9,70.9,70.0,66.0,69.0
2,Albany-Schenectady-Troy NY,65.9,61.3,64.1,62.2,61.2,63.7,71.1,72.8,72.4,70.6,67.9,67.5
3,Albuquerque NM,64.3,66.9,67.0,67.9,70.0,69.5,65.7,65.5,67.1,62.8,65.9,64.4
4,Allentown-Bethlehem-Easton PA-NJ,69.2,68.9,73.1,72.1,67.8,68.8,72.4,71.5,75.7,75.5,71.5,68.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
65,Tucson AZ,61.4,56.0,60.1,63.8,60.1,67.1,65.5,64.3,67.2,64.9,66.1,66.7
66,Tulsa OK,65.2,65.4,66.8,68.3,70.5,70.1,67.8,64.2,64.4,66.5,64.1,65.3
67,Virginia Beach-Norfolk-Newport News VA-N,59.4,59.6,65.3,62.8,63.0,65.8,63.5,61.4,62.3,62.0,63.3,64.1
68,Washington-Arlington-Alexandria DC-VA-MD,64.6,63.1,63.3,62.9,64.7,67.9,67.2,67.3,67.6,66.9,66.0,65.0


In [15]:
# Merge census home ownership with existing data
full_census_merge = pd.merge(merged_census_df, msa_home_ownership, how="left", on=["MANAME", "MANAME"])
full_census_merge

Unnamed: 0,Household Income_2014,Population_2014,Median Contract Rent_2014,Median Gross Rent_2014,Median Home Value_2014,Median Monthly Owner Costs_2014,state,Zipcode,Household Income_2019,Population_2019,...,2011 H%,2012 H%,2013 H%,2014 H%,2015 H%,2016 H%,2017 H%,2018 H%,2019 H%,2020 H%
0,60775.0,17141.0,800.0,895.0,212900,1586.0,25,1001,63949.0,17312,...,,,,,,,,,,
1,55082.0,29844.0,1026.0,1117.0,341600,2094.0,25,1002,61159.0,30014,...,,,,,,,,,,
2,68361.0,5113.0,753.0,906.0,208000,1642.0,25,1005,67302.0,5128,...,64.9,63.4,62.7,65.9,64.4,64.1,65.8,61.9,63.3,62.5
3,74221.0,14774.0,755.0,876.0,255800,1795.0,25,1007,91191.0,15005,...,,,,,,,,,,
4,82365.0,3667.0,615.0,683.0,270400,1693.0,25,1010,70063.0,3658,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16683,55268.0,18575.0,982.0,1097.0,187000,1883.0,02,99701,59955.0,17258,...,,,,,,,,,,
16684,76085.0,24311.0,1151.0,1377.0,208900,1902.0,02,99705,80875.0,23131,...,,,,,,,,,,
16685,73871.0,31658.0,944.0,1041.0,227600,1870.0,02,99709,87055.0,29288,...,,,,,,,,,,
16686,88517.0,12134.0,1000.0,1250.0,236400,1924.0,02,99712,88502.0,14837,...,,,,,,,,,,


In [16]:
#Filter out zips not contained in top 75 largest MSAs
full_census_merge.dropna(inplace=True)
full_census_merge

Unnamed: 0,Household Income_2014,Population_2014,Median Contract Rent_2014,Median Gross Rent_2014,Median Home Value_2014,Median Monthly Owner Costs_2014,state,Zipcode,Household Income_2019,Population_2019,...,2011 H%,2012 H%,2013 H%,2014 H%,2015 H%,2016 H%,2017 H%,2018 H%,2019 H%,2020 H%
2,68361.0,5113.0,753.0,906.0,208000,1642.0,25,1005,67302.0,5128,...,64.9,63.4,62.7,65.9,64.4,64.1,65.8,61.9,63.3,62.5
14,43583.0,1389.0,599.0,758.0,196800,1473.0,25,1031,38173.0,1135,...,64.9,63.4,62.7,65.9,64.4,64.1,65.8,61.9,63.3,62.5
29,80577.0,1791.0,775.0,1039.0,257100,1757.0,25,1068,88571.0,1833,...,64.9,63.4,62.7,65.9,64.4,64.1,65.8,61.9,63.3,62.5
40,37126.0,3108.0,665.0,823.0,199600,1573.0,25,1083,60323.0,3094,...,64.9,63.4,62.7,65.9,64.4,64.1,65.8,61.9,63.3,62.5
44,48679.0,1078.0,538.0,538.0,250600,1487.0,25,1092,108725.0,1278,...,64.9,63.4,62.7,65.9,64.4,64.1,65.8,61.9,63.3,62.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16538,59325.0,56326.0,900.0,1036.0,185100,1538.0,53,98682,75634.0,60887,...,61.1,59.2,60.0,62.5,64.0,63.7,63.7,63.9,60.9,59.8
16539,58874.0,31908.0,866.0,986.0,250200,1741.0,53,98683,70099.0,33219,...,61.1,59.2,60.0,62.5,64.0,63.7,63.7,63.9,60.9,59.8
16540,57216.0,27442.0,913.0,1065.0,208100,1521.0,53,98684,66781.0,29396,...,61.1,59.2,60.0,62.5,64.0,63.7,63.7,63.9,60.9,59.8
16541,79265.0,26954.0,1058.0,1249.0,257100,1856.0,53,98685,103120.0,29792,...,61.1,59.2,60.0,62.5,64.0,63.7,63.7,63.9,60.9,59.8


In [17]:
# Check curreent dataframe types
print(full_census_merge.dtypes)

Household Income_2014              float64
Population_2014                    float64
Median Contract Rent_2014          float64
Median Gross Rent_2014             float64
Median Home Value_2014              object
Median Monthly Owner Costs_2014    float64
state                               object
Zipcode                              int32
Household Income_2019              float64
Population_2019                     object
Median Contract Rent_2019          float64
Median Gross Rent_2019             float64
Median Home Value_2019              object
Median Monthly Owner Costs_2019    float64
MA                                 float64
MANAME                              object
2009 H%                            float64
2010 H%                            float64
2011 H%                            float64
2012 H%                            float64
2013 H%                            float64
2014 H%                            float64
2015 H%                            float64
2016 H%    

In [18]:
# Convert median monthly owner cost to integer
###############ERIC, the main data frame is called df.  Also, the columns are the original names is _2014 and _2019 as a suffix
full_census_merge["Median Monthly Owner Costs_2014"] = full_census_merge["Median Monthly Owner Costs_2014"].astype(object).astype(float)
full_census_merge["Median Monthly Owner Costs_2019"] = full_census_merge["Median Monthly Owner Costs_2019"].astype(object).astype(float)

In [19]:
# Confirm data type conversion
print(full_census_merge.dtypes)

Household Income_2014              float64
Population_2014                    float64
Median Contract Rent_2014          float64
Median Gross Rent_2014             float64
Median Home Value_2014              object
Median Monthly Owner Costs_2014    float64
state                               object
Zipcode                              int32
Household Income_2019              float64
Population_2019                     object
Median Contract Rent_2019          float64
Median Gross Rent_2019             float64
Median Home Value_2019              object
Median Monthly Owner Costs_2019    float64
MA                                 float64
MANAME                              object
2009 H%                            float64
2010 H%                            float64
2011 H%                            float64
2012 H%                            float64
2013 H%                            float64
2014 H%                            float64
2015 H%                            float64
2016 H%    

In [20]:
# Create homeowner % averages
full_census_merge["Average H%_2014"] = full_census_merge[['2010 H%','2011 H%','2012 H%','2013 H%','2014 H%']].mean(axis=1)
full_census_merge["Average H%_2019"] = full_census_merge[['2015 H%','2016 H%','2017 H%','2018 H%','2019 H%']].mean(axis=1)

In [21]:
# Create weighted cost column
full_census_merge["Weighted Cost_2014"] = full_census_merge["Median Gross Rent_2014"] * (1-full_census_merge["Average H%_2014"] / 100) + full_census_merge["Median Monthly Owner Costs_2014"] * full_census_merge["Average H%_2014"] / 100
full_census_merge["Weighted Cost_2019"] = full_census_merge["Median Gross Rent_2019"] * (1-full_census_merge["Average H%_2019"] / 100) + full_census_merge["Median Monthly Owner Costs_2019"] * full_census_merge["Average H%_2019"] / 100
full_census_merge

Unnamed: 0,Household Income_2014,Population_2014,Median Contract Rent_2014,Median Gross Rent_2014,Median Home Value_2014,Median Monthly Owner Costs_2014,state,Zipcode,Household Income_2019,Population_2019,...,2015 H%,2016 H%,2017 H%,2018 H%,2019 H%,2020 H%,Average H%_2014,Average H%_2019,Weighted Cost_2014,Weighted Cost_2019
2,68361.0,5113.0,753.0,906.0,208000,1642.0,25,1005,67302.0,5128,...,64.4,64.1,65.8,61.9,63.3,62.5,64.48,63.90,1380.5728,1342.0100
14,43583.0,1389.0,599.0,758.0,196800,1473.0,25,1031,38173.0,1135,...,64.4,64.1,65.8,61.9,63.3,62.5,64.48,63.90,1219.0320,1219.8610
29,80577.0,1791.0,775.0,1039.0,257100,1757.0,25,1068,88571.0,1833,...,64.4,64.1,65.8,61.9,63.3,62.5,64.48,63.90,1501.9664,1642.4740
40,37126.0,3108.0,665.0,823.0,199600,1573.0,25,1083,60323.0,3094,...,64.4,64.1,65.8,61.9,63.3,62.5,64.48,63.90,1306.6000,1168.9180
44,48679.0,1078.0,538.0,538.0,250600,1487.0,25,1092,108725.0,1278,...,64.4,64.1,65.8,61.9,63.3,62.5,64.48,63.90,1149.9152,1320.3130
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16538,59325.0,56326.0,900.0,1036.0,185100,1538.0,53,98682,75634.0,60887,...,64.0,63.7,63.7,63.9,60.9,59.8,60.92,63.24,1341.8184,1553.8352
16539,58874.0,31908.0,866.0,986.0,250200,1741.0,53,98683,70099.0,33219,...,64.0,63.7,63.7,63.9,60.9,59.8,60.92,63.24,1445.9460,1617.3712
16540,57216.0,27442.0,913.0,1065.0,208100,1521.0,53,98684,66781.0,29396,...,64.0,63.7,63.7,63.9,60.9,59.8,60.92,63.24,1342.7952,1570.6296
16541,79265.0,26954.0,1058.0,1249.0,257100,1856.0,53,98685,103120.0,29792,...,64.0,63.7,63.7,63.9,60.9,59.8,60.92,63.24,1618.7844,1785.2000


In [22]:
# Create Affordability Index
full_census_merge["Affordability_2014"] = full_census_merge["Weighted Cost_2014"] / full_census_merge["Household Income_2014"] * 12
full_census_merge["Affordability_2019"] = full_census_merge["Weighted Cost_2019"] / full_census_merge["Household Income_2019"] * 12
full_census_merge

Unnamed: 0,Household Income_2014,Population_2014,Median Contract Rent_2014,Median Gross Rent_2014,Median Home Value_2014,Median Monthly Owner Costs_2014,state,Zipcode,Household Income_2019,Population_2019,...,2017 H%,2018 H%,2019 H%,2020 H%,Average H%_2014,Average H%_2019,Weighted Cost_2014,Weighted Cost_2019,Affordability_2014,Affordability_2019
2,68361.0,5113.0,753.0,906.0,208000,1642.0,25,1005,67302.0,5128,...,65.8,61.9,63.3,62.5,64.48,63.90,1380.5728,1342.0100,0.242344,0.239281
14,43583.0,1389.0,599.0,758.0,196800,1473.0,25,1031,38173.0,1135,...,65.8,61.9,63.3,62.5,64.48,63.90,1219.0320,1219.8610,0.335644,0.383473
29,80577.0,1791.0,775.0,1039.0,257100,1757.0,25,1068,88571.0,1833,...,65.8,61.9,63.3,62.5,64.48,63.90,1501.9664,1642.4740,0.223682,0.222530
40,37126.0,3108.0,665.0,823.0,199600,1573.0,25,1083,60323.0,3094,...,65.8,61.9,63.3,62.5,64.48,63.90,1306.6000,1168.9180,0.422324,0.232532
44,48679.0,1078.0,538.0,538.0,250600,1487.0,25,1092,108725.0,1278,...,65.8,61.9,63.3,62.5,64.48,63.90,1149.9152,1320.3130,0.283469,0.145723
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16538,59325.0,56326.0,900.0,1036.0,185100,1538.0,53,98682,75634.0,60887,...,63.7,63.9,60.9,59.8,60.92,63.24,1341.8184,1553.8352,0.271417,0.246530
16539,58874.0,31908.0,866.0,986.0,250200,1741.0,53,98683,70099.0,33219,...,63.7,63.9,60.9,59.8,60.92,63.24,1445.9460,1617.3712,0.294720,0.276872
16540,57216.0,27442.0,913.0,1065.0,208100,1521.0,53,98684,66781.0,29396,...,63.7,63.9,60.9,59.8,60.92,63.24,1342.7952,1570.6296,0.281627,0.282229
16541,79265.0,26954.0,1058.0,1249.0,257100,1856.0,53,98685,103120.0,29792,...,63.7,63.9,60.9,59.8,60.92,63.24,1618.7844,1785.2000,0.245069,0.207742


In [23]:
full_census_merge.to_csv("Full_Census_Merge.csv", encoding="utf-8", index=False)

In [24]:
# Add population growth metrics by zip code
full_census_merge["Population Growth"] = full_census_merge["Population_2019"] - full_census_merge["Population_2014"]
full_census_merge["Population Growth %"] = full_census_merge["Population Growth"] / full_census_merge["Population_2014"]

TypeError: unsupported operand type(s) for -: 'str' and 'float'

In [None]:
# Add change in income metric by zip code
full_census_merge["Change in Income"] = full_census_merge["Household Income_2019"] - full_census_merge["Household Income_2014"]
full_census_merge["Change in Income %"] = full_census_merge["Change in Income"] / full_census_merge["Household Income_2014"]

In [None]:
# Add change in Weighted Cost
full_census_merge["Change in Weighted Cost"] = full_census_merge["Weighted Cost_2019"] - full_census_merge["Weighted Cost_2014"]
full_census_merge["Change in Weighted Cost %"] = full_census_merge["Change in Weighted Cost"] / full_census_merge["Weighted Cost_2014"]
full_census_merge

In [None]:
# Add change in Affordability
full_census_merge["Change in Affordability"] = full_census_merge["Affordability_2019"] - full_census_merge["Affordability_2014"]
full_census_merge["Change in Affordability %"] = full_census_merge["Change in Affordability"] / full_census_merge["Affordability_2014"]

In [None]:
full_census_merge.to_csv("Full_Census_Merge_Metrics.csv", encoding="utf-8", index=False)

In [None]:
MSA_agg = full_census_merge.groupby(['MANAME']).agg(
    Zip_count = ('Zipcode','count'),
    Population_2014 = ('Population_2014','sum'),
    Population_2019 = ('Population_2019','sum'),
    Population_growth = ('Population Growth','sum'),
    Affordability_2019 = ('Affordability_2019','mean'),
    Income_Growth = ('Change in Income %', 'mean'),
    Weighted_Cost_Change = ('Change in Weighted Cost %','mean'),
    Affordability_Change = ('Change in Affordability %','mean')
).reset_index()    

MSA_agg

In [None]:
MSA_agg["Population_growth_%"] = MSA_agg["Population_growth"] / MSA_agg["Population_2014"]
MSA_agg

In [None]:
MSA_agg.to_csv("MSA_Agg.csv", encoding="utf-8", index=False)

In [None]:
MSA_top_growth=MSA_agg.nlargest(5,'Population_growth_%')
MSA_top_growth

In [None]:
#Data Frame to include atlanta in top 5 comparison
MSA_top_growth_atl = MSA_top_growth.append(MSA_agg[MSA_agg["MANAME"]=="Atlanta-Sandy Springs-Roswell GA"])
MSA_top_growth_atl

In [None]:
#Create DF for Bar Graph to identify 5 slowest growing
MSA_bottom_growth = MSA_agg.nsmallest(5,'Population_growth_%')
MSA_bottom_growth

In [None]:
#Create DF for Bar Graph to identify 3 least affordable
MSA_least_affordable=MSA_agg.nlargest(3,'Affordability_2019')
MSA_least_affordable

In [None]:
# Export CSV's for analysis
MSA_top_growth_atl.to_csv("MSA_top_growth_atl.csv", encoding="utf-8", index=False)
MSA_bottom_growth.to_csv("MSA_bottom_growth.csv", encoding="utf-8", index=False)
MSA_least_affordable.to_csv("MSA_least_affordable.csv", encoding="utf-8", index=False)

In [None]:
top_msa_name = ["Austin-Round Rock TX", "Orlando-Kissimmee-Sanford FL", "Raleigh NC", "Houston-The Woodlands-Sugar Land TX", "San Antonio-New Braunfels TX", "Atlanta-Sandy Springs-Roswell GA"]


In [None]:
msa_top_growth_zip = full_census_merge[full_census_merge.MANAME.isin(top_msa_name)]
msa_top_growth_zip

In [None]:
msa_top_growth_zip.to_csv("msa_top_growth_zip.csv", encoding="utf-8", index=False)

Change in population percentage vs change in affordability

In [None]:
#Data Frame to include atlanta in top 5 comparison
MSA_top_growth_atl = MSA_top_growth.append(MSA_agg[MSA_agg["MANAME"]=="Atlanta-Sandy Springs-Roswell GA"])
MSA_top_growth_atl

In [None]:
x_values = MSA_top_growth_atl["Population_growth_%"]
y_values = MSA_top_growth_atl["Affordability_Change"]
fig1, ax1 = plt.subplots(figsize=(10,8))
plt.scatter(x_values, y_values, alpha = 0.90, edgecolors = 'k', linewidths = 1)
plt.xlabel("Population Growth %")
plt.ylabel("Affordability Change")
plt.title(f"Change in population % vs change in affordability Linear Regression")
plt.grid()
#plt.savefig("../output_data/latitude_v_wind_speed.png", bbox_inches="tight")
plt.show()

In [None]:
#Linear Regression Plots for North

x_values = pd.to_numeric(MSA_top_growth_atl["Population_growth_%"]) #.astype(float)
y_values = pd.to_numeric(MSA_top_growth_atl["Affordability_Change"]) #.astype(float)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " +str(round(intercept,2))

# plot for northern hemisphere - max temp v/ latitude linear regression

fig1,ax1 = plt.subplots(figsize=(15,10))
plt.scatter(x_values, y_values, alpha = 0.90, edgecolors = 'k', linewidths = 1)
plt.plot(x_values, regress_values, "r-")
plt.annotate(line_eq,(.10 ,-.04),fontsize=14)
plt.xlabel("Population Growth %")
plt.ylabel("Affordability Change")
plt.title(f"Change in population % vs change in affordability Linear Regression")

plt.grid()

#calculations of p/r values
print(f"The P-Value is: {pvalue}")
print(f"The R-Value is: {rvalue}")
print(f"The r-squared is: {rvalue **2}")
#save plot as png
plt.savefig("png1_test", bbox_inches = "tight")
plt.show()