In [260]:
import numpy as np
from cities.utils.data_grabber import find_repo_root, DataGrabber
import pandas as pd
import numpy as np
import requests

data = DataGrabber()
data.get_features_wide(["gdp"])
gdp = data.wide["gdp"]

root = find_repo_root()

In [261]:
area = "county:*&in=state:*"

variables = "DP04_0045E,DP04_0046E,DP04_0089E,DP04_0134E"


url = f"https://api.census.gov/data/2021/acs/acs5/profile?get={variables}&for={area}"

response = requests.get(url)

assert response.status_code == 200  # 200 means success

data = response.json()

df = pd.DataFrame(data[1:], columns=data[0])
df.columns

Index(['DP04_0045E', 'DP04_0046E', 'DP04_0089E', 'DP04_0134E', 'state',
       'county'],
      dtype='object')

In [262]:
df

df_subset = df[df["state"] == "09"]
df_subset


# 09000        Connecticut
# 09001        Fairfield County
# 09003        Hartford County
# 09005        Litchfield County
# 09007        Middlesex County
# 09009        New Haven County
# 09011        New London County
# 09013        Tolland County
# 09015        Windham County

Unnamed: 0,DP04_0045E,DP04_0046E,DP04_0089E,DP04_0134E,state,county
309,349443,232832,443100,1593,9,1
310,356529,230908,249000,1191,9,3
311,74857,57096,270000,1114,9,5
312,68200,50544,298300,1222,9,7
313,336400,208977,259400,1223,9,9
314,109481,73645,257600,1192,9,11
315,56989,40072,264500,1238,9,13
316,45425,31429,217300,1019,9,15


In [263]:
homeownership = df.copy()

column_mapping = {
    "DP04_0045E": "occupied_housing_units",
    "DP04_0046E": "owner_occupied",
    "DP04_0089E": "median_owner_occupied_home_value",
    "DP04_0134E": "median_rent",
}
homeownership.rename(columns=column_mapping, inplace=True)

homeownership["GeoFIPS"] = homeownership["state"] + homeownership["county"]
homeownership.drop(["state", "county"], axis=1, inplace=True)
homeownership["GeoFIPS"] = homeownership["GeoFIPS"].astype(np.int64)

homeownership.head()

Unnamed: 0,occupied_housing_units,owner_occupied,median_owner_occupied_home_value,median_rent,GeoFIPS
0,21856,16227,164900,1085,1001
1,87190,67242,226600,1093,1003
2,9088,5654,89500,605,1005
3,7083,5580,102900,744,1007
4,21300,16865,138100,691,1009


In [264]:
homeownership["homeownership_rate"] = (
    homeownership["owner_occupied"].astype(float)
    / homeownership["occupied_housing_units"].astype(float)
) * 100

homeownership.drop(["owner_occupied", "occupied_housing_units"], axis=1, inplace=True)

In [265]:
common_fips = np.intersect1d(homeownership["GeoFIPS"].unique(), gdp["GeoFIPS"].unique())
homeownership = homeownership[homeownership["GeoFIPS"].isin(common_fips)]
homeownership = homeownership.merge(
    gdp[["GeoFIPS", "GeoName"]], on="GeoFIPS", how="left"
)
homeownership = homeownership.reindex(
    columns=["GeoFIPS", "GeoName"]
    + list(homeownership.columns.drop(["GeoFIPS", "GeoName"]))
)

homeownership.iloc[:, 2:] = homeownership.iloc[:, 2:].astype(float)

homeownership.head()

Unnamed: 0,GeoFIPS,GeoName,median_owner_occupied_home_value,median_rent,homeownership_rate
0,1001,"Autauga, AL",164900.0,1085.0,74.245059
1,1003,"Baldwin, AL",226600.0,1093.0,77.121229
2,1005,"Barbour, AL",89500.0,605.0,62.213908
3,1007,"Bibb, AL",102900.0,744.0,78.780178
4,1009,"Blount, AL",138100.0,691.0,79.178404


In [266]:
rows_with_negative_values = homeownership[
    homeownership.iloc[:, -3:].eq(-666666666.0).any(axis=1)
]
rows_with_negative_values

Unnamed: 0,GeoFIPS,GeoName,median_owner_occupied_home_value,median_rent,homeownership_rate
179,6003,"Alpine, CA",378200.0,-666666666.0,81.797235
1113,22023,"Cameron, LA",152500.0,-666666666.0,92.509025
2519,48033,"Borden, TX",143300.0,-666666666.0,66.326531
2633,48261,"Kenedy, TX",-666666666.0,-666666666.0,10.416667
2637,48269,"King, TX",22600.0,-666666666.0,33.73494
2653,48301,"Loving, TX",-666666666.0,-666666666.0,48.484848
2711,48417,"Shackelford, TX",153100.0,-666666666.0,84.475965
2724,48443,"Terrell, TX",94800.0,-666666666.0,95.070423


In [267]:
# fixing missing counties

transplant = pd.read_csv(f"{root}/data/raw/missing_homeownership.csv")

transplant

transplant.drop("year", axis=1, inplace=True)

In [268]:
# deleting counties with negative values

problematic_fips = rows_with_negative_values["GeoFIPS"]
homeownership = homeownership[~homeownership["GeoFIPS"].isin(problematic_fips)]

# merging with transplant

homeownership = pd.concat([homeownership, transplant])

homeownership.sort_values(by=["GeoFIPS", "GeoName"], inplace=True)

homeownership.reset_index(drop=True, inplace=True)

homeownership.head()

Unnamed: 0,GeoFIPS,GeoName,median_owner_occupied_home_value,median_rent,homeownership_rate
0,1001,"Autauga, AL",164900.0,1085.0,74.245059
1,1003,"Baldwin, AL",226600.0,1093.0,77.121229
2,1005,"Barbour, AL",89500.0,605.0,62.213908
3,1007,"Bibb, AL",102900.0,744.0,78.780178
4,1009,"Blount, AL",138100.0,691.0,79.178404


In [269]:
rows_with_negative_values = homeownership[
    homeownership.iloc[:, -3:].eq(-666666666.0).any(axis=1)
]
rows_with_negative_values

Unnamed: 0,GeoFIPS,GeoName,median_owner_occupied_home_value,median_rent,homeownership_rate


In [271]:
variables = ["median_owner_occupied_home_value", "median_rent", "homeownership_rate"]

for variable in variables:
    hazard_variable = homeownership[["GeoFIPS", "GeoName", variable]]
    hazard_variable.to_csv(f"{root}/data/raw/{variable}.csv", index=False)

In [270]:
print(homeownership.shape[0])

3071
