In [44]:
# Import Dependencies
import numpy as np
import pandas as pd
import requests
from census import Census
from us import states
import pathlib

# Census API Key
from config import api_key
c = Census(api_key, year=2018)

In [45]:
#Extract 2018 Census data by tract in Los Angeles County
census_data = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                          "B19301_001E",
                          "B17001_002E","B23025_005E"), geo= {'for': 'tract:*', 'in': 'state:06 county:037'})

#Create DataFrame
census_pd = pd.DataFrame(census_data)

#Rename Columns
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "B17001_002E": "Poverty Count",
                                      "B23025_005E": "Unemployment Count",
                                      "tract": "Census Tract"})
#Calculate Poverty Rate
census_pd["Poverty Rate"] = round(100 * \
    census_pd["Poverty Count"].astype(
        int) / census_pd["Population"].astype(int),2)

#Calculate Unemployment Rate
census_pd["Unemployment Rate"] = round(100 * \
    census_pd["Unemployment Count"].astype(
        int) / census_pd["Population"].astype(int),2)

census_pd = census_pd[["Census Tract", "Population", "Median Age", "Household Income",
                       "Per Capita Income", "Poverty Count", "Poverty Rate", "Unemployment Rate"]]
census_pd.head()


Unnamed: 0,Census Tract,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate,Unemployment Rate
0,543321,5619.0,42.7,85465.0,34799.0,501.0,8.92,3.51
1,543502,4424.0,38.7,63582.0,32217.0,942.0,21.29,3.12
2,550100,7441.0,36.7,71133.0,24140.0,724.0,9.73,2.35
3,550500,8192.0,38.8,84315.0,36900.0,323.0,3.94,1.97
4,551000,7311.0,40.0,86068.0,36407.0,195.0,2.67,3.69


In [48]:
#Load csv file with census tract neighborhood names
csv_path = pathlib.Path("la_census_tract.csv")

In [49]:
#Read census tract file
tract_data_df = pd.read_csv(csv_path)
tract_data_df

Unnamed: 0,Census Tract,Neighborhood,Latitude,Longitude
0,101110,Tujunga,34.259555,-118.293602
1,101122,Tujunga,34.267357,-118.290240
2,101210,Tujunga,34.251998,-118.292687
3,101220,Tujunga,34.251190,-118.281014
4,101300,Tujunga,34.245595,-118.271731
...,...,...,...,...
2339,573902,Long Beach,33.811974,-118.078652
2340,576602,Long Beach,33.765980,-118.170779
2341,599100,Unincorporated Catalina Island,33.388813,-118.423553
2342,980033,San Pedro,33.751351,-118.216154


In [50]:
#Change file data type to int64
census_pd["Census Tract"]=census_pd["Census Tract"].astype("int64")

In [51]:
#Merge census data with tract by neigborhood data
census_data_complete = pd.merge(tract_data_df,
    census_pd, how="left", on=["Census Tract", "Census Tract"])

census_data_complete = census_data_complete.dropna()

census_data_complete

Unnamed: 0,Census Tract,Neighborhood,Latitude,Longitude,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate,Unemployment Rate
0,101110,Tujunga,34.259555,-118.293602,4314.0,40.6,53077.0,26996.0,679.0,15.74,3.85
1,101122,Tujunga,34.267357,-118.290240,3239.0,52.3,88953.0,46974.0,109.0,3.37,2.90
2,101210,Tujunga,34.251998,-118.292687,6052.0,36.6,32119.0,19902.0,1994.0,32.95,5.30
3,101220,Tujunga,34.251190,-118.281014,3497.0,39.7,41728.0,21844.0,524.0,14.98,3.77
4,101300,Tujunga,34.245595,-118.271731,4297.0,49.7,86914.0,35120.0,391.0,9.10,2.91
...,...,...,...,...,...,...,...,...,...,...,...
2337,124700,Sherman Oaks,34.162467,-118.413865,5697.0,40.2,90795.0,55921.0,286.0,5.02,1.30
2338,503801,La Mirada,33.899103,-117.993649,3847.0,36.9,93398.0,29261.0,381.0,9.90,4.55
2339,573902,Long Beach,33.811974,-118.078652,1979.0,49.9,126250.0,61849.0,41.0,2.07,0.71
2340,576602,Long Beach,33.765980,-118.170779,4369.0,37.5,65192.0,48098.0,789.0,18.06,2.24


In [52]:
#Replace special characters with ''
spec_char=-666666666.0

census_data_complete['Household Income'] =census_data_complete ['Household Income'].replace(spec_char, '')


In [53]:
#Remove rows without data (NaN)
census_data_complete['Household Income'] = pd.to_numeric(census_data_complete['Household Income'], errors='coerce')

census_data_complete.dropna()

Unnamed: 0,Census Tract,Neighborhood,Latitude,Longitude,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate,Unemployment Rate
0,101110,Tujunga,34.259555,-118.293602,4314.0,40.6,53077.0,26996.0,679.0,15.74,3.85
1,101122,Tujunga,34.267357,-118.290240,3239.0,52.3,88953.0,46974.0,109.0,3.37,2.90
2,101210,Tujunga,34.251998,-118.292687,6052.0,36.6,32119.0,19902.0,1994.0,32.95,5.30
3,101220,Tujunga,34.251190,-118.281014,3497.0,39.7,41728.0,21844.0,524.0,14.98,3.77
4,101300,Tujunga,34.245595,-118.271731,4297.0,49.7,86914.0,35120.0,391.0,9.10,2.91
...,...,...,...,...,...,...,...,...,...,...,...
2336,980031,San Pedro,33.737510,-118.261425,1113.0,44.1,64500.0,9765.0,0.0,0.00,0.00
2337,124700,Sherman Oaks,34.162467,-118.413865,5697.0,40.2,90795.0,55921.0,286.0,5.02,1.30
2338,503801,La Mirada,33.899103,-117.993649,3847.0,36.9,93398.0,29261.0,381.0,9.90,4.55
2339,573902,Long Beach,33.811974,-118.078652,1979.0,49.9,126250.0,61849.0,41.0,2.07,0.71


In [54]:
#Create a csv extract
census_data_complete.to_csv("Output_income_data.csv", index=False, header=True)