In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
from census import Census
import gmaps

In [3]:
from config import census_api
c = Census(census_api, year=2018)

In [4]:
# Pull Census Data
census_data = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                          "B19301_001E",
                          "B17001_002E",
                          "B23025_005E"), {'for': 'county:*'})

# Bring data into DataFrame
census_df = pd.DataFrame(census_data)

# Remove Puerto Rico
clean_df = census_df[~census_df.NAME.str.contains('Puerto')]

clean_df

Unnamed: 0,NAME,B19013_001E,B01003_001E,B01002_001E,B19301_001E,B17001_002E,B23025_005E,state,county
0,"Washington County, Mississippi",30834.0,47086.0,36.9,19884.0,15496.0,3041.0,28,151
1,"Perry County, Mississippi",39007.0,12028.0,40.9,21611.0,2231.0,337.0,28,111
2,"Choctaw County, Mississippi",37203.0,8321.0,44.0,20589.0,1888.0,301.0,28,019
3,"Itawamba County, Mississippi",40510.0,23480.0,40.0,20629.0,3428.0,414.0,28,057
4,"Carroll County, Mississippi",43060.0,10129.0,47.0,22567.0,1477.0,377.0,28,015
...,...,...,...,...,...,...,...,...,...
3215,"Clayton County, Iowa",52828.0,17672.0,47.1,28486.0,1564.0,198.0,19,043
3216,"Buena Vista County, Iowa",54556.0,20260.0,35.2,26607.0,2507.0,590.0,19,021
3217,"Guthrie County, Iowa",57075.0,10674.0,45.6,28953.0,1175.0,184.0,19,077
3218,"Humboldt County, Iowa",52219.0,9566.0,43.0,29882.0,1103.0,180.0,19,091


In [5]:
# Split 'NAME' column on comma and bring back into dataframe
covid_df = clean_df['NAME'].str.split(", ", n = 1, expand=True)
clean_df['County'] = covid_df[0]
clean_df['State'] = covid_df[1]
covid_clean_df = clean_df.drop(['state', 'county', 'NAME'], axis=1)

# Rename column headers
covid_clean_df = covid_clean_df.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Mean Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "B17001_002E": "Poverty Count",
                                      "B23025_005E": "Unemployment Count"})

# Reorganize column headers
covid_clean_df = covid_clean_df[['County', 'State', 
                                'Mean Household Income', 'Population', 
                                'Median Age', 'Per Capita Income', 
                                'Poverty Count', 'Unemployment Count']]
                                             
covid_clean_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


Unnamed: 0,County,State,Mean Household Income,Population,Median Age,Per Capita Income,Poverty Count,Unemployment Count
0,Washington County,Mississippi,30834.0,47086.0,36.9,19884.0,15496.0,3041.0
1,Perry County,Mississippi,39007.0,12028.0,40.9,21611.0,2231.0,337.0
2,Choctaw County,Mississippi,37203.0,8321.0,44.0,20589.0,1888.0,301.0
3,Itawamba County,Mississippi,40510.0,23480.0,40.0,20629.0,3428.0,414.0
4,Carroll County,Mississippi,43060.0,10129.0,47.0,22567.0,1477.0,377.0
...,...,...,...,...,...,...,...,...
3215,Clayton County,Iowa,52828.0,17672.0,47.1,28486.0,1564.0,198.0
3216,Buena Vista County,Iowa,54556.0,20260.0,35.2,26607.0,2507.0,590.0
3217,Guthrie County,Iowa,57075.0,10674.0,45.6,28953.0,1175.0,184.0
3218,Humboldt County,Iowa,52219.0,9566.0,43.0,29882.0,1103.0,180.0


In [6]:
# Clean data for merging and plots
covid_clean_df['County'].replace(' County', '', regex=True, inplace=True)
covid_clean_df.head()

# Change state names to abbreviations
us_state_abbrev = {
'Alabama': 'AL', 'Alaska': 'AK', 'Arizona': 'AZ', 'Arkansas': 'AR', 'California': 'CA', 'Colorado': 'CO',
'Connecticut': 'CT', 'Delaware': 'DE', 'Florida': 'FL', 'Georgia': 'GA', 'Hawaii': 'HI', 'Idaho': 'ID',
'Illinois': 'IL', 'Indiana': 'IN', 'Iowa': 'IA', 'Kansas': 'KS', 'Kentucky': 'KY', 'Louisiana': 'LA',
'Maine': 'ME', 'Maryland': 'MD', 'Massachusetts': 'MA', 'Michigan': 'MI', 'Minnesota': 'MN', 'Mississippi': 'MS',
'Missouri': 'MO', 'Montana': 'MT', 'Nebraska': 'NE', 'Nevada': 'NV', 'New Hampshire': 'NH', 'New Jersey': 'NJ',
'New Mexico': 'NM', 'New York': 'NY', 'North Carolina': 'NC', 'North Dakota': 'ND', 'Ohio': 'OH', 'Oklahoma': 'OK',
'Oregon': 'OR', 'Pennsylvania': 'PA', 'Rhode Island': 'RI', 'South Carolina': 'SC', 'South Dakota': 'SD',
'Tennessee': 'TN', 'Texas': 'TX', 'Utah': 'UT', 'Vermont': 'VT', 'Virginia': 'VA', 'Washington': 'WA',
'West Virginia': 'WV', 'Wisconsin': 'WI', 'Wyoming': 'WY'}
covid_clean_df['State'] = covid_clean_df['State'].map(us_state_abbrev).fillna(covid_clean_df['State'])
covid_clean_df.reset_index()


covid_clean_df.head()

Unnamed: 0,County,State,Mean Household Income,Population,Median Age,Per Capita Income,Poverty Count,Unemployment Count
0,Washington,MS,30834.0,47086.0,36.9,19884.0,15496.0,3041.0
1,Perry,MS,39007.0,12028.0,40.9,21611.0,2231.0,337.0
2,Choctaw,MS,37203.0,8321.0,44.0,20589.0,1888.0,301.0
3,Itawamba,MS,40510.0,23480.0,40.0,20629.0,3428.0,414.0
4,Carroll,MS,43060.0,10129.0,47.0,22567.0,1477.0,377.0


In [14]:
avg_income = covid_clean_df['Mean Household Income'].mean()
tot_pop = covid_clean_df['Population'].sum()
avg_age = covid_clean_df['Median Age'].mean()
avg_pci = covid_clean_df['Per Capita Income'].mean()
pov_count = covid_clean_df['Poverty Count'].sum()
employ_count = covid_clean_df['Unemployment Count'].sum()

us = {'County': ['United States'], 'State': ['US'],
        'Mean Household Income': [avg_income],
        'Population': [tot_pop],
        'Median Age': [avg_age],
         'Per Capita Income': [avg_pci],
         'Poverty Count': [pov_count],
         'Unemployment Count': [employ_count]}

us_df = pd.DataFrame(data=us)

covid_clean_df = covid_clean_df.append(us_df)
covid_clean_df

Unnamed: 0,County,State,Mean Household Income,Population,Median Age,Per Capita Income,Poverty Count,Unemployment Count
0,Washington,MS,30834.000000,47086.0,36.900000,19884.000000,15496.0,3041.0
1,Perry,MS,39007.000000,12028.0,40.900000,21611.000000,2231.0,337.0
2,Choctaw,MS,37203.000000,8321.0,44.000000,20589.000000,1888.0,301.0
3,Itawamba,MS,40510.000000,23480.0,40.000000,20629.000000,3428.0,414.0
4,Carroll,MS,43060.000000,10129.0,47.000000,22567.000000,1477.0,377.0
...,...,...,...,...,...,...,...,...
3216,Buena Vista,IA,54556.000000,20260.0,35.200000,26607.000000,2507.0,590.0
3217,Guthrie,IA,57075.000000,10674.0,45.600000,28953.000000,1175.0,184.0
3218,Humboldt,IA,52219.000000,9566.0,43.000000,29882.000000,1103.0,180.0
3219,Washington,IA,61769.000000,22143.0,40.800000,29857.000000,2021.0,482.0


In [15]:
covid_clean_df.to_csv("clean_data/census_data.csv", index=True, header=True)