In [1]:
from IPython.display import display, Image 
import pandas as pd  
import matplotlib.pyplot as plt
import numpy as np 

from census import Census 
from us import states

import fiona 
import geopandas as gpd 
from shapely.geometry import Point, Polygon 

### 1. Streamlining Census API Acquisition

I want to get data at the census tract level for several counties at once, but when I try to do so using a list (process similar to variable codes) I get an error message.

In [2]:
my_api_key = '34e40301bda77077e24c859c6c6c0b721ad73fc7' 
c = Census(my_api_key)

In [3]:
code = ["NAME","B01001_001E", "B17001_001E"]
counties = ["061", "047", "005"]
nyc = c.acs5.state_county_tract(code,  states.NY.fips, counties, Census.ALL)
nyc = pd.DataFrame(nyc)

CensusException: error: invalid 'in' argument

In the meantime I've been doing the requests for every county seperately then 'concatenating' them together.

In [5]:
code = ["NAME", "B01003_001E", "B00002_001E", "B16010_015E", "B16010_041E"]
mnht = c.acs5.state_county_tract(code,  states.NY.fips, "061", Census.ALL)
mnht = pd.DataFrame(mnht)
blkn = c.acs5.state_county_tract(code,  states.NY.fips, "047", Census.ALL)
blkn = pd.DataFrame(blkn)
brx = c.acs5.state_county_tract(code,  states.NY.fips, "005", Census.ALL)
brx = pd.DataFrame(brx)

In [6]:
nyc = pd.concat([mnht, blkn, brx])

### 2. Looping over DataFrames

Suppose I were working on my Manhattan and Brooklyn DataFrames separately, but wanted to do the same operations on them. 

The loop below doesn't work:

In [7]:
for j in [mnht, blkn, brx]:
    j = j.rename(columns = {'NAME': 'name', 'B01003_001E': 'pop', 'B00002_001E': 'housing_units', 
                            "B16010_015E": "edu_hs", "B16010_041E": "edu_college"})

In [8]:
mnht.columns

Index(['B00002_001E', 'B01003_001E', 'B16010_015E', 'B16010_041E', 'NAME',
       'county', 'state', 'tract'],
      dtype='object')

So I repeated the operation over the DFs.

The same loop works for specific columns however:

In [9]:
mnht["B16010_015E"][0:2]

0      0.0
1    584.0
Name: B16010_015E, dtype: float64

In [10]:
for j in [mnht, blkn, brx]:
    j["B16010_015E"] = j["B16010_015E"] / j["B01003_001E"]

In [11]:
mnht["B16010_015E"][0:2]

0         NaN
1    0.209244
Name: B16010_015E, dtype: float64

### 3. Adding Shapefile Data

I then wanted to map the data. I retrieved the shapefiles from [NYC Planning](https://www1.nyc.gov/site/planning/data-maps/open-data/districts-download-metadata.page), "Census Tracts 2010 (Clipped to Shoreline)". My current code points towards the user directory. 

In [12]:
nyc_map = gpd.read_file(r"nyct2010_18a\\nyct2010.shx")

I wanted to automate the process using the in-class zipfile code, but got an error message:

In [13]:
import requests, io             # internet and input tools  
import zipfile as zf            # zip file tools 
import shutil                   # file management tools 
import os                       # operating system tools (check files)

In [14]:
url = 'https://www1.nyc.gov/assets/planning/download/zip/data-maps/open-data/nyct2010_18a.zip'
r = requests.get(url) 
print('Response status code:', r.status_code) #200 is good

Response status code: 200


In [15]:
mlz = zf.ZipFile(io.BytesIO(r.content)) 

In [16]:
mlz.namelist()

['nyct2010_18a/nyct2010.shp',
 'nyct2010_18a/nyct2010.dbf',
 'nyct2010_18a/nyct2010.shx',
 'nyct2010_18a/nyct2010.prj',
 'nyct2010_18a/nyct2010.shp.xml']

In [17]:
tract_shape = gpd.read_file(mlz.open(mlz.namelist()[3]))

AttributeError: 'ZipExtFile' object has no attribute 'split'