In [1]:
# Dependencies 
import requests
import json
import pandas as pd
from census import Census


# Census API Key
from api_key import my_key
c = Census(my_key, year=2016)

In [2]:
# Run Census Search to retrieve data on all zip codes (2016 ACS5 Census)
census_data = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B02001_002E",
                          "B02001_003E", "B02001_004E", "B02001_005E",
                          "B02001_006E","B02001_008E", "B03001_003E", "B01002_001E", 
                          "B01002_002E","B01002_003E"), 
                          {'for': 'zip code tabulation area:*'})

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

In [3]:
census_pd.head()

Unnamed: 0,B01002_001E,B01002_002E,B01002_003E,B01003_001E,B02001_002E,B02001_003E,B02001_004E,B02001_005E,B02001_006E,B02001_008E,B03001_003E,B19013_001E,NAME,zip code tabulation area
0,45.0,41.6,48.1,17423.0,15974.0,256.0,5.0,703.0,24.0,184.0,1314.0,56714.0,ZCTA5 01001,1001
1,23.2,23.3,23.2,29970.0,23459.0,1711.0,140.0,3502.0,17.0,911.0,1870.0,48923.0,ZCTA5 01002,1002
2,19.9,19.9,19.9,11296.0,8698.0,660.0,30.0,1538.0,0.0,240.0,526.0,2499.0,ZCTA5 01003,1003
3,44.1,41.6,47.4,5228.0,5063.0,105.0,0.0,32.0,18.0,8.0,77.0,70568.0,ZCTA5 01005,1005
4,42.5,40.2,43.8,14888.0,13906.0,125.0,0.0,443.0,0.0,380.0,305.0,80502.0,ZCTA5 01007,1007


In [5]:
# Column Reordering
census_pd = census_pd.rename(columns={"B19013_001E": "Income" , 
                                      "B01003_001E": "Population", 
                                      "B02001_002E": "Population White",
                                      "B02001_003E": "Population Black",
                                      "B02001_004E": "Population American Indian", 
                                      "B02001_005E": "Population Asian",
                                      "B02001_006E": "Population Hawaiian",
                                      "B02001_008E": "Population 2 or more Races", 
                                      "B03001_003E": "Population Hispanic", 
                                      "B01002_001E": "Age", 
                                      "B01002_002E": "Median Male Age",
                                      "B01002_003E": "Median Female Age",
                                      "NAME": "Name", 
                                      "zip code tabulation area": "Zipcode"})

    
                      

# Final DataFrame
census_pd = census_pd[["Zipcode","Income",  "Population","Population White",  
                                    "Population White",
                                    "Population Black",
                                    "Population American Indian", 
                                    "Population Asian",
                                    "Population Hawaiian",
                                    "Population 2 or more Races", 
                                    "Population Hispanic", 
                                    "Age", 
                                    "Median Male Age",
                                    "Median Female Age"
                                   ]]

# Visualize
print(len(census_pd))
census_pd.head(50)

33120


Unnamed: 0,Zipcode,Income,Population,Population White,Population White.1,Population Black,Population American Indian,Population Asian,Population Hawaiian,Population 2 or more Races,Population Hispanic,Age,Median Male Age,Median Female Age
0,1001,56714.0,17423.0,15974.0,15974.0,256.0,5.0,703.0,24.0,184.0,1314.0,45.0,41.6,48.1
1,1002,48923.0,29970.0,23459.0,23459.0,1711.0,140.0,3502.0,17.0,911.0,1870.0,23.2,23.3,23.2
2,1003,2499.0,11296.0,8698.0,8698.0,660.0,30.0,1538.0,0.0,240.0,526.0,19.9,19.9,19.9
3,1005,70568.0,5228.0,5063.0,5063.0,105.0,0.0,32.0,18.0,8.0,77.0,44.1,41.6,47.4
4,1007,80502.0,14888.0,13906.0,13906.0,125.0,0.0,443.0,0.0,380.0,305.0,42.5,40.2,43.8
5,1008,67250.0,1194.0,1182.0,1182.0,0.0,0.0,7.0,0.0,5.0,4.0,48.5,49.4,46.9
6,1009,-666666666.0,237.0,237.0,237.0,0.0,0.0,0.0,0.0,0.0,0.0,62.2,61.5,66.6
7,1010,83273.0,3718.0,3703.0,3703.0,9.0,0.0,6.0,0.0,0.0,43.0,46.6,46.3,47.2
8,1011,74722.0,1523.0,1462.0,1462.0,15.0,0.0,11.0,0.0,17.0,51.0,41.0,39.9,43.0
9,1012,60417.0,528.0,520.0,520.0,0.0,0.0,0.0,2.0,6.0,11.0,50.5,50.7,48.0


In [5]:
# Save as a csv
# Note to avoid any issues later, use encoding="utf-8"
census_pd.to_csv("census_data.csv", encoding="utf-8", index=False)

In [6]:
#To select rows whose column value is in list 
Zip_code = [33114, 33002, 33018, 33017, 33016, 33015, 33014, 33013, 33012, 33011, 33010, 33092, 
         33090, 33039, 33035, 33034, 33031, 33032, 33030, 33033, 33149, 33181, 33182, 33183, 
         33184, 33185, 33186, 33187, 33188, 33189, 33190, 33180, 33179, 33178, 33166, 33167, 
         33168, 33169, 33172, 33173, 33174, 33175, 33176, 33177, 33165, 33193, 33194, 33247, 
         33255, 33256, 33257, 33261, 33265, 33266, 33269, 33280, 33283, 33245, 33243, 33242, 
         33196, 33197, 33199, 33206, 33222, 33231, 33233, 33234, 33238, 33299, 33296, 33164, 
         33101, 33130, 33131, 33132, 33133, 33134, 33135, 33136, 33137, 33138, 33129, 33128, 
         33127, 33102, 33106, 33111, 33112, 33116, 33122, 33124, 33125, 33126, 33170, 33156, 
         33158, 33157, 33161, 33155, 33162, 33153, 33152, 33151, 33150, 33163, 33142, 33143, 
         33144, 33145, 33146, 33147, 33159, 33109, 33139, 33141, 33154, 33140, 33119, 33239, 
         33056, 33160, 33055, 33054]
census_pd.Zipcode.isin(Zip_code)


0        False
1        False
2        False
3        False
4        False
5        False
6        False
7        False
8        False
9        False
10       False
11       False
12       False
13       False
14       False
15       False
16       False
17       False
18       False
19       False
20       False
21       False
22       False
23       False
24       False
25       False
26       False
27       False
28       False
29       False
         ...  
33090    False
33091    False
33092    False
33093    False
33094    False
33095    False
33096    False
33097    False
33098    False
33099    False
33100    False
33101    False
33102    False
33103    False
33104    False
33105    False
33106    False
33107    False
33108    False
33109    False
33110    False
33111    False
33112    False
33113    False
33114    False
33115    False
33116    False
33117    False
33118    False
33119    False
Name: Zipcode, Length: 33120, dtype: bool

In [9]:
census_pd= census_pd[census_pd.Zipcode.isin(Zip_code)]
census_pd


Unnamed: 0,Zipcode,Income,Population,Population White,Population White.1,Population Black,Population American Indian,Population Asian,Population Hawaiian,Population 2 or more Races,Population Hispanic,Age,Median Male Age,Median Female Age


In [11]:
census_pd.dtypes


Zipcode                        object
Income                        float64
Population                    float64
Population White              float64
Population White              float64
Population Black              float64
Population American Indian    float64
Population Asian              float64
Population Hawaiian           float64
Population 2 or more Races    float64
Population Hispanic           float64
Age                           float64
Median Male Age               float64
Median Female Age             float64
dtype: object

In [27]:

census_pd = census_pd.astype({"Zipcode": int})



Unnamed: 0,Zipcode,Income,Population,Population White,Population White.1,Population Black,Population American Indian,Population Asian,Population Hawaiian,Population 2 or more Races,Population Hispanic,Age,Median Male Age,Median Female Age


In [24]:
census_pd.dtypes


Zipcode                         int32
Income                        float64
Population                    float64
Population White              float64
Population White              float64
Population Black              float64
Population American Indian    float64
Population Asian              float64
Population Hawaiian           float64
Population 2 or more Races    float64
Population Hispanic           float64
Age                           float64
Median Male Age               float64
Median Female Age             float64
dtype: object

In [30]:
census_pd[census_pd.Zipcode.isin(Zip_code)]



Unnamed: 0,Zipcode,Income,Population,Population White,Population White.1,Population Black,Population American Indian,Population Asian,Population Hawaiian,Population 2 or more Races,Population Hispanic,Age,Median Male Age,Median Female Age
