In [1]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
from census import Census

# Census API Key
from config import census_key


c = Census(census_key, year=2013)

In [51]:
file = 'zipcodes_stl2.csv'
df = pd.read_csv(file)
df_small = df.iloc[0:5,:]
df_small

Unnamed: 0,ZIP Code,County,Latitude,Longitude
0,63101,Saint Louis City,38.631551,-90.193
1,63102,Saint Louis City,38.6352,-90.18702
2,63103,Saint Louis City,38.631451,-90.21415
3,63104,Saint Louis City,38.610701,-90.21362
4,63105,Saint Louis,38.645484,-90.32888


In [52]:
df_census = pd.DataFrame()

for zip in df_small['ZIP Code']:
    census_data = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                                  "B19301_001E",
                                  "B17001_002E"), {'for': 'zip code tabulation area:'+str(zip)})
#     print(census_data)
    df_new = pd.DataFrame(census_data)
    df_census = df_census.append(df_new)
# df_census.head()
# Column Reordering
df_census = df_census.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "B17001_002E": "Poverty Count",
                                      "NAME": "Name", "zip code tabulation area": "ZIP Code"})

# Add in Poverty Rate (Poverty Count / Population)
df_census["Poverty Rate"] = 100 * \
    df_census["Poverty Count"].astype(
        int) / df_census["Population"].astype(int)

# Final DataFrame
df_census = df_census[["ZIP Code", "Population", "Median Age", "Household Income",
                       "Per Capita Income", "Poverty Count","Poverty Rate"]]

df_census['ZIP Code'] = pd.to_numeric(df_census['ZIP Code'])
df_census.head()

Unnamed: 0,ZIP Code,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate
0,63101,2613.0,30.4,54417.0,42701.0,610.0,23.344814
0,63102,2216.0,33.6,54018.0,32318.0,142.0,6.407942
0,63103,6824.0,30.4,34719.0,30274.0,1340.0,19.636577
0,63104,19520.0,31.2,45498.0,29780.0,5417.0,27.751025
0,63105,17361.0,33.8,86031.0,57408.0,1206.0,6.946604


In [53]:
df_merge = pd.merge(df_census, df_small, on=('ZIP Code'))
df_merge
df_merge.dtypes

ZIP Code               int64
Population           float64
Median Age           float64
Household Income     float64
Per Capita Income    float64
Poverty Count        float64
Poverty Rate         float64
County                object
Latitude             float64
Longitude            float64
dtype: object

In [5]:
#New Dependencies

import json
import pprint
# import requests
# import sys
import urllib
from urllib.parse import quote
    
from config import yelp_key

In [6]:
API_KEY=yelp_key

API_HOST = 'https://api.yelp.com'
SEARCH_PATH = '/v3/businesses/search'
MATCH_PATH = '/v3/businesses/matches'
SEARCH_PHONE_PATH = '/v3/businesses/search/phone'
BUSINESS_PATH = '/v3/businesses/'  # Business ID will come after slash.

# Defaults for our simple example.
# DEFAULT_TERM = 'dinner'
# DEFAULT_LOCATION = 'San Francisco, CA'
# SEARCH_LIMIT = 5

In [7]:
def request(host, path, api_key, url_params=None):
    url_params = url_params or {}
    url = '{0}{1}'.format(host, quote(path.encode('utf8')))
    headers = {
        'Authorization': 'Bearer %s' % api_key,
    }
#     print(u'Querying {0} ...'.format(url))
#     print(headers)

    response = requests.request('GET', url, headers=headers, params=url_params)
#     my_url=url
    return response.json() #, my_url

In [84]:
df_merge.dtypes
# df_merge['Latitude'] = df_merge['Latitude'].astype(str)
# df_merge['Longitude'] = df_merge['Longitude'].astype(str)
# df_merge.dtypes

df_businesses = pd.DataFrame()
df_merge.dtypes

ZIP Code               int64
Population           float64
Median Age           float64
Household Income     float64
Per Capita Income    float64
Poverty Count        float64
Poverty Rate         float64
County                object
Latitude             float64
Longitude            float64
dtype: object

In [105]:
len(df_small)
for index, row in df_small.iterrows():
        row_zip = row[0]
        row_county = row[1]
        row_lat = row[2]
        row_long = row[3]
        print(index, row_zip, row_lat, row_long)

0 63101 38.631551 -90.193
1 63102 38.6352 -90.18701999999999
2 63103 38.631451 -90.21415
3 63104 38.610701 -90.21361999999999
4 63105 38.645484 -90.32888


In [111]:
df_bus_total = pd.DataFrame()
prices = []
dict_total = {}

for index, row in df_merge.iterrows():
    row_zip = row[0]
    row_long = row[9]
    row_lat = row[8]
    print(index, row_zip, row_lat, row_long)
#     print(index, row[0])
#     url_params={"city":"St. Louis County", 'zip_code':'63131','latitude':'38.618582','longitude':'-90.43643'}
    url_params={"city":row_lat, 'zip_code':row_zip,'latitude':row_lat,'longitude':row_long}
    dict_businesses = request(API_HOST, SEARCH_PATH, API_KEY, url_params)

    df_businesses['business_id']=[business["id"] for business in dict_businesses['businesses']]
    df_businesses['name']=[business["name"]for business in dict_businesses['businesses']]
    df_businesses['rating']=[business["rating"]for business in dict_businesses['businesses']]
    df_businesses['zip_code']=[business['location']["zip_code"]for business in dict_businesses['businesses']]
    df_businesses['review_count']=[business["review_count"]for business in dict_businesses['businesses']]
#     try:
#         df_businesses['price']=[business["price"]for business in dict_businesses['businesses']]
#     except:
#         df_businesses['price']='not found'
    dict_total.update(dict_businesses)


    df_bus_total = df_bus_total.append(df_businesses)
# df_businesses
# pprint.pprint(dict_businesses)

0 63101 38.631551 -90.193
1 63102 38.6352 -90.18701999999999
2 63103 38.631451 -90.21415
3 63104 38.610701 -90.21361999999999
4 63105 38.645484 -90.32888


In [112]:
for b in dict_total['businesses']:
    try:
        my_price = b['price']
        prices.append(my_price)
    except:
        prices.append('no price')
# df_businesses['price'] = prices
# df_businesses

df_businesses['price'] = prices
df_bus_total.head(50)

Unnamed: 0,business_id,name,rating,zip_code,review_count,price
0,8YDJraW_cg5IPTPisPfB-A,City Museum,4.5,63103,1244,no price
1,iRIHK8-EwpeffwvoO4nzIA,Broadway Oyster Bar,4.5,63102,1736,$$
2,Mr7Aov2n7wPCpwaUxk8lCw,Mango,4.0,63101,901,$$
3,2BMk_drsikKWslJCXmQtjQ,Rooster,4.0,63101,1754,$$
4,WCdSajl5Q0qywpv7K5jHdQ,Sugarfire Smoke House,4.5,63101,881,$$
5,NHb6QTrJOnyIj-Zd0fYzFg,Bogart's Smokehouse,4.5,63104,1348,$$
6,4r3Ck65DCG1T6gpWodPyrg,Bridge Tap House and Wine Bar,4.0,63101,697,$$
7,P2XJbQZmf1zvWp9L_THdjQ,Sauce on the Side,4.5,63101,387,$$
8,M0r9lUn2gLFYgIwIfG8-bQ,Baileys' Range,3.5,63101,1443,$$
9,QV7QOLww8ym3E2zBgdE2Ow,Medina Mediterranean Grill,4.5,63103,266,$


In [116]:
df_bus_total = df_bus_total.sort_values('business_id')
df_bus_total = df_bus_total.reset_index(drop=True)
len(df_bus_total)

100