## Data

Based on definition of our problem, factors that will influence our decission are:
* number of existing restaurants in the neighborhood (any type of restaurant)
* number of and distance to Cajun restaurants in the neighborhood, if any
* distance of neighborhood from city center

We decided to use regularly spaced grid of locations, centered around city center, to define our neighborhoods.

Following data sources will be needed to extract/generate the required information:
* centers of candidate areas will be generated algorithmically and approximate addresses of centers of those areas will be obtained using **Google Maps API reverse geocoding**
* number of restaurants and their type and location in every neighborhood will be obtained using **Foursquare API**
* coordinate of Houston center will be obtained using **Google Maps API geocoding**

In [5]:
!pip install folium

  from cryptography.utils import int_from_bytes
  from cryptography.utils import int_from_bytes
Collecting folium
  Downloading folium-0.12.1-py2.py3-none-any.whl (94 kB)
[K     |████████████████████████████████| 94 kB 5.6 MB/s  eta 0:00:01
Collecting branca>=0.3.0
  Downloading branca-0.4.2-py3-none-any.whl (24 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.4.2 folium-0.12.1


In [6]:
from bs4 import BeautifulSoup
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.options.mode.chained_assignment = None  # default='warn'

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

import requests
from pandas import DataFrame

print('Libraries imported.')

Libraries imported.


In [7]:
url = 'https://www.ciclt.net/sn/clt/capitolimpact/gw_ziplist.aspx?FIPS=48201'
tbl = pd.read_html(url)
tbl1 = tbl[2]
tbl2 = tbl[3]
tbl3 = tbl[4]
tbl4 = tbl[5]
tbl5 = tbl[6]
data = pd.concat([tbl1,tbl2,tbl3,tbl4,tbl5])
data = data.reset_index(drop=True)
data.shape

(248, 3)

In [18]:
data.rename(columns={'Zip Code' : 'Zip'}, inplace=True)
data = data.drop_duplicates(subset=['Zip'])
data['Zip'] = data['Zip'].astype(str)
data

Unnamed: 0,Zip,City,County
0,77001,Houston,Harris County
1,77002,Houston,Harris County
2,77003,Houston,Harris County
3,77004,Houston,Harris County
4,77005,Houston,Harris County
6,77006,Houston,Harris County
7,77007,Houston,Harris County
8,77008,Houston,Harris County
9,77009,Houston,Harris County
10,77010,Houston,Harris County


In [9]:
!pip install geopy
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="houston_explorer")
from geopy.extra.rate_limiter import RateLimiter

  from cryptography.utils import int_from_bytes
  from cryptography.utils import int_from_bytes


In [46]:
#applying the rate limiter wrapper
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1)

#Applying the method to pandas DataFrame
data['Full_Address'] = data["City"] + ', ' + data['County'] + ", " + data["Zip"]
data['location'] = data['Full_Address'].apply(geocode)
data['Latitude'] = data['location'].apply(lambda x: x.latitude if x else None)
data['Longitude'] = data['location'].apply(lambda x: x.longitude if x else None)

In [53]:
#data = data.drop(columns=['Full_Address','location'])
data = data.dropna()
data.reset_index()

Unnamed: 0,index,Zip,City,County,Latitude,Longitude
0,0,77001,Houston,Harris County,29.758938,-95.367697
1,1,77002,Houston,Harris County,29.758938,-95.367697
2,2,77003,Houston,Harris County,29.758938,-95.367697
3,3,77004,Houston,Harris County,29.758938,-95.367697
4,4,77005,Houston,Harris County,29.758938,-95.367697
5,6,77006,Houston,Harris County,29.758938,-95.367697
6,7,77007,Houston,Harris County,29.772412,-95.399544
7,8,77008,Houston,Harris County,29.79927,-95.414815
8,9,77009,Houston,Harris County,29.794114,-95.370413
9,10,77010,Houston,Harris County,29.758938,-95.367697


In [56]:
locations = data[['Latitude','Longitude']]
locationlist = locations.values.tolist()
len(locationlist)

188

In [75]:
houston = folium.Map(location = [29.7,-95.3], zoom_start=11)
houston

In [76]:
CLIENT_ID = 'ZNJJDXRGDDTBHNGU4AR3JPCUVCAKCDBRNEVYTRSTVK0DPTQU' # your Foursquare ID
CLIENT_SECRET = 'UVMV4P12AQWY2LWX4EVAZ3LA3SWAFWMA5SFPGWMR14QFXDXU' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

## Call API

In [77]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

## Return All Venues

In [78]:
houston_venues = getNearbyVenues(names=data['Zip'],
                                   latitudes=data['Latitude'],
                                   longitudes=data['Longitude']
                                  )

77001
77002
77003
77004
77005
77006
77007
77008
77009
77010
77011
77012
77013
77014
77015
77016
77017
77018
77019
77020
77021
77022
77023
77024
77025
77026
77027
77028
77029
77030
77031
77032
77033
77034
77035
77036
77037
77038
77039
77040
77041
77042
77043
77044
77045
77046
77047
77048
77049
77050
77051
77054
77055
77056
77057
77058
77059
77060
77061
77062
77063
77064
77065
77066
77067
77068
77069
77070
77071
77072
77073
77074
77075
77076
77077
77078
77079
77080
77081
77082
77083
77084
77085
77086
77087
77088
77089
77090
77091
77092
77093
77094
77095
77096
77098
77099
77201
77202
77203
77205
77207
77210
77213
77215
77217
77219
77220
77221
77222
77223
77225
77227
77228
77229
77231
77233
77235
77236
77237
77238
77240
77241
77243
77245
77248
77249
77251
77253
77255
77256
77257
77258
77259
77261
77263
77265
77279
77282
77287
77315
77325
77336
77337
77338
77339
77345
77346
77347
77373
77375
77377
77379
77383
77388
77389
77391
77396
77401
77402
77410
77411
77413
77429
77433
77447
77449
7745

## Display All Venues

In [79]:
houston_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,77001,29.758938,-95.367697,Mango Tree Thai Bistro,29.758251,-95.365387,Thai Restaurant
1,77001,29.758938,-95.367697,Jason's Deli,29.757464,-95.365543,Food Truck
2,77001,29.758938,-95.367697,City Hall Farmer's Market,29.759164,-95.368446,Farmers Market
3,77001,29.758938,-95.367697,5411 Empanadas,29.758986,-95.36859,Empanada Restaurant
4,77001,29.758938,-95.367697,Becks Prime,29.758185,-95.366172,Burger Joint


In [82]:
houston_venues.shape

(3812, 7)