# Web Scraping and GIS

Scrape data from websites and create choropleth (heat) maps. Example code from RitVikMath (https://github.com/ritvikmath/StarbucksStoreScraping) scrapes information about Starbucks locations in Las Angeles.

## Web Scraping

In [19]:
!pip -q install folium
!pip -q install pandas
!pip -q install shapely

import requests
import re
import pandas as pd
import folium
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon
import json

In [20]:
#zipcodes in LA
f = open('Data/laZips.txt', 'r')
laZips = [z.replace('\n','') for z in f.readlines()]

In [21]:
def processResponse(r):
    #parse out each store's info
    stores = re.findall(r'"storeNumber":.*?"slug"', r)
    storeInfo = []
    for store in stores:
        #parse out info about each store
        storeInfo.append(list(re.findall(r'"storeNumber":"(.*?)".*?"name":"(.*?)".*?"latitude":(.*?),.*?"longitude":(.*?)}.*?"city":"(.*?)".*?"countrySubdivisionCode":"(.*?)".*?"postalCode":"(.*?)"', store)[0]))
    return storeInfo

In [22]:
allStores = []
for idx,z in enumerate(laZips):
    if idx%10 == 0:
        print(idx,'/',len(laZips))
    #search for 100 stores centered in given zip
    r = requests.get('https://www.starbucks.com/store-locator?map=34.216464,-118.656046,11z&place='+z)
    if r.status_code != 200:
        raise SystemExit
    storeInfoList = processResponse(r.text)
    for storeInfo in storeInfoList:
        storeInfo[6] = storeInfo[6][:5]
    allStores += storeInfoList

0 / 322
10 / 322
20 / 322
30 / 322
40 / 322
50 / 322
60 / 322
70 / 322
80 / 322
90 / 322
100 / 322
110 / 322
120 / 322
130 / 322
140 / 322
150 / 322
160 / 322
170 / 322
180 / 322
190 / 322
200 / 322
210 / 322
220 / 322
230 / 322
240 / 322
250 / 322
260 / 322
270 / 322
280 / 322
290 / 322
300 / 322
310 / 322
320 / 322


In [23]:
#construct non-duplicated list of stores
seenStoreIds = []
laStores = []
for store in allStores:
    if store[0] in seenStoreIds:
        continue
    else:
        laStores.append(store)
        seenStoreIds.append(store[0])

In [24]:
#open up the LA Geojson
with open('Data/laMap.json') as f:
    laArea = json.load(f)
laPolygon = Polygon(laArea['features'][0]['geometry']['coordinates'][0][0])

In [25]:
#keep store if and only if it is within the LA polygon
keepLAStores = []
for store in laStores:
    point = Point(float(store[3]), float(store[2]))
    if laPolygon.contains(point):
        keepLAStores.append(store)

In [26]:
len(laStores), len(keepLAStores)

(930, 750)

In [27]:
dfSbux = pd.DataFrame(columns=['id', 'strLocation', 'latitude', 'longitude', 'city', 'state', 'zip'])
for i,col in enumerate(dfSbux.columns):
    dfSbux[col] = [item[i] for item in keepLAStores]

In [28]:
dfSbux.latitude = dfSbux.latitude.apply(lambda x: float(x))
dfSbux.longitude = dfSbux.longitude.apply(lambda x: float(x))

In [29]:
laMap = folium.Map(location=[34.0522,-118.2437], tiles='Stamen Toner', zoom_start=10)
for i,row in dfSbux.iterrows():
    folium.CircleMarker((row.latitude,row.longitude), radius=3, weight=2, fill_color='red', fill_opacity=.9).add_to(laMap)

In [30]:
folium.GeoJson(laArea).add_to(laMap)

<folium.features.GeoJson at 0x7ff8e9311b50>

In [31]:
laMap

## Heat Map

In [32]:
import folium
import pandas as pd
import json
from folium import plugins

In [33]:
df = pd.read_csv('Data/starbucksInLACounty.csv')

In [34]:
with open('Data/laMap.geojson') as f:
    laArea = json.load(f)

In [35]:
laMap = folium.Map(location=[34.0522,-118.2437], tiles='Stamen Toner', zoom_start=9)

folium.GeoJson(laArea).add_to(laMap)

for i,row in df.iterrows():
    folium.CircleMarker((row.latitude,row.longitude), radius=3, weight=2, color='red', fill_color='red', fill_opacity=.5).add_to(laMap)
    
laMap.add_children(plugins.HeatMap(df[['latitude', 'longitude']].as_matrix(), radius=25))


  
  


In [36]:
laMap