In [1]:
# Dependencies
import pandas as pd
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist
from bs4 import BeautifulSoup
from webdriver_manager.chrome import ChromeDriverManager
import time
from prettyprinter import pprint
import json

# Google developer API key
from config import gkey

In [2]:
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

[WDM] - Current google-chrome version is 89.0.4389
[WDM] - Get LATEST driver version for 89.0.4389
[WDM] - Get LATEST driver version for 89.0.4389


 


[WDM] - Trying to download new driver from http://chromedriver.storage.googleapis.com/89.0.4389.23/chromedriver_win32.zip
[WDM] - Driver has been saved in cache [C:\Users\brook\.wdm\drivers\chromedriver\win32\89.0.4389.23]


### Generate Dataframe / URL List for All Cities Where Giant Food Operates Grocery Stores

In [3]:
store_states = ['dc', 'de', 'md', 'va']

In [4]:
cities_list = []

for store_state in store_states:
    
    url_state = 'https://stores.giantfood.com/' + store_state
    browser.visit(url_state)
    
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    
    resultsX = soup.find_all('li', class_="DirectoryList-item")
    
    store_cities = resultsX
    
    
    for store_city in store_cities:

        try:
            city = store_city.find('a', class_="DirectoryList-itemLink Link--secondary").text        

            city_dict = {            
                'city': city,
                'state': store_state
            }

            cities_list.append(city_dict)

        except Exception as e:
            print(e)
            pass    

In [5]:
cities_df = pd.DataFrame.from_dict(cities_list)
cities_df['url'] = 'https://stores.giantfood.com/' + cities_df['state'] + '/' + cities_df['city'].str.lower().str.replace(' ', '-')
cities_df.head()

Unnamed: 0,city,state,url
0,Washington,dc,https://stores.giantfood.com/dc/washington
1,Bear,de,https://stores.giantfood.com/de/bear
2,Middletown,de,https://stores.giantfood.com/de/middletown
3,Millsboro,de,https://stores.giantfood.com/de/millsboro
4,Millville,de,https://stores.giantfood.com/de/millville


### Generate Dataframe / Addresses List for All Giant Food Grocery Stores

In [6]:
city_urls = cities_df.url

In [7]:
stores_list = []

for city_url in city_urls:
    
    url = city_url
    browser.visit(url)
    
    time.sleep(1)
    
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    
    results = soup.find_all('li', class_="LocationList-item l-col-xs-12 l-col-sm-6 l-col-md-3-up")
    
    stores = results
    
    for store in stores:

        try:
            street_address = store.find('span', class_='c-address-street-1').text
            city = store.find('span', class_='c-address-city').text
            state = store.find('abbr', class_='c-address-state').text
            zip_code = store.find('span', class_='c-address-postal-code').text

            product_dict = {
                'street_address': street_address,
                'city': city,
                'state': state,                
                'zip_code': zip_code
            }

            stores_list.append(product_dict)

        except Exception as e:
            print(e)
            pass

In [8]:
stores_df = pd.DataFrame.from_dict(stores_list)
stores_df

Unnamed: 0,street_address,city,state,zip_code
0,1050 Brentwood Road,Washington,DC,20018
1,"1345 Park Road, NW",Washington,DC,20010
2,1400 7th Street NW,Washington,DC,20001
3,1535 Alabama Avenue SE,Washington,DC,20032
4,300 H Street NE,Washington,DC,20002
...,...,...,...,...
159,317 Worth Avenue,Stafford,VA,22554
160,21000 Southbank Street,Sterling,VA,20165
161,21800 Town Center Plaza,Sterling,VA,20164
162,359 Maple Avenue East,Vienna,VA,22180


### Generate Dataframe / Addresses / URL List for All Giant Food Grocery Stores

#### Example from Google API documentation

https://maps.googleapis.com/maps/api/geocode/json?address=1600+Amphitheatre+Parkway,+Mountain+View,+CA&key=YOUR_API_KEY

In [9]:
stores_df['geocode_url'] = 'https://maps.googleapis.com/maps/api/geocode/json?address=' +\
    stores_df['street_address'].str.replace(' ', '+') + ',' +\
    stores_df['city'].str.replace(' ', '+') + ',' +\
    stores_df['state'] +\
    '&key=' + gkey

In [10]:
stores_df.head()

Unnamed: 0,street_address,city,state,zip_code,geocode_url
0,1050 Brentwood Road,Washington,DC,20018,https://maps.googleapis.com/maps/api/geocode/j...
1,"1345 Park Road, NW",Washington,DC,20010,https://maps.googleapis.com/maps/api/geocode/j...
2,1400 7th Street NW,Washington,DC,20001,https://maps.googleapis.com/maps/api/geocode/j...
3,1535 Alabama Avenue SE,Washington,DC,20032,https://maps.googleapis.com/maps/api/geocode/j...
4,300 H Street NE,Washington,DC,20002,https://maps.googleapis.com/maps/api/geocode/j...


### Generate Dataframe / Addresses / Geocoordinates List for All Giant Food Grocery Stores

In [11]:
geo_urls = stores_df['geocode_url']

In [12]:
geocoordinates_list = []

for geo_url in geo_urls:
    
    try:    
        url = geo_url
        browser.visit(url)

        time.sleep(1)

        html = browser.html
        soup = BeautifulSoup(html, 'html.parser')

        resultsZ = soup.find("pre").contents[0]

        element_contents = resultsZ

        geo_superset = json.loads(element_contents)
        store_geocoordinates_dict = geo_superset['results'][0]['geometry']['location']
        
        geocoordinates_list.append(store_geocoordinates_dict)
    
    except Exception as e:
        print(e)
        pass

In [13]:
type(geocoordinates_list)

list

In [14]:
df = pd.DataFrame(geocoordinates_list)
df.head()

Unnamed: 0,lat,lng
0,38.919442,-76.992261
1,38.930742,-77.031897
2,38.908749,-77.022149
3,38.844913,-76.980958
4,38.90052,-77.001296


In [15]:
stores_df['geocoordinates'] = geocoordinates_list
stores_df['lat'] = df['lat']
stores_df['lng'] = df['lng']
stores_df.drop(['geocode_url'], axis=1, inplace=True)
# stores_df.drop(['geocoordinates'], axis=1, inplace=True)
stores_df.head()

Unnamed: 0,street_address,city,state,zip_code,geocoordinates,lat,lng
0,1050 Brentwood Road,Washington,DC,20018,"{'lat': 38.9194422, 'lng': -76.9922606}",38.919442,-76.992261
1,"1345 Park Road, NW",Washington,DC,20010,"{'lat': 38.9307421, 'lng': -77.0318972}",38.930742,-77.031897
2,1400 7th Street NW,Washington,DC,20001,"{'lat': 38.9087489, 'lng': -77.0221494}",38.908749,-77.022149
3,1535 Alabama Avenue SE,Washington,DC,20032,"{'lat': 38.8449126, 'lng': -76.98095769999999}",38.844913,-76.980958
4,300 H Street NE,Washington,DC,20002,"{'lat': 38.9005198, 'lng': -77.0012965}",38.90052,-77.001296


### Save Dataframe as both CSV and JSON

In [16]:
stores_df.to_csv (r'giantfood_address_geos.csv', index = False, header=True)

In [17]:
stores_df.to_json(r'giantfood_address_geos.json', orient = "records")