In [1]:
from bs4 import BeautifulSoup as bbs
import requests as rq
import pandas as pd

import os
import sys

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait

In [2]:
def web_driver(baseurl):
    if 'linux' in sys.platform.lower():
        driver = webdriver.Firefox()
    else:
        path = os.path.join(os.getcwd(), 'driver')
        driver = webdriver.Firefox(path)

    wait = WebDriverWait(driver, 10)
    driver.get(baseurl)
    return driver


def extract_data(driver):
    data = bbs(driver.page_source, 'html.parser')
    store_data = data.find('div', {'class': 'c-store-box'})
    name = store_data.find('h3', {'class': 'c-store-box__name'})
    address = store_data.find('p', {'class': 'c-store-box__content'})
    if name:
        name = name.text
    if address:
        address = address.text

    return [name, address]

In [3]:
def country_filter(driver, country='United States'):
    for i in driver.find_elements('css selector', ".js-country"):
        if i.text == country:
            return i


def city_list(driver):
    my_cities = []
    for i in driver.find_elements('css selector', ".js-city"):
        if i.text.strip():
            my_cities.append(i)
    return my_cities


def store_list(driver):
    my_store = []
    for i in driver.find_elements('css selector', ".js-store"):
        if i.text.strip():
            my_store.append(i)
    return my_store


def full_pipeline(driver, country_name='United States'):
    humberger_menu = driver.find_element(
        'css selector', ".c-location__hamburger-wrapper")
    humberger_menu.click()
    country_data = country_filter(driver, country_name)
    country_data.click()

    all_data = {}
    my_cities = city_list(driver)
    length_city = len(my_cities)
    for i in range(length_city):
        current_city = my_cities[i].text
        my_cities[i].click()
        my_stores = store_list(driver)
        length_store = len(my_stores)
        for j in range(length_store):
            my_stores[j].click()
            try:
                all_data[current_city].append(extract_data(driver))
            except:
                all_data[current_city] = [extract_data(driver)]
            driver.find_element('css selector', ".js-location-arrow").click()
            my_cities[i].click()
            my_stores = store_list(driver)
        driver.find_element('css selector', ".js-location-arrow").click()
        my_cities = city_list(driver)

    driver.find_element('css selector', ".js-location-arrow").click()
    driver.find_element('css selector', ".c-location__hamburger-wrapper").click()
    return all_data

In [4]:
baseurl = 'https://marouchocolate.com/where-to-buy/'
driver = web_driver(baseurl)

In [5]:
all_data = full_pipeline(driver,'United States')

In [6]:
driver.close()

In [7]:
data = []
for i, j in all_data.items():
    for k in j:
        data.append([i]+k)

In [8]:
df = pd.DataFrame(data, columns=['City','Name', 'Address'])

In [9]:
df

Unnamed: 0,City,Name,Address
0,Alabama,Honeycreeper Chocolate,"1029 Glen View Rd, Birmingham, AL 35222, USA"
1,Alaska,Summit Spice & Tea Company,"3131 Denali St, Anchorage, AK 99503, USA"
2,Alaska,Non-Essentials,"17869 Christian Ridge Rd, Palmer, AK 99645, USA"
3,Arizona,The Loft Cinema,"3233 E Speedway Blvd, Tucson, AZ 85716, USA"
4,Arizona,Bright Side Bookshop,"18 N San Francisco St, Flagstaff, AZ 86001, USA"
...,...,...,...
314,Wisconsin,Madison Chocolate Company,"729 Glenway St, Madison, WI 53711, USA"
315,Wisconsin,Green Road Pottery,"1910 Green Rd, Stoughton, WI 53589, USA"
316,Wisconsin,Chocolate Sommelier,"829 E Brady St, Milwaukee, WI 53202, USA"
317,Wyoming,Moose Enterprise,"12170 Dornans Rd, Moose, WY 83012, USA"
