In [1]:
import pandas as pd
import numpy as np
import time
from lxml import html
import os
import re
import yaml
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.keys import Keys
from sqlalchemy import create_engine, text
import pymysql

In [2]:
def launch_selenium(lang='fr'):
    options = Options()

    if lang == 'fr':
        options.add_argument("--lang=fr-FR")

    if lang == 'en':
        options.add_argument("--lang=en")
        
    prefs = {
        #"prefs": {'intl.accept_languages': 'en,en_US'},
        "translate":{"enabled":"true"}
    }
    options.add_experimental_option("prefs", prefs)

    options.add_argument('--ignore-certificate-errors')
    #options.add_argument('--incognito') #can not use extensions
    #options.add_argument('--headless')
    #options.add_argument("--window-size=1000,800")
    #options.add_argument('--no-sandbox')
    options.add_argument("--start-maximized")
    #options.add_argument("--disable-notifications")
    #options.add_argument("--disable-popup-blocking")
    options.add_extension('extensions/J2TEAM_Cookies.crx')
    options.add_extension('extensions/setupvpn.crx')
    chrome_path = 'driver/chromedriver_89.exe'
    driver = webdriver.Chrome(chrome_path, options=options)
    
    #close windows
    while len(driver.window_handles)>1:
        driver.switch_to.window(driver.window_handles[len(driver.window_handles)-1])
        time.sleep(0.5)
        driver.close()
        time.sleep(0.5)
    driver.switch_to.window(driver.window_handles[len(driver.window_handles)-1])
    
    return driver

In [3]:
def get_page(driver):
    #get content of the page
    content = driver.page_source
    page = html.fromstring(content) 
    
    return page


def get_items(driver, xpath):
    #get content of the page
    page = get_page(driver)
    
    #xpath items
    items = page.xpath(xpath)
    
    return items

def get_value(item, config, elem):
    try:
        xpath_elem = config[elem]
        
        v = item.xpath(xpath_elem)
        if len(v)>0:
            v = v[0]
        else:
            return None

        #clean_value
        #if '@href' in xpath_elem:
            #v = format_url('amazon.com', v)
        #elif 'text()' in xpath_elem:
        if 'text()' in xpath_elem:
            v = clean_text(v)

        return v      
    except:
        return None

def format_url(domain, url):
    try:
        if 'http' not in url:
            if url[0] == '/':
                return 'https://www.' + domain + url
    except:
        return np.nan
    
def clean_text(txt):
    try:
        txt = txt.strip()
        txt = re.sub('\n', '', txt)
        return txt
    except:
        return np.nan

In [4]:
def extract_partner(x, brand):
    dict_partner = {
        'Audemars Piguet': ['ap house', 'audemars piguet'],
        'Ferrari': ['ferrari'],
        'Breguet': ['breguet boutique', 'breguet store', 'boutique breguet'],
        'Rolex': ['rolex boutique', 'boutique rolex', 'rolex shop'],
        'Tag Heuer': ['tag heuer'],
        'Zenith': ['zenith'],
        'Omega': ['omega'],
        'Tudor': ['boutique'],
        'Chopard': ['boutique'],
        'Bvlgari': ['boutique']
    }
    official_kw_list = dict_partner[brand]
    try:
        x = str(x).lower()
        x = re.sub('\s{2,}', '', x)
        for kw in official_kw_list:
            if kw in x:
                return 0
        return 1
    except:
        return 1

# COLLECT STORES

In [5]:
driver = launch_selenium('en')

In [45]:
with open('./config/config_store_locator.yaml', 'r', encoding='utf-8') as file:
    config_all = yaml.safe_load(file)

# CHANEL

In [24]:
config = config_all['CHANEL']

In [25]:
driver.get(config['start_url'][0])

In [26]:
rows_list = []

- Filter Chanel Stores on Fashion only
- Dezoom '10z' (Check url on map) is the maximal dezoom to find stores
- Enter each city based on the list of cities already scraped in database

In [151]:
items = get_items(driver, config["items"])
print('boutiques:', len(items))

for item in items:
    address = get_value(item, config, 'address')
    store_name = get_value(item, config, 'store_name')
    city = get_value(item, config, 'city')
    rows_list.append({
        'store_name': store_name,
        'city': city,
        'address': address,
        'brand': 'Chanel',
        'is_partner': '0'
    })
    
items_r = get_items(driver, config["items_r"])
print('retailers:', len(items_r))

for item in items_r:
    address = get_value(item, config, 'address')
    store_name = get_value(item, config, 'store_name_r')
    city = get_value(item, config, 'city')
    rows_list.append({
        'store_name': store_name,
        'city': city,
        'address': address,
        'brand': 'Chanel',
        'is_partner': '1'
    })
    
df = pd.DataFrame(rows_list)
df.drop_duplicates(inplace=True)
print('total stores:', len(df))

boutiques: 0
retailers: 2
total stores: 298


In [152]:
df.head()

Unnamed: 0,store_name,city,address,brand,is_partner
0,CHANEL WESTFIELD SYDNEY,"2000, Sydney, New South Wales","Shop 4026/28, Level 4, Cnr Pitt St Mall and Ma...",Chanel,0
1,CHANEL DAVID JONES SHOES,"2000, Sydney, New South Wales",86-108 Castlereagh Street,Chanel,0
2,CHANEL SYDNEY,"2000, Sydney, New South Wales",70 Castlereagh Street,Chanel,0
3,CHANEL BONDI JUNCTION,"2022, Bondi Junction, New South Wales",500 Oxford Street,Chanel,0
8,CHANEL CHADSTONE,"3148, Chadstone, Victoria",1341 Dandenong Road,Chanel,0


In [153]:
brand = 'CHANEL'
mmyyyy = '092021'
fpath = '../raw/' + mmyyyy + '/'+ brand + '_'+ mmyyyy + '.xlsx'
print('saving file in folder:', fpath)
writer = pd.ExcelWriter(fpath, engine='xlsxwriter',options={'strings_to_urls': False})
df.to_excel(writer, index=False)
writer.close()

saving file in folder: ../raw/092021/CHANEL_092021.xlsx


# DIOR

In [154]:
config = config_all['DIOR']

In [155]:
driver.get(config['start_url'][0])

In [156]:
rows_list = []

- Filter Dior Stores on Womens Fashion only (Men Fashion should be included)
- Navigate with dezoom on continents and countries and use "Search in this area" button
- Search Manually Hawai & Guam / Northern Mariana Islands

In [243]:
items = get_items(driver, config["items"])
print('boutiques:', len(items))

for item in items:
    address = get_value(item, config, 'address')
    store_name = get_value(item, config, 'store_name')
    city = get_value(item, config, 'city')
    city2 = get_value(item, config, 'city2')
    if city2 is None:
        city = city
    else:
        city = city2
    rows_list.append({
        'store_name': store_name,
        'city': city,
        'address': address,
        'brand': 'Dior',
        'is_partner': '0'
    })
    
df = pd.DataFrame(rows_list)
df.drop_duplicates(inplace=True)
print('total stores:', len(df))

boutiques: 5
total stores: 258


In [244]:
df.head()

Unnamed: 0,store_name,city,address,brand,is_partner
0,dior - paris la samaritaine,75001 paris,9 rue de la monnaie,Dior,0
1,dior - paris le bon marche,75007 paris,5 rue de babylone,Dior,0
2,dior - paris saint-honoré,75001 paris,"261, rue saint-honoré",Dior,0
3,dior - paris galeries lafayette,75009 paris,"40, boulevard haussmann",Dior,0
4,dior - paris royale,75008 paris,25 rue royale,Dior,0


In [245]:
brand = 'DIOR'
mmyyyy = '092021'
fpath = '../raw/' + mmyyyy + '/'+ brand + '_'+ mmyyyy + '.xlsx'
print('saving file in folder:', fpath)
writer = pd.ExcelWriter(fpath, engine='xlsxwriter',options={'strings_to_urls': False})
df.to_excel(writer, index=False)
writer.close()

saving file in folder: ../raw/092021/DIOR_092021.xlsx


# HERMES

In [246]:
config = config_all['HERMES']

In [247]:
driver.get(config['start_url'][0])

In [248]:
rows_list = []

- On Map View, dezoom max and click on "Find" without any entree to see all stores
- Then go on list view and load all data with "See more" button

In [249]:
items = get_items(driver, config["items"])

for item in items:
    country = get_value(item, config, 'country')
    store_name = get_value(item, config, 'store_name')
    address_city = get_value(item, config, 'address_city')
        
    rows_list.append({
        'country': country,
        'store_name': store_name,
        'address_city': address_city,
        'brand': 'Hermès',
        'is_partner': '0'
    })
    
df = pd.DataFrame(rows_list)
df.drop_duplicates(inplace=True)
print('total stores:', len(df))

total stores: 308


In [250]:
df.head()

Unnamed: 0,country,store_name,address_city,brand,is_partner
0,France,Hermès Aix en Provence,"26, place de Verdun, Aix-en-Provence, 13100,",Hermès,0
1,France,Hermès Biarritz,"19, Avenue Edouard VII, Biarritz, 64200,",Hermès,0
2,France,Hermès Bordeaux,"2, Place Gambetta, Bordeaux, 33000 ,",Hermès,0
3,France,Hermès Cannes,"52 boulevard de la Croisette, Cannes, 06400 ,",Hermès,0
4,France,Hermès Courchevel,"Le Coeur de Courchevel , 1850 Rue du Rocher, ...",Hermès,0


In [251]:
brand = 'HERMES'
mmyyyy = '092021'
fpath = '../raw/' + mmyyyy + '/'+ brand + '_'+ mmyyyy + '.xlsx'
print('saving file in folder:', fpath)
writer = pd.ExcelWriter(fpath, engine='xlsxwriter',options={'strings_to_urls': False})
df.to_excel(writer, index=False)
writer.close()

saving file in folder: ../raw/092021/HERMES_092021.xlsx


# LOUIS VUITTON

- Use webscraper.io
- To load all data you need to dezoom maximally on the map view
- Put 20000 on request in order to have 20 sec to update stores on map

In [260]:
#webscraper.io sitemap:
"""
{"_id":"LOUIS_VUITTON_102020","startUrl":["https://eu.louisvuitton.com/eng-e1/stores"],"selectors":[{"id":"elem","type":"SelectorElement","parentSelectors":["_root"],"selector":"li.storeItem","multiple":true,"delay":0},{"id":"store_name","type":"SelectorText","parentSelectors":["elem"],"selector":"h3","multiple":false,"regex":"","delay":0},{"id":"store_address","type":"SelectorText","parentSelectors":["elem"],"selector":"p","multiple":false,"regex":"","delay":0}]}
"""

'\n{"_id":"LOUIS_VUITTON_102020","startUrl":["https://eu.louisvuitton.com/eng-e1/stores"],"selectors":[{"id":"elem","type":"SelectorElement","parentSelectors":["_root"],"selector":"li.storeItem","multiple":true,"delay":0},{"id":"store_name","type":"SelectorText","parentSelectors":["elem"],"selector":"h3","multiple":false,"regex":"","delay":0},{"id":"store_address","type":"SelectorText","parentSelectors":["elem"],"selector":"p","multiple":false,"regex":"","delay":0}]}\n'

# AUDEMARS PIGUET

- Filter on "Where to buy" before scraping

In [252]:
config = config_all['AUDEMARS PIGUET']

In [253]:
driver.get(config['start_url'][0])

In [254]:
rows_list = []

- Dezoom on map to get all stores points (take service centers also)
- "Show list results" gives you the number of stores you need to collect

In [339]:
items = get_items(driver, config["items"])
print('boutiques:', len(items))

for item in items:
    store_name = get_value(item, config, 'store_name')
    address_city = get_value(item, config, 'address_city')
    status = get_value(item, config, 'status')
        
    rows_list.append({
        'store_name': store_name,
        'address_city': address_city,
        'status': status,
        'brand': 'Audemars Piguet'
    })
    
df = pd.DataFrame(rows_list)
df.drop_duplicates(inplace=True)
print('total stores:', len(df))

boutiques: 20
total stores: 151


In [340]:
df['is_partner'] = df['store_name'].apply(lambda x : extract_partner(x, 'Audemars Piguet'))

In [341]:
df

Unnamed: 0,store_name,address_city,status,brand,is_partner
0,AP House Barcelona,"Passeig de Gracia 37, 08006 Barcelona",,Audemars Piguet,0
1,AP House Madrid,"Calle de Serrano, 66, 28001 Madrid",,Audemars Piguet,0
2,Audemars Piguet Boutique Monaco,"2 avenue des Spélugues, 98000 Monaco",,Audemars Piguet,0
3,Audemars Piguet Boutique Geneva Fusterie,"Place de la Fusterie 12, 1204 Geneva",,Audemars Piguet,0
4,Audemars Piguet Boutique Geneva Montres Prestige,"Quai du mont-blanc 19, Fairmont Grand Hôtel, 1...",,Audemars Piguet,0
...,...,...,...,...,...
777,AP House St Barthélemy,"Rue de la République, Gustavia, 97133 Gustavia",,Audemars Piguet,0
783,Joyeria Bauer,"Carrera 15 n° 93-60, Bogota",,Audemars Piguet,1
796,Chronos,"Chronos, 11300 Montevideo",,Audemars Piguet,1
797,Eve Joyerias,"Puerto Madero Este, 1107 Buenos Aires",,Audemars Piguet,1


In [342]:
brand = 'AUDEMARS_PIGUET'
mmyyyy = '092021'
fpath = '../raw/' + mmyyyy + '/'+ brand + '_'+ mmyyyy + '.xlsx'
print('saving file in folder:', fpath)
writer = pd.ExcelWriter(fpath, engine='xlsxwriter',options={'strings_to_urls': False})
df.to_excel(writer, index=False)
writer.close()

saving file in folder: ../raw/092021/AUDEMARS_PIGUET_092021.xlsx


# LAMBORGHINI

In [343]:
config = config_all['LAMBORGHINI']

In [344]:
driver.get(config['start_url'][0])

In [345]:
rows_list = []

- Dezoom on map and click on "Clear Filters" to update all stores

In [377]:
items = get_items(driver, config["items"])
print('boutiques:', len(items))

for item in items:
    address = get_value(item, config, 'address')
    store_name = get_value(item, config, 'store_name')
    rows_list.append({
        'address': address,
        'store_name': store_name,
        'brand': 'Lamborghini',
        'is_partner': '0'
    })
        
df = pd.DataFrame(rows_list)
df.drop_duplicates(inplace=True)
print('total stores:', len(df))

boutiques: 23
total stores: 154


In [378]:
df

Unnamed: 0,address,store_name,brand,is_partner
0,ul. Polczynska 120 B 00-347 Warszawa PL 00-347...,Exclusive Cars Vertriebs Gmbh,Lamborghini,0
1,Via de Las Dos Castillas 9 28224 Pozuelo De Al...,Jemercar Tecnicos Del Automovil Services SL,Lamborghini,0
2,"Rua São Francisco 582, Alcabideche 2645-019 Al...",Siva SA,Lamborghini,0
3,Pr. Mohammed Bin Abdul Aziz Street P.O. Box :3...,Saudi Arabian Marketing & Agencies Co. Ltd,Lamborghini,0
4,"Al-Tilal Complex, Pepsi Road 80, Shuwaikh Indu...",Fouad Alghanim & Sons Automotive,Lamborghini,0
...,...,...,...,...
701,4-34-4 HARADA HIGASHI-KU FUKUOKA-SHI 812 0063 ...,RPM Co. Ltd,Lamborghini,0
702,"44-1, Shinden, Nanakita, Izumi-ku, Sendai-shi,...",Tajima Motor Corporation Co,Lamborghini,0
734,382 Swan St. Richmond 'Vic 3141 Richmond VIC A...,Zagame Automotive Group,Lamborghini,0
735,"622(Samsung-dong) Yeongdong-daero, Gangnam-gu ...","Sqda Motors Co., Ltd",Lamborghini,0


In [379]:
brand = 'LAMBORGHINI'
mmyyyy = '092021'
fpath = '../raw/' + mmyyyy + '/'+ brand + '_'+ mmyyyy + '.xlsx'
print('saving file in folder:', fpath)
writer = pd.ExcelWriter(fpath, engine='xlsxwriter',options={'strings_to_urls': False})
df.to_excel(writer, index=False)
writer.close()

saving file in folder: ../raw/092021/LAMBORGHINI_092021.xlsx


# FERRARI

In [380]:
config = config_all['FERRARI']

In [381]:
driver.get(config['start_url'][0])

In [382]:
rows_list = []

- Click on "SALE" to get stores

In [383]:
items = get_items(driver, config["items"])

for item in items:
    country = get_value(item, config, 'country')
    
    res = item.xpath(config['store_name_city'])
    for store_name_city in res:
        store_name = store_name_city.split(',')[0]
        city = store_name_city.split(', ')[1]
        
        rows_list.append({
            'country': country,
            'store_name': store_name,
            'city': city,
            'brand': 'Ferrari'
        })
        
df = pd.DataFrame(rows_list)
df.drop_duplicates(inplace=True)
print('total stores:', len(df))

total stores: 262


In [384]:
df['is_partner'] = df['store_name'].apply(lambda x : extract_partner(x, 'Ferrari'))

In [385]:
df.head()

Unnamed: 0,country,store_name,city,brand,is_partner
0,U.A.E.,Al Tayer Motors L.L.C,Dubai,Ferrari,1
1,U.A.E.,Premier Motors L.L.C.,Abu Dhabi,Ferrari,1
2,Bahrain,Euro Motors,Sitra,Ferrari,1
3,India,Select Cars,New Delhi,Ferrari,1
4,India,Navnit Motors,Mumbai,Ferrari,1


In [386]:
brand = 'FERRARI'
mmyyyy = '092021'
fpath = '../raw/' + mmyyyy + '/'+ brand + '_'+ mmyyyy + '.xlsx'
print('saving file in folder:', fpath)
writer = pd.ExcelWriter(fpath, engine='xlsxwriter',options={'strings_to_urls': False})
df.to_excel(writer, index=False)
writer.close()

saving file in folder: ../raw/092021/FERRARI_092021.xlsx


# PATEK PHILIPPE

In [387]:
config = config_all['PATEK PHILIPPE']

In [388]:
rows_list = []

- Change urls to get both boutiques and retailers

In [389]:
#BOUTIQUES
driver.get(config['start_url'][0])

In [390]:
items = get_items(driver, config["items"])

for item in items:
    country = get_value(item, config, 'country')
    store_name = get_value(item, config, 'store_name')
    address = ', '.join(item.xpath(config['address']))
    city = get_value(item, config, 'city')
        
    rows_list.append({
        'country': country,
        'store_name': store_name,
        'city': city,
        'address': address,
        'brand': 'Patek Philippe',
        'is_partner': '0'
    })
    
print(len(rows_list), 'boutiques collected')

12 boutiques collected


In [391]:
#RETAILERS
driver.get(config['start_url'][1])

In [392]:
items = get_items(driver, config["items"])

for item in items:
    country = get_value(item, config, 'country')
    store_name = get_value(item, config, 'store_name')
    address = ', '.join(item.xpath(config['address']))
    city = get_value(item, config, 'city')
        
    rows_list.append({
        'country': country,
        'store_name': store_name,
        'city': city,
        'address': address,
        'brand': 'Patek Philippe',
        'is_partner': '1'
    })
    
df = pd.DataFrame(rows_list)
df.drop_duplicates(inplace=True)
print('total stores:', len(df))

total stores: 389


In [393]:
df.head()

Unnamed: 0,country,store_name,city,address,brand,is_partner
0,Germany,Deutsche Patek Philippe GmbH,München,Brienner Str. 12,Patek Philippe,0
1,Singapore,Geneva Master Time Marketing LLP,Singapore,"501, Orchard Road, 16-01 Wheelock Place",Patek Philippe,0
2,Thailand,Geneva Master Time Representative Office,Bangkok,"Unit 5B-3, 5th Floor Gaysorn, 999 Ploenchit Ro...",Patek Philippe,0
3,Mexico,Grupo Mondi S.A. de C.V.,Mexico D.F.,"Av. Popocatépetl 204 - Col. General Anaya, Del...",Patek Philippe,0
4,Hong Kong SAR China,Libertas Limited,Kowloon,"15/F., The Peninsula Office Tower, 18 Middle R...",Patek Philippe,0


In [394]:
brand = 'PATEK_PHILIPPE'
mmyyyy = '092021'
fpath = '../raw/' + mmyyyy + '/'+ brand + '_'+ mmyyyy + '.xlsx'
print('saving file in folder:', fpath)
writer = pd.ExcelWriter(fpath, engine='xlsxwriter',options={'strings_to_urls': False})
df.to_excel(writer, index=False)
writer.close()

saving file in folder: ../raw/092021/PATEK_PHILIPPE_092021.xlsx


# BREGUET

In [395]:
config = config_all['BREGUET']

In [396]:
driver.get(config['start_url'][0])

In [397]:
rows_list = []

- Click on retailers
- Click on "Load more results" until you reach the end

In [487]:
items = get_items(driver, config["items"])

for item in items:
    store_name = get_value(item, config, 'store_name')
    address = get_value(item, config, 'address')
    city = get_value(item, config, 'city')
        
    rows_list.append({
        'store_name': store_name,
        'address': address,
        'city': city,
        'brand': 'Breguet'
    })
    
df = pd.DataFrame(rows_list)
df.drop_duplicates(inplace=True)
print('total stores:', len(df))

total stores: 389


In [488]:
df['is_partner'] = df['store_name'].apply(lambda x : extract_partner(x, 'Breguet'))

In [489]:
df

Unnamed: 0,store_name,address,city,brand,is_partner
0,Suïssa at The Embassy,"Avinguda Meritxell, 31.",AD500 Andorra la Vella,Breguet,1
1,BOUTIQUE DOS RELÓGIOS PLUS,"Edifício ESCOM, secundo piso35-37 Loja I Rua M...",Luanda,Breguet,1
2,CHRONOGRAPH,"5, Northern Avenue",0001 Yerevan,Breguet,1
3,Monards 101 Collins,101 Collins Street,VIC 3000 Melbourne,Breguet,1
4,Monards Crown Casino,"Shop 15, 8 Whiteman Street","VIC 3006 Southbank, Melbourne",Breguet,1
...,...,...,...,...,...
385,ZADOK JEWELERS,1749 Post Oak Boulevard,TX 77056 Houston,Breguet,1
386,Chronos,26 de Marzo 3548,11300 Montevideo,Breguet,1
387,«Geneva» boutique,"Mirso Ulugbek district\tMustaqillik Shoh Str, ...",100000 Tashkent,Breguet,1
389,SWISS MASTER TIME CO LTD,56 Ly Thai ToHoan Kiem District,Hanoi,Breguet,1


In [490]:
brand = 'BREGUET'
mmyyyy = '092021'
fpath = '../raw/' + mmyyyy + '/'+ brand + '_'+ mmyyyy + '.xlsx'
print('saving file in folder:', fpath)
writer = pd.ExcelWriter(fpath, engine='xlsxwriter',options={'strings_to_urls': False})
df.to_excel(writer, index=False)
writer.close()

saving file in folder: ../raw/092021/BREGUET_092021.xlsx


# VACHERON CONSTANTIN

In [491]:
config = config_all['VACHERON CONSTANTIN']

In [492]:
driver.get(config['start_url'][0])

In [493]:
rows_list = []

In [494]:
items = get_items(driver, config["items"])

for item in items:
    country = get_value(item, config, 'country')
    url = get_value(item, config, 'url')
    new_url = 'https://stores.vacheron-constantin.com/' + url
    
    driver.get(new_url)
    time.sleep(2)
    page = get_page(driver)
    
    items_lvl2 = get_items(driver, config["items"])
    if len(items_lvl2) == 0:
        multis = get_items(driver, config["multi"])
        if len(multis) == 0:
            #final_page
            #collect
            city = get_value(page, config, 'city')
            address = get_value(page, config, 'address')
            is_partner = get_value(page, config, 'is_partner')
            store_name = get_value(page, config, 'store_name')
            dict1 = {'city': city,
                    'address': address,
                    'store_name': store_name,
                    'brand': 'Vacheron Constantin',
                    'country': country,
                    'is_partner': is_partner
                    }
            rows_list.append(dict1)     
        else:
            for multi in multis:
                final_url = get_value(multi, config, 'urld')
                final_url = re.sub('\.\./', '', final_url)
                driver.get('https://stores.vacheron-constantin.com/' + final_url)
                page = get_page(driver)
                #final_page
                #collect
                city = get_value(page, config, 'city')
                address = get_value(page, config, 'address')
                is_partner = get_value(page, config, 'is_partner')
                store_name = get_value(page, config, 'store_name')
                dict1 = {'city': city,
                        'address': address,
                        'store_name': store_name,
                        'brand': 'Vacheron Constantin',
                        'country': country,
                        'is_partner': is_partner
                        }
                rows_list.append(dict1)     
    else:
        for item_lvl2 in items_lvl2:
            url = get_value(item_lvl2, config, 'url')
            new_url = 'https://stores.vacheron-constantin.com/' + url
            driver.get(new_url)
            time.sleep(2)
            page = get_page(driver)
            
            items_lvl3 = get_items(driver, config["items"])
            if len(items_lvl3) == 0:
                multis = get_items(driver, config["multi"])
                if len(multis) == 0:
                    #final_page
                    #collect
                    city = get_value(page, config, 'city')
                    address = get_value(page, config, 'address')
                    is_partner = get_value(page, config, 'is_partner')
                    store_name = get_value(page, config, 'store_name')
                    dict1 = {'city': city,
                            'address': address,
                            'store_name': store_name,
                            'brand': 'Vacheron Constantin',
                            'country': country,
                            'is_partner': is_partner
                            }
                    rows_list.append(dict1)     
                else:
                    for multi in multis:
                        final_url = get_value(multi, config, 'urld')
                        final_url = re.sub('\.\./', '', final_url)
                        driver.get('https://stores.vacheron-constantin.com/' + final_url)
                        page = get_page(driver)
                        #final_page
                        #collect
                        city = get_value(page, config, 'city')
                        address = get_value(page, config, 'address')
                        is_partner = get_value(page, config, 'is_partner')
                        store_name = get_value(page, config, 'store_name')
                        dict1 = {'city': city,
                                'address': address,
                                'store_name': store_name,
                                'brand': 'Vacheron Constantin',
                                'country': country,
                                'is_partner': is_partner
                                }
                        rows_list.append(dict1)     
            else:
                for item_lvl3 in items_lvl3:
                    url = get_value(item_lvl3, config, 'url')
                    new_url = 'https://stores.vacheron-constantin.com/' + url
                    driver.get(new_url)
                    time.sleep(2)
                    page = get_page(driver)

                    items_lvl4 = get_items(driver, config["items"])
                    if len(items_lvl4) == 0:
                        multis = get_items(driver, config["multi"])
                        if len(multis) == 0:
                            #final_page
                            #collect
                            city = get_value(page, config, 'city')
                            address = get_value(page, config, 'address')
                            is_partner = get_value(page, config, 'is_partner')
                            store_name = get_value(page, config, 'store_name')
                            dict1 = {'city': city,
                                    'address': address,
                                    'store_name': store_name,
                                    'brand': 'Vacheron Constantin',
                                    'country': country,
                                    'is_partner': is_partner
                                    }
                            rows_list.append(dict1)     
                        else:
                            for multi in multis:
                                final_url = get_value(multi, config, 'urld')
                                final_url = re.sub('\.\./', '', final_url)
                                driver.get('https://stores.vacheron-constantin.com/' + final_url)
                                page = get_page(driver)
                                #final_page
                                #collect
                                city = get_value(page, config, 'city')
                                address = get_value(page, config, 'address')
                                is_partner = get_value(page, config, 'is_partner')
                                store_name = get_value(page, config, 'store_name')
                                dict1 = {'city': city,
                                        'address': address,
                                        'store_name': store_name,
                                        'brand': 'Vacheron Constantin',
                                        'country': country,
                                        'is_partner': is_partner
                                        }
                                rows_list.append(dict1)

df = pd.DataFrame(rows_list)
df.drop_duplicates(inplace=True)

df = df[df['is_partner']!='Vacheron Constantin Service Center']
df.replace("Vacheron Constantin Authorized Retailer", '1', inplace=True)
df.replace('Vacheron Constantin Boutique', '0', inplace=True)
df.replace(np.nan, '0', inplace=True)

print('total stores:', len(df))

total stores: 313


In [495]:
df.head()

Unnamed: 0,city,address,store_name,brand,country,is_partner
0,Casablanca,"3, Angle Boulevard Abdelkrim El Khattabi",Quantième - Quantième,Vacheron Constantin,Morocco,1
1,Johannesburg,"Shop HL15,Melrose Arch",Elegance - Elegance,Vacheron Constantin,South Africa,1
2,Johannesburg,"Shop U77 Upper floor,Sandton City",Elegance - Elegance,Vacheron Constantin,South Africa,1
3,Johannesburg,Ballyclare Drive,Johannesburg,Vacheron Constantin,South Africa,0
4,Melbourne,88 Collins Street,Melbourne - Collins Street,Vacheron Constantin,Australia,0


In [496]:
brand = 'VACHERON_CONSTANTIN'
mmyyyy = '092021'
fpath = '../raw/' + mmyyyy + '/'+ brand + '_'+ mmyyyy + '.xlsx'
print('saving file in folder:', fpath)
writer = pd.ExcelWriter(fpath, engine='xlsxwriter',options={'strings_to_urls': False})
df.to_excel(writer, index=False)
writer.close()

saving file in folder: ../raw/092021/VACHERON_CONSTANTIN_092021.xlsx


# CARTIER

BOUTIQUES

In [497]:
config = config_all['CARTIER_BOUTIQUES']

In [498]:
driver.get(config['start_url'][0])

In [499]:
rows_list = []

In [500]:
items = get_items(driver, config["items"])

for item in items:
    country = get_value(item, config, 'country')
    url = get_value(item, config, 'url')
    new_url = 'https://stores.cartier.com/' + url
    
    driver.get(new_url)
    time.sleep(2)
    page = get_page(driver)
    
    items_lvl2 = get_items(driver, config["items"])
    if len(items_lvl2) == 0:
        multis = get_items(driver, config["multi"])
        if len(multis) == 0:
            #final_page
            #collect
            city = get_value(page, config, 'city')
            address = get_value(page, config, 'address')
            store_name = get_value(page, config, 'store_name')
            store_name2 = get_value(page, config, 'store_name2')
            dict1 = {'city': city,
                    'address': address,
                    'store_name': store_name+' '+store_name2,
                    'brand': 'Cartier',
                    'country': country,
                    'is_partner': 0
                    }
            rows_list.append(dict1)     
        else:
            for multi in multis:
                final_url = get_value(multi, config, 'urld')
                final_url = re.sub('\.\./', '', final_url)
                driver.get('https://stores.cartier.com/' + final_url)
                page = get_page(driver)
                #final_page
                #collect
                city = get_value(page, config, 'city')
                address = get_value(page, config, 'address')
                store_name = get_value(page, config, 'store_name')
                store_name2 = get_value(page, config, 'store_name2')
                dict1 = {'city': city,
                        'address': address,
                        'store_name': store_name+' '+store_name2,
                        'brand': 'Cartier',
                        'country': country,
                        'is_partner': 0
                        }
                rows_list.append(dict1)     
    else:
        for item_lvl2 in items_lvl2:
            url = get_value(item_lvl2, config, 'url')
            new_url = 'https://stores.cartier.com/' + url
            driver.get(new_url)
            time.sleep(2)
            page = get_page(driver)
            
            items_lvl3 = get_items(driver, config["items"])
            if len(items_lvl3) == 0:
                multis = get_items(driver, config["multi"])
                if len(multis) == 0:
                    #final_page
                    #collect
                    city = get_value(page, config, 'city')
                    address = get_value(page, config, 'address')
                    store_name = get_value(page, config, 'store_name')
                    store_name2 = get_value(page, config, 'store_name2')
                    dict1 = {'city': city,
                            'address': address,
                            'store_name': store_name+' '+store_name2,
                            'brand': 'Cartier',
                            'country': country,
                            'is_partner': 0
                            }
                    rows_list.append(dict1)     
                else:
                    for multi in multis:
                        final_url = get_value(multi, config, 'urld')
                        final_url = re.sub('\.\./', '', final_url)
                        driver.get('https://stores.cartier.com/' + final_url)
                        page = get_page(driver)
                        #final_page
                        #collect
                        city = get_value(page, config, 'city')
                        address = get_value(page, config, 'address')
                        store_name = get_value(page, config, 'store_name')
                        store_name2 = get_value(page, config, 'store_name2')
                        dict1 = {'city': city,
                                'address': address,
                                'store_name': store_name+' '+store_name2,
                                'brand': 'Cartier',
                                'country': country,
                                'is_partner': 0
                                }
                        rows_list.append(dict1)     
            else:
                for item_lvl3 in items_lvl3:
                    url = get_value(item_lvl3, config, 'url')
                    new_url = 'https://stores.cartier.com/' + url
                    driver.get(new_url)
                    time.sleep(2)
                    page = get_page(driver)

                    items_lvl4 = get_items(driver, config["items"])
                    if len(items_lvl4) == 0:
                        multis = get_items(driver, config["multi"])
                        if len(multis) == 0:
                            #final_page
                            #collect
                            city = get_value(page, config, 'city')
                            address = get_value(page, config, 'address')
                            store_name = get_value(page, config, 'store_name')
                            store_name2 = get_value(page, config, 'store_name2')
                            dict1 = {'city': city,
                                    'address': address,
                                    'store_name': store_name+' '+store_name2,
                                    'brand': 'Cartier',
                                    'country': country,
                                    'is_partner': 0
                                    }
                            rows_list.append(dict1)     
                        else:
                            for multi in multis:
                                final_url = get_value(multi, config, 'urld')
                                final_url = re.sub('\.\./', '', final_url)
                                driver.get('https://stores.cartier.com/' + final_url)
                                page = get_page(driver)
                                #final_page
                                #collect
                                city = get_value(page, config, 'city')
                                address = get_value(page, config, 'address')
                                store_name = get_value(page, config, 'store_name')
                                store_name2 = get_value(page, config, 'store_name2')
                                dict1 = {'city': city,
                                        'address': address,
                                        'store_name': store_name+' '+store_name2,
                                        'brand': 'Cartier',
                                        'country': country,
                                        'is_partner': 0
                                        }
                                rows_list.append(dict1)
                                
df = pd.DataFrame(rows_list)
df.drop_duplicates(inplace=True)
print('total stores:', len(df))

total stores: 270


In [501]:
len(df)

270

In [502]:
df.head()

Unnamed: 0,city,address,store_name,brand,country,is_partner
0,Oranjestad,Renaissance Mall,Boutique Cartier Aruba Oranjestad - Renaissanc...,Cartier,Aruba,0
1,Sydney,74 Castlereagh Street,Boutique Cartier Sydney Flagship Sydney - 74 C...,Cartier,Australia,0
2,Sydney,"The Rocks , 155 George Street",Boutique Cartier Boutique -DFS Galleria Sydney...,Cartier,Australia,0
3,Brisbane,172 Edward Street,Boutique Cartier Brisbane Brisbane - 172 Edwar...,Cartier,Australia,0
4,Chadstone,Ground Floor G066/1341,Boutique Cartier Chadstone Chadstone - Ground ...,Cartier,Australia,0


RETAILERS

In [505]:
config = config_all['CARTIER_RETAILERS']

In [506]:
driver.get(config['start_url'][0])

- Click on Resellers to only extract Retailers
- Cartier has no retailers for jewelry (only watches)

In [507]:
with open('./config/city_country.yaml', 'r', encoding='utf-8') as file:
    dict_cc = yaml.safe_load(file)

In [508]:
for ci, co in dict_cc.items():
    ci = re.sub(" ", "+", ci)
    co = re.sub(" ", "+", co)
    new_url = "https://stores.cartier.com/en_eu/search?q=" + str(ci) + "%2C+" + str(co) +"&category=storeLocatorSearch&r=500&storetype=true&watches=true"
    driver.get(new_url)
    time.sleep(3)

    items = get_items(driver, config["items"])

    for item in items:
        country = get_value(item, config, 'country')
        store_name = get_value(item, config, 'store_name')
        address = ', '.join(item.xpath(config['address']))
        city = get_value(item, config, 'city')

        rows_list.append({
            'country': country,
            'store_name': store_name,
            'city': city,
            'address': address,
            'brand': 'Cartier',
            'is_partner': '1'
        })
    
df = pd.DataFrame(rows_list)
df.drop_duplicates(inplace=True)
print('total stores:', len(df))

total stores: 928


In [509]:
df.head()

Unnamed: 0,city,address,store_name,brand,country,is_partner
0,Oranjestad,Renaissance Mall,Boutique Cartier Aruba Oranjestad - Renaissanc...,Cartier,Aruba,0
1,Sydney,74 Castlereagh Street,Boutique Cartier Sydney Flagship Sydney - 74 C...,Cartier,Australia,0
2,Sydney,"The Rocks , 155 George Street",Boutique Cartier Boutique -DFS Galleria Sydney...,Cartier,Australia,0
3,Brisbane,172 Edward Street,Boutique Cartier Brisbane Brisbane - 172 Edwar...,Cartier,Australia,0
4,Chadstone,Ground Floor G066/1341,Boutique Cartier Chadstone Chadstone - Ground ...,Cartier,Australia,0


In [510]:
brand = 'CARTIER'
mmyyyy = '092021'
fpath = '../raw/' + mmyyyy + '/'+ brand + '_'+ mmyyyy + '.xlsx'
print('saving file in folder:', fpath)
writer = pd.ExcelWriter(fpath, engine='xlsxwriter',options={'strings_to_urls': False})
df.to_excel(writer, index=False)
writer.close()

saving file in folder: ../raw/092021/CARTIER_092021.xlsx


# ROLEX

In [7]:
config = config_all['ROLEX']

In [8]:
driver.get(config['start_url'][0])

In [9]:
rows_list = []

Go and make a random research

In [10]:
import pycountry
#! pip3 install pycountry #to install pycountry

#CREATE LIST OF COUNTRIES
s = []
for i in list(pycountry.countries):
    try:
        country = i.common_name
    except:
        country = i.name
    alpha_2 = i.alpha_2
    alpha_3 = i.alpha_3
    s.append({'country': country, 'alpha_2': alpha_2, 'alpha_3': alpha_3})
countries = pd.DataFrame(s)

countries.loc[countries['country']=='China', 'country'] = 'Mainland China'
countries.loc[countries['country']=='Virgin Islands, British', 'country'] = 'British Virgin Islands'
countries.loc[countries['country']=='Virgin Islands, U.S.', 'country'] = 'US Virgin Islands'
countries.loc[countries['country']=='Korea, Republic of', 'country'] = 'South Korea'
countries.loc[countries['country']=="Korea, Democratic People's Republic of", 'country'] = 'North Korea'
countries.loc[countries['country']=="Côte d'Ivoire", 'country'] = 'Ivory Coast'
countries.loc[countries['country']=='Iran, Islamic Republic of', 'country'] = 'Iran'
countries.loc[countries['country']=="Lao People's Democratic Republic", 'country'] = 'Laos'
countries.loc[countries['country']=="Russian Federation", 'country'] = 'Russia'
countries.loc[countries['country']=="Hong Kong", 'country'] = 'Hong Kong SAR'
countries.loc[countries['country']=="Taiwan", 'country'] = 'Taiwan Region'
countries.loc[countries['country']=="Macao", 'country'] = 'Macau SAR'


countries_list = list(countries['country'].unique())

In [11]:
countries_list

['Aruba',
 'Afghanistan',
 'Angola',
 'Anguilla',
 'Åland Islands',
 'Albania',
 'Andorra',
 'United Arab Emirates',
 'Argentina',
 'Armenia',
 'American Samoa',
 'Antarctica',
 'French Southern Territories',
 'Antigua and Barbuda',
 'Australia',
 'Austria',
 'Azerbaijan',
 'Burundi',
 'Belgium',
 'Benin',
 'Bonaire, Sint Eustatius and Saba',
 'Burkina Faso',
 'Bangladesh',
 'Bulgaria',
 'Bahrain',
 'Bahamas',
 'Bosnia and Herzegovina',
 'Saint Barthélemy',
 'Belarus',
 'Belize',
 'Bermuda',
 'Bolivia',
 'Brazil',
 'Barbados',
 'Brunei Darussalam',
 'Bhutan',
 'Bouvet Island',
 'Botswana',
 'Central African Republic',
 'Canada',
 'Cocos (Keeling) Islands',
 'Switzerland',
 'Chile',
 'Mainland China',
 'Ivory Coast',
 'Cameroon',
 'Congo, The Democratic Republic of the',
 'Congo',
 'Cook Islands',
 'Colombia',
 'Comoros',
 'Cabo Verde',
 'Costa Rica',
 'Cuba',
 'Curaçao',
 'Christmas Island',
 'Cayman Islands',
 'Cyprus',
 'Czechia',
 'Germany',
 'Djibouti',
 'Dominica',
 'Denmark',
 'D

In [12]:
countries_list_l = countries_list

for e, country in enumerate(countries_list_l):
    country = str(country).lower()
    country = re.sub(' ', '', country)
    country = re.sub('\.', '', country)
    country = re.sub('(frenchpart)', '', country)
    country = re.sub('(dutchpart)', '', country)
    country = re.sub('è', 'e', country)
    country = re.sub('é', 'e', country)
    country = country.strip()
    url = 'https://www.rolex.com/rolex-dealers/' + country + '.html#mode=list'
    driver.get(url)
    time.sleep(5)
    #driver.execute_script("window.stop();")
    items = get_items(driver, config["items"])
    print(e+1, '-', len(items), 'stores found for', country)
    
    for item in items:
        store_name = get_value(item, config, 'store_name')
        info1 = get_value(item, config, 'info1')
        info2 = get_value(item, config, 'info2')
        info3 = get_value(item, config, 'info3')
        info4 = get_value(item, config, 'info4')
        info5 = get_value(item, config, 'info5')

        rows_list.append({
            'url': driver.current_url,
            'store_name': store_name,
            'info1': info1,
            'info2': info2,
            'info3': info3,
            'info4': info4,
            'info5': info5,
            'brand': 'Rolex'
        })
        #print(rows_list)
    
df = pd.DataFrame(rows_list)
df.drop_duplicates(subset=['brand', 'info1', 'info2', 'info3', 'info4', 'info5', 'store_name'], inplace=True)
print('total stores:', len(df))

1 - 1 stores found for aruba
2 - 0 stores found for afghanistan
3 - 0 stores found for angola
4 - 0 stores found for anguilla
5 - 0 stores found for ålandislands
6 - 0 stores found for albania
7 - 2 stores found for andorra
8 - 24 stores found for unitedarabemirates
9 - 10 stores found for argentina
10 - 0 stores found for armenia
11 - 0 stores found for americansamoa
12 - 0 stores found for antarctica
13 - 0 stores found for frenchsouthernterritories
14 - 1 stores found for antiguaandbarbuda
15 - 21 stores found for australia
16 - 15 stores found for austria
17 - 2 stores found for azerbaijan
18 - 0 stores found for burundi
19 - 13 stores found for belgium
20 - 0 stores found for benin
21 - 0 stores found for bonaire,sinteustatiusandsaba
22 - 0 stores found for burkinafaso
23 - 0 stores found for bangladesh
24 - 1 stores found for bulgaria
25 - 3 stores found for bahrain
26 - 4 stores found for bahamas
27 - 0 stores found for bosniaandherzegovina
28 - 1 stores found for saintbarthelem

233 - 0 stores found for unitedstatesminoroutlyingislands
234 - 3 stores found for uruguay
235 - 336 stores found for unitedstates
236 - 0 stores found for uzbekistan
237 - 0 stores found for holysee(vaticancitystate)
238 - 0 stores found for saintvincentandthegrenadines
239 - 8 stores found for venezuela
240 - 1 stores found for britishvirginislands
241 - 1 stores found for usvirginislands
242 - 2 stores found for vietnam
243 - 1 stores found for vanuatu
244 - 0 stores found for wallisandfutuna
245 - 0 stores found for samoa
246 - 3 stores found for yemen
247 - 10 stores found for southafrica
248 - 0 stores found for zambia
249 - 0 stores found for zimbabwe
total stores: 1705


In [424]:
#correct manually some wrongly scraped info (not necessary)
items = get_items(driver, config["items"])
print(len(items), 'stores found')

for item in items:
    store_name = get_value(item, config, 'store_name')
    info1 = get_value(item, config, 'info1')
    info2 = get_value(item, config, 'info2')
    info3 = get_value(item, config, 'info3')
    info4 = get_value(item, config, 'info4')
    info5 = get_value(item, config, 'info5')

    rows_list.append({
        'url': driver.current_url,
        'store_name': store_name,
        'info1': info1,
        'info2': info2,
        'info3': info3,
        'info4': info4,
        'info5': info5,
        'brand': 'Rolex'
    })
    
df = pd.DataFrame(rows_list)
df.drop_duplicates(subset=['brand', 'info1', 'info2', 'info3', 'info4', 'info5', 'store_name'], inplace=True)
print('total stores:', len(df))

0 stores found
total stores: 1707


In [13]:
df['is_partner'] = df['store_name'].apply(lambda x : extract_partner(x, 'Rolex'))

In [14]:
df

Unnamed: 0,url,store_name,info1,info2,info3,info4,info5,brand,is_partner
0,https://www.rolex.com/rolex-dealers/aruba.html...,‭Gandelman‬,"Renaissance Mall, 82 L.G. Smith Boulevard",Oranjestad 00000,Aruba,,,Rolex,1
1,https://www.rolex.com/rolex-dealers/andorra.ht...,‭JOIERIA BERNA‬,Avenida Meritxell 51-55,AD500 Andorra la Vella,Andorra,Andorra,,Rolex,1
2,https://www.rolex.com/rolex-dealers/andorra.ht...,‭JOIERIA GENEVE‬,Avenida Meritxell 124,AD500 Andorra la Vella,Andorra,Andorra,,Rolex,1
3,https://www.rolex.com/rolex-dealers/unitedarab...,‭Mohammed Rasool Khoory & Sons Galleria Mall‬,The Galleria Al Maryah Island,P.O Box 126 Abu Dhabi,United Arab Emirates,,,Rolex,1
4,https://www.rolex.com/rolex-dealers/unitedarab...,‭Mohammed Rasool Khoory & Sons‬,"Abu Dhabi International Airport, Terminal 3",P.O. Box: 126 Abu Dhabi,United Arab Emirates,,,Rolex,1
...,...,...,...,...,...,...,...,...,...
2377,https://www.rolex.com/rolex-dealers/guadeloupe...,‭ZEGG & CERLATI‬,1 Place du Casino,98000 MONACO,,,,Rolex,1
2383,https://www.rolex.com/rolex-dealers/guadeloupe...,‭Zimson‬,"128-A, Race Course",Coimbatore Tamil Nadu,641018,India,,Rolex,1
2384,https://www.rolex.com/rolex-dealers/guadeloupe...,‭Zong Chong Watch Co. - Bo Ai Branch‬,"154 Bo’ai Rd.,",Taipei City Zhongzheng District,10043,Taiwan Region,,Rolex,1
2385,https://www.rolex.com/rolex-dealers/guadeloupe...,‭Zong Chong Watch Co. - Zhong Xiao Branch‬,"508 Sec. 4, Zhongxiao E. Rd.,",Taipei City Xinyi District,11071,Taiwan Region,,Rolex,1


In [15]:
len(df)

1705

In [16]:
brand = 'ROLEX'
mmyyyy = '092021'
fpath = '../raw/' + mmyyyy + '/'+ brand + '_'+ mmyyyy + '.xlsx'
print('saving file in folder:', fpath)
writer = pd.ExcelWriter(fpath, engine='xlsxwriter',options={'strings_to_urls': False})
df.to_excel(writer, index=False)
writer.close()

saving file in folder: ../raw/092021/ROLEX_092021.xlsx


# TAG HEUER

In [31]:
config = config_all['TAGHEUER']

In [32]:
driver.get(config['start_url'][0])

In [33]:
rows_list = []

In [34]:
country_codes = get_items(driver, config["country_code"])
print(len(country_codes))

for country in country_codes:
    
    print(country)
    url = "https://www.tagheuer.com/" + country
    driver.get(url)
    time.sleep(3)
    
    items = get_items(driver, config["items"])
    for item in items:
        store_name = get_value(item, config, 'store_name')
        store_address1 = get_value(item, config, 'store_address1')
        store_address2 = get_value(item, config, 'store_address2')
        store_address3 = get_value(item, config, 'store_address3')
        rows_list.append({
                'store_name': store_name,
                'store_address1': store_address1,
                'store_address2': store_address2,
                'store_address3': store_address3,
                'brand': "Tag Heuer"
            })

df = pd.DataFrame(rows_list)
df.drop_duplicates(inplace=True)
print('total stores:', len(df))
df['is_partner'] = df['store_name'].apply(lambda x : extract_partner(x, 'Tag Heuer'))

40
/fr/en/stores/ar
/fr/en/stores/au
/fr/en/stores/at
/fr/en/stores/be
/fr/en/stores/br
/fr/en/stores/ca
/fr/en/stores/cn
/fr/en/stores/co
/fr/en/stores/hr
/fr/en/stores/cz
/fr/en/stores/dk
/fr/en/stores/fi
/fr/en/stores/fr
/fr/en/stores/de
/fr/en/stores/gr
/fr/en/stores/hk
/fr/en/stores/in
/fr/en/stores/id
/fr/en/stores/il
/fr/en/stores/it
/fr/en/stores/jp
/fr/en/stores/my
/fr/en/stores/mx
/fr/en/stores/nl
/fr/en/stores/no
/fr/en/stores/pl
/fr/en/stores/pt
/fr/en/stores/ru
/fr/en/stores/sa
/fr/en/stores/sg
/fr/en/stores/za
/fr/en/stores/kr
/fr/en/stores/es
/fr/en/stores/se
/fr/en/stores/ch
/fr/en/stores/tw
/fr/en/stores/tr
/fr/en/stores/ae
/fr/en/stores/gb
/fr/en/stores/us
total stores: 2719


In [35]:
len(df)

2719

In [36]:
brand = 'TAGHEUER'
mmyyyy = '092021'
fpath = '../raw/' + mmyyyy + '/'+ brand + '_'+ mmyyyy + '.xlsx'
print('saving file in folder:', fpath)
writer = pd.ExcelWriter(fpath, engine='xlsxwriter',options={'strings_to_urls': False})
df.to_excel(writer, index=False)
writer.close()

saving file in folder: ../raw/092021/TAGHEUER_092021.xlsx


# ZENITH

In [7]:
config = config_all['ZENITH']

In [8]:
driver.get(config['start_url'][0])

In [9]:
rows_list = []

In [10]:
country_codes = get_items(driver, config["country_code"])
print("Number of countries=",len(country_codes))

for idx, country_code in enumerate(country_codes):
    print(country_code)
    url = "https://www.zenith-watches.com/en_us/store-locator?filter=official,retailer&store=&country=" + country_code + "&address=&latlng="
    #print(url)
    driver.get(url)
    time.sleep(3)
    
    items = get_items(driver, config["items"])
    
    for item in items:
        store_name = get_value(item,config,'store_name')
        store_address1 = get_value(item,config,'store_address1') 
        store_address2 = get_value(item,config,'store_address2')
        store_address3 = get_value(item,config,'store_address3')
        rows_list.append({
                'store_name': store_name,
                'store_address1': store_address1,
                'store_address2': store_address2,
                'store_address3': store_address3,
                'brand': "Zenith"
            })
            
    
df = pd.DataFrame(rows_list)
df.drop_duplicates(inplace=True)
print('total stores:', len(df))    
df['is_partner'] = df['store_name'].apply(lambda x : extract_partner(x, 'Zenith'))

Number of countries= 95
AD
AO
AR
AM
AW
AU
AT
AZ
BS
BH
BD
BY
BE
BM
VG
BG
CA
KY
CN
CO
CR
HR
CY
CZ
DK
DO
EG
EE
FI
FR
GE
DE
GR
GU
GT
HK
HU
IN
ID
IE
IL
IT
JM
JP
KZ
KE
KW
KG
LV
LB
LT
LU
MO
MY
MV
MT
MU
MX
MC
MA
NP
NL
NG
MP
NO
OM
PY
PE
PH
PL
PT
QA
RO
RU
RS
SG
SK
SI
ZA
KR
ES
KN
MF
SE
CH
TW
TH
TR
TM
UA
AE
GB
US
UZ
VE
total stores: 667


In [11]:
len(df)

667

In [12]:
brand = 'ZENITH'
mmyyyy = '092021'
fpath = '../raw/' + mmyyyy + '/'+ brand + '_'+ mmyyyy + '.xlsx'
print('saving file in folder:', fpath)
writer = pd.ExcelWriter(fpath, engine='xlsxwriter',options={'strings_to_urls': False})
df.to_excel(writer, index=False)
writer.close()

saving file in folder: ../raw/092021/ZENITH_092021.xlsx


# OMEGA

In [288]:
config = config_all['OMEGA']

In [292]:
driver.get(config['start_url'][0])

In [293]:
rows_list = []

In [294]:
country_codes = get_items(driver, config["country_code"])
print("Number of countries=",len(country_codes))

for country_code in country_codes:
    driver.get(country_code)
    time.sleep(3)

    city_codes = get_items(driver,config["city_code"])
    for city_code in city_codes:
        print(city_code)
        driver.get(city_code)
        time.sleep(3)   
        #Need to scroll down to load more elements
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(3)
        
        items = get_items(driver, config["items"])
        print("Items=",len(items),"\n")

        for item in items:
            rows_list.append({
                    'store_name': get_value(item,config,'store_name'),
                    'store_address1': get_value(item,config,'store_address1'),
                    'store_address2': get_value(item,config,'store_address2'),
                    'store_address3': get_value(item,config,'store_address3'),
                    'store_address4': get_value(item,config,'store_address4'),
                    'store_address5': get_value(item,config,'store_address5'),
                    'store_address6': get_value(item,config,'store_address6'),
                    'store_address7': get_value(item,config,'store_address7'),
                    'brand': "Omega"
                })
            
df = pd.DataFrame(rows_list)
df.drop_duplicates(inplace=True)
print('total stores:', len(df))
df['is_partner'] = df['store_name'].apply(lambda x : extract_partner(x, 'Omega'))            

Number of countries= 129
https://www.omegawatches.com/store/country/algeria/city/dz-alger
Items= 4 

https://www.omegawatches.com/store/country/algeria/city/oran
Items= 2 

https://www.omegawatches.com/store/country/andorra/city/andorra-la-vella
Items= 2 

https://www.omegawatches.com/store/country/angola/city/luanda
Items= 1 

https://www.omegawatches.com/store/country/antigua-and-barbuda/city/st-john's
Items= 1 

https://www.omegawatches.com/store/country/argentina/city/bahia-blanca
Items= 1 

https://www.omegawatches.com/store/country/argentina/city/buenos-aires
Items= 7 

https://www.omegawatches.com/store/country/argentina/city/cordoba
Items= 1 

https://www.omegawatches.com/store/country/argentina/city/rosario
Items= 1 

https://www.omegawatches.com/store/country/aruba/city/noord
Items= 1 

https://www.omegawatches.com/store/country/aruba/city/oranjestad
Items= 2 

https://www.omegawatches.com/store/country/aruba/city/palm-beach
Items= 1 

https://www.omegawatches.com/store/count

Items= 1 

https://www.omegawatches.com/store/country/czech-republic/city/brno
Items= 1 

https://www.omegawatches.com/store/country/czech-republic/city/karlovy-vary
Items= 1 

https://www.omegawatches.com/store/country/czech-republic/city/ostrava-1
Items= 1 

https://www.omegawatches.com/store/country/czech-republic/city/praha
Items= 2 

https://www.omegawatches.com/store/country/denmark/city/aalborg
Items= 1 

https://www.omegawatches.com/store/country/denmark/city/arhus-c
Items= 1 

https://www.omegawatches.com/store/country/denmark/city/hellerup
Items= 1 

https://www.omegawatches.com/store/country/denmark/city/kobenhavn
Items= 1 

https://www.omegawatches.com/store/country/denmark/city/kobenhavn-k
Items= 3 

https://www.omegawatches.com/store/country/denmark/city/odense
Items= 1 

https://www.omegawatches.com/store/country/dominican-republic/city/punta-cana
Items= 1 

https://www.omegawatches.com/store/country/ecuador/city/guayaquil
Items= 1 

https://www.omegawatches.com/store/co

Items= 2 

https://www.omegawatches.com/store/country/germany/city/neuss
Items= 1 

https://www.omegawatches.com/store/country/germany/city/neustadt
Items= 1 

https://www.omegawatches.com/store/country/germany/city/nurnberg
Items= 1 

https://www.omegawatches.com/store/country/germany/city/oberhausen
Items= 2 

https://www.omegawatches.com/store/country/germany/city/oberstaufen
Items= 1 

https://www.omegawatches.com/store/country/germany/city/oberstdorf
Items= 1 

https://www.omegawatches.com/store/country/germany/city/oldenburg
Items= 2 

https://www.omegawatches.com/store/country/germany/city/osnabruck
Items= 1 

https://www.omegawatches.com/store/country/germany/city/posthausen
Items= 1 

https://www.omegawatches.com/store/country/germany/city/recklinghausen
Items= 1 

https://www.omegawatches.com/store/country/germany/city/reutlingen
Items= 1 

https://www.omegawatches.com/store/country/germany/city/rottach-egern
Items= 1 

https://www.omegawatches.com/store/country/germany/city/

Items= 1 

https://www.omegawatches.com/store/country/italy/city/mantova
Items= 1 

https://www.omegawatches.com/store/country/italy/city/messina
Items= 1 

https://www.omegawatches.com/store/country/italy/city/milano
Items= 2 

https://www.omegawatches.com/store/country/italy/city/milano-marittima
Items= 1 

https://www.omegawatches.com/store/country/italy/city/modena
Items= 1 

https://www.omegawatches.com/store/country/italy/city/napoli
Items= 1 

https://www.omegawatches.com/store/country/italy/city/nettuno
Items= 1 

https://www.omegawatches.com/store/country/italy/city/padova
Items= 1 

https://www.omegawatches.com/store/country/italy/city/palermo
Items= 1 

https://www.omegawatches.com/store/country/italy/city/parma
Items= 1 

https://www.omegawatches.com/store/country/italy/city/pavia
Items= 1 

https://www.omegawatches.com/store/country/italy/city/perugia
Items= 1 

https://www.omegawatches.com/store/country/italy/city/pescara
Items= 1 

https://www.omegawatches.com/store/coun

Items= 2 

https://www.omegawatches.com/store/country/malaysia/city/kota-kinabalu
Items= 2 

https://www.omegawatches.com/store/country/malaysia/city/kuala-lumpur
Items= 5 

https://www.omegawatches.com/store/country/malaysia/city/pahang
Items= 1 

https://www.omegawatches.com/store/country/malaysia/city/penang
Items= 1 

https://www.omegawatches.com/store/country/malaysia/city/selangor
Items= 2 

https://www.omegawatches.com/store/country/malta/city/la-valletta
Items= 1 

https://www.omegawatches.com/store/country/malta/city/luqa
Items= 1 

https://www.omegawatches.com/store/country/malta/city/st-julians
Items= 1 

https://www.omegawatches.com/store/country/mauritius/city/le-morne
Items= 1 

https://www.omegawatches.com/store/country/mauritius/city/port-louis
Items= 1 

https://www.omegawatches.com/store/country/mauritius/city/trou-d'eau-douce
Items= 1 

https://www.omegawatches.com/store/country/mexico/city/boca-del-rio
Items= 1 

https://www.omegawatches.com/store/country/mexico/cit

Items= 2 

https://www.omegawatches.com/store/country/republic-of-korea/city/jeju
Items= 2 

https://www.omegawatches.com/store/country/republic-of-korea/city/seoul
Items= 17 

https://www.omegawatches.com/store/country/republic-of-korea/city/sungnam-si
Items= 1 

https://www.omegawatches.com/store/country/republic-of-korea/city/suwon
Items= 1 

https://www.omegawatches.com/store/country/republic-of-korea/city/ulsan
Items= 1 

https://www.omegawatches.com/store/country/republic-of-korea/city/yongin-si
Items= 1 

https://www.omegawatches.com/store/country/reunion/city/saint-denis
Items= 1 

https://www.omegawatches.com/store/country/romania/city/bucharest
Items= 3 

https://www.omegawatches.com/store/country/russia/city/barnaul
Items= 1 

https://www.omegawatches.com/store/country/russia/city/blagoveschensk
Items= 1 

https://www.omegawatches.com/store/country/russia/city/chelyabinsk
Items= 1 

https://www.omegawatches.com/store/country/russia/city/irkutsk
Items= 1 

https://www.omegawa

Items= 1 

https://www.omegawatches.com/store/country/sweden/city/stockholm
Items= 4 

https://www.omegawatches.com/store/country/sweden/city/sundbyberg
Items= 1 

https://www.omegawatches.com/store/country/sweden/city/uppsala
Items= 1 

https://www.omegawatches.com/store/country/switzerland/city/arosa
Items= 1 

https://www.omegawatches.com/store/country/switzerland/city/ascona
Items= 1 

https://www.omegawatches.com/store/country/switzerland/city/baden
Items= 1 

https://www.omegawatches.com/store/country/switzerland/city/basel
Items= 2 

https://www.omegawatches.com/store/country/switzerland/city/bern
Items= 1 

https://www.omegawatches.com/store/country/switzerland/city/biel-bienne
Items= 2 

https://www.omegawatches.com/store/country/switzerland/city/brugg-ag
Items= 1 

https://www.omegawatches.com/store/country/switzerland/city/burgenstock
Items= 1 

https://www.omegawatches.com/store/country/switzerland/city/chur
Items= 2 

https://www.omegawatches.com/store/country/switzerland/

Items= 1 

https://www.omegawatches.com/store/country/united-kingdom/city/derby
Items= 1 

https://www.omegawatches.com/store/country/united-kingdom/city/dudley
Items= 2 

https://www.omegawatches.com/store/country/united-kingdom/city/dundee
Items= 2 

https://www.omegawatches.com/store/country/united-kingdom/city/edinburgh
Items= 3 

https://www.omegawatches.com/store/country/united-kingdom/city/essex
Items= 1 

https://www.omegawatches.com/store/country/united-kingdom/city/exeter
Items= 1 

https://www.omegawatches.com/store/country/united-kingdom/city/gateshead
Items= 1 

https://www.omegawatches.com/store/country/united-kingdom/city/gatwick-airport
Items= 2 

https://www.omegawatches.com/store/country/united-kingdom/city/glasgow
Items= 3 

https://www.omegawatches.com/store/country/united-kingdom/city/greenhithe
Items= 1 

https://www.omegawatches.com/store/country/united-kingdom/city/guernsey
Items= 1 

https://www.omegawatches.com/store/country/united-kingdom/city/guildford
Items

In [295]:
len(df)

1416

In [296]:
df

Unnamed: 0,store_name,store_address1,store_address2,store_address3,store_address4,store_address5,store_address6,store_address7,brand,is_partner
0,GOTTFRIED Bab Ezzouar - MS DIFFUSION,Centre Commercial Bab Ezzouar,DZ - Alger,Algeria,,,,,Omega,1
1,GOTTFRIED Dely Brahim - MS DIFFUSION,"Bois des Cares ndeg2, Dely Brahim",DZ - Alger,Algeria,,,,,Omega,1
2,GOTTFRIED Golf - MS DIFFUSION,"2, Rue Shakespeare - El Mouradia",DZ - Alger,Algeria,,,,,Omega,1
3,GOTTFRIED Sidi Yahia - MS DIFFUSION,"Chemin Sidi Yahia ndeg6, Sidi Yahia",DZ - Alger,Algeria,,,,,Omega,1
4,GOTTFRIED Es Senia - MS DIFFUSION,"Centre Commercial El Kerma, 52 showroom B",Oran,Algeria,,,,,Omega,1
...,...,...,...,...,...,...,...,...,...,...
1413,Swiss Watches Union Agency,Sana'a Trade Center,Algiers St,Sana'a,Yemen,,,,Omega,1
1414,Swiss Watches Union Agency,Sana'a Trade Center,Algiers St.,Sana'a,Yemen,,,,Omega,1
1415,Swiss Watches Union Agency,Arafat Commercial Market,26 September St,Ta'izz,Yemen,,,,Omega,1
1416,Swiss Watches Union Agency,Arafat Commercial Market,26 September St.,Taiz,Yemen,,,,Omega,1


In [297]:
brand = 'OMEGA'
mmyyyy = '092021'
fpath = '../raw/' + mmyyyy + '/'+ brand + '_'+ mmyyyy + '.xlsx'
print('saving file in folder:', fpath)
writer = pd.ExcelWriter(fpath, engine='xlsxwriter',options={'strings_to_urls': False})
df.to_excel(writer, index=False)
writer.close()

saving file in folder: ../raw/092021/OMEGA_092021.xlsx


# Hublot

In [35]:
config = config_all['HUBLOT']

In [36]:
driver.get(config['start_url'][0])

In [37]:
rows_list = []

In [38]:
items_boutique = get_items(driver, config["items_boutique"])
print("Number of items_boutique:",len(items_boutique))

for item in items_boutique:
    rows_list.append({
            'store_name': get_value(item,config,'store_name'),
            'store_address': get_value(item,config,'store_address'),
            'brand': "Hublot",
            'is_partner': '0'
        })

items_retailer = get_items(driver, config["items_retailer"])
print("Number of items_retailer:",len(items_retailer))

for item in items_retailer:
    rows_list.append({
            'store_name': get_value(item,config,'store_name'),
            'store_address': get_value(item,config,'store_address'),
            'brand': "Hublot",
            'is_partner': '1'
        })       
    
df = pd.DataFrame(rows_list)
df.drop_duplicates(inplace=True)
print('total stores:', len(df))

Number of items_boutique: 135
Number of items_retailer: 628
total stores: 751


In [39]:
len(df)

751

In [40]:
brand = 'HUBLOT'
mmyyyy = '092021'
fpath = '../raw/' + mmyyyy + '/'+ brand + '_'+ mmyyyy + '.xlsx'
print('saving file in folder:', fpath)
writer = pd.ExcelWriter(fpath, engine='xlsxwriter',options={'strings_to_urls': False})
df.to_excel(writer, index=False)
writer.close()

saving file in folder: ../raw/092021/HUBLOT_092021.xlsx


# TUDOR

In [46]:
config = config_all['TUDOR']

In [47]:
driver.get(config['start_url'][0])

In [48]:
rows_list = []

- Dezoom on map to get as much stores points as possible
- Move across the map
- Don't forget Hawaii and Northern Mariana Islands

In [124]:
items = get_items(driver, config["items"])
print('boutiques:', len(items))

for item in items:
    store_name1 = get_value(item, config, 'store_name1')
    store_name2 = get_value(item, config, 'store_name2')

    store_address1 = get_value(item, config, 'store_address1')
    store_address2 = get_value(item, config, 'store_address2')
    store_address3 = get_value(item, config, 'store_address3')
    store_address4 = get_value(item, config, 'store_address4')
    store_address5 = get_value(item, config, 'store_address5')
    store_address6 = get_value(item, config, 'store_address6')
    store_address7 = get_value(item, config, 'store_address7')
    
    store_partner = get_value(item, config, 'store_partner')
        
    rows_list.append({
        'store_name1': store_name1,
        'store_name2': store_name2,
        'store_address1': store_address1,
        'store_address2': store_address2,
        'store_address3': store_address3,
        'store_address4': store_address4,
        'store_address5': store_address5,
        'store_address6': store_address6,
        'store_address7': store_address7,
        'store_partner': store_partner,
        'brand': 'Tudor'
    })
    
df = pd.DataFrame(rows_list)
df.drop_duplicates(inplace=True)
print('total stores:', len(df))

boutiques: 2
total stores: 1730


In [125]:
df['is_partner'] = df['store_partner'].apply(lambda x : extract_partner(x, 'Tudor'))

In [126]:
df

Unnamed: 0,store_name1,store_name2,store_address1,store_address2,store_address3,store_address4,store_address5,store_address6,store_address7,store_partner,brand,is_partner
0,‭Alain Joaillier‬,,35 Rue du Temple,17000 La Rochelle,Charente Maritime,France,,,,Official TUDOR Watch retailer,Tudor,1
1,‭Alain Joaillier‬,,27 Rue Gambetta,Royan Charente Maritime,17200,France,,,,Official TUDOR Watch retailer,Tudor,1
2,‭Anshindo Horloger La Paix Paris‬,,8 Rue de la Paix,75002 Paris,Paris,France,,,,Official TUDOR Watch retailer,Tudor,1
3,‭Antoine de Macedo Horloger‬,,201 Boulevard Saint Germain,Paris Paris,75006,France,,,,Official TUDOR Watch retailer,Tudor,1
4,‭Arije‬,,50 Rue Pierre Charron,75008 Paris,Paris,France,,,,Official TUDOR Watch retailer,Tudor,1
...,...,...,...,...,...,...,...,...,...,...,...,...
7433,‭The Royal Shop‬,,"The Royal Shop, 32 Broad Street",Bridgetown 11000,Saint Michael,Barbados,,,,Official TUDOR Watch retailer,Tudor,1
7495,‭Casa Banchero Jockey Plaza‬,,"Av. La Paz 1010, Miraflores",Lima,Lima 150122,Peru,,,,Official TUDOR Watch retailer,Tudor,1
7496,‭Casa Banchero Miraflores‬,,Centro Comercial Jockey Plaza - Av. Javier Pra...,Lima Lima,15023,Peru,,,,Official TUDOR Watch retailer,Tudor,1
7504,‭RELÓGIOS ROLEX LTDA‬,,"Avenida Paulista, 2006",14° andar,01310-926 Sao Paulo,Brazil,,,,Tudor affiliate,Tudor,1


In [127]:
brand = 'TUDOR'
mmyyyy = '092021'
fpath = '../raw/' + mmyyyy + '/'+ brand + '_'+ mmyyyy + '.xlsx'
print('saving file in folder:', fpath)
writer = pd.ExcelWriter(fpath, engine='xlsxwriter',options={'strings_to_urls': False})
df.to_excel(writer, index=False)
writer.close()

saving file in folder: ../raw/092021/TUDOR_092021.xlsx


# ULYSSE NARDIN

In [128]:
config = config_all['ULYSSE NARDIN']

In [129]:
driver.get(config['start_url'][0])

In [132]:
rows_list = []

- Filter Retailers
- Dezoom max on the map

In [133]:
items = get_items(driver, config["items"])
print('boutiques:', len(items))

for item in items:
    
    store_name = get_value(item, config, 'store_name')
    store_address1 = get_value(item, config, 'store_address1')
    store_address2 = get_value(item, config, 'store_address2')
    store_address3 = get_value(item, config, 'store_address3')
    country = get_value(item, config, 'country')
        
    rows_list.append({
        'store_name': store_name,
        'store_address1': store_address1,
        'store_address2': store_address2,
        'store_address3': store_address3,
        'country': country,
        'brand': 'Ulysse Nardin',        
        'is_partner': '1'
    })
    
df = pd.DataFrame(rows_list)
df.drop_duplicates(inplace=True)
print('total stores:', len(df))

boutiques: 396
total stores: 395


In [134]:
df.head()

Unnamed: 0,store_name,store_address1,store_address2,store_address3,country,brand,is_partner
0,Tiempo Global S.A.,"Plaza Colonia, Escazù",,SAN JOSE,Costa Rica,Ulysse Nardin,1
1,UHRSACHEN (MAEGLI – TICK DIFFERENT AG)),Kramgasse 19,3011.0,Bern,Switzerland,Ulysse Nardin,1
2,LES AMBASSADEURS SA,Rue du Rhône 62,1204.0,Genève,Switzerland,Ulysse Nardin,1
3,Les Ambassadeurs SA,Via Nassa 5,6900.0,Lugano,Switzerland,Ulysse Nardin,1
4,LES AMBASSADEURS AG,Bahnhofstrasse 64,8001.0,Zuerich,Switzerland,Ulysse Nardin,1


In [135]:
brand = 'ULYSSE_NARDIN'
mmyyyy = '092021'
fpath = '../raw/' + mmyyyy + '/'+ brand + '_'+ mmyyyy + '.xlsx'
print('saving file in folder:', fpath)
writer = pd.ExcelWriter(fpath, engine='xlsxwriter',options={'strings_to_urls': False})
df.to_excel(writer, index=False)
writer.close()

saving file in folder: ../raw/092021/ULYSSE_NARDIN_092021.xlsx


# GIRARD PERREGAUX

In [136]:
config = config_all['GIRARD PERREGAUX']

In [137]:
driver.get(config['start_url'][0])

In [138]:
rows_list = []

- Filter Retailers
- Dezoom max on the map and move across to collect stores

In [245]:
items = get_items(driver, config["items"])
print('boutiques:', len(items))

for item in items:
    
    store_name = get_value(item, config, 'store_name')
    store_address1 = get_value(item, config, 'store_address1')
    store_address2 = get_value(item, config, 'store_address2')
    store_address3 = get_value(item, config, 'store_address3')
        
    rows_list.append({
        'store_name': store_name,
        'store_address1': store_address1,
        'store_address2': store_address2,
        'store_address3': store_address3,
        'brand': 'Girard Perregaux',        
        'is_partner': '1'
    })
    
df = pd.DataFrame(rows_list)
df.drop_duplicates(inplace=True)
print('total stores:', len(df))

boutiques: 0
total stores: 290


In [246]:
df.head()

Unnamed: 0,store_name,store_address1,store_address2,store_address3,brand,is_partner
0,JOYERIA JAEL,Compostela 8,15004 A Coruña,Spain,Girard Perregaux,1
1,JOYERÍA ABRINES,Tetuan 1,41001 Sevilla,Spain,Girard Perregaux,1
2,Massimo Bianco,Calle del Teatro 38,03001 Alicante,Spain,Girard Perregaux,1
3,Miguel Munoz Joyeros,Calle Reyes Catolicos 29,18001 Granada,Spain,Girard Perregaux,1
4,Rabat - Barcelona,Passeig de Gracia 94,08008 Barcelona,Spain,Girard Perregaux,1


In [247]:
brand = 'GIRARD_PERREGAUX'
mmyyyy = '092021'
fpath = '../raw/' + mmyyyy + '/'+ brand + '_'+ mmyyyy + '.xlsx'
print('saving file in folder:', fpath)
writer = pd.ExcelWriter(fpath, engine='xlsxwriter',options={'strings_to_urls': False})
df.to_excel(writer, index=False)
writer.close()

saving file in folder: ../raw/092021/GIRARD_PERREGAUX_092021.xlsx


# LONGINES

In [37]:
config = config_all['LONGINES']

In [38]:
driver.get(config['start_url'][0])

In [39]:
rows_list = []

In [40]:
country_codes = get_items(driver, config["country_code"])
print("Number of countries=",len(country_codes))

for idx, country_code in enumerate(country_codes):
    print(country_code)
    url_country = "https://www.longines.com/retailers/" + country_code.lower()
    
    driver.get(url_country)
    time.sleep(3)

    country_region_codes = get_items(driver,config["country_region_code"])
    for country_region_code in country_region_codes:
        #The first item is "Choose a Region". We pass it
        if country_region_code == "0":
            continue
        print(country_region_code)
        url_country_region = "https://www.longines.com/retailers/" + country_region_code.lower()
        
        driver.get(url_country_region)
        time.sleep(3)   
        
        country_region_city_codes = get_items(driver,config["country_region_city_code"])
        for country_region_city_code in country_region_city_codes:
            #The first item is "Choose a City". We pass it
            if country_region_city_code == "0":
                continue            
            print(country_region_city_code)
            url_country_region_city = "https://www.longines.com/retailers/" + country_region_city_code.lower()

            driver.get(url_country_region_city)
            time.sleep(3)           
        
            items_boutique = get_items(driver, config["items_boutique"])
            print("Number of items_boutique:",len(items_boutique))

            for item in items_boutique:
                rows_list.append({
                        'store_name1': get_value(item,config,'store_name1'),
                        'store_name2': get_value(item,config,'store_name2'),
                        'store_address1': get_value(item,config,'store_address1'),
                        'store_address2': get_value(item,config,'store_address2'),
                        'brand': "Longines",
                        'is_partner': '0'
                    })

            items_retailer = get_items(driver, config["items_retailer"])
            print("Number of items_retailer:",len(items_retailer))

            for item in items_retailer:
                rows_list.append({
                        'store_name1': get_value(item,config,'store_name1'),
                        'store_name2': get_value(item,config,'store_name2'),
                        'store_address1': get_value(item,config,'store_address1'),
                        'store_address2': get_value(item,config,'store_address2'),
                        'brand': "Longines",
                        'is_partner': '1'
                    })                
                
            items_distribu = get_items(driver, config["items_distribu"])
            print("Number of items_distribu:",len(items_distribu))

            for item in items_distribu:
                rows_list.append({
                        'store_name1': get_value(item,config,'store_name1'),
                        'store_name2': get_value(item,config,'store_name2'),
                        'store_address1': get_value(item,config,'store_address1'),
                        'store_address2': get_value(item,config,'store_address2'),
                        'brand': "Longines",
                        'is_partner': '1'
                    })                    
                
                
df = pd.DataFrame(rows_list)
df.drop_duplicates(inplace=True)
print('total stores:', len(df))        

Number of countries= 140
af
af/kabul
af/kabul/kabul
Number of items_boutique: 0
Number of items_retailer: 0
Number of items_distribu: 0
al
al/tirana
al/tirana/tirana
Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 0
dz
dz/algiers
dz/algiers/alger
Number of items_boutique: 0
Number of items_retailer: 4
Number of items_distribu: 1
dz/oran
dz/oran/oran
Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 1
ad
ad/andorralavella
ad/andorralavella/andorralavella
Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 0
ao
ao/provincedebenguela
ao/provincedebenguela/benguela
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 0
ao/provincedehuambo
ao/provincedehuambo/huambo
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 0
ao/provincedehuila
ao/provincedehuila/lubango
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_dis

Number of items_distribu: 0
at/styriaaut/graz
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 0
at/styriaaut/hartberg
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 0
at/styriaaut/leoben
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 0
at/styriaaut/lieboch
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 0
at/tyrol
at/tyrol/innsbruck
Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 0
at/tyrol/ischgl
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 0
at/tyrol/jenbach
Number of items_boutique: 0
Number of items_retailer: 0
Number of items_distribu: 0
at/tyrol/kitzbuhel
Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 0
at/tyrol/riedimoberinntal
Number of items_boutique: 0
Number of items_retailer: 0
Number of items_distribu: 0
at/tyrol/seefeld
Number of it

Number of items_distribu: 1
bz
bz/belize
bz/belize/corozal
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
bo
bo/santacruz
bo/santacruz/santacruz
Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 1
ba
ba/banjaluka
ba/banjaluka/banjaluka
Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 1
ba/kantonsarajevo
ba/kantonsarajevo/sarajevo
Number of items_boutique: 0
Number of items_retailer: 3
Number of items_distribu: 1
ba/kantonsarajevo/tuzla
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
br
br/riodejaneiro
br/riodejaneiro/riodejaneiro
Number of items_boutique: 0
Number of items_retailer: 0
Number of items_distribu: 0
br/saopaulo
br/saopaulo/saopaulo
Number of items_boutique: 0
Number of items_retailer: 0
Number of items_distribu: 0
vg
vg/tortola
vg/tortola/tortola
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 0
bg
bg/bu

Number of items_distribu: 1
cl/ohigginsregion
cl/ohigginsregion/rancagua
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
cl/santiagometropolitanregion
cl/santiagometropolitanregion/santiago
Number of items_boutique: 1
Number of items_retailer: 18
Number of items_distribu: 1
cl/tarapacaregion
cl/tarapacaregion/iquique
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
cl/valparaiso
cl/valparaiso/valparaiso
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
cl/valparaiso/vinadelmar
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
cn
cn/anhui
cn/anhui/anqing
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 7
cn/anhui/bengbu
Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 7
cn/anhui/chuzhou
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 7
cn/anhui/fuyang


Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 7
cn/hunan
cn/hunan/changde
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 7
cn/hunan/changsha
Number of items_boutique: 2
Number of items_retailer: 6
Number of items_distribu: 7
cn/hunan/chenzhou
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 7
cn/hunan/hengyang
Number of items_boutique: 0
Number of items_retailer: 3
Number of items_distribu: 7
cn/hunan/shaoyang
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 7
cn/hunan/xiangtan
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 7
cn/hunan/yueyang
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 7
cn/hunan/zhuzhou
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 7
cn/innermongolia
cn/innermongolia/baotou
Number of items_boutique: 0
Number of items_retailer:

Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 7
cn/sichuan/nanchong
Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 7
cn/sichuan/suining
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 7
cn/tianjin
cn/tianjin/tianjin
Number of items_boutique: 4
Number of items_retailer: 4
Number of items_distribu: 7
cn/xinjiang
cn/xinjiang/karamay
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 7
cn/xinjiang/kuerle
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 7
cn/xinjiang/wulumuqi
Number of items_boutique: 0
Number of items_retailer: 4
Number of items_distribu: 7
cn/xinjiang/yili
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 7
cn/xizangtibet
cn/xizangtibet/lasa
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 7
cn/yunnan
cn/yunnan/kunming
Number of items_boutiqu

Number of items_distribu: 1
dk/hillerodmunicipality
dk/hillerodmunicipality/hillerod
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
dk/holstebromunicipality
dk/holstebromunicipality/holstebro
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
dk/horsensmunicipality
dk/horsensmunicipality/horsens
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
dk/horsholmmunicipality
dk/horsholmmunicipality/horsholm
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
dk/kobenhavn
dk/kobenhavn/kobenhavn
Number of items_boutique: 0
Number of items_retailer: 5
Number of items_distribu: 1
dk/kobenhavn/kobenhavnk
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
dk/kobenhavn/kobenhavns
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
dk/lyngbytaarbaekmunicipality
dk/lyngbytaarbaekmunicipality/lyngby
Numbe

Number of items_retailer: 1
Number of items_distribu: 1
fr/hautrhin
fr/hautrhin/colmar
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
fr/hautrhin/mulhouse
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
fr/hautsdeseine
fr/hautsdeseine/levalloisperret
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
fr/herault
fr/herault/montpellier
Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 1
fr/illeetvilaine
fr/illeetvilaine/rennes
Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 1
fr/indreetloire
fr/indreetloire/tours
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
fr/isere
fr/isere/grenoble
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
fr/loire
fr/loire/saintetienne
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
f

Number of items_boutique: 0
Number of items_retailer: 0
Number of items_distribu: 1
de/badenwurttemberg/pforzheim
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
de/badenwurttemberg/ravensburg
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
de/badenwurttemberg/reutlingen
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
de/badenwurttemberg/schramberg
Number of items_boutique: 0
Number of items_retailer: 0
Number of items_distribu: 1
de/badenwurttemberg/schwaikheim
Number of items_boutique: 0
Number of items_retailer: 0
Number of items_distribu: 1
de/badenwurttemberg/stuttgart
Number of items_boutique: 0
Number of items_retailer: 5
Number of items_distribu: 1
de/badenwurttemberg/sulzbach
Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 1
de/badenwurttemberg/titisee
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
de

Number of items_distribu: 1
de/niedersachsen/nordhorn
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
de/niedersachsen/oldenburg
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
de/niedersachsen/osnabruck
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
de/niedersachsen/schneverdingenotlunzen
Number of items_boutique: 0
Number of items_retailer: 0
Number of items_distribu: 1
de/niedersachsen/vechta
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
de/niedersachsen/wolfsburg
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
de/nordrheinwestfalen
de/nordrheinwestfalen/aachen
Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 1
de/nordrheinwestfalen/alsdorf
Number of items_boutique: 0
Number of items_retailer: 0
Number of items_distribu: 1
de/nordrheinwestfalen/arnsberg
Number of items_b

Number of items_retailer: 10
Number of items_distribu: 1
gr/attica/chalandri
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
gr/attica/glyfada
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
gr/attica/kallithea
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
gr/attica/marousi
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
gr/attica/neaionia
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
gr/attica/piraeus
Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 1
gr/centralgreece
gr/centralgreece/chalkida
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
gr/crete
gr/crete/agiosnicolaos
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
gr/crete/chersonissos
Number of items_boutique: 0
Number of items_retailer: 2
Numbe

Number of items_retailer: 1
Number of items_distribu: 1
ir/mazandaran
ir/mazandaran/sari
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
ir/tabriz
ir/tabriz/tabriz
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
ir/tehran
ir/tehran/tajrish
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
ir/tehran/tehran
Number of items_boutique: 0
Number of items_retailer: 19
Number of items_distribu: 1
iq
iq/albasra
iq/albasra/albasra
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 2
iq/almosul
iq/almosul/almosul
Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 2
iq/baghdd
iq/baghdd/baghdad
Number of items_boutique: 0
Number of items_retailer: 12
Number of items_distribu: 2
iq/kurdistanregion
iq/kurdistanregion/erbil
Number of items_boutique: 0
Number of items_retailer: 3
Number of items_distribu: 2
iq/kurdistanregion/sulaimaniya

Number of items_retailer: 2
Number of items_distribu: 1
it/emiliaromagna/pavullonelfrignano
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
it/emiliaromagna/piacenza
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
it/emiliaromagna/ravenna
Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 1
it/emiliaromagna/reggioemilia
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
it/emiliaromagna/riccione
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
it/emiliaromagna/rimini
Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 1
it/emiliaromagna/rolo
Number of items_boutique: 0
Number of items_retailer: 0
Number of items_distribu: 1
it/emiliaromagna/sassuolo
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
it/emiliaromagna/traversetolo
Number of items_boutique: 0

Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
it/marche/fano
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
it/marche/pesaro
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
it/marche/portosangiorgio
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
it/marche/sanbenedettodeltronto
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
it/marche/senigallia
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
it/marche/villasantantonio
Number of items_boutique: 0
Number of items_retailer: 0
Number of items_distribu: 1
it/molise
it/molise/campobasso
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
it/molise/termoli
Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 1
it/navarra
it/navarra/borgomanero
Number of items_boutiqu

Number of items_distribu: 1
it/tuscany/siena
Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 1
it/tuscany/terentola
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
it/tuscany/viareggio
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
it/tuscany/vicopisanoloccaprona
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
it/umbria
it/umbria/foligno
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
it/umbria/perugia
Number of items_boutique: 0
Number of items_retailer: 3
Number of items_distribu: 1
it/umbria/spoleto
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
it/umbria/terni
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
it/valledaosta
it/valledaosta/aosta
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
it/veneto

Number of items_distribu: 1
kz/kyzylorda
kz/kyzylorda/kyzylorda
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
kz/mangystau
kz/mangystau/aktau
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
kz/southkazakhstan
kz/southkazakhstan/shimkent
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
ke
ke/nairobi
ke/nairobi/nairobi
Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 0
kw
kw/ahmadi
kw/ahmadi/eqaila
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
kw/ahmadi/fahaheel
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
kw/alasimah
kw/alasimah/kuwait
Number of items_boutique: 0
Number of items_retailer: 3
Number of items_distribu: 1
kw/alasimah/kuwaitcity
Number of items_boutique: 0
Number of items_retailer: 0
Number of items_distribu: 1
kw/farwaniyah
kw/farwaniyah/alrai
Number of items

Number of items_boutique: 0
Number of items_retailer: 5
Number of items_distribu: 1
mx/nuevoleon/sanpedrogarzagarcia
Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 1
mx/oaxaca
mx/oaxaca/oaxaca
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
mx/puebla
mx/puebla/puebla
Number of items_boutique: 0
Number of items_retailer: 5
Number of items_distribu: 1
mx/queretaro
mx/queretaro/queretaro
Number of items_boutique: 0
Number of items_retailer: 5
Number of items_distribu: 1
mx/quintanaroo
mx/quintanaroo/cancun
Number of items_boutique: 0
Number of items_retailer: 5
Number of items_distribu: 1
mx/quintanaroo/playadelcarmen
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
mx/sanluispotosi
mx/sanluispotosi/sanluispotosi
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
mx/sinaloa
mx/sinaloa/matzalan
Number of items_boutique: 0
Number of items_retailer: 1
Num

nl/zuidholland/alphenadrijn
Number of items_boutique: 0
Number of items_retailer: 0
Number of items_distribu: 1
nl/zuidholland/denhaag
Number of items_boutique: 0
Number of items_retailer: 3
Number of items_distribu: 1
nl/zuidholland/leiden
Number of items_boutique: 0
Number of items_retailer: 0
Number of items_distribu: 1
nl/zuidholland/leidschendam
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
nl/zuidholland/middelharnis
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
nl/zuidholland/oegstgeest
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
nl/zuidholland/rotterdam
Number of items_boutique: 0
Number of items_retailer: 3
Number of items_distribu: 1
nl/zuidholland/sgravenzande
Number of items_boutique: 0
Number of items_retailer: 0
Number of items_distribu: 1
nz
nz/auckland
nz/auckland/auckland
Number of items_boutique: 0
Number of items_retailer: 7
Number of items_distribu: 0


Number of items_retailer: 4
Number of items_distribu: 1
pl/pomeranianvoivodeship/gdynia
Number of items_boutique: 0
Number of items_retailer: 3
Number of items_distribu: 1
pl/silesianvoivodeship
pl/silesianvoivodeship/bielskobiaa
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
pl/silesianvoivodeship/chorzow
Number of items_boutique: 0
Number of items_retailer: 0
Number of items_distribu: 1
pl/silesianvoivodeship/czstochowa
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
pl/silesianvoivodeship/katowice
Number of items_boutique: 0
Number of items_retailer: 4
Number of items_distribu: 1
pl/warmianmasurianvoivodeship
pl/warmianmasurianvoivodeship/olsztyn
Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 1
pl/westpomeranianvoivodeship
pl/westpomeranianvoivodeship/szczecin
Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 1
pt
pt/aveiro
pt/aveiro/aveiro
Numb

Number of items_boutique: 0
Number of items_retailer: 3
Number of items_distribu: 1
ru/krasnoyarskkrai
ru/krasnoyarskkrai/krasnoyarsk
Number of items_boutique: 0
Number of items_retailer: 3
Number of items_distribu: 1
ru/lipetskoblast
ru/lipetskoblast/lipetsk
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
ru/moscow
ru/moscow/moscow
Number of items_boutique: 3
Number of items_retailer: 27
Number of items_distribu: 1
ru/moscowoblast
ru/moscowoblast/kolomna
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
ru/murmanskoblast
ru/murmanskoblast/murmansk
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
ru/nizhnynovgorodoblast
ru/nizhnynovgorodoblast/nizhnynovgorod
Number of items_boutique: 0
Number of items_retailer: 3
Number of items_distribu: 1
ru/northossetiaalania
ru/northossetiaalania/vladikavkaz
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
ru/n

Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 0
si/ljubljana
si/ljubljana/ljubljana
Number of items_boutique: 1
Number of items_retailer: 4
Number of items_distribu: 0
si/piran
si/piran/portoroz
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 0
si/radovljica
si/radovljica/bled
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 0
si/styriaslo
si/styriaslo/celje
Number of items_boutique: 0
Number of items_retailer: 0
Number of items_distribu: 0
za
za/easterncape
za/easterncape/eastlondon
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
za/easterncape/portelizaberth
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
za/gauteng
za/gauteng/bedfordview
Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 1
za/gauteng/bryanston
Number of items_boutique: 0
Number of items_retailer: 0
Number of items

Number of items_distribu: 0
es/cataluna/santcugatdelvalles
Number of items_boutique: 0
Number of items_retailer: 0
Number of items_distribu: 0
es/cataluna/sitges
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 0
es/cataluna/tarragona
Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 0
es/cataluna/tarrasa
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 0
es/ceuta
es/ceuta/ceuta
Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 0
es/comunidaddemadrid
es/comunidaddemadrid/alcaladehenares
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 0
es/comunidaddemadrid/alcobendas
Number of items_boutique: 0
Number of items_retailer: 0
Number of items_distribu: 0
es/comunidaddemadrid/alcorcon
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 0
es/comunidaddemadrid/getafe
Number of items_boutique: 0
Number o

Number of items_boutique: 0
Number of items_retailer: 0
Number of items_distribu: 1
ch/aargau/rheinfelden
Number of items_boutique: 0
Number of items_retailer: 0
Number of items_distribu: 1
ch/aargau/spreitenbach
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
ch/aargau/wohlen
Number of items_boutique: 0
Number of items_retailer: 0
Number of items_distribu: 1
ch/appenzellinnerrhoden
ch/appenzellinnerrhoden/appenzell
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
ch/baselland
ch/baselland/arlesheim
Number of items_boutique: 0
Number of items_retailer: 0
Number of items_distribu: 1
ch/baselland/reinachbl
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
ch/baselstadt
ch/baselstadt/basel
Number of items_boutique: 0
Number of items_retailer: 4
Number of items_distribu: 1
ch/bern
ch/bern/aarberg
Number of items_boutique: 0
Number of items_retailer: 0
Number of items_distribu: 1
ch/bern

Number of items_distribu: 1
ch/ticino/locarno
Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 1
ch/ticino/lugano
Number of items_boutique: 0
Number of items_retailer: 3
Number of items_distribu: 1
ch/uri
ch/uri/altdorf
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
ch/valais
ch/valais/cransmontana
Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 1
ch/valais/martigny
Number of items_boutique: 0
Number of items_retailer: 0
Number of items_distribu: 1
ch/valais/sierre
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
ch/valais/sion
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
ch/valais/visp
Number of items_boutique: 0
Number of items_retailer: 0
Number of items_distribu: 1
ch/valais/zermatt
Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 1
ch/vaud
ch/vaud/grandson
Number of items_b

tr/mula/fethiye
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
vi
vi/stthomas
vi/stthomas/charlotteamalie
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 0
vi/stthomas/stthomas
Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 0
vi/tortola
vi/tortola/tortola
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 0
ua
ua/dnipropetrovskoblast
ua/dnipropetrovskoblast/dnipro
Number of items_boutique: 0
Number of items_retailer: 3
Number of items_distribu: 0
ua/donetskoblast
ua/donetskoblast/kramatorsk
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 0
ua/donetskoblast/mariupul
Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 0
ua/kharkivoblast
ua/kharkivoblast/kharkov
Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 0
ua/kyivcity
ua/kyivcity/kiev
Number of items_bo

Number of items_distribu: 1
gb/hertfordshire
gb/hertfordshire/tring
Number of items_boutique: 0
Number of items_retailer: 0
Number of items_distribu: 1
gb/hertfordshire/watford
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
gb/highland
gb/highland/dundee
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
gb/highland/inverness
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
gb/highwycombe
gb/highwycombe/highwycombe
Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 1
gb/jersey
gb/jersey/jersey
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
gb/kent
gb/kent/bluewater
Number of items_boutique: 0
Number of items_retailer: 3
Number of items_distribu: 1
gb/kent/canterbury
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
gb/kent/maidstone
Number of items_boutique: 0
Number of items_retail

gb/westyorkshire/halifax
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
gb/westyorkshire/ilkley
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
gb/westyorkshire/leeds
Number of items_boutique: 0
Number of items_retailer: 6
Number of items_distribu: 1
gb/westyorkshire/wakefield
Number of items_boutique: 0
Number of items_retailer: 0
Number of items_distribu: 1
gb/windsorandmaidenhead
gb/windsorandmaidenhead/windsor
Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 1
gb/worcestershire
gb/worcestershire/shrewsbury
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
gb/worcestershire/worcester
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
gb/york
gb/york/york
Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 1
us
us/alabama
us/alabama/birminghamal
Number of items_boutique: 0
Number of i

Number of items_distribu: 1
us/hamilton
us/hamilton/hamilton
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
us/hawaii
us/hawaii/honolulu
Number of items_boutique: 1
Number of items_retailer: 5
Number of items_distribu: 1
us/illinois
us/illinois/chicago
Number of items_boutique: 0
Number of items_retailer: 5
Number of items_distribu: 1
us/illinois/glenview
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
us/illinois/northriverside
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
us/illinois/oakbrook
Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 1
us/illinois/oakbrookterrace
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
us/illinois/schaumburg
Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 1
us/illinois/skokie
Number of items_boutique: 0
Number of items_retailer: 1
Number of i

Number of items_distribu: 1
us/newyork/whiteplains
Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 1
us/northcarolina
us/northcarolina/charlotte
Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 1
us/northcarolina/durham
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
us/northcarolina/raleigh
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
us/northcarolina/winstonsalem
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
us/northdakota
us/northdakota/fargo
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
us/ohio
us/ohio/cincinnati
Number of items_boutique: 0
Number of items_retailer: 2
Number of items_distribu: 1
us/ohio/columbus
Number of items_boutique: 0
Number of items_retailer: 1
Number of items_distribu: 1
us/ohio/fairlawn
Number of items_boutique: 0
Number of items_retailer: 1
N

In [41]:
len(df)

4196

In [42]:
brand = 'LONGINES'
mmyyyy = '092021'
fpath = '../raw/' + mmyyyy + '/'+ brand + '_'+ mmyyyy + '.xlsx'
print('saving file in folder:', fpath)
writer = pd.ExcelWriter(fpath, engine='xlsxwriter',options={'strings_to_urls': False})
df.to_excel(writer, index=False)
writer.close()

saving file in folder: ../raw/092021/LONGINES_092021.xlsx


# RICHARD MILLE

In [248]:
config = config_all['RICHARD MILLE']

In [249]:
driver.get(config['start_url'][0])

In [250]:
rows_list = []

In [251]:
country_codes = driver.find_elements_by_xpath(config["country_code"])

print("Number of countries=",len(country_codes))

for idx in range(len(country_codes)):

    country = driver.find_element_by_xpath(config["country_code"]+ '['+ str(idx+1) +']')
    country.click()
    time.sleep(3)
      
    items = get_items(driver, config["items"])
    print("Number of items:",len(items))

    for item in items:
        rows_list.append({
                'store_name': get_value(item,config,'store_name'),
                'store_address1': get_value(item,config,'store_address1'),
                'store_address2': get_value(item,config,'store_address2'),
                'store_address3': get_value(item,config,'store_address3'),
                'country': get_value(item,config,'country'),
                'brand': "Richard Mille",
                'is_partner': '1'
            })

df = pd.DataFrame(rows_list)
df.drop_duplicates(inplace=True)
print('total stores:', len(df))        

Number of countries= 24
Number of items: 0
Number of items: 1
Number of items: 1
Number of items: 5
Number of items: 1
Number of items: 1
Number of items: 1
Number of items: 1
Number of items: 1
Number of items: 6
Number of items: 1
Number of items: 1
Number of items: 1
Number of items: 1
Number of items: 1
Number of items: 1
Number of items: 2
Number of items: 1
Number of items: 1
Number of items: 1
Number of items: 2
Number of items: 1
Number of items: 8
Number of items: 1
total stores: 41


In [252]:
len(df)

41

In [253]:
brand = 'RICHARD_MILLE'
mmyyyy = '092021'
fpath = '../raw/' + mmyyyy + '/'+ brand + '_'+ mmyyyy + '.xlsx'
print('saving file in folder:', fpath)
writer = pd.ExcelWriter(fpath, engine='xlsxwriter',options={'strings_to_urls': False})
df.to_excel(writer, index=False)
writer.close()

saving file in folder: ../raw/092021/RICHARD_MILLE_092021.xlsx


# CHOPARD

In [254]:
config = config_all['CHOPARD']

In [263]:
driver.get(config['start_url'][0])

In [264]:
rows_list = []

In [265]:
country_codes = get_items(driver, config["country_code"])
print("Number of countries=",len(country_codes))

for country_code in country_codes:
    print(country_code)
    url_country = "https://www.chopard.com/intl/storelocator" + "?country=" + country_code
    
    driver.get(url_country)
    time.sleep(3)
         
    items = get_items(driver, config["items"])
    print("Items=",len(items))
    for item in items:
        rows_list.append({
                'store_name': get_value(item,config,'store_name'),
                'store_address1': get_value(item,config,'store_address1'),
                'store_address2': get_value(item,config,'store_address2'),
                'brand': "Chopard"
            })
            
df = pd.DataFrame(rows_list)
df.drop_duplicates(inplace=True)
print('total stores:', len(df))
df['is_partner'] = df['store_name'].apply(lambda x : extract_partner(x, 'Chopard'))            

Number of countries= 126
AL
Items= 1
DZ
Items= 2
AD
Items= 2
AR
Items= 4
AM
Items= 1
AW
Items= 3
AU
Items= 12
AT
Items= 16
AZ
Items= 2
BS
Items= 0
BH
Items= 5
BB
Items= 1
BE
Items= 11
BA
Items= 2
BR
Items= 0
VG
Items= 1
BG
Items= 1
KH
Items= 1
CA
Items= 12
KY
Items= 1
TW
Items= 20
CO
Items= 7
CG
Items= 0
CR
Items= 1
HR
Items= 1
CW
Items= 3
CY
Items= 2
CZ
Items= 3
CI
Items= 1
DK
Items= 3
DO
Items= 0
EC
Items= 3
EG
Items= 3
EE
Items= 1
FI
Items= 4
FR
Items= 79
GF
Items= 1
GE
Items= 1
DE
Items= 89
GI
Items= 1
GR
Items= 8
GU
Items= 0
GT
Items= 3
HT
Items= 1
HN
Items= 0
HK
Items= 28
HU
Items= 1
IN
Items= 11
ID
Items= 4
IQ
Items= 1
IE
Items= 1
IL
Items= 2
IT
Items= 72
JM
Items= 2
JP
Items= 47
JO
Items= 2
KZ
Items= 2
KW
Items= 9
KG
Items= 0
LV
Items= 1
LB
Items= 4
LI
Items= 2
LT
Items= 1
LU
Items= 2
CN
Items= 50
MO
Items= 6
MY
Items= 2
MV
Items= 1
MT
Items= 2
MU
Items= 0
MX
Items= 16
MD
Items= 1
MC
Items= 1
MN
Items= 0
ME
Items= 1
MA
Items= 5
NP
Items= 1
NL
Items= 18
NC
Items= 0
NZ
Items= 3
N

In [266]:
len(df)

986

In [267]:
brand = 'CHOPARD'
mmyyyy = '092021'
fpath = '../raw/' + mmyyyy + '/'+ brand + '_'+ mmyyyy + '.xlsx'
print('saving file in folder:', fpath)
writer = pd.ExcelWriter(fpath, engine='xlsxwriter',options={'strings_to_urls': False})
df.to_excel(writer, index=False)
writer.close()

saving file in folder: ../raw/092021/CHOPARD_092021.xlsx


# BVLGARI

In [298]:
config = config_all['BVLGARI']

In [299]:
driver.get(config['start_url'][0])

In [300]:
rows_list = []

In [301]:
countries_url = get_items(driver, config["country"])
print("Number of countries=",len(countries_url))

for country_url in countries_url:
    print(country_url)
    driver.get(country_url)
    time.sleep(3)
    
    items = get_items(driver, config["items"])
    
    for item in items:
        store_name = get_value(item,config,'store_name')
        store_boutique = get_value(item,config,'store_boutique') 
        store_address = get_value(item,config,'store_address')
        rows_list.append({
                'store_name': store_name,
                'store_boutique': store_boutique,
                'store_address': store_address,
                'brand': "Bvlgari"
            })           
    
df = pd.DataFrame(rows_list)
df.drop_duplicates(inplace=True)
print('total stores:', len(df))    
df['is_partner'] = df['store_boutique'].apply(lambda x : extract_partner(x, 'Bvlgari'))

Number of countries= 98
https://www.bulgari.com/en-int/storelocator/angola
https://www.bulgari.com/en-int/storelocator/egypt
https://www.bulgari.com/en-int/storelocator/morocco
https://www.bulgari.com/en-int/storelocator/r%C3%A9union
https://www.bulgari.com/en-int/storelocator/south+africa
https://www.bulgari.com/en-int/storelocator/tunisia
https://www.bulgari.com/en-int/storelocator/azerbaijan
https://www.bulgari.com/en-int/storelocator/bahrain
https://www.bulgari.com/en-int/storelocator/cambodia
https://www.bulgari.com/en-int/storelocator/georgia
https://www.bulgari.com/en-int/storelocator/hong+kong+s.a.r.
https://www.bulgari.com/en-int/storelocator/india
https://www.bulgari.com/en-int/storelocator/indonesia
https://www.bulgari.com/en-int/storelocator/iraq
https://www.bulgari.com/en-int/storelocator/israel
https://www.bulgari.com/en-int/storelocator/japan
https://www.bulgari.com/en-int/storelocator/jordan
https://www.bulgari.com/en-int/storelocator/kazakhstan
https://www.bulgari.com/

In [302]:
len(df)

1006

In [303]:
brand = 'BVLGARI'
mmyyyy = '092021'
fpath = '../raw/' + mmyyyy + '/'+ brand + '_'+ mmyyyy + '.xlsx'
print('saving file in folder:', fpath)
writer = pd.ExcelWriter(fpath, engine='xlsxwriter',options={'strings_to_urls': False})
df.to_excel(writer, index=False)
writer.close()

saving file in folder: ../raw/092021/BVLGARI_092021.xlsx


# CLEAN AND SAVE

In [6]:
import pandas as pd
import numpy as np
import os
import re

In [7]:
path_raw_data = '../raw/092021'

df = pd.DataFrame()

for f in os.listdir(path_raw_data):
    print(f)
    #EXCLUDE ALREADY COMPUTED DF
    if 'INSERT' not in f:
        if '.xlsx' in f:
            fname = re.sub('_[0-9]{5,}\.xlsx', '', f)
            dfc = pd.read_excel(os.path.join(path_raw_data, f), encoding='utf8')
        elif '.csv' in f:
            fname = re.sub('_[0-9]{5,}\.csv', '', f)
            dfc = pd.read_csv(os.path.join(path_raw_data, f), sep=',', encoding='utf8')    
        fname = re.sub('_', ' ', fname)
        
        if fname == 'LOUIS VUITTON':
            #add extract retailers algorithm
            dfc['is_partner'] = 0
            dfc['brand'] = 'Louis Vuitton'
        if fname == 'PATEK PHILIPPE':
            dfc['store_address'] = dfc['country'] + ', ' + dfc['city'] + ', ' + dfc['address']
        if fname == 'AUDEMARS PIGUET':
            dfc['store_address'] = dfc['address_city']
        if fname == 'BREGUET':
            dfc['store_address'] = dfc['city'] + ', ' + dfc['address']
        if fname == 'CARTIER':
            dfc.replace(np.nan, '', inplace=True)
            dfc['store_address'] = dfc['country'] + ', ' + dfc['city'] + ', ' + dfc['address']
        if fname == 'CHANEL':
            dfc['store_address'] = dfc['city'] + ', ' + dfc['address']
        if fname == 'DIOR':
            dfc['store_address'] = dfc['city'] + ', ' + dfc['address']
        if fname == 'FERRARI':
            dfc['store_address'] = dfc['country'] + ', ' + dfc['city']
        if fname == 'HERMES':
            dfc['store_address'] = dfc['country'] + ', ' + dfc['address_city']
        if fname == 'LAMBORGHINI':
            dfc['store_address'] = dfc['address']
        if fname == 'ROLEX':
            dfc.replace(np.nan, '', inplace=True)
            #suppress invisible characters
            dfc['store_name'] = dfc['store_name'].apply(lambda x : re.sub('\u202c', '', x)) 
            dfc['store_name'] = dfc['store_name'].apply(lambda x : re.sub('\u202d', '', x))
            
            dfc['store_address'] = dfc['info5'] + ', ' + dfc['info4'] + ', ' + dfc['info3'] + ', ' + dfc['info2'] + ', ' + dfc['info1']
        if fname == 'VACHERON CONSTANTIN':
            dfc['store_address'] = dfc['country'] + ', ' + dfc['city'] + ', ' + dfc['address']

        #if fname == 'HUBLOT':
            #dfc['store_address'] = dfc['store_address'].apply(lambda x: re.sub(r'[\n\r\t]', '', x))
        if fname == 'OMEGA':
            #dfc.replace(np.nan, '', inplace=True)
            #dfc['store_address'] = dfc['store_address1'] + ', ' + dfc['store_address2'] + ', ' + dfc['store_address3'] + ', ' + dfc['store_address4'] + ', ' + dfc['store_address5'] + dfc['store_address6'] + ', ' + dfc['store_address7']
            dfc['store_address'] = dfc[['store_address1','store_address2','store_address3','store_address4','store_address5','store_address6','store_address7']].apply(lambda x: ', '.join(x.dropna()), axis=1)
        if fname == 'TAGHEUER':
            #dfc.replace(np.nan, '', inplace=True)
            #dfc['store_address'] = dfc['store_address1'] + ', ' + dfc['store_address2'] + ', ' + dfc['store_address3'] + ', ' + dfc['store_address4'] + ', ' + dfc['store_address5']
            dfc.replace(',', np.nan, inplace=True)
            dfc['store_address'] = dfc[['store_address1','store_address2','store_address3']].apply(lambda x: ', '.join(x.dropna()), axis=1)
        if fname == 'ZENITH':
            #dfc.replace(np.nan, '', inplace=True)
            #dfc['store_address'] = dfc['store_address1'] + ', ' + dfc['store_address2'] + ', ' + dfc['store_address3'] 
            dfc['store_address'] = dfc[['store_address1','store_address2','store_address3']].apply(lambda x: ', '.join(x.dropna()), axis=1)

        if fname == 'CHOPARD':
            dfc['store_address'] = dfc[['store_address1','store_address2']].apply(lambda x: ', '.join(x.dropna()), axis=1)
        if fname == 'GIRARD PERREGAUX':
            dfc['store_address'] = dfc[['store_address1','store_address2','store_address3']].apply(lambda x: ', '.join(x.dropna()), axis=1)            
        if fname == 'LONGINES':
            dfc['store_name'] = dfc[['store_name1','store_name2']].apply(lambda x: ', '.join(x.dropna()), axis=1)
            dfc['store_address'] = dfc[['store_address1','store_address2']].apply(lambda x: ', '.join(x.dropna()), axis=1)            
        if fname == 'RICHARD MILLE':
            dfc['store_address'] = dfc[['store_address1','store_address2','store_address3']].apply(lambda x: ', '.join(x.dropna()), axis=1)            
        if fname == 'TUDOR':
            dfc['store_name'] = dfc[['store_name1','store_name2']].apply(lambda x: ', '.join(x.dropna()), axis=1)            
            dfc['store_address'] = dfc[['store_address1','store_address2','store_address3','store_address4','store_address5','store_address6']].apply(lambda x: ', '.join(x.dropna()), axis=1)            
        if fname == 'ULYSSE NARDIN':
            dfc['store_address'] = dfc[['store_address1','store_address2','store_address3','country']].apply(lambda x: ', '.join(x.dropna()), axis=1)            

        dfc = dfc[['brand', 'store_name', 'store_address', 'is_partner']]
        df = pd.concat([df, dfc], ignore_index=True)
    
print('total length:', len(df))

AUDEMARS_PIGUET_092021.xlsx
BREGUET_092021.xlsx
BVLGARI_092021.xlsx
CARTIER_092021.xlsx
CHANEL_092021.xlsx
CHOPARD_092021.xlsx
DIOR_092021.xlsx
FERRARI_092021.xlsx
GIRARD_PERREGAUX_092021.xlsx
HERMES_092021.xlsx
HUBLOT_092021.xlsx
LAMBORGHINI_092021.xlsx
LONGINES_092021.xlsx
LOUIS_VUITTON_092021.csv
OMEGA_092021.xlsx
PATEK_PHILIPPE_092021.xlsx
RICHARD_MILLE_092021.xlsx
ROLEX_092021.xlsx
TAGHEUER_092021.xlsx
TUDOR_092021.xlsx
ULYSSE_NARDIN_092021.xlsx
VACHERON_CONSTANTIN_092021.xlsx
ZENITH_092021.xlsx
total length: 19877


In [8]:
df.head()

Unnamed: 0,brand,store_name,store_address,is_partner
0,Audemars Piguet,AP House Barcelona,"Passeig de Gracia 37, 08006 Barcelona",0
1,Audemars Piguet,AP House Madrid,"Calle de Serrano, 66, 28001 Madrid",0
2,Audemars Piguet,Audemars Piguet Boutique Monaco,"2 avenue des Spélugues, 98000 Monaco",0
3,Audemars Piguet,Audemars Piguet Boutique Geneva Fusterie,"Place de la Fusterie 12, 1204 Geneva",0
4,Audemars Piguet,Audemars Piguet Boutique Geneva Montres Prestige,"Quai du mont-blanc 19, Fairmont Grand Hôtel, 1...",0


In [9]:
df[df['brand']=='Lamborghini']

Unnamed: 0,brand,store_name,store_address,is_partner
5627,Lamborghini,Exclusive Cars Vertriebs Gmbh,ul. Polczynska 120 B 00-347 Warszawa PL 00-347...,0
5628,Lamborghini,Jemercar Tecnicos Del Automovil Services SL,Via de Las Dos Castillas 9 28224 Pozuelo De Al...,0
5629,Lamborghini,Siva SA,"Rua São Francisco 582, Alcabideche 2645-019 Al...",0
5630,Lamborghini,Saudi Arabian Marketing & Agencies Co. Ltd,Pr. Mohammed Bin Abdul Aziz Street P.O. Box :3...,0
5631,Lamborghini,Fouad Alghanim & Sons Automotive,"Al-Tilal Complex, Pepsi Road 80, Shuwaikh Indu...",0
...,...,...,...,...
5776,Lamborghini,RPM Co. Ltd,4-34-4 HARADA HIGASHI-KU FUKUOKA-SHI 812 0063 ...,0
5777,Lamborghini,Tajima Motor Corporation Co,"44-1, Shinden, Nanakita, Izumi-ku, Sendai-shi,...",0
5778,Lamborghini,Zagame Automotive Group,382 Swan St. Richmond 'Vic 3141 Richmond VIC A...,0
5779,Lamborghini,"Sqda Motors Co., Ltd","622(Samsung-dong) Yeongdong-daero, Gangnam-gu ...",0


In [10]:
NAME_SL = 'SL_ALL_092021_INSERT'
writer = pd.ExcelWriter(os.path.join(path_raw_data, NAME_SL + '.xlsx'), engine='xlsxwriter',options={'strings_to_urls': False})
df.to_excel(writer, index=False)
writer.close()

# INSERTION AND PARSING

- insert stores : insert_stores.py
- python3 -m store_locator.insert_stores data_to_push/SL_ALL_102020_INSERT.xlsx 2020-10-26 -p

- parse stores : parse_stores.py
- python3 -m store_locator.parse_stores "Q4 October 2020" --brand "Audemars Piguet" -p

# CHECK UPDATED STORES STATUS

When collecting data and checking changes between 2 months you have:
- New stores (Real new stores that pop on the website)
- Closed stores (Real closed stores that doesn't appear on the website)
- Closed stores detected because you have forgotten to search for a specific place
- New stores detected because someone changed the address but should be compensated with a closed store

- Sort by store_name and check store_name and store_address similarity

To find the link new-closed due to a change in the text of either the store name or store address
- Check new status : change status "new" to "-" and discard the corresponding closed store (by putting discarded to 1)

- Check closed status : check that you have not forgotten any place when scraping