## Import Module

In [87]:
from selenium import webdriver
from bs4 import BeautifulSoup
from tqdm import tqdm_notebook as tqdm
import pandas as pd
import threading
import queue
import time
import re

## Necessary Data

In [88]:
city_name_list = ["桃園市", 
                  "基隆市", 
                  "新竹市", 
                  "新竹縣", 
                  "宜蘭縣", 
                  "臺中市", 
                  "苗栗縣", 
                  "彰化縣", 
                  "南投縣", 
                  "雲林縣",
                  "臺南市",
                  "高雄市",
                  "嘉義市",
                  "嘉義縣",
                  "屏東縣",
                  "臺東縣",
                  "花蓮縣",
                  "澎湖縣",
                  "金門縣",
                  "連江縣"]

In [89]:
job_queue = queue.Queue()

for city in city_name_list:
    job_queue.put(city)

## Worker Class

In [90]:
class Worker(threading.Thread):
    
    def __init__(self, job_queue, base_url):
        
        super().__init__()
        
        self.job_queue = job_queue
        self.base_url = base_url
        self.city_name = None
        self.driver = None
        
        
    def init_store_space(self):
        
        self.final_dict = {}
        self.name = []
        self.phone = []
        self.address = []
        
    
    def run(self):
        
        self.init_webdriver()
        
        while self.job_queue.qsize() >= 1:
            
            self.init_store_space()
            
            self.city_name = self.job_queue.get()
            
            if self.process_page() is False:
                continue
            
            if self.scrape_data() is False:
                continue
            
            self.convert_df()
            
        self.destruct_web_driver()
    
    
    def init_webdriver(self):
        
        self.driver = webdriver.Chrome("C:\chromedriver_win32\chromedriver")
        
    
    def destruct_web_driver(self):
        
        self.driver.close()
        
        
    def process_page(self):
        
        self.driver.get("https://www.panasonic.com/tw/consumer/where-to-buy.html")
        time.sleep(3.5)
        
        try:
            self.driver.find_element_by_xpath('//*[@id="category1"]/option[1]').click()
            self.driver.find_element_by_xpath('//*[@id="a-search"]').send_keys(self.city_name)
            self.driver.find_element_by_xpath('//*[@id="searchsubmit"]').click()
            time.sleep(3.5)
            return True
        except:
            print(self.city_name + " " + "failed in process_page.")
            return False
        
    
    def scrape_data(self):
        
        html = driver.page_source
        soup = BeautifulSoup(html, "html.parser")
        soup = soup.find_all("div", class_="shop")
        
        if len(soup) == 0:
            print(self.city_name + " " + "failed in scrape_data.")
            return False
        
        print(self.city_name + " " + "Scraped !")
        for idx in tqdm(range(len(soup))):
            
            shop = soup[idx]
            
            try:
                na = shop.find("h3", class_="subheader3").text
            except:
                na = None
            self.name.append(na)
            
            try:
                add = shop.find_all("p", class_="bodycopy1")[1].text
            except:
                add = None
            self.address.append(add)
            
            try:
                ph = shop.find_all("p", class_="bodycopy1")[3].text
                ph = re.findall("\d+-\d+", ph)[0]
            except:
                ph = None
            self.phone.append(ph)
            
        self.final_dict["公司名稱"] = self.name
        self.final_dict["地址"] = self.address
        self.final_dict["電話"] = self.phone
        
        
    def convert_df(self):
        
        df = pd.DataFrame.from_dict(self.final_dict)
        df.to_excel(self.city_name+".xlsx")
        

## Main Driver

In [91]:
url = "https://www.panasonic.com/tw/consumer/where-to-buy.html"

worker1 = Worker(job_queue, url)
worker1.start()

worker2 = Worker(job_queue, url)
worker2.start()

worker3 = Worker(job_queue, url)
worker3.start()

worker4 = Worker(job_queue, url)
worker4.start()

worker5 = Worker(job_queue, url)
worker5.start()

新竹縣 Scraped !
基隆市 Scraped !
新竹市 Scraped !
宜蘭縣 Scraped !


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))







Exception in thread []:
Traceback (most recent call last):
  File "c:\users\johnny\appdata\local\programs\python\python37\lib\threading.py", line 926, in _bootstrap_inner
    self.run()
  File "<ipython-input-90-b4019475c2ff>", line 34, in run
    if self.scrape_data() is False:
  File "<ipython-input-90-b4019475c2ff>", line 87, in scrape_data
    self.name.append(na)
AttributeError: 'str' object has no attribute 'append'

Exception in thread []:
Traceback (most recent call last):
  File "c:\users\johnny\appdata\local\programs\python\python37\lib\threading.py", line 926, in _bootstrap_inner
    self.run()
  File "<ipython-input-90-b4019475c2ff>", line 34, in run
    if self.scrape_data() is False:
  File "<ipython-input-90-b4019475c2ff>", line 87, in scrape_data
    self.name.append(na)
AttributeError: 'str' object has no attribute 'append'

Exception in thread []:
Traceback (most recent call last):
  File "c:\users\johnny\appdata\local\programs\python\python37\lib\threading.py", line 

桃園市 Scraped !


HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))




Exception in thread []:
Traceback (most recent call last):
  File "c:\users\johnny\appdata\local\programs\python\python37\lib\threading.py", line 926, in _bootstrap_inner
    self.run()
  File "<ipython-input-90-b4019475c2ff>", line 34, in run
    if self.scrape_data() is False:
  File "<ipython-input-90-b4019475c2ff>", line 87, in scrape_data
    self.name.append(na)
AttributeError: 'str' object has no attribute 'append'



## Debug

In [9]:
driver = webdriver.Chrome("C:\chromedriver_win32\chromedriver")

In [58]:
driver.get("https://www.panasonic.com/tw/consumer/where-to-buy.html")

In [59]:
driver.find_element_by_xpath('//*[@id="category1"]/option[1]').click()

In [60]:
driver.find_element_by_xpath('//*[@id="a-search"]').send_keys("雲林縣")

In [61]:
driver.find_element_by_xpath('//*[@id="searchsubmit"]').click()

In [62]:
html = driver.page_source

In [63]:
soup = BeautifulSoup(html, "html.parser")

In [64]:
soup

<html class="js flexbox canvas canvastext webgl no-touch geolocation postmessage websqldatabase indexeddb hashchange history draganddrop websockets rgba hsla multiplebgs backgroundsize borderimage borderradius boxshadow textshadow opacity cssanimations csscolumns cssgradients cssreflections csstransforms csstransforms3d csstransitions fontface generatedcontent video audio localstorage sessionstorage webworkers applicationcache svg inlinesvg smil svgclippaths fullscreen" lang="zh" style="" xml:lang="zh" xmlns="http://www.w3.org/1999/xhtml"><!--  [if lt IE 7 ]> <html lang="en" class="no-js ie6"> <![endif]   --><!--  [if IE 7 ]> <html lang="en" class="no-js ie7"> <![endif]      --><!--  [if IE 8 ]> <html lang="en" class="no-js ie8"> <![endif]      --><!--  [if IE 9 ]> <html lang="en" class="no-js ie9"> <![endif]      --><!-- Build Version: 2.3.6.10_0 --><head class="at-element-marker" prefix="og: http://ogp.me/ns# fb: http://ogp.me/ns/fb# article: http://ogp.me/ns/article#"><script src="h

In [65]:
soup = soup.find_all("div", class_="shop")

In [67]:
len(soup)

15

In [68]:
soup[14].find("h3", class_="subheader3").text

'宏達電氣行'

In [69]:
soup[14].find_all("p", class_="bodycopy1")[1].text

'雲林縣西螺鎮河南里埔心路81號'

In [70]:
phone = soup[14].find_all("p", class_="bodycopy1")[3].text

In [71]:
re.findall("\d+-\d+", phone)

['05-5862959']