# Web Scraping Practice - 博客來

### Web scraping with python using Beautiful Soup & requests

In [1]:
import csv
import requests
from bs4 import BeautifulSoup

### Enter into the books store website: https://www.books.com.tw/web/books_bmidm_0207/?loc=P_0005_2_007 and scrape the books' information

In [2]:
headers = {
    'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36',
}
resp = requests.get('https://www.books.com.tw/web/books_bmidm_0207/?loc=P_0005_2_007', headers=headers)
soup = BeautifulSoup(resp.text, 'html.parser')
items = soup.select('.item')

In [3]:
# Create a blank list and put crawling information in it. It's a temporary list.
row_list = []

for item in items:
    try:
        title = item.select('h4 a')[0].text
        author = item.select('.info a')[0].text
        publisher = item.select('.info a')[1].text

        data = {
            'title' : title,
            'author' : author,
            'publisher' : publisher
        }

        row_list.append(data)
    except IndexError:
        pass

### Save the sorting data as a CSV file.

In [4]:
headers = ['title', 'author', 'publisher']

with open('books.csv', 'w') as output_file:
    dict_writer = csv.DictWriter(output_file, headers)
    dict_writer.writeheader()
    dict_writer.writerows(row_list)

print(row_list)

[{'title': '歡迎來到志祺七七!不搞笑、談時事，資訊設計原來很可以：從50人的資訊設計公司到日更YouTuber的瘋狂技能樹', 'author': '張志祺,林欣婕', 'publisher': '天下文化'}, {'title': '【獨家親簽】歡迎來到志祺七七!不搞笑、談時事，資訊設計原來很可以：從50人的資訊設計公司到日更YouTuber的瘋狂技能樹', 'author': '張志祺,林欣婕', 'publisher': '天下文化'}, {'title': '15招市場搶攤法', 'author': '周紹賢', 'publisher': '優品文化事業有限公司'}, {'title': '網紅影響力：自媒體如何塑造我們的數位時代', 'author': '莎拉．麥柯克戴爾', 'publisher': '遠流'}, {'title': '超人氣FB+IG+LINE社群經營與行銷力：用225招快速聚粉，飆升流量變業績!', 'author': '文淵閣工作室', 'publisher': '碁峰'}, {'title': '訂閱經濟的獲利實例：包包、西裝、手錶、眼鏡、汽車到房子……超過20個案例，讓顧客從買一次變成一直買。', 'author': '日經xTREND', 'publisher': '大是文化'}, {'title': '迎戰微型創業新零售，跨境電商全攻略：批貨技巧→品牌形塑→跨國交易，打造業績無上限的獲利心法', 'author': '陳若甯,黃偉宙', 'publisher': '麥浩斯'}, {'title': 'STP行銷策略之Python商業應用實戰|網路爬蟲x機器學習x數據分析', 'author': '楊超霆,羅凱揚,蘇宇暉,鍾皓軒', 'publisher': '碁峰'}, {'title': 'Telegram行動行銷|操作技巧x品牌貼圖x經營心法', 'author': '劉滄碩', 'publisher': '碁峰'}, {'title': '社群大數據：網路聲量、口碑及輿情分析2/e', 'author': '楊立偉', 'publisher': '前程文化'}, {'title': '社群營銷的魔法：社群媒體營銷聖經', 'author': '陳威樺', 'publisher': '集夢坊'}, {'tit