In [1]:
from bs4 import BeautifulSoup
import requests
import pymysql

class Stock:
    #建構式
    def __init__(self, *stock_numbers):
        self.stock_numbers = stock_numbers
    
    #爬取
    def scrape(self):
    
        result = list()  #最終結果
 
        for stock_number in self.stock_numbers:
            
            response = requests.get(
                "https://tw.stock.yahoo.com/q/q?s=" + stock_number)
            soup = BeautifulSoup(response.text.replace("加到投資組合", ""), "lxml")
                
            stock_date = soup.find(
                "font", {"class": "tt"}).getText().strip()[-9:]  #資料日期
                
            tables = soup.find_all("table")[2]  #取得網頁中第三個表格(索引從0開始計算)
            tds = tables.find_all("td")[0:11]  #取得表格中1到10格
            
            result.append((stock_date,) +
                tuple(td.getText().strip() for td in tds))
                
        return result
    
    # 存在MySQL
    def save(self, stocks):
        
        ## 連線資訊
        db_settings = {
            "host": "127.0.0.1",
            "port": 3306,
            "user": "root",
            "password": "******",
            "db": "stock",
            "charset": "utf8"
        }
        try:
            conn = pymysql.connect(**db_settings)
            
            ## 進行資料庫的操作，就需要有cursor物件
            with conn.cursor() as cursor:
                sql = """INSERT INTO market(
                                market_date,
                                stock_name,
                                market_time,
                                final_price,
                                buy_price,
                                sell_price,
                                ups_and_downs,
                                lot,
                                yesterday_price,
                                opening_price,
                                highest_price,
                                lowest_price)
                         VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"""

                for stock in stocks:
                    cursor.execute(sql, stock)
                conn.commit()
        except Exception as ex:
            print("Exception:", ex)    
stock = Stock('2451', '2454')  #建立Stock物件
print(stock.scrape())  #印出爬取結果
# stock.save(stock.scrape())  #將爬取的結果存入MySQL資料庫中


[('109/12/16', '2451創見', '14:30', '64.3', '64.2', '64.3', '△0.5', '1,358', '63.8', '63.9', '64.4', '63.6'), ('109/12/16', '2454聯發科', '14:30', '701', '701', '702', '0.00', '6,244', '701', '711', '714', '701')]


## BeautifulSoup開發網頁爬蟲的實用技巧

In [2]:
import requests
from bs4 import BeautifulSoup
response = requests.get(
    "https://travel.ettoday.net/category/%E6%A1%83%E5%9C%92/")
soup = BeautifulSoup(response.text, "html.parser")
print(soup.prettify())  #輸出排版後的HTML內容

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html itemscope="" itemtype="http://schema.org/ItemList" xmlns="http://www.w3.org/1999/xhtml" xmlns:fb="http://ogp.me/ns/fb#">
 <head>
  <meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
  <meta content="zh-TW" http-equiv="Content-Language"/>
  <link href="//cdn2.ettoday.net/style/travel/images/touch-icon.png" rel="apple-touch-icon" sizes="180x180"/>
  <link href="//cdn2.ettoday.net/style/travel/images/touch-icon.png" rel="shortcut icon" sizes="192x192"/>
  <title>
   桃園相關新聞懶人包, 照片, 影片, 報導, 資訊及訊息 -第1頁| ETtoday 旅遊雲 | ETtoday新聞雲
  </title>
  <meta content="桃園相關新聞懶人包, 照片, 影片, 報導, 資訊及訊息" name="description"/>
  <!-- facebook 專用-->
  <meta content="zh_TW" property="og:locale"/>
  <meta content="146858218737386" property="fb:app_id"/>
  <meta content="ETtoday 旅遊雲" property="og:site_name"/>
  <meta content="https://www.facebook.com/ETtodayTRAVEL" property

### 以HTML標籤及屬性搜尋節點

* find()

In [3]:
result = soup.find("h3")
print(result)

<h3 itemprop="headline">
<a href="https://travel.ettoday.net/article/1876978.htm" itemprop="url">桃園人注意！「70米城堡光雕秀」16日晚..</a>
</h3>


* find_all()

In [4]:
result = soup.find_all("h3", itemprop="headline", limit=3)
print(result)

[<h3 itemprop="headline">
<a href="https://travel.ettoday.net/article/1876978.htm" itemprop="url">桃園人注意！「70米城堡光雕秀」16日晚..</a>
</h3>, <h3 itemprop="headline">
<a href="https://travel.ettoday.net/article/1876155.htm" itemprop="url">到山上過「紅花耶誕節」！埃及風5米聖誕樹..</a>
</h3>, <h3 itemprop="headline">
<a href="https://travel.ettoday.net/article/1876035.htm" itemprop="url">全台獨創「企鵝號誌燈」在桃園！2種企鵝變..</a>
</h3>]


In [5]:
result = soup.find_all(["h3", "p"], limit=2)
print(result)

[<h3 itemprop="headline">
<a href="https://travel.ettoday.net/article/1876978.htm" itemprop="url">桃園人注意！「70米城堡光雕秀」16日晚..</a>
</h3>, <p class="summary" itemprop="description">桃園人免跑新北耶誕城，位於南崁的台茂購物中心明（16）晚將有「超大型光雕秀」試映，城堡外牆將打上長達70公尺的光雕展演，以雪人穿越時空過耶誕為主題，俏皮而炫麗，從16日起至27日每晚6至9點，每個整點播放一次，陪大家度過奇幻色彩的耶誕。</p>]


* select_one()

In [6]:
result = soup.find("h3", itemprop="headline")
print(result.select_one("a"))

<a href="https://travel.ettoday.net/article/1876978.htm" itemprop="url">桃園人注意！「70米城堡光雕秀」16日晚..</a>


* select()

In [7]:
result = soup.find("div", itemprop="itemListElement")
print(result.select("a"))

[<a class="pic" href="https://travel.ettoday.net/article/1876978.htm">
<img data-original="https://cdn2.ettoday.net/images/5335/c5335728.jpg" itemprop="image" onerror="this.src='//cdn2.ettoday.net/style/travel/images/fb_ettoday_travel_logo.jpg'" src="https://cdn2.ettoday.net/style/misc/loading_200x150.gif"/>
</a>, <a href="https://travel.ettoday.net/article/1876978.htm" itemprop="url">桃園人注意！「70米城堡光雕秀」16日晚..</a>]


### 以CSS屬性搜尋節點

In [8]:
titles = soup.find("p", class_="summary")
print(titles)

<p class="summary" itemprop="description">桃園人免跑新北耶誕城，位於南崁的台茂購物中心明（16）晚將有「超大型光雕秀」試映，城堡外牆將打上長達70公尺的光雕展演，以雪人穿越時空過耶誕為主題，俏皮而炫麗，從16日起至27日每晚6至9點，每個整點播放一次，陪大家度過奇幻色彩的耶誕。</p>


In [9]:
titles = soup.find_all("p", class_="summary", limit=3)
print(titles)

[<p class="summary" itemprop="description">桃園人免跑新北耶誕城，位於南崁的台茂購物中心明（16）晚將有「超大型光雕秀」試映，城堡外牆將打上長達70公尺的光雕展演，以雪人穿越時空過耶誕為主題，俏皮而炫麗，從16日起至27日每晚6至9點，每個整點播放一次，陪大家度過奇幻色彩的耶誕。</p>, <p class="summary" itemprop="description">家長們又有遛小孩新去處！全台首家「巧虎夢想樂園」將在12月17日試營運，即日起接受線上預約，12月31日前，凡購買兒童票即可享有一位陪同成人免費。樂園開幕後，將成為繼Xpark水族館、JETS嘉年華之後最受矚目焦點。                                                <em content="2020-12-14T14:19:00+08:00" itemprop="datePublished">(2020-12-14 14:19)</em>
</p>, <p class="summary" itemprop="description">想從郊山健行往前跨一步成為山系人嗎？那就從新北第一高峰塔曼山開始吧！塔曼山是入門的中級山岳，適合從健行舒適圈剛進入登山界的你。走在塔曼山的步道裡，就像進入了電影«阿凡達»的魔幻世界一般。先從入門中級山開始訓練以後嘉明湖、松蘿湖都不是你對手啦！                                                <em content="2020-12-08T11:19:00+08:00" itemprop="datePublished">(2020-12-08 11:19)</em>
</p>]


In [10]:
titles = soup.select(".summary", limit=3)
print(titles)

[<p class="summary" itemprop="description">桃園人免跑新北耶誕城，位於南崁的台茂購物中心明（16）晚將有「超大型光雕秀」試映，城堡外牆將打上長達70公尺的光雕展演，以雪人穿越時空過耶誕為主題，俏皮而炫麗，從16日起至27日每晚6至9點，每個整點播放一次，陪大家度過奇幻色彩的耶誕。</p>, <p class="summary" itemprop="description">家長們又有遛小孩新去處！全台首家「巧虎夢想樂園」將在12月17日試營運，即日起接受線上預約，12月31日前，凡購買兒童票即可享有一位陪同成人免費。樂園開幕後，將成為繼Xpark水族館、JETS嘉年華之後最受矚目焦點。                                                <em content="2020-12-14T14:19:00+08:00" itemprop="datePublished">(2020-12-14 14:19)</em>
</p>, <p class="summary" itemprop="description">想從郊山健行往前跨一步成為山系人嗎？那就從新北第一高峰塔曼山開始吧！塔曼山是入門的中級山岳，適合從健行舒適圈剛進入登山界的你。走在塔曼山的步道裡，就像進入了電影«阿凡達»的魔幻世界一般。先從入門中級山開始訓練以後嘉明湖、松蘿湖都不是你對手啦！                                                <em content="2020-12-08T11:19:00+08:00" itemprop="datePublished">(2020-12-08 11:19)</em>
</p>]


### 搜尋父節點

In [11]:
result = soup.find("a", itemprop="url")
parents = result.find_parents("h3")
print(parents)

[<h3 itemprop="headline">
<a href="https://travel.ettoday.net/article/1876978.htm" itemprop="url">桃園人注意！「70米城堡光雕秀」16日晚..</a>
</h3>]


### 搜尋前節點

In [12]:
result = soup.find("h3", itemprop="headline")
previous_node = result.find_previous_siblings("a")
print(previous_node)

[<a class="pic" href="https://travel.ettoday.net/article/1876978.htm">
<img data-original="https://cdn2.ettoday.net/images/5335/c5335728.jpg" itemprop="image" onerror="this.src='//cdn2.ettoday.net/style/travel/images/fb_ettoday_travel_logo.jpg'" src="https://cdn2.ettoday.net/style/misc/loading_200x150.gif"/>
</a>]


### 搜尋後節點

In [13]:
result = soup.find("h3", itemprop="headline")
next_node = result.find_next_siblings("p")
print(next_node)

[<p class="summary" itemprop="description">桃園人免跑新北耶誕城，位於南崁的台茂購物中心明（16）晚將有「超大型光雕秀」試映，城堡外牆將打上長達70公尺的光雕展演，以雪人穿越時空過耶誕為主題，俏皮而炫麗，從16日起至27日每晚6至9點，每個整點播放一次，陪大家度過奇幻色彩的耶誕。</p>]


### 取得屬性值

In [14]:
titles = soup.find_all("h3", itemprop="headline")
for title in titles:
    print(title.select_one("a"))

<a href="https://travel.ettoday.net/article/1876978.htm" itemprop="url">桃園人注意！「70米城堡光雕秀」16日晚..</a>
<a href="https://travel.ettoday.net/article/1876155.htm" itemprop="url">到山上過「紅花耶誕節」！埃及風5米聖誕樹..</a>
<a href="https://travel.ettoday.net/article/1876035.htm" itemprop="url">全台獨創「企鵝號誌燈」在桃園！2種企鵝變..</a>
<a href="https://travel.ettoday.net/article/1875985.htm" itemprop="url">桃園「巧虎夢想樂園」17日試營運！31日前買孩童送大人票</a>
<a href="https://travel.ettoday.net/article/1871574.htm" itemprop="url">台版阿凡達「魔幻森林」太仙！登新北最高峰　順遊秘境莊園賞雲瀑</a>
<a href="https://travel.ettoday.net/article/1871179.htm" itemprop="url">美翻！桃園「落羽松秘境」免費看　350株落羽松倒映水池超有意境</a>
<a href="https://travel.ettoday.net/article/1870849.htm" itemprop="url">小7最新卡娜赫拉聯名店！水族館、機場搬進店　兔兔化身空服員超萌</a>
<a href="https://travel.ettoday.net/article/1865745.htm" itemprop="url">免費餵Q萌小鹿！桃園浪漫水上哈比屋　落羽松、黃金花海任你逛</a>
<a href="https://travel.ettoday.net/article/1869997.htm" itemprop="url">免人擠人！楊梅私人庭園有紫爆仙草花　50元入園送仙草茶、小禮物</a>


In [15]:
for title in titles:
    print(title.select_one("a").get("href"))

https://travel.ettoday.net/article/1876978.htm
https://travel.ettoday.net/article/1876155.htm
https://travel.ettoday.net/article/1876035.htm
https://travel.ettoday.net/article/1875985.htm
https://travel.ettoday.net/article/1871574.htm
https://travel.ettoday.net/article/1871179.htm
https://travel.ettoday.net/article/1870849.htm
https://travel.ettoday.net/article/1865745.htm
https://travel.ettoday.net/article/1869997.htm


### 取得連結文字

In [16]:
titles = soup.find_all("h3", itemprop="headline")
for title in titles:
    print(title.select_one("a").getText())

桃園人注意！「70米城堡光雕秀」16日晚..
到山上過「紅花耶誕節」！埃及風5米聖誕樹..
全台獨創「企鵝號誌燈」在桃園！2種企鵝變..
桃園「巧虎夢想樂園」17日試營運！31日前買孩童送大人票
台版阿凡達「魔幻森林」太仙！登新北最高峰　順遊秘境莊園賞雲瀑
美翻！桃園「落羽松秘境」免費看　350株落羽松倒映水池超有意境
小7最新卡娜赫拉聯名店！水族館、機場搬進店　兔兔化身空服員超萌
免費餵Q萌小鹿！桃園浪漫水上哈比屋　落羽松、黃金花海任你逛
免人擠人！楊梅私人庭園有紫爆仙草花　50元入園送仙草茶、小禮物
