## 證交所爬蟲

In [1]:
import requests
import pandas as pd
import datetime as dt # 時間套件
from dateutil.relativedelta import relativedelta

In [2]:
# 輸入股票代號
stock_id = '2453'
# 當日時間
date = dt.date.today().strftime("%Y%m%d")
# 取得證交所網站資料，
stock_data = requests.get(f'https://www.twse.com.tw/rwd/zh/ \
            afterTrading/STOCK_DAY?date={date}&stockNo={stock_id}')
json_data = stock_data.json()
df = pd.DataFrame(data=json_data['data'],
                  columns=json_data['fields'])
df
#df.tail()

Unnamed: 0,日期,成交股數,成交金額,開盤價,最高價,最低價,收盤價,漲跌價差,成交筆數
0,113/09/02,321254,18068837,56.5,56.7,55.9,55.9,-0.3,270
1,113/09/03,455533,25411001,56.1,56.6,55.0,55.2,-0.7,402
2,113/09/04,759838,40073231,53.0,53.8,51.2,52.4,-2.8,690
3,113/09/05,444128,23418646,52.9,53.5,51.8,52.3,-0.1,375
4,113/09/06,310343,16275762,52.4,52.8,51.8,52.7,0.4,292
5,113/09/09,346807,18137213,51.6,53.3,51.5,53.3,0.6,317
6,113/09/10,795172,41082349,53.3,53.3,50.7,51.1,-2.2,737
7,113/09/11,149980,7748959,51.5,52.0,51.3,51.8,0.7,156
8,113/09/12,294880,15478702,52.9,52.9,52.2,52.4,0.6,237
9,113/09/13,459543,24493519,52.7,53.9,52.7,53.5,1.1,377


In [3]:
# 設定抓取幾個月資料
month_num=3
date_now = dt.datetime.now()

# 建立日期串列
date_list = [(date_now - relativedelta(months=i)).replace(day=1).\
             strftime('%Y%m%d') for i in range(month_num)]

date_list.reverse()  #反轉日期列表，確保資料按照時間順序排列，從最早的日期到最新的日期。這樣在後續的資料抓取和合併過程中，資料會按照時間順序排列
all_df = pd.DataFrame()

# 使用迴圈抓取連續月份資料
for date in date_list:
  url = f'https://www.twse.com.tw/rwd/zh/afterTrading/\
      BWIBBU?date={date}&stockNo={stock_id}'
  try:
    json_data = requests.get(url).json()
    df = pd.DataFrame(data=json_data['data'],
                  columns=json_data['fields'])
    all_df = pd.concat([all_df, df], ignore_index=True)
  except Exception as e:
    print(f"無法取得{date}的資料, 可能資料量不足.")

all_df.head()

Unnamed: 0,日期,殖利率(%),股利年度,本益比,股價淨值比,財報年/季
0,113年07月01日,3.83,112,22.47,3.21,113/1
1,113年07月02日,3.83,112,22.47,3.21,113/1
2,113年07月03日,3.82,112,22.51,3.22,113/1
3,113年07月04日,3.79,112,22.72,3.25,113/1
4,113年07月05日,3.52,112,24.44,3.5,113/1


### ⚠️ 如果過度頻繁爬取證交所的資料(約連續50次)會被偵測並封鎖IP，可以搭配time、sleep延遲回應避免被認為是機器人程式。

## 用 BeautifulSoup4 取得 Yahoo 股市資料

In [4]:
from datetime import datetime
from bs4 import BeautifulSoup
import time

In [6]:
def yahoo_stock(stock_id):
    url = f'https://tw.stock.yahoo.com/quote/{stock_id}.TW'
    # 使用 requests 取得網頁內容
    response = requests.get(url)
    html = response.content
    # 使用 Beautiful Soup 解析 HTML 內容
    soup = BeautifulSoup(html, 'html.parser')
    # 使用 find 與 find_all 定位元素
    time_element = soup.find('section',\
                {'id': 'qsp-overview-realtime-info'}).find('time')
    table_soups = soup.find('section',\
                {'id': 'qsp-overview-realtime-info'}).find('ul')\
                                   .find_all('li')
    fields = []
    datas = []
    for table_soup in table_soups:
        table_datas = table_soup.find_all('span')
        for num,table_data in enumerate(table_datas):
            if table_data.text =='':
                continue
            if num == 0:
                fields.append(table_data.text)
            else:
                datas.append(table_data.text)
    # 建立 DataFrame
    df = pd.DataFrame([datas], columns=fields)
    # 增加日期和股號欄位
    df.insert(0,'日期',time_element['datatime'])
    df.insert(1,'股號',stock_id)
    # 回傳 DataFrame
    return df

stock_id = '8374' #羅昇
yahoo_stock(stock_id)

Unnamed: 0,日期,股號,成交,開盤,最高,最低,均價,成交金額(億),昨收,漲跌幅,漲跌,總量,昨量,振幅
0,2024/09/23 14:30,8374,138.5,142.5,143.5,138.0,139.9,4.38,142.5,2.81%,4.0,3132,7087,3.86%
