In [9]:
import requests
import datetime
import pandas as pd
import numpy as np
from io import StringIO
import time

In [10]:
def parseData(date):
    date_str = date.strftime('%Y%m%d')
    
    data = requests.get('https://www.twse.com.tw/exchangeReport/MI_INDEX?response=csv&date='+date_str+'&type=ALLBUT0999', timeout=10)
    if data.text == '':
        return pd.DataFrame()
    
    index = 0
    證券代號index = 0
    for i in data.text.split('\n')[:1000]:
        if '證券代號' in i:
            # print(i)
            # print(index)
            證券代號index = index
        index = index + 1

    if 證券代號index == 0:
        
        return pd.DataFrame()
    df = pd.read_csv(StringIO(data.text.replace('=','')), header=證券代號index-1)
    # print(df)
    df = df.set_index(df['證券代號'].astype(str).str.replace(' ','')).drop(['證券代號'],axis=1)
    df = df.dropna(axis=1,how='all').dropna(axis=0,how='all')
    df.index.name = 'stock_id'
    df['date'] = pd.to_datetime(date)
    df = df.reset_index().set_index(['stock_id','date'])
    df = df.apply(lambda s: s.astype(str).str.replace(',',''))
    
    return df

def crawl_price(date):
    
    
    price_data = parseData(date)

    if len(price_data) != 0:
        print('成功')
        return price_data
    else:
        print('沒資料（可能是本日沒開盤/資料來源未更新/ip被擋）')
        return pd.DataFrame()
    time.sleep(4)

#price = crawl_price(datetime.date(2021,8,2))
#price    

In [11]:
import datetime
import pandas as pd
import os
import csv

# 檢查並創建 CSV 檔案
csv_file = './price.csv'
if not os.path.exists(csv_file) or os.stat(csv_file).st_size == 0:
    with open(csv_file, 'w', newline='') as fwrite:
        writer = csv.writer(fwrite)
        writer.writerow(['stock_id', 'date', '證券名稱', '成交股數', '成交筆數', 
                         '成交金額', '開盤價', '最高價', '最低價', 
                         '收盤價', '漲跌(+/-)', '漲跌價差', 
                         '最後揭示買價', '最後揭示買量', 
                         '最後揭示賣價', '最後揭示賣量', 
                         '本益比'])

# 設定日期範圍
start = '20241101'
end = datetime.datetime.now().strftime('%Y%m%d')  # 獲取當前日期

datestart = datetime.datetime.strptime(start, '%Y%m%d')
dateend = datetime.datetime.strptime(end, '%Y%m%d')

# 讀取現有的價格數據
price_df = pd.read_csv(csv_file, index_col=['stock_id', 'date'], parse_dates=['date'])

# 日期迴圈
while datestart <= dateend:
    print(datestart)
    if datestart not in pd.to_datetime(price_df.index.get_level_values('date')):
        try:
            price_df_new = crawl_price(datestart)  # 假設 crawl_price 函數已定義
            price_df = pd.concat([price_df, price_df_new])
            price_df.to_csv(csv_file)
        except Exception as e:
            print(f"抓取 {datestart} 的價格資料時發生錯誤: {e}")
    else:
        print('已存過日期')
    
    datestart += datetime.timedelta(days=1)

2024-11-01 00:00:00
已存過日期
2024-11-02 00:00:00
沒資料（可能是本日沒開盤/資料來源未更新/ip被擋）
2024-11-03 00:00:00
沒資料（可能是本日沒開盤/資料來源未更新/ip被擋）
2024-11-04 00:00:00
已存過日期
2024-11-05 00:00:00
沒資料（可能是本日沒開盤/資料來源未更新/ip被擋）


In [12]:
price_df = pd.read_csv('./price.csv',index_col=['stock_id','date'],parse_dates=['date'])
price_df

Unnamed: 0_level_0,Unnamed: 1_level_0,證券名稱,成交股數,成交筆數,成交金額,開盤價,最高價,最低價,收盤價,漲跌(+/-),漲跌價差,最後揭示買價,最後揭示買量,最後揭示賣價,最後揭示賣量,本益比
stock_id,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
0015,2007-01-02,富邦,367000,57,3487410,9.35,9.58,9.35,9.58,+,0.10,9.53,100.0,9.57,5.0,0.00
0050,2007-01-02,元大台灣50,4907000,543,285899200,57.80,58.50,57.75,58.45,+,0.70,58.40,3.0,58.45,14.0,0.00
0051,2007-01-02,元大中型100,2536000,330,83528580,32.98,33.15,32.70,33.15,+,0.15,33.15,303.0,33.16,6.0,0.00
0052,2007-01-02,富邦科技,1441100,177,55797180,38.35,38.90,38.35,38.90,+,0.66,38.90,130.0,38.99,1.0,0.00
01001T,2007-01-02,土銀富邦R1,126000,30,1543290,12.21,12.27,12.15,12.25,+,0.10,12.21,25.0,12.25,16.0,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9944,2024-11-04,新麗,120019,67,2477974,20.60,20.70,20.50,20.70,+,0.05,20.55,5.0,20.70,22.0,60.88
9945,2024-11-04,潤泰新,2849305,3652,133343507,47.20,47.25,46.50,47.00,-,0.15,46.90,32.0,47.00,20.0,9.67
9946,2024-11-04,三發地產,358913,278,9160051,25.35,25.80,25.10,25.45,-,0.10,25.45,3.0,25.55,9.0,34.39
9955,2024-11-04,佳龍,3549273,2509,125573988,34.15,36.30,33.95,35.00,+,0.75,35.00,5.0,35.10,3.0,0.00
