In [27]:
import requests
from io import StringIO
import pandas as pd
from datetime import datetime
from chinese_calendar import is_workday
import time, random, json

start = "2022-09-17"
end = "2022-09-22"
mode = "all"


# 取得時間區間內的所有工作日
start = datetime.strptime(start, "%Y-%m-%d")
end = datetime.strptime(end, "%Y-%m-%d")

dates = pd.date_range(start, end)
work = [is_workday(date) for date in dates]
dates = dates[work]
dates_str = [datetime.strftime(date, "%Y%m%d") for date in dates]


# 獲取三大法人資訊
features = ['Date', '證券代號', '證券名稱', '外資(不含外資自營)', '外資自營', '外資', '投信', '自營(自行買賣)', '自營(避險)', '自營', '三大法人']

df1_1 = pd.DataFrame()
df2_1 = pd.DataFrame()
for i in range(len(dates)):
    # 上市資料
    if (mode == "all") or (mode == "listed"):
        r = requests.get(f"http://www.tse.com.tw/fund/T86?response=csv&date={dates_str[i]}&selectType=ALLBUT0999")

        df1_0 = pd.read_csv(StringIO(r.text), header = 1, thousands = ",")
        df1_0 = df1_0.dropna(how='all', axis=1).dropna(how='any') # 刪除
        df1_0.insert(0, "Date", dates[i])

        df1_1 = pd.concat([df1_1, df1_0], ignore_index = True)

        time.sleep(random.uniform(0, 0.5))


    # 上櫃資料
    if (mode == "all") or (mode == "opt"):
        ## 上櫃日期由西元轉換為民國
        year = dates[i].year - 1911 # 民國
        month = dates[i].month
        month = ("0" + str(month)) if len(str(month)) == 1 else month
        day = dates[i].day
        date = f"{year}/{month}/{day}"


        r = requests.get(f"http://www.tpex.org.tw/web/stock/3insti/daily_trade/3itrade_hedge_result.php?l=zh-tw&se=AL&t=D&d={date}")

        data = json.loads(r.text)
        df2_0 = pd.DataFrame(data["aaData"])
        df2_0.insert(0, "Date", dates[i])
        
        df2_1 = pd.concat([df2_1, df2_0], ignore_index = True)
        
        time.sleep(random.uniform(0, 0.5))


# 資料清洗
## 上市
if (mode == "all") or (mode == "listed"):
    df1_1 = df1_1[['Date', '證券代號', '證券名稱', '外陸資買賣超股數(不含外資自營商)', '外資自營商買賣超股數', '投信買賣超股數', '自營商買賣超股數(自行買賣)', '自營商買賣超股數(避險)', '自營商買賣超股數', '三大法人買賣超股數']]
    df1_1.insert(5, "外資買賣超股數", (df1_1["外陸資買賣超股數(不含外資自營商)"] + df1_1["外資自營商買賣超股數"]))
    df1_1.columns = features

df1_1['證券代號'] = df1_1['證券代號'].apply(lambda X: X.replace('=', '').replace('"', ''))

## 上櫃
if (mode == "all") or (mode == "opt"):
    df2_1 = df2_1.iloc[:, [0, 1, 2, 5, 8, 11, 14, 17, 20, 23, 24]]
    df2_1.columns = features

    df2_1.iloc[:, 3:] = df2_1.iloc[:, 3:].applymap(lambda X: int(X.replace(",", "")))

df = pd.concat([df1_1, df2_1], ignore_index = True)
df = df.sort_values("Date")
df = df.reset_index(drop = True)

df

Unnamed: 0,Date,證券代號,證券名稱,外資(不含外資自營),外資自營,外資,投信,自營(自行買賣),自營(避險),自營,三大法人
0,2022-09-19,00632R,元大台灣50反1,-10520000.0,0.0,-10520000.0,0.0,3133000.0,50123545.0,53256545.0,42736545.0
1,2022-09-19,731718,昇達科永豐1B購01,0.0,0.0,0.0,0.0,0.0,-17000.0,-17000.0,-17000.0
2,2022-09-19,731724,欣銓元富1A購01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2022-09-19,731725,長聖元富21購02,0.0,0.0,0.0,0.0,0.0,-10000.0,-10000.0,-10000.0
4,2022-09-19,731731,矽瑪凱基1A購01,0.0,0.0,0.0,0.0,0.0,9000.0,9000.0,9000.0
...,...,...,...,...,...,...,...,...,...,...,...
18776,2022-09-22,6156,松上,11000.0,0.0,11000.0,0.0,0.0,0.0,0.0,11000.0
18777,2022-09-22,6151,晉倫,1000.0,0.0,1000.0,0.0,0.0,0.0,0.0,1000.0
18778,2022-09-22,6150,撼訊,-19000.0,0.0,-19000.0,0.0,0.0,121.0,121.0,-18879.0
18779,2022-09-22,6167,久正,74000.0,0.0,74000.0,0.0,0.0,-1.0,-1.0,73999.0
