In [197]:
import requests
import pickle
import yfinance as yf
import numpy as np
from sqlalchemy import create_engine
import pandas as pd
import pandas_market_calendars as mcal
import pandas as pd
from sqlalchemy import create_engine
import sqlite3
import time


class SQLiteBuilder:
    def build_SQLite_db(self):
        # 使用 with 確保連線自動關閉
        with sqlite3.connect("options_data.db") as conn:
            cursor = conn.cursor()
            
            # 建立選擇權資料表，使用 contractID + date 作為 PRIMARY KEY
            cursor.execute("""
            CREATE TABLE IF NOT EXISTS options_data (
                contractID TEXT,
                symbol TEXT,
                expiration DATE,
                strike FLOAT,
                type TEXT,
                last FLOAT,
                mark FLOAT,
                bid FLOAT,
                bid_size INT,
                ask FLOAT,
                ask_size INT,
                volume INT,
                open_interest INT,
                date DATE,
                implied_volatility FLOAT,
                delta FLOAT,
                gamma FLOAT,
                theta FLOAT,
                vega FLOAT,
                rho FLOAT,
                PRIMARY KEY (contractID, date) ON CONFLICT IGNORE -- 設定複合主鍵
            );
            """)
            
            conn.commit()  # 確保變更寫入資料庫
            print("✅ 資料表建立成功！")

    def create_index():
        with sqlite3.connect("options_data.db") as conn:
            cursor = conn.cursor()
            
            # 在 date 欄位建立索引，加快查詢速度
            cursor.execute("CREATE INDEX IF NOT EXISTS idx_date ON options_data(date);")
            
            conn.commit()
            print("✅ 已成功建立索引 idx_date！")
            
#SQLiteBuilder.create_index()
    

class OptionFetcher:
    def __init__(self, ticker):
        self.ticker = ticker

    def fetch_option_dataframe(self, date):
        apikey = '9AR0HMV75NBXMOU1'
        url = f'https://www.alphavantage.co/query?function=HISTORICAL_OPTIONS&symbol={self.ticker}&date={date}&apikey={apikey}'
        r = requests.get(url)
        data = r.json() #dict_keys(['endpoint', 'message', 'data'])
        return pd.DataFrame(data['data'])
        


class OptionInserter:
    def insert(self, option_dataframe):
        # 使用 SQLAlchemy 連線引擎
        engine = create_engine("sqlite:///options_data.db")
        with engine.begin() as conn:
            option_dataframe.to_sql("options_data", conn, if_exists="append", index=False, method="multi")
            #print("✅ 所有交易日的數據已成功存入 SQLite！")
        




class Calendar:
    def __init__(self, start_date, end_date):
        # 取得美股（NYSE）交易日曆
        nyse = mcal.get_calendar('NYSE')
        # 獲取該時間範圍內的交易日
        schedule = nyse.valid_days(start_date=start_date, end_date=end_date)
        # 轉為 DataFrame
        trading_days = schedule.tz_convert(None).date
        self.trading_days = [str(trading_day) 
                             for trading_day in trading_days]

    def count_trading_days(self, start, end):
        # 計算 start ~ end 之間的交易日數量
        return sum(1 for day in pd.date_range(start, end) if day.date() in self.trading_days)

✅ 已成功建立索引 idx_date！


In [291]:
#start_list = ['2015-01-01', '2016-01-01', '2017-01-01', '2018-01-01', '2019-01-01', '2020-01-01', '2021-01-01', '2022-01-01']
#end_list =   ['2015-12-31', '2016-12-31', '2017-12-31', '2018-12-31', '2019-12-31', '2020-12-31', '2021-12-31', '2022-12-31']
start_list = ['2014-01-01']
end_list =   ['2014-12-31']


option_inserter = OptionInserter()
option_fetcher = OptionFetcher('AAPL')

for i in [0]:
    start_time = time.perf_counter()  # 記錄開始時間
    calendar = Calendar(start_list[i], end_list[i])
    for trade_day in calendar.trading_days:
        option_data = option_fetcher.fetch_option_dataframe(trade_day)
        #print(option_data)
        option_inserter.insert(option_data)
        print(trade_day)
    
    end_time = time.perf_counter()  # 記錄結束時間
    elapsed_time = end_time - start_time  # 計算執行時間
    print(f"總執行時間: {elapsed_time:.2f} 秒")

2014-01-02
2014-01-03
2014-01-06
2014-01-07
2014-01-08
2014-01-09
2014-01-10
2014-01-13
2014-01-14
2014-01-15
2014-01-16
2014-01-17
2014-01-21
2014-01-22
2014-01-23
2014-01-24
2014-01-27
2014-01-28
2014-01-29
2014-01-30
2014-01-31
2014-02-03
2014-02-04
2014-02-05
2014-02-06
2014-02-07
2014-02-10
2014-02-11
2014-02-12
2014-02-13
2014-02-14
2014-02-18
2014-02-19
2014-02-20
2014-02-21
2014-02-24
2014-02-25
2014-02-26
2014-02-27
2014-02-28
2014-03-03
2014-03-04
2014-03-05
2014-03-06
2014-03-07
2014-03-10
2014-03-11
2014-03-12
2014-03-13
2014-03-14
2014-03-17
2014-03-18
2014-03-19
2014-03-20
2014-03-21
2014-03-24
2014-03-25
2014-03-26
2014-03-27
2014-03-28
2014-03-31
2014-04-01
2014-04-02
2014-04-03
2014-04-04
2014-04-07
2014-04-08
2014-04-09
2014-04-10
2014-04-11
2014-04-14
2014-04-15
2014-04-16
2014-04-17
2014-04-21
2014-04-22
2014-04-23
2014-04-24
2014-04-25
2014-04-28
2014-04-29
2014-04-30
2014-05-01
2014-05-02
2014-05-05
2014-05-06
2014-05-07
2014-05-08
2014-05-09
2014-05-12
2014-05-13

In [281]:
class OptionQuery:
    def __init__(self,ticker):
        self.price = yf.download(ticker, start='2010-01-01', end='2025-02-01', progress=False)
        self.ticker = ticker
        
    def _get_close_price(self, DATE):
        return self.price.loc[DATE]['Close']

    def _get_raw_option_df(self, DATE):
        with sqlite3.connect("options_data.db") as conn:
            cursor = conn.cursor()
        
            # 執行查詢
            cursor.execute("SELECT * FROM options_data WHERE date = ?", (DATE,))
            
            # 取得所有符合條件的資料
            rows = cursor.fetchall()
            # 取得欄位名稱
            columns = [desc[0] for desc in cursor.description]
        
        # 轉成 DataFrame 方便顯示
        return pd.DataFrame(rows, columns=columns)

    def _get_normalized_option_data(self, DATE):
        df = self._get_raw_option_df(DATE)
        # 計算剩餘天數
        df["expiration"] = pd.to_datetime(df["expiration"])
        df["date"] = pd.to_datetime(df["date"])
        # 計算交易日數
        df["days_to_expiration"] = (df["expiration"] - df["date"]).dt.days / 365
        # 刪除不需要欄位
        df = df.drop(columns=["contractID", "date", "expiration", "symbol"])
        df = pd.get_dummies(df, columns=["type"])#.astype(int)
        df[["type_call", "type_put"]] = df[["type_call", "type_put"]].astype(int)
        df['open_interest'] =  df['open_interest'] / df['open_interest'].sum() *100
        df['bid_size'] = df['bid_size'] / df['bid_size'].sum() * 100
        df['ask_size'] = df['ask_size'] / df['ask_size'].sum() * 100
        
        df['volume'] = df['volume'] / df['volume'].sum() * 100
        df[['last', 'mark', 'bid', 'ask']] = df[['last', 'mark', 'bid', 'ask']].div(df['strike'], axis=0)
        close_price = self._get_close_price(DATE)
        df['strike'] =  df['strike'] / close_price

        return df

        

    def option_data(self, DATE):
        return self._get_normalized_option_data(DATE)



query = OptionQuery('AAPL')
e = query.option_data('2018-10-12')

e

KeyError: "None of [Index(['type_call', 'type_put'], dtype='object')] are in the [columns]"

      strike      last      mark       bid  bid_size       ask  ask_size  \
0       70.0  0.524286  0.518571  0.515000  0.016548  0.522143  0.016045   
1       70.0  0.000143  0.000143  0.000000  0.000000  0.000143  0.016446   
2       75.0  0.000000  0.417333  0.414000  0.016548  0.420667  0.016045   
3       75.0  0.000000  0.000133  0.000000  0.000000  0.000133  0.018452   
4       80.0  0.341250  0.328375  0.325625  0.065365  0.331250  0.032491   
...      ...       ...       ...       ...       ...       ...       ...   
1465   165.0  0.177455  0.385455  0.380000  0.036820  0.390909  0.037304   
1466   170.0  0.028235  0.026765  0.023824  0.141073  0.029706  0.072603   
1467   170.0  0.199706  0.400294  0.395294  0.027305  0.405294  0.019655   
1468   175.0  0.022857  0.023429  0.020571  0.095566  0.026286  0.074207   
1469   175.0  0.195943  0.414457  0.409429  0.026477  0.419429  0.021259   

        volume  open_interest  implied_volatility    delta    gamma    theta  \
0     0

In [245]:


DATE = '2015-01-06'

# 下載 AAPL 的當天數據

price = yf.download(ticker, start=DATE, end=pd.to_datetime(DATE) + pd.Timedelta(days=1), progress=False)
# 連接 SQLite 資料庫

# 計算剩餘天數
df["expiration"] = pd.to_datetime(df["expiration"])
df["date"] = pd.to_datetime(df["date"])


# 計算交易日數
df["days_to_expiration"] = (df["expiration"] - df["date"]).dt.days / 365



df = df.drop(columns=["contractID", "date", "expiration", "symbol"])
df = pd.get_dummies(df, columns=["type"])#.astype(int)
df[["type_call", "type_put"]] = df[["type_call", "type_put"]].astype(int)
# 刪除不需要的欄位

df['open_interest'] =  df['open_interest'] / df['open_interest'].sum() *100
df['bid_size'] = df['bid_size'] / df['bid_size'].sum() * 100
df['ask_size'] = df['ask_size'] / df['ask_size'].sum() * 100

df['volume'] = df['volume'] / df['volume'].sum() * 100
df[['last', 'mark', 'bid', 'ask']] = df[['last', 'mark', 'bid', 'ask']].div(df['strike'], axis=0)

df['strike'] /= price['Close'].iloc[0]
#df.to_numpy()

NameError: name 'ticker' is not defined

In [244]:
price = yf.download('AAPL', start='2010-01-01', end='2025-02-01', progress=False)
price.loc['2010-01-05']['Close']

np.float64(7.656428813934326)

In [196]:
df.to_numpy()

array([[7.000e+01, 3.670e+01, 3.630e+01, ..., 3.000e+00, 1.000e+00,
        0.000e+00],
       [7.000e+01, 1.000e-02, 1.000e-02, ..., 3.000e+00, 0.000e+00,
        1.000e+00],
       [7.500e+01, 0.000e+00, 3.130e+01, ..., 3.000e+00, 1.000e+00,
        0.000e+00],
       ...,
       [1.700e+02, 3.395e+01, 6.805e+01, ..., 7.450e+02, 0.000e+00,
        1.000e+00],
       [1.750e+02, 4.000e+00, 4.100e+00, ..., 7.450e+02, 1.000e+00,
        0.000e+00],
       [1.750e+02, 3.429e+01, 7.253e+01, ..., 7.450e+02, 0.000e+00,
        1.000e+00]], shape=(1470, 18))

In [288]:
import sqlite3
import pandas as pd

DATE = '2019-02-15'

# 連接 SQLite 資料庫
with sqlite3.connect("options_data.db") as conn:
    cursor = conn.cursor()

    # 執行查詢，直接在 SQL 語句中篩選 volume != 0
    cursor.execute("SELECT * FROM options_data WHERE date = ? AND volume != 0", (DATE,))
    
    # 取得所有符合條件的資料
    rows = cursor.fetchall()

    # 取得欄位名稱
    columns = [desc[0] for desc in cursor.description]

# 轉成 DataFrame 方便顯示
df = pd.DataFrame(rows, columns=columns)
if df.empty:
    print('None')


In [289]:
df

Unnamed: 0,contractID,symbol,expiration,strike,type,last,mark,bid,bid_size,ask,ask_size,volume,open_interest,date,implied_volatility,delta,gamma,theta,vega,rho
0,AAPL190215C00100000,AAPL,2019-02-15,100.0,call,70.44,70.33,70.15,30,70.50,10,7,13,2019-02-15,3.31605,1.00000,0.00000,-0.02400,0.00000,0.00274
1,AAPL190215C00110000,AAPL,2019-02-15,110.0,call,60.13,60.40,60.20,10,60.60,10,13,13,2019-02-15,2.84515,1.00000,0.00000,-0.02640,0.00000,0.00301
2,AAPL190215C00115000,AAPL,2019-02-15,115.0,call,55.61,55.33,55.15,30,55.50,10,2,4,2019-02-15,2.60970,1.00000,0.00000,-0.02760,0.00000,0.00315
3,AAPL190215C00120000,AAPL,2019-02-15,120.0,call,50.13,50.40,50.20,10,50.60,10,18,162,2019-02-15,2.37425,1.00000,0.00000,-0.02880,0.00000,0.00329
4,AAPL190215P00120000,AAPL,2019-02-15,120.0,put,0.01,0.01,0.00,0,0.01,15,1,5602,2019-02-15,2.32542,-0.00162,0.00025,-0.19852,0.00047,-0.00001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
608,AAPL210618C00230000,AAPL,2021-06-18,230.0,call,8.81,8.95,7.80,296,10.10,329,5,156,2019-02-15,0.22150,0.29125,0.00594,-0.05209,0.89410,0.95198
609,AAPL210618C00240000,AAPL,2021-06-18,240.0,call,7.15,7.10,6.60,7,7.60,127,90,189,2019-02-15,0.21861,0.24549,0.00552,-0.04666,0.82035,0.81286
610,AAPL210618C00250000,AAPL,2021-06-18,250.0,call,5.80,5.78,5.30,203,6.25,146,100,168,2019-02-15,0.21861,0.20873,0.00504,-0.04212,0.74859,0.69708
611,AAPL210618C00280000,AAPL,2021-06-18,280.0,call,3.05,2.83,2.25,270,3.40,40,48,130,2019-02-15,0.21373,0.11830,0.00355,-0.02774,0.51622,0.40544


In [9]:
selected_data = [contract for contract in data if not contract['volume']=='0']

In [16]:
len(selected_data)

617

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

# ---------------------------
# 1) 建立一個假資料集 (ToyDataset)
# ---------------------------
class ToyDataset(Dataset):
    """
    每筆資料代表「某一天」：
      - features: shape = (num_contracts, dim_feat)
      - label: 該天的目標值 (例如明日漲跌幅)
    """
    def __init__(self, num_days=10, dim_feat=7, max_contracts=8):
        super().__init__()
        self.data = []
        self.labels = []
        
        # 隨機產生 num_days 筆資料
        for _ in range(num_days):
            # 當天有多少筆合約 (1 ~ max_contracts 隨機)
            n_contracts = torch.randint(1, max_contracts+1, (1,)).item()
            # 建立 (n_contracts, dim_feat) 特徵
            feats = torch.randn(n_contracts, dim_feat)
            # 目標 (回歸問題, e.g. 漲跌幅)
            label = torch.randn(1)  # shape = (1,)
            
            self.data.append(feats)
            self.labels.append(label)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        """
        回傳
          - features: (n_i, dim_feat)
          - label: shape (1,)
        """
        return self.data[idx], self.labels[idx]

# ---------------------------
# 2) 自訂 collate_fn：做 Padding & Mask
# ---------------------------
def collate_fn(batch):
    """
    batch: list of (features, label), 其中
       features.shape = (n_i, dim_feat)
       label.shape = (1,)
    目標：把不等長的 features 對齊成 (B, max_len, dim_feat)
         並建立 mask (B, max_len)，True 代表要忽略 (padding)。
    """
    # 1. 找出此 batch 中最大的合約數量
    batch_size = len(batch)
    lengths = [sample[0].shape[0] for sample in batch]  # 每天合約數
    max_len = max(lengths)
    
    # 2. 準備一個張量放 padded features
    dim_feat = batch[0][0].shape[1]
    padded_feats = torch.zeros(batch_size, max_len, dim_feat)
    # mask: True 代表該位置是 padding，False 代表有意義的資料
    mask = torch.ones(batch_size, max_len, dtype=torch.bool)
    
    # 3. 把每筆資料複製到 padded_feats 中
    labels = []
    for i, (feats, label) in enumerate(batch):
        n_i = feats.shape[0]
        padded_feats[i, :n_i, :] = feats
        mask[i, :n_i] = False  # 前 n_i 是實際資料
        labels.append(label)
    
    # 4. labels 轉成 tensor，shape = (batch_size, 1)
    labels = torch.cat(labels, dim=0).view(-1, 1)
    
    return padded_feats, mask, labels

# ---------------------------
# 3) 定義 Set Transformer 模組 (支援 mask)
# ---------------------------
class MAB(nn.Module):
    """
    MAB(Q, K) = Multi-Head Attention Block with residual + layer norm
    加入 key_mask: (B, Nk) 的布林遮罩，True 表示要忽略 padding。
    """
    def __init__(self, dim_Q, dim_K, dim_out, num_heads, ln=True):
        super().__init__()
        self.num_heads = num_heads
        self.dim_out = dim_out

        # 轉換維度
        self.W_Q = nn.Linear(dim_Q, dim_out)
        self.W_K = nn.Linear(dim_K, dim_out)
        self.W_V = nn.Linear(dim_K, dim_out)

        # feedforward
        self.fc = nn.Sequential(
            nn.Linear(dim_out, dim_out),
            nn.ReLU(),
            nn.Linear(dim_out, dim_out),
        )

        self.ln1 = nn.LayerNorm(dim_out) if ln else nn.Identity()
        self.ln2 = nn.LayerNorm(dim_out) if ln else nn.Identity()

    def forward(self, Q, K, key_mask=None):
        """
        Q, K shape = (B, Nq, d_in), (B, Nk, d_in)
        key_mask shape = (B, Nk) or None
           - True 表示 K 的該位置是 padding，需要忽略
        """
        B, Nq, _ = Q.shape
        Nk = K.shape[1]
        
        # 線性投影
        Q_ = self.W_Q(Q)  # (B, Nq, dim_out)
        K_ = self.W_K(K)  # (B, Nk, dim_out)
        V_ = self.W_V(K)  # (B, Nk, dim_out)

        # 分多頭
        d = self.dim_out
        d_head = d // self.num_heads
        Q_ = Q_.view(B, Nq, self.num_heads, d_head).transpose(1, 2)  # (B, h, Nq, d_head)
        K_ = K_.view(B, Nk, self.num_heads, d_head).transpose(1, 2)  # (B, h, Nk, d_head)
        V_ = V_.view(B, Nk, self.num_heads, d_head).transpose(1, 2)

        # 注意力 scores: (B, h, Nq, Nk)
        scores = torch.matmul(Q_, K_.transpose(-2, -1)) / (d_head**0.5)

        if key_mask is not None:
            # key_mask: (B, Nk) -> 擴展到 (B, 1, 1, Nk)
            exp_mask = key_mask.unsqueeze(1).unsqueeze(2)
            # True 的地方填入 -inf
            scores = scores.masked_fill(exp_mask, float('-inf'))

        attn = torch.softmax(scores, dim=-1)  # (B, h, Nq, Nk)
        H = torch.matmul(attn, V_)            # (B, h, Nq, d_head)

        # 拼回 (B, Nq, dim_out)
        H = H.transpose(1, 2).contiguous().view(B, Nq, d)
        
        # 殘差 + LayerNorm
        H = self.ln1(H + Q)
        
        # 前饋
        H2 = self.fc(H)
        H = self.ln2(H + H2)
        return H

class SAB(nn.Module):
    """ Self-Attention Block: SAB(X) = MAB(X, X) """
    def __init__(self, dim_in, dim_out, num_heads, ln=True):
        super().__init__()
        self.mab = MAB(dim_in, dim_in, dim_out, num_heads, ln=ln)

    def forward(self, X, mask=None):
        """
        X: (B, N, dim_in)
        mask: (B, N) or None
           - True 表示 padding
        """
        return self.mab(X, X, key_mask=mask)

class PMA(nn.Module):
    """
    PMA(k=1): 用於將整個集合聚合成 1 個向量
    PMA(S, X) = MAB(S, X)
    其中 S 是可學參數 (num_seeds, dim_in)
    """
    def __init__(self, dim_in, num_heads, num_seeds=1, ln=True):
        super().__init__()
        self.num_seeds = num_seeds
        self.dim_in = dim_in
        self.S = nn.Parameter(torch.Tensor(num_seeds, dim_in))
        nn.init.xavier_uniform_(self.S)

        self.mab = MAB(dim_in, dim_in, dim_in, num_heads, ln=ln)

    def forward(self, X, mask=None):
        B = X.shape[0]
        # S shape = (num_seeds, dim_in) -> (B, num_seeds, dim_in)
        S = self.S.unsqueeze(0).expand(B, self.num_seeds, self.dim_in)
        # PMA = MAB(S, X)
        H = self.mab(S, X, key_mask=mask)  # shape = (B, num_seeds, dim_in)
        return H

class SetTransformer(nn.Module):
    """
    結合 SAB + PMA，最後做回歸
    """
    def __init__(self, dim_input, num_heads=4, ln=True):
        super().__init__()
        hidden_dim = 128

        self.sab1 = SAB(dim_input, hidden_dim, num_heads, ln=ln)
        self.sab2 = SAB(hidden_dim, hidden_dim, num_heads, ln=ln)

        self.pma = PMA(hidden_dim, num_heads, num_seeds=1, ln=ln)
        self.fc = nn.Linear(hidden_dim, 1)  # 預測 1 維(回歸)

    def forward(self, X, mask=None):
        """
        X: (B, N, dim_input)
        mask: (B, N) True=padding
        """
        H = self.sab1(X, mask=mask)   # (B, N, hidden_dim)
        H = self.sab2(H, mask=mask)   # (B, N, hidden_dim)
        
        # PMA 輸出 shape = (B, 1, hidden_dim)
        H = self.pma(H, mask=mask)    # (B, 1, hidden_dim)
        H = H.squeeze(1)              # (B, hidden_dim)
        out = self.fc(H)              # (B, 1)
        return out

# ---------------------------
# 4) 測試 + 訓練流程示例
# ---------------------------
if __name__ == "__main__":
    # 產生一個 toy dataset: 20 天，特徵維度=7，每天最多 10 筆合約
    dataset = ToyDataset(num_days=20, dim_feat=7, max_contracts=10)
    # 建立 dataloader，一次 batch=4 天
    loader = DataLoader(dataset, batch_size=4, shuffle=True, collate_fn=collate_fn)

    # 建置模型
    model = SetTransformer(dim_input=7, num_heads=4, ln=True)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.MSELoss()  # 回歸用 MSE

    # 假裝訓練 2 epoch
    for epoch in range(2):
        for i, (batch_feats, batch_mask, batch_labels) in enumerate(loader):
            # batch_feats: (B, max_len, dim_feat)
            # batch_mask: (B, max_len), True=padding
            # batch_labels: (B, 1)
            
            optimizer.zero_grad()

            # forward
            preds = model(batch_feats, mask=batch_mask)  # (B, 1)
            
            loss = criterion(preds, batch_labels)
            loss.backward()
            optimizer.step()

            print(f"[Epoch {epoch+1}] Step {i+1}, Loss = {loss.item():.4f}")


In [27]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import random

# ---------------------------
# 1) 假資料集 (ToyDataset) - 一次回傳「一天」資料
# ---------------------------
class ToySingleDayDataset:
    """
    每個索引 i 代表某一天。
    features[i] shape = (n_i, dim_feat)
    label[i] shape = (1,)   (回歸目標)
    """
    def __init__(self, num_days=10, dim_feat=7, max_contracts=8):
        self.data = []
        self.labels = []
        for _ in range(num_days):
            n_contracts = torch.randint(1, max_contracts+1, (1,)).item()
            feats = torch.randn(n_contracts, dim_feat)
            label = torch.randn(1)  # 假裝要預測的漲跌幅
            self.data.append(feats)
            self.labels.append(label)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

# ---------------------------
# 2) Set Transformer 模組 (無 Mask 版本)
# ---------------------------

class MAB(nn.Module):
    """
    MAB(Q, K) = Multi-Head Attention Block
    這裡不實作 mask，因為一次只處理一個 day，不需要 padding。
    """
    def __init__(self, dim_Q, dim_K, dim_out, num_heads, ln=True):
        super().__init__()
        self.num_heads = num_heads
        self.dim_out = dim_out

        self.W_Q = nn.Linear(dim_Q, dim_out)
        self.W_K = nn.Linear(dim_K, dim_out)
        self.W_V = nn.Linear(dim_K, dim_out)

        self.fc = nn.Sequential(
            nn.Linear(dim_out, dim_out),
            nn.ReLU(),
            nn.Linear(dim_out, dim_out),
        )
        self.ln1 = nn.LayerNorm(dim_out) if ln else nn.Identity()
        self.ln2 = nn.LayerNorm(dim_out) if ln else nn.Identity()

    def forward(self, Q, K):
        """
        Q, K shape = (1, Nq, d_in), (1, Nk, d_in)
        這裡因為一次一天，所以 batch_size=1
        但也可以寫成 (batch, Nq, d_in) (batch, Nk, d_in) 只是 batch=1
        """
        B, Nq, _ = Q.shape
        _, Nk, _ = K.shape

        # 線性投影
        Q_ = self.W_Q(Q)  # (1, Nq, dim_out)
        K_ = self.W_K(K)  # (1, Nk, dim_out)
        V_ = self.W_V(K)  # (1, Nk, dim_out)

        d = self.dim_out
        d_head = d // self.num_heads
        # 拆多頭
        Q_ = Q_.view(B, Nq, self.num_heads, d_head).transpose(1, 2)  # (1, h, Nq, d_head)
        K_ = K_.view(B, Nk, self.num_heads, d_head).transpose(1, 2)
        V_ = V_.view(B, Nk, self.num_heads, d_head).transpose(1, 2)

        # 注意力 scores: (1, h, Nq, Nk)
        scores = torch.matmul(Q_, K_.transpose(-2, -1)) / (d_head**0.5)
        attn = torch.softmax(scores, dim=-1)
        H = torch.matmul(attn, V_)  # (1, h, Nq, d_head)

        # 拼回 (1, Nq, dim_out)
        H = H.transpose(1, 2).contiguous().view(B, Nq, d)

        # 殘差 + LayerNorm
        H = self.ln1(H + Q)
        # 前饋
        H2 = self.fc(H)
        H = self.ln2(H + H2)
        return H

class SAB(nn.Module):
    """ Self-Attention Block: SAB(X) = MAB(X, X) """
    def __init__(self, dim_in, dim_out, num_heads=4, ln=True):
        super().__init__()
        self.mab = MAB(dim_in, dim_in, dim_out, num_heads=num_heads, ln=ln)

    def forward(self, X):
        # X shape = (1, N, dim_in)
        return self.mab(X, X)

class PMA(nn.Module):
    """
    Pooling by Multihead Attention:
    PMA(S, X) = MAB(S, X)
    num_seeds=1 -> 取得整個集合的一個向量表示
    """
    def __init__(self, dim_in, num_heads=4, num_seeds=1, ln=True):
        super().__init__()
        self.num_seeds = num_seeds
        self.dim_in = dim_in
        self.S = nn.Parameter(torch.Tensor(num_seeds, dim_in))
        nn.init.xavier_uniform_(self.S)
        self.mab = MAB(dim_in, dim_in, dim_in, num_heads=num_heads, ln=ln)

    def forward(self, X):
        # X shape = (1, N, dim_in)
        # S shape = (num_seeds, dim_in) -> (1, num_seeds, dim_in)
        B = X.shape[0]  # 一般情況 batch=1
        S = self.S.unsqueeze(0).expand(B, self.num_seeds, self.dim_in)
        H = self.mab(S, X)  # (1, num_seeds, dim_in)
        return H

class SetTransformer(nn.Module):
    """
    結合 SAB + PMA，最後用 Linear 做回歸
    """
    def __init__(self, dim_input=7, hidden_dim=128, num_heads=4):
        super().__init__()
        self.sab1 = SAB(dim_input, hidden_dim, num_heads=num_heads, ln=True)
        self.sab2 = SAB(hidden_dim, hidden_dim, num_heads=num_heads, ln=True)
        self.pma = PMA(hidden_dim, num_heads=num_heads, num_seeds=1, ln=True)
        self.fc = nn.Linear(hidden_dim, 1)  # 輸出 1 維(回歸)

    def forward(self, X):
        """
        X shape = (1, N, dim_input)
        """
        H = self.sab1(X)        # (1, N, hidden_dim)

        print(H.shape)
        H = self.sab2(H)        # (1, N, hidden_dim)
        H = self.pma(H)         # (1, 1, hidden_dim)
        H = H.squeeze(1)        # (1, hidden_dim)
        out = self.fc(H)        # (1, 1)
        return out

# ---------------------------
# 3) 訓練流程：一次一天
# ---------------------------
def train_single_day():
    # 準備資料集: 20 天, 特徵維度=7, 每天合約數量 <= 8
    dataset = ToySingleDayDataset(num_days=20, dim_feat=7, max_contracts=8)
    
    model = SetTransformer(dim_input=7, hidden_dim=128, num_heads=4)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.MSELoss()
    
    EPOCHS = 3
    for epoch in range(EPOCHS):
        # 每個 epoch 都把 20 天跑過一次
        # (也可以在這裡打亂天數順序 shuffle)
        day_indices = list(range(len(dataset)))
        random.shuffle(day_indices)
        
        total_loss = 0.0
        for i in day_indices:
            # 取出單日資料
            feats, label = dataset[i]
            print(feats.shape, label.shape)
            # feats shape = (n_i, dim_feat) -> 轉成 (1, n_i, dim_feat)
            feats = feats.unsqueeze(0)
            # label shape = (1,) -> 轉成 (1, 1)
            label = label.view(1, 1)

            optimizer.zero_grad()
            preds = model(feats)  # (1, 1)
            loss = criterion(preds, label)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
        
        avg_loss = total_loss / len(dataset)
        print(f"Epoch {epoch+1}, Average Loss: {avg_loss:.4f}")

if __name__ == "__main__":
    train_single_day()


torch.Size([1, 7]) torch.Size([1])


RuntimeError: The size of tensor a (128) must match the size of tensor b (7) at non-singleton dimension 2