## 整體想法與邏輯

因為週期貨沒有量，所以平倉方式稍微複雜一些

ROLL
進場：
1.	週選合成 + 月期貨
2.	價差單 (週期+月期)

出場：
1.	週選合成 + 月期貨 (出場相較被動，是因為剛好有相反方向的進場單，沒辦法強迫用此方式，因為用選擇權不一定組的到
2.	價差合約(月期+週期)


ROLL整理來說會是價差合約，理論上時間不會超越一天，但不一定會馬上平倉


月的話是這樣：
進場：月選 + 月期貨
放置到期日結算


## ROLL策略匹配與處理

1. 先匹配 Call + Put（確保履約價相同，時間差 ≤ 5 分鐘） 
2. 優先匹配月期貨（優先配對 PCP 組合，確保套利穩定性） 
3. 處理未匹配的月期貨 → 搭配週期貨（確保期貨部位都能被利用） 
4. 串接進出場策略（依據流動性與交易時間決定最適合的出場條件）

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from itertools import islice
from datetime import datetime, timedelta
import re
import os
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
from matplotlib.gridspec import GridSpec
from matplotlib.ticker import MultipleLocator
from matplotlib.ticker import FuncFormatter
import matplotlib.gridspec as gridspec
from matplotlib.font_manager import FontProperties as font
import matplotlib
matplotlib.rc('font', family='Microsoft JhengHei') # 在圖表上顯示中文
import matplotlib.ticker as mtick
from collections import Counter

In [2]:
# 輸入PCP數據
df_2024 = pd.read_csv('D:/CODE/SC_SP_delay_monthly策略/data/PCP20240101_20241231.csv', encoding = 'utf-8', low_memory = False)
df_2025 = pd.read_csv('D:/CODE/SC_SP_delay_monthly策略/data/PCP20250101_20250501.csv', encoding = 'utf-8', low_memory = False)
df_pcp = pd.concat([df_2024, df_2025], ignore_index=True)

In [3]:
def convert_time_format(df):
    """ 
    轉換時間格式，將 TradeTime 分拆為 Date 和 Time 
    """
    trade_d = []
    trade_t = []
    
    for t in df['TradeTime']:
        dt = datetime.strptime(str(t), "%Y/%m/%d %H:%M")  # 修正解析格式
        trade_d.append(dt.strftime("%Y%m%d"))  # 提取日期，格式 YYYYMMDD
        trade_t.append(dt.strftime("%H%M"))  # 提取時間，格式 HHMM（只到分鐘，沒有秒）

    df['Date'] = trade_d  # 新增 Date 欄位
    df['Time'] = trade_t  # 新增 Time 欄位
    
    return df

df_pcp = convert_time_format(df_pcp)
df_pcp = df_pcp.drop(columns=["Unnamed: 0"])
df_pcp["IAccountID"] = df_pcp["IAccountID"].str.strip()  # 去除前後空格
df_pcp["IAccountID"] = df_pcp["IAccountID"].str.replace("\u200b", "")  # 移除隱藏字符

# 指篩選ROLL數據
df_pcp = df_pcp[df_pcp["IAccountID"] == "ROLL"]
df_pcp["TradeTime"] = pd.to_datetime(df_pcp["TradeTime"])  # 確保 TradeTime 轉換成datetime型式

# 將PCP數據分類，分成Call, Put, 期貨 (月、週)
df_opt = df_pcp[df_pcp["Type"] == "opt"]  # 選擇權
df_mxf = df_pcp[df_pcp["Type"] == "mxf"]  # 期貨

# Call商品英文代號 A~L
# Put商品英文代號 M~X
# 期貨選擇小台指期，代號為MXF，區分為月期貨、週期貨
df_opt["OptionType"] = df_opt["CommodityId"].str[-2].apply(lambda x: "Call" if x in "ABCDEFGHIJKL" else ("Put" if x in "MNOPQRSTUVWX" else "Unknown"))
df_mxf["FuturesType"] = df_mxf["CommodityId"].apply(lambda x: "Monthly" if x.startswith("MXF") else "Weekly")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_opt["OptionType"] = df_opt["CommodityId"].str[-2].apply(lambda x: "Call" if x in "ABCDEFGHIJKL" else ("Put" if x in "MNOPQRSTUVWX" else "Unknown"))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_mxf["FuturesType"] = df_mxf["CommodityId"].apply(lambda x: "Monthly" if x.startswith("MXF") else "Weekly")


In [4]:
df_mxf

Unnamed: 0,IAccountID,BS,CommodityId,TradeDate,TradeTime,Qty,Price,Type,Fee,Date,Time,FuturesType
4,ROLL,S,MXFA4,2024/1/2,2024-01-02 09:01:00,1,17899.0,mxf,31.0,20240102,0901,Monthly
5,ROLL,S,MXFA4,2024/1/2,2024-01-02 09:01:00,1,17899.0,mxf,31.0,20240102,0901,Monthly
6,ROLL,S,MXFA4,2024/1/2,2024-01-02 09:01:00,1,17899.0,mxf,31.0,20240102,0901,Monthly
7,ROLL,S,MXFA4,2024/1/2,2024-01-02 09:01:00,1,17898.0,mxf,31.0,20240102,0901,Monthly
12,ROLL,B,MX1A4,2024/1/2,2024-01-02 09:01:00,1,17932.0,mxf,31.0,20240102,0901,Weekly
...,...,...,...,...,...,...,...,...,...,...,...,...
183990,ROLL,S,MXFE5,2025/5/1,2025-05-01 05:00:00,1,20417.0,mxf,33.0,20250501,0500,Monthly
183991,ROLL,S,MXFE5,2025/5/1,2025-05-01 05:00:00,1,20412.0,mxf,33.0,20250501,0500,Monthly
183998,ROLL,S,MXFE5,2025/5/1,2025-05-01 05:00:00,1,20414.0,mxf,33.0,20250501,0500,Monthly
183999,ROLL,S,MXFE5,2025/5/1,2025-05-01 05:00:00,1,20413.0,mxf,33.0,20250501,0500,Monthly


In [5]:
df_opt

Unnamed: 0,IAccountID,BS,CommodityId,TradeDate,TradeTime,Qty,Price,Type,Fee,Date,Time,OptionType
0,ROLL,B,TX117600A4,2024/1/2,2024-01-02 09:01:00,1,323.0,opt,27.000000,20240102,0901,Call
1,ROLL,B,TX117450A4,2024/1/2,2024-01-02 09:01:00,1,472.0,opt,35.000000,20240102,0901,Call
2,ROLL,S,TX118250M4,2024/1/2,2024-01-02 09:01:00,1,329.0,opt,27.000000,20240102,0901,Put
3,ROLL,S,TX118300M4,2024/1/2,2024-01-02 09:01:00,1,379.0,opt,30.000000,20240102,0901,Put
8,ROLL,S,TX117600M4,2024/1/2,2024-01-02 09:01:00,1,1.3,opt,11.000000,20240102,0901,Put
...,...,...,...,...,...,...,...,...,...,...,...,...
183996,ROLL,S,TX120900Q5,2025/5/1,2025-05-01 05:00:00,1,520.0,opt,36.666667,20250501,0500,Put
183997,ROLL,S,TX120900Q5,2025/5/1,2025-05-01 05:00:00,1,520.0,opt,36.666667,20250501,0500,Put
184001,ROLL,B,TX120900E5,2025/5/1,2025-05-01 05:00:00,1,39.0,opt,13.000000,20250501,0500,Call
184002,ROLL,B,TX120900E5,2025/5/1,2025-05-01 05:00:00,1,40.0,opt,13.000000,20250501,0500,Call


#### 先匹配 Call + Put（確保履約價相同，時間差 ≤ 5 分鐘） 
主要透過履約價、交易時間、買賣方向及選擇權類型來去做匹配

In [7]:
# 舉例來說，選擇權商品代號為TX117600A4，取出其中的17600
def get_strike_price(commodity_id):
    """ 從 CommodityId 提取履約價，僅保留最後 5 位數 """
    return int(commodity_id[-7:-2]) if commodity_id and len(commodity_id) >= 6 else None

def match_call_put(df_opt, max_time_diff=5):
    """ 以 Call 為基準，匹配履約價相同、交易時間最接近且 `BS` 方向相反的 Put """

    # 確保 TradeTime 為 datetime
    df_opt = df_opt.copy()
    df_opt["TradeTime"] = pd.to_datetime(df_opt["TradeTime"])

    # 提取履約價
    df_opt["StrikePrice"] = df_opt["CommodityId"].apply(get_strike_price)

    # 拆分 Call 與 Put
    df_call = df_opt[df_opt["OptionType"] == "Call"].copy()  # Call 
    df_put = df_opt[df_opt["OptionType"] == "Put"].copy()  # Put 

    # 為 Put 添加唯一交易索引
    df_put["TradeIndex"] = df_put.reset_index().index  
    df_put["Used"] = False  # 標記 Put 是否已被匹配

    pcp_list = []
    for _, call_row in df_call.iterrows():
        call_time = call_row["TradeTime"]
        strike_price = call_row["StrikePrice"]
        call_bs = call_row["BS"]  

        # 找履約價相同的 Put，且 BS 必須與 Call 相反，且未被使用 
        put_candidates = df_put[(df_put["StrikePrice"] == strike_price) & (df_put["BS"] != call_bs) & (df_put["Used"] == False)].copy()

        # 計算時間差並找最接近的 Put
        if not put_candidates.empty:
            put_candidates["TimeDiff"] = abs(put_candidates["TradeTime"] - call_time)
            put_row = put_candidates.loc[put_candidates["TimeDiff"].idxmin()]

            # 標記該Put已被使用
            df_put.loc[df_put["TradeIndex"] == put_row["TradeIndex"], "Used"] = True  

            # 儲存匹配結果
            pcp_list.append({
                "StrikePrice": strike_price,
                "CallTime": call_time, "CallPrice": call_row["Price"], "CallID": call_row["CommodityId"], "CallFee": call_row["Fee"], "CallBS": call_bs,
                "PutTime": put_row["TradeTime"], "PutPrice": put_row["Price"], "PutID": put_row["CommodityId"], "PutFee": put_row["Fee"], "PutBS": put_row["BS"],
                "TradeIndex_Put": put_row["TradeIndex"]  # 追蹤 Put的唯一識別碼
            })

    return pd.DataFrame(pcp_list)

# 使用
df_opt = match_call_put(df_opt)
df_opt.to_csv(r'D:\CODE\SC_SP_delay_weekly策略\data\opt.csv',index = False, encoding = 'utf-8-sig')
df_opt

Unnamed: 0,StrikePrice,CallTime,CallPrice,CallID,CallFee,CallBS,PutTime,PutPrice,PutID,PutFee,PutBS,TradeIndex_Put
0,17600,2024-01-02 09:01:00,323.0,TX117600A4,27.0,B,2024-01-02 09:01:00,1.3,TX117600M4,11.000000,S,2
1,17450,2024-01-02 09:01:00,472.0,TX117450A4,35.0,B,2024-01-02 09:01:00,0.5,TX117450M4,11.000000,S,3
2,18250,2024-01-02 09:01:00,0.9,TX118250A4,11.0,B,2024-01-02 09:01:00,329.0,TX118250M4,27.000000,S,0
3,18300,2024-01-02 09:01:00,0.6,TX118300A4,11.0,B,2024-01-02 09:01:00,379.0,TX118300M4,30.000000,S,1
4,17500,2024-01-02 10:36:00,314.0,TX117500A4,27.0,S,2024-01-02 10:36:00,1.3,TX117500M4,11.000000,B,4
...,...,...,...,...,...,...,...,...,...,...,...,...
13110,20900,2025-05-01 05:00:00,38.5,TX120900E5,13.0,B,2025-05-01 05:00:00,515.0,TX120900Q5,36.333333,S,13155
13111,20900,2025-05-01 05:00:00,38.5,TX120900E5,13.0,B,2025-05-01 05:00:00,515.0,TX120900Q5,36.333333,S,13156
13112,20900,2025-05-01 05:00:00,39.0,TX120900E5,13.0,B,2025-05-01 05:00:00,520.0,TX120900Q5,36.666667,S,13157
13113,20900,2025-05-01 05:00:00,40.0,TX120900E5,13.0,B,2025-05-01 05:00:00,520.0,TX120900Q5,36.666667,S,13158


將期貨分成月期與週期，然後同樣給予他們唯一之索引 - TradeIndex

In [8]:
df_fut_monthly = df_mxf[df_mxf["FuturesType"] == "Monthly"]
df_fut_monthly["TradeIndex"] = df_fut_monthly.reset_index().index
df_fut_monthly

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_fut_monthly["TradeIndex"] = df_fut_monthly.reset_index().index


Unnamed: 0,IAccountID,BS,CommodityId,TradeDate,TradeTime,Qty,Price,Type,Fee,Date,Time,FuturesType,TradeIndex
4,ROLL,S,MXFA4,2024/1/2,2024-01-02 09:01:00,1,17899.0,mxf,31.0,20240102,0901,Monthly,0
5,ROLL,S,MXFA4,2024/1/2,2024-01-02 09:01:00,1,17899.0,mxf,31.0,20240102,0901,Monthly,1
6,ROLL,S,MXFA4,2024/1/2,2024-01-02 09:01:00,1,17899.0,mxf,31.0,20240102,0901,Monthly,2
7,ROLL,S,MXFA4,2024/1/2,2024-01-02 09:01:00,1,17898.0,mxf,31.0,20240102,0901,Monthly,3
13,ROLL,S,MXFA4,2024/1/2,2024-01-02 09:01:00,1,17910.0,mxf,31.0,20240102,0901,Monthly,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...
183990,ROLL,S,MXFE5,2025/5/1,2025-05-01 05:00:00,1,20417.0,mxf,33.0,20250501,0500,Monthly,21131
183991,ROLL,S,MXFE5,2025/5/1,2025-05-01 05:00:00,1,20412.0,mxf,33.0,20250501,0500,Monthly,21132
183998,ROLL,S,MXFE5,2025/5/1,2025-05-01 05:00:00,1,20414.0,mxf,33.0,20250501,0500,Monthly,21133
183999,ROLL,S,MXFE5,2025/5/1,2025-05-01 05:00:00,1,20413.0,mxf,33.0,20250501,0500,Monthly,21134


In [9]:
df_fut_weekly = df_mxf[df_mxf["FuturesType"] == "Weekly"]
df_fut_weekly["TradeIndex"] = df_fut_weekly.reset_index().index
df_fut_weekly

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_fut_weekly["TradeIndex"] = df_fut_weekly.reset_index().index


Unnamed: 0,IAccountID,BS,CommodityId,TradeDate,TradeTime,Qty,Price,Type,Fee,Date,Time,FuturesType,TradeIndex
12,ROLL,B,MX1A4,2024/1/2,2024-01-02 09:01:00,1,17932.0,mxf,31.0,20240102,0901,Weekly,0
14,ROLL,B,MX1A4,2024/1/2,2024-01-02 09:01:00,1,17932.0,mxf,31.0,20240102,0901,Weekly,1
16,ROLL,B,MX1A4,2024/1/2,2024-01-02 09:01:00,1,17932.0,mxf,31.5,20240102,0901,Weekly,2
17,ROLL,B,MX1A4,2024/1/2,2024-01-02 09:01:00,1,17932.0,mxf,31.5,20240102,0901,Weekly,3
20,ROLL,B,MX1A4,2024/1/2,2024-01-02 09:01:00,1,17932.0,mxf,31.0,20240102,0901,Weekly,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...
183940,ROLL,S,MX1E5,2025/5/1,2025-05-01 04:26:00,1,20287.0,mxf,33.0,20250501,0426,Weekly,8082
183942,ROLL,S,MX1E5,2025/5/1,2025-05-01 04:27:00,1,20287.0,mxf,33.0,20250501,0427,Weekly,8083
183965,ROLL,S,MX1E5,2025/5/1,2025-05-01 04:55:00,1,20400.0,mxf,33.0,20250501,0455,Weekly,8084
183976,ROLL,B,MX1E5,2025/5/1,2025-05-01 04:57:00,1,20419.0,mxf,33.0,20250501,0457,Weekly,8085


### 兩種進出場策略組合
1. 先用 Call + Put 選擇權組合匹配交易時間最接近的 Monthly Futures (設定5分鐘內)

2. 然後用未匹配的 Monthly Futures 去匹配交易時間最接近的 Weekly Futures

Put Call Parity策略
1. Buy Future + Short Call + Buy Put 
2. Short Future + Buy Call + Short Put

In [10]:
# 確保 Monthly Futures 只能匹配一次
df_fut_monthly["Used"] = False 

matched_pcp_list = []  # 儲存匹配結果

for _, opt_row in df_opt.iterrows():
    opt_time = opt_row["CallTime"]  # 選擇權交易時間
    call_bs = opt_row["CallBS"]  # 取得 Call 的買賣方向

    # 尋找交易時間最接近且尚未使用，並且 BS 不一致的 Monthly Futures (主要是看Call, 期貨BS不一致)
    fut_candidates = df_fut_monthly[(df_fut_monthly["Used"] == False) & (df_fut_monthly["BS"] != call_bs)].copy()
    fut_candidates["TimeDiff"] = abs(fut_candidates["TradeTime"] - opt_time)

    if fut_candidates.empty:
        continue

    fut_row = fut_candidates.loc[fut_candidates["TimeDiff"].idxmin()]  # 找最接近的 Monthly Futures

    if fut_row["TimeDiff"] > timedelta(minutes=5):
        continue  # 時間差過大則忽略匹配

    # 標記該 Monthly Futures 已被使用
    df_fut_monthly.loc[df_fut_monthly["TradeIndex"] == fut_row["TradeIndex"], "Used"] = True

    # 儲存匹配結果
    matched_pcp_list.append({
        "TradeIndex_Options": opt_row.get("TradeIndex", None), "StrikePrice": opt_row["StrikePrice"],
        "CallTime": opt_row["CallTime"], "CallPrice": opt_row["CallPrice"], "CallID": opt_row["CallID"],
        "CallFee": opt_row["CallFee"], "CallBS": opt_row["CallBS"], 
        "PutTime": opt_row["PutTime"], "PutPrice": opt_row["PutPrice"], "PutID": opt_row["PutID"],
        "PutFee": opt_row["PutFee"], "PutBS": opt_row["PutBS"], 
        "TradeIndex_FutMonthly": fut_row["TradeIndex"], "TradeDate": fut_row["TradeDate"],
        "FutMonthlyTime": fut_row["TradeTime"], "FutMonthlyPrice": fut_row["Price"], "FutMonthlyID": fut_row["CommodityId"],
        "FutMonthlyFee": fut_row["Fee"], "FutMonthlyBS": fut_row["BS"], 
        "TimeDiff_Options_Futures": fut_row["TimeDiff"]  
    })

df_pcp_matched = pd.DataFrame(matched_pcp_list)

# 檢查是否仍有重複使用
print("檢查 `TradeIndex_FutMonthly` 是否被多次使用：")
print(df_pcp_matched["TradeIndex_FutMonthly"].value_counts().head(10))  # 顯示前 10 筆重複次數


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_fut_monthly["Used"] = False


檢查 `TradeIndex_FutMonthly` 是否被多次使用：
TradeIndex_FutMonthly
0        1
14446    1
14428    1
14429    1
14430    1
14431    1
14432    1
14433    1
14434    1
14435    1
Name: count, dtype: int64


In [11]:
df_pcp_matched

Unnamed: 0,TradeIndex_Options,StrikePrice,CallTime,CallPrice,CallID,CallFee,CallBS,PutTime,PutPrice,PutID,PutFee,PutBS,TradeIndex_FutMonthly,TradeDate,FutMonthlyTime,FutMonthlyPrice,FutMonthlyID,FutMonthlyFee,FutMonthlyBS,TimeDiff_Options_Futures
0,,17600,2024-01-02 09:01:00,323.0,TX117600A4,27.0,B,2024-01-02 09:01:00,1.3,TX117600M4,11.000000,S,0,2024/1/2,2024-01-02 09:01:00,17899.0,MXFA4,31.0,S,0 days
1,,17450,2024-01-02 09:01:00,472.0,TX117450A4,35.0,B,2024-01-02 09:01:00,0.5,TX117450M4,11.000000,S,1,2024/1/2,2024-01-02 09:01:00,17899.0,MXFA4,31.0,S,0 days
2,,18250,2024-01-02 09:01:00,0.9,TX118250A4,11.0,B,2024-01-02 09:01:00,329.0,TX118250M4,27.000000,S,2,2024/1/2,2024-01-02 09:01:00,17899.0,MXFA4,31.0,S,0 days
3,,18300,2024-01-02 09:01:00,0.6,TX118300A4,11.0,B,2024-01-02 09:01:00,379.0,TX118300M4,30.000000,S,3,2024/1/2,2024-01-02 09:01:00,17898.0,MXFA4,31.0,S,0 days
4,,17500,2024-01-02 10:36:00,314.0,TX117500A4,27.0,S,2024-01-02 10:36:00,1.3,TX117500M4,11.000000,B,31,2024/1/2,2024-01-02 10:36:00,17778.0,MXFA4,31.0,B,0 days
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12931,,20900,2025-05-01 05:00:00,38.5,TX120900E5,13.0,B,2025-05-01 05:00:00,515.0,TX120900Q5,36.333333,S,21131,2025/5/1,2025-05-01 05:00:00,20417.0,MXFE5,33.0,S,0 days
12932,,20900,2025-05-01 05:00:00,38.5,TX120900E5,13.0,B,2025-05-01 05:00:00,515.0,TX120900Q5,36.333333,S,21132,2025/5/1,2025-05-01 05:00:00,20412.0,MXFE5,33.0,S,0 days
12933,,20900,2025-05-01 05:00:00,39.0,TX120900E5,13.0,B,2025-05-01 05:00:00,520.0,TX120900Q5,36.666667,S,21133,2025/5/1,2025-05-01 05:00:00,20414.0,MXFE5,33.0,S,0 days
12934,,20900,2025-05-01 05:00:00,40.0,TX120900E5,13.0,B,2025-05-01 05:00:00,520.0,TX120900Q5,36.666667,S,21134,2025/5/1,2025-05-01 05:00:00,20413.0,MXFE5,33.0,S,0 days


用剩餘的月期去匹配週期，設定為一買一賣

In [12]:
matched_weekly_list = []  # 儲存匹配結果
df_fut_weekly["Used"] = False  # 初始化 Weekly Futures 狀態

# 取得未匹配的 Monthly Futures
remaining_fut_monthly = df_fut_monthly[df_fut_monthly["Used"] == False].copy()
remaining_fut_monthly["Used"] = False  # 確保 Monthly Futures 也只能匹配一次

for _, fut_monthly_row in remaining_fut_monthly.iterrows():
    fut_monthly_time = fut_monthly_row["TradeTime"]
    fut_monthly_bs = fut_monthly_row["BS"]  # 獲取 Monthly Futures 的交易方向

    # *尋找交易時間最接近，且 BS 方向相反的 Weekly Futures
    weekly_candidates = df_fut_weekly[(df_fut_weekly["Used"] == False) & (df_fut_weekly["BS"] != fut_monthly_bs)].copy()
    weekly_candidates["TimeDiff"] = abs(weekly_candidates["TradeTime"] - fut_monthly_time)

    if weekly_candidates.empty:
        continue

    weekly_row = weekly_candidates.loc[weekly_candidates["TimeDiff"].idxmin()]  # 找最接近的 Weekly Futures

    if weekly_row["TimeDiff"] > timedelta(minutes=5):
        continue  # 時間差過大則忽略匹配

    # 標記該 Monthly Futures 和 Weekly Futures 已被使用
    remaining_fut_monthly.loc[remaining_fut_monthly["TradeIndex"] == fut_monthly_row["TradeIndex"], "Used"] = True
    df_fut_weekly.loc[df_fut_weekly["TradeIndex"] == weekly_row["TradeIndex"], "Used"] = True

    # 儲存匹配結果
    matched_weekly_list.append({
        "TradeIndex_FutMonthly": fut_monthly_row["TradeIndex"],  
        "FutMonthlyTime": fut_monthly_time, "FutMonthlyPrice": fut_monthly_row["Price"], "FutMonthlyID": fut_monthly_row["CommodityId"],
        "FutMonthlyFee": fut_monthly_row["Fee"], "FutMonthlyBS": fut_monthly_bs, 
        "TradeIndex_FutWeekly": weekly_row["TradeIndex"],  "TradeDate": fut_monthly_row["TradeDate"],
        "WeeklyFutTime": weekly_row["TradeTime"], "WeeklyFutPrice": weekly_row["Price"], "WeeklyFutID": weekly_row["CommodityId"],
        "WeeklyFutFee": weekly_row["Fee"], "WeeklyFutBS": weekly_row["BS"], 
        "TimeDiff_Weekly": weekly_row["TimeDiff"]
    })

df_matched_weekly = pd.DataFrame(matched_weekly_list)

# 檢查匹配結果
print("`Monthly Futures` 與 `Weekly Futures` 匹配結果：")
print(df_matched_weekly.head())
print("匹配成功數量:", len(df_matched_weekly))

# 確認BS方向是否相反
df_valid_bs = df_matched_weekly.dropna(subset=["FutMonthlyBS", "WeeklyFutBS"])
df_mismatch = df_valid_bs[df_valid_bs["FutMonthlyBS"] == df_valid_bs["WeeklyFutBS"]]  # 找出 BS相同的異常情況

if df_mismatch.empty:
    print("所有 `Monthly Futures` 與 `Weekly Futures` 交易方向皆相反！")
else:
    print("發現 `Monthly Futures` 與 `Weekly Futures` `BS` 相同的異常匹配！")
    print(df_mismatch)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_fut_weekly["Used"] = False  # 初始化 Weekly Futures 狀態


`Monthly Futures` 與 `Weekly Futures` 匹配結果：
   TradeIndex_FutMonthly      FutMonthlyTime  FutMonthlyPrice FutMonthlyID  \
0                      4 2024-01-02 09:01:00          17910.0        MXFA4   
1                      5 2024-01-02 09:01:00          17910.0        MXFA4   
2                      6 2024-01-02 09:01:00          17910.0        MXFA4   
3                      7 2024-01-02 09:01:00          17910.0        MXFA4   
4                      8 2024-01-02 09:01:00          17910.0        MXFA4   

   FutMonthlyFee FutMonthlyBS  TradeIndex_FutWeekly TradeDate  \
0           31.0            S                     0  2024/1/2   
1           31.0            S                     1  2024/1/2   
2           31.5            S                     2  2024/1/2   
3           31.5            S                     3  2024/1/2   
4           31.0            S                     4  2024/1/2   

        WeeklyFutTime  WeeklyFutPrice WeeklyFutID  WeeklyFutFee WeeklyFutBS  \
0 2024-01-02 09:01

In [13]:
df_matched_weekly

Unnamed: 0,TradeIndex_FutMonthly,FutMonthlyTime,FutMonthlyPrice,FutMonthlyID,FutMonthlyFee,FutMonthlyBS,TradeIndex_FutWeekly,TradeDate,WeeklyFutTime,WeeklyFutPrice,WeeklyFutID,WeeklyFutFee,WeeklyFutBS,TimeDiff_Weekly
0,4,2024-01-02 09:01:00,17910.0,MXFA4,31.0,S,0,2024/1/2,2024-01-02 09:01:00,17932.0,MX1A4,31.0,B,0 days
1,5,2024-01-02 09:01:00,17910.0,MXFA4,31.0,S,1,2024/1/2,2024-01-02 09:01:00,17932.0,MX1A4,31.0,B,0 days
2,6,2024-01-02 09:01:00,17910.0,MXFA4,31.5,S,2,2024/1/2,2024-01-02 09:01:00,17932.0,MX1A4,31.5,B,0 days
3,7,2024-01-02 09:01:00,17910.0,MXFA4,31.5,S,3,2024/1/2,2024-01-02 09:01:00,17932.0,MX1A4,31.5,B,0 days
4,8,2024-01-02 09:01:00,17910.0,MXFA4,31.0,S,4,2024/1/2,2024-01-02 09:01:00,17932.0,MX1A4,31.0,B,0 days
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8023,21122,2025-05-01 04:26:00,20270.0,MXFE5,33.0,B,8082,2025/5/1,2025-05-01 04:26:00,20287.0,MX1E5,33.0,S,0 days
8024,21123,2025-05-01 04:27:00,20270.0,MXFE5,33.0,B,8083,2025/5/1,2025-05-01 04:27:00,20287.0,MX1E5,33.0,S,0 days
8025,21126,2025-05-01 04:55:00,20365.0,MXFE5,33.0,B,8084,2025/5/1,2025-05-01 04:55:00,20400.0,MX1E5,33.0,S,0 days
8026,21128,2025-05-01 04:57:00,20406.0,MXFE5,33.0,S,8085,2025/5/1,2025-05-01 04:57:00,20419.0,MX1E5,33.0,B,0 days


### 將df_pcp_matched與df_matched_weekly數據合併，主要以月期貨來合併

In [14]:
# 標記來源類別
df_pcp_matched["SourceType"] = "Option-Monthly"
df_matched_weekly["SourceType"] = "Monthly-Weekly"

# 以 TradeIndex_FutMonthly 為基準進行合
df_combined = pd.concat([df_pcp_matched, df_matched_weekly], axis=0, ignore_index=True)

# 確認合併結果
print(" 合併後數據概覽：")
print(df_combined.head())
print(f"合併後數據總量: {len(df_combined)}")
print(df_combined["SourceType"].value_counts())  # 確保來源標記正確

 合併後數據概覽：
  TradeIndex_Options  StrikePrice            CallTime  CallPrice      CallID  \
0               None      17600.0 2024-01-02 09:01:00      323.0  TX117600A4   
1               None      17450.0 2024-01-02 09:01:00      472.0  TX117450A4   
2               None      18250.0 2024-01-02 09:01:00        0.9  TX118250A4   
3               None      18300.0 2024-01-02 09:01:00        0.6  TX118300A4   
4               None      17500.0 2024-01-02 10:36:00      314.0  TX117500A4   

   CallFee CallBS             PutTime  PutPrice       PutID  ...  \
0     27.0      B 2024-01-02 09:01:00       1.3  TX117600M4  ...   
1     35.0      B 2024-01-02 09:01:00       0.5  TX117450M4  ...   
2     11.0      B 2024-01-02 09:01:00     329.0  TX118250M4  ...   
3     11.0      B 2024-01-02 09:01:00     379.0  TX118300M4  ...   
4     27.0      S 2024-01-02 10:36:00       1.3  TX117500M4  ...   

   FutMonthlyBS TimeDiff_Options_Futures      SourceType TradeIndex_FutWeekly  \
0             S    

In [15]:
# 選擇要保留的欄位
columns_order = ["TradeIndex_FutMonthly", "TradeDate", "SourceType",
                 "FutMonthlyTime", "FutMonthlyPrice", "FutMonthlyID", "FutMonthlyFee", "FutMonthlyBS",
                 "StrikePrice", "CallTime", "CallPrice", "CallID", "CallFee", "CallBS",
                 "PutTime", "PutPrice", "PutID", "PutFee", "PutBS",
                 "WeeklyFutTime", "WeeklyFutPrice", "WeeklyFutID", "WeeklyFutFee", "WeeklyFutBS"]

# 重新排序 DataFrame
df_combined = df_combined[columns_order]
df_combined = df_combined.sort_values(by="TradeIndex_FutMonthly").reset_index(drop=True)

df_combined


Unnamed: 0,TradeIndex_FutMonthly,TradeDate,SourceType,FutMonthlyTime,FutMonthlyPrice,FutMonthlyID,FutMonthlyFee,FutMonthlyBS,StrikePrice,CallTime,...,PutTime,PutPrice,PutID,PutFee,PutBS,WeeklyFutTime,WeeklyFutPrice,WeeklyFutID,WeeklyFutFee,WeeklyFutBS
0,0,2024/1/2,Option-Monthly,2024-01-02 09:01:00,17899.0,MXFA4,31.0,S,17600.0,2024-01-02 09:01:00,...,2024-01-02 09:01:00,1.3,TX117600M4,11.000000,S,NaT,,,,
1,1,2024/1/2,Option-Monthly,2024-01-02 09:01:00,17899.0,MXFA4,31.0,S,17450.0,2024-01-02 09:01:00,...,2024-01-02 09:01:00,0.5,TX117450M4,11.000000,S,NaT,,,,
2,2,2024/1/2,Option-Monthly,2024-01-02 09:01:00,17899.0,MXFA4,31.0,S,18250.0,2024-01-02 09:01:00,...,2024-01-02 09:01:00,329.0,TX118250M4,27.000000,S,NaT,,,,
3,3,2024/1/2,Option-Monthly,2024-01-02 09:01:00,17898.0,MXFA4,31.0,S,18300.0,2024-01-02 09:01:00,...,2024-01-02 09:01:00,379.0,TX118300M4,30.000000,S,NaT,,,,
4,4,2024/1/2,Monthly-Weekly,2024-01-02 09:01:00,17910.0,MXFA4,31.0,S,,NaT,...,NaT,,,,,2024-01-02 09:01:00,17932.0,MX1A4,31.0,B
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20959,21131,2025/5/1,Option-Monthly,2025-05-01 05:00:00,20417.0,MXFE5,33.0,S,20900.0,2025-05-01 05:00:00,...,2025-05-01 05:00:00,515.0,TX120900Q5,36.333333,S,NaT,,,,
20960,21132,2025/5/1,Option-Monthly,2025-05-01 05:00:00,20412.0,MXFE5,33.0,S,20900.0,2025-05-01 05:00:00,...,2025-05-01 05:00:00,515.0,TX120900Q5,36.333333,S,NaT,,,,
20961,21133,2025/5/1,Option-Monthly,2025-05-01 05:00:00,20414.0,MXFE5,33.0,S,20900.0,2025-05-01 05:00:00,...,2025-05-01 05:00:00,520.0,TX120900Q5,36.666667,S,NaT,,,,
20962,21134,2025/5/1,Option-Monthly,2025-05-01 05:00:00,20413.0,MXFE5,33.0,S,20900.0,2025-05-01 05:00:00,...,2025-05-01 05:00:00,520.0,TX120900Q5,36.666667,S,NaT,,,,


In [16]:
df_combined.to_csv(r'D:\CODE\SC_SP_delay_weekly策略\data\PCP.csv',index = False, encoding = 'utf-8-sig')

## 進場數據與機制設定

從目前整理好的ROLL數據中，挑出有包含選擇權策略的 (週選+月期)，全部都當作進場策略，回測4:00~5:00出現PCP數據時，多去買option之績效

設定Future, Call, Put 的時間都需在4~5點之間

In [17]:
df = pd.read_csv(r'D:\CODE\SC_SP_delay_weekly策略\data\PCP.csv', encoding='utf-8-sig')

# 設定時間範圍
start_time = pd.to_datetime("04:00:00").time()
end_time = pd.to_datetime("05:00:00").time()

# 轉換時間格式
df["FutMonthlyTime"] = pd.to_datetime(df["FutMonthlyTime"], errors="coerce")
df["CallTime"] = pd.to_datetime(df["CallTime"], errors="coerce")
df["PutTime"] = pd.to_datetime(df["PutTime"], errors="coerce")

# 篩選符合條件的數據
df = df[
    (df["SourceType"] == "Option-Monthly") &
    (df["FutMonthlyTime"].dt.time >= start_time) & (df["FutMonthlyTime"].dt.time <= end_time) &
    (df["CallTime"].dt.time >= start_time) & (df["CallTime"].dt.time <= end_time) &
    (df["PutTime"].dt.time >= start_time) & (df["PutTime"].dt.time <= end_time)
]

# 排除不需要的欄位
columns_to_remove = ["WeeklyFutTime", "WeeklyFutPrice", "WeeklyFutID", "WeeklyFutFee", "WeeklyFutBS"]
df = df.drop(columns=columns_to_remove, errors="ignore")  # 避免 KeyError

# 顯示篩選後的數據數量
print(f"📌 篩選後剩餘的數據筆數：{df.shape[0]}")
df


📌 篩選後剩餘的數據筆數：1005


Unnamed: 0,TradeIndex_FutMonthly,TradeDate,SourceType,FutMonthlyTime,FutMonthlyPrice,FutMonthlyID,FutMonthlyFee,FutMonthlyBS,StrikePrice,CallTime,CallPrice,CallID,CallFee,CallBS,PutTime,PutPrice,PutID,PutFee,PutBS
664,666,2024/2/1,Option-Monthly,2024-02-01 04:05:00,17917.0,MXFB4,31.0,B,18600.0,2024-02-01 04:05:00,7.0,TX118600B4,11.0,S,2024-02-01 04:05:00,690.0,TX118600N4,45.000000,B
665,667,2024/2/1,Option-Monthly,2024-02-01 04:10:00,17910.0,MXFB4,31.0,B,18450.0,2024-02-01 04:10:00,14.5,TX118450B4,12.0,S,2024-02-01 04:10:00,550.0,TX118450N4,39.000000,B
666,668,2024/2/1,Option-Monthly,2024-02-01 04:10:00,17913.0,MXFB4,31.0,B,18450.0,2024-02-01 04:10:00,14.5,TX118450B4,12.0,S,2024-02-01 04:10:00,550.0,TX118450N4,39.000000,B
781,783,2024/2/23,Option-Monthly,2024-02-23 04:52:00,18994.0,MXFC4,32.0,S,18500.0,2024-02-23 04:52:00,500.0,TX418500B4,36.0,B,2024-02-23 04:52:00,6.4,TX418500N4,11.000000,S
1290,1292,2024/3/11,Option-Monthly,2024-03-09 04:46:00,19744.0,MXFC4,33.0,S,18850.0,2024-03-09 04:46:00,885.0,TX218850C4,55.0,B,2024-03-09 04:46:00,3.3,TX218850O4,11.000000,S
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20959,21131,2025/5/1,Option-Monthly,2025-05-01 05:00:00,20417.0,MXFE5,33.0,S,20900.0,2025-05-01 05:00:00,38.5,TX120900E5,13.0,B,2025-05-01 05:00:00,515.0,TX120900Q5,36.333333,S
20960,21132,2025/5/1,Option-Monthly,2025-05-01 05:00:00,20412.0,MXFE5,33.0,S,20900.0,2025-05-01 05:00:00,38.5,TX120900E5,13.0,B,2025-05-01 05:00:00,515.0,TX120900Q5,36.333333,S
20961,21133,2025/5/1,Option-Monthly,2025-05-01 05:00:00,20414.0,MXFE5,33.0,S,20900.0,2025-05-01 05:00:00,39.0,TX120900E5,13.0,B,2025-05-01 05:00:00,520.0,TX120900Q5,36.666667,S
20962,21134,2025/5/1,Option-Monthly,2025-05-01 05:00:00,20413.0,MXFE5,33.0,S,20900.0,2025-05-01 05:00:00,40.0,TX120900E5,13.0,B,2025-05-01 05:00:00,520.0,TX120900Q5,36.666667,S


設定 ArbitrageStrategy 為腳3之策略，且只抓取選擇權腳3為SC、SP之策略

In [18]:
def analyze_pcp_with_bs(df_pcp):
    """
    進一步分析 PCP 數據，包含理論與實際價格比較以及交易腳判斷。
    """
    df_pcp = df_pcp.copy()
    strategy_type = []
    for _, row in df_pcp.iterrows():
        C = row['CallPrice']    # Call 價格
        P = row['PutPrice']     # Put 價格
        FutureBS = row['FutMonthlyBS']  # 買賣方向 (B/S)
        
        # 判斷策略類型
        if FutureBS == "B":
            # Buy Future + Short Call + Long Put
            if C > P:
                strategy_type.append("BP") # Short Call 腳1, Long Put 腳3
            else:
                strategy_type.append("SC")   # Short Call 腳3, Long Put 腳1

        else:
            # Sell Future + Long Call + Short Put
            if C > P:
                strategy_type.append("SP")   # Long Call 腳1, Short Put 腳3
            else:
                strategy_type.append("BC")   # Long Call 腳3, Short Put 腳1
        
    
    df_pcp["ArbitrageStrategy"] = strategy_type
    
    # 轉成 DataFrame
    df_results = pd.DataFrame(df_pcp)
    return df_results


df_option_pcp = analyze_pcp_with_bs(df)

# 排除BC、BP資料，只觀察SC、SP資料
df_option_pcp = df_option_pcp[~df_option_pcp["ArbitrageStrategy"].isin(["BC", "BP"])]
df_option_pcp.to_csv(r'D:\CODE\SC_SP_delay_weekly策略\data\PCP_option.csv',index = False, encoding = 'utf-8-sig')
df_option_pcp

Unnamed: 0,TradeIndex_FutMonthly,TradeDate,SourceType,FutMonthlyTime,FutMonthlyPrice,FutMonthlyID,FutMonthlyFee,FutMonthlyBS,StrikePrice,CallTime,CallPrice,CallID,CallFee,CallBS,PutTime,PutPrice,PutID,PutFee,PutBS,ArbitrageStrategy
664,666,2024/2/1,Option-Monthly,2024-02-01 04:05:00,17917.0,MXFB4,31.0,B,18600.0,2024-02-01 04:05:00,7.0,TX118600B4,11.0,S,2024-02-01 04:05:00,690.0,TX118600N4,45.0,B,SC
665,667,2024/2/1,Option-Monthly,2024-02-01 04:10:00,17910.0,MXFB4,31.0,B,18450.0,2024-02-01 04:10:00,14.5,TX118450B4,12.0,S,2024-02-01 04:10:00,550.0,TX118450N4,39.0,B,SC
666,668,2024/2/1,Option-Monthly,2024-02-01 04:10:00,17913.0,MXFB4,31.0,B,18450.0,2024-02-01 04:10:00,14.5,TX118450B4,12.0,S,2024-02-01 04:10:00,550.0,TX118450N4,39.0,B,SC
781,783,2024/2/23,Option-Monthly,2024-02-23 04:52:00,18994.0,MXFC4,32.0,S,18500.0,2024-02-23 04:52:00,500.0,TX418500B4,36.0,B,2024-02-23 04:52:00,6.4,TX418500N4,11.0,S,SP
1290,1292,2024/3/11,Option-Monthly,2024-03-09 04:46:00,19744.0,MXFC4,33.0,S,18850.0,2024-03-09 04:46:00,885.0,TX218850C4,55.0,B,2024-03-09 04:46:00,3.3,TX218850O4,11.0,S,SP
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20948,21120,2025/5/1,Option-Monthly,2025-05-01 04:23:00,20328.0,MXFE5,33.0,S,19600.0,2025-05-01 04:23:00,800.0,TX119600E5,51.0,B,2025-05-01 04:23:00,67.0,TX119600Q5,14.0,S,SP
20949,21121,2025/5/1,Option-Monthly,2025-05-01 04:23:00,20330.0,MXFE5,33.0,S,19600.0,2025-05-01 04:23:00,800.0,TX119600E5,51.0,B,2025-05-01 04:23:00,65.0,TX119600Q5,14.0,S,SP
20952,21124,2025/5/1,Option-Monthly,2025-05-01 04:55:00,20397.0,MXFE5,33.0,S,19600.0,2025-05-01 04:55:00,855.0,TX119600E5,54.0,B,2025-05-01 04:55:00,54.0,TX119600Q5,14.0,S,SP
20953,21125,2025/5/1,Option-Monthly,2025-05-01 04:55:00,20380.0,MXFE5,33.0,B,21000.0,2025-05-01 04:55:00,23.5,TX121000E5,12.0,S,2025-05-01 04:55:00,645.0,TX121000Q5,43.0,B,SC


重新製作一個df_new，儲存重要資訊，並透過 ArbitrageStrategy 來合併部分資料
(將SC、SP策略合併，轉換成用Category來區分C or P)

In [19]:
# 建立新的 DataFrame
df_new = df_option_pcp.copy()

# 根據 ArbitrageStrategy 來選擇相應的欄位
df_new["Time"] = df_new.apply(lambda x: x["CallTime"] if x["ArbitrageStrategy"] == "SC" else x["PutTime"], axis=1)
df_new["Price"] = df_new.apply(lambda x: x["CallPrice"] if x["ArbitrageStrategy"] == "SC" else x["PutPrice"], axis=1)
df_new["ID"] = df_new.apply(lambda x: x["CallID"] if x["ArbitrageStrategy"] == "SC" else x["PutID"], axis=1)
df_new["Fee"] = df_new.apply(lambda x: x["CallFee"] if x["ArbitrageStrategy"] == "SC" else x["PutFee"], axis=1)
df_new["StrikePrice"] = df_new["StrikePrice"]  # 履約價保持不變
df_new["Category"] = df_new.apply(lambda x: "C" if x["ArbitrageStrategy"] == "SC" else "P", axis=1)
df_new["FutPrice"] = df_new["FutMonthlyPrice"]

# 選擇需要的欄位並重新排序
columns_order = ["Time", "Category", "Price", "ID", "Fee", "StrikePrice", "FutPrice"]
df_new = df_new[columns_order]

# 轉換為標準日期格式
df_new["Date"] = pd.to_datetime(df_new["Time"]).dt.strftime("%Y-%m-%d")

# 觀察數據中出現哪些日期
unique_dates = df_new["Date"].unique()
unique_dates 

array(['2024-02-01', '2024-02-23', '2024-03-09', '2024-03-13',
       '2024-04-17', '2024-06-26', '2024-07-24', '2024-07-30',
       '2024-08-01', '2024-08-02', '2024-08-03', '2024-08-07',
       '2024-08-08', '2024-08-09', '2024-08-10', '2024-08-29',
       '2024-08-31', '2024-09-04', '2024-09-07', '2024-09-12',
       '2024-09-17', '2024-09-19', '2024-09-20', '2024-09-24',
       '2024-09-27', '2024-10-02', '2024-10-09', '2024-10-12',
       '2024-10-18', '2024-10-19', '2024-10-22', '2024-10-30',
       '2024-10-31', '2024-11-06', '2024-11-12', '2024-11-13',
       '2024-11-20', '2024-11-21', '2024-11-23', '2024-12-04',
       '2024-12-06', '2024-12-10', '2024-12-19', '2024-12-20',
       '2024-12-26', '2024-12-28', '2025-01-08', '2025-01-15',
       '2025-01-18', '2025-01-21', '2025-02-04', '2025-02-05',
       '2025-02-08', '2025-02-22', '2025-02-25', '2025-02-27',
       '2025-02-28', '2025-03-04', '2025-03-05', '2025-03-06',
       '2025-03-08', '2025-03-11', '2025-03-12', '2025-

透過選擇權ID生成其到期月份 (週期)，比如說從TX118600B4轉換成202402W1

In [None]:
# 月份對應編碼
call_code = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L"]
put_code = ["M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X"]

# 函數：根據 `ID` 解析年月與交易類型
def parse_product_code(id_value):
    if pd.isna(id_value) or len(id_value) < 3:
        return None  # 確保數據有效

    year_code = id_value[-1]  # 取得最後一碼（年分）
    month_code = id_value[-2]  # 倒數第二碼（月份編碼）
    type_code = id_value[2]   # 第三碼（選擇權類別），判斷是月選還是週選

    # 解析年分
    year_map = {"4": "2024", "5": "2025"}  # 可以擴展其他年份
    year = year_map.get(year_code, None)
    if not year:
        return None  # 未知年份則返回 `None`

    # 解析月份
    if month_code in call_code:
        month = f"{call_code.index(month_code) + 1:02d}"  # 轉換為 "01" ~ "12"
    elif month_code in put_code:
        month = f"{put_code.index(month_code) + 1:02d}"
    else:
        return None  # 未知月份則返回 `None`

    # 解析選擇權類型
    if type_code in ["1", "2", "4", "5"]:
        return f"{year}{month}W{type_code}"  # 週選（加上 `W+數字`）
    elif type_code == "O":
        return f"{year}{month}"  # 月選（不加 `W` 標記）

    return None  # 未知類型

# 應用函數
df_new["ProductCode"] = df_new["ID"].apply(parse_product_code)

In [22]:
df_new

Unnamed: 0,Time,Category,Price,ID,Fee,StrikePrice,FutPrice,Date,ProductCode
664,2024-02-01 04:05:00,C,7.0,TX118600B4,11.0,18600.0,17917.0,2024-02-01,202402W1
665,2024-02-01 04:10:00,C,14.5,TX118450B4,12.0,18450.0,17910.0,2024-02-01,202402W1
666,2024-02-01 04:10:00,C,14.5,TX118450B4,12.0,18450.0,17913.0,2024-02-01,202402W1
781,2024-02-23 04:52:00,P,6.4,TX418500N4,11.0,18500.0,18994.0,2024-02-23,202402W4
1290,2024-03-09 04:46:00,P,3.3,TX218850O4,11.0,18850.0,19744.0,2024-03-09,202403W2
...,...,...,...,...,...,...,...,...,...
20948,2025-05-01 04:23:00,P,67.0,TX119600Q5,14.0,19600.0,20328.0,2025-05-01,202505W1
20949,2025-05-01 04:23:00,P,65.0,TX119600Q5,14.0,19600.0,20330.0,2025-05-01,202505W1
20952,2025-05-01 04:55:00,P,54.0,TX119600Q5,14.0,19600.0,20397.0,2025-05-01,202505W1
20953,2025-05-01 04:55:00,C,23.5,TX121000E5,12.0,21000.0,20380.0,2025-05-01,202505W1


目前Greek缺少4/23以後之資料，故暫時刪除

In [23]:
# **刪除 `Date` 在 2025-04-24 之後的數據**
df_new = df_new[df_new["Date"] <= "2025-04-23"]

In [24]:
df_new.to_csv(r'D:\CODE\SC_SP_delay_weekly策略\data\PCP_option.csv',index = False, encoding = 'utf-8-sig')

### 找結算日之結算價，計算到期日之損益

In [25]:
df_settlement = pd.read_csv(r'D:\CODE\SC_SP_delay_weekly策略\data\Settlement_Prcie.csv', encoding='utf-8-sig') 

# **轉換 `SettlementDate` 格式**
df_settlement["SettlementDate"] = pd.to_datetime(df_settlement["最後結算日"], format="%Y/%m/%d").dt.strftime("%Y-%m-%d")
df_settlement

Unnamed: 0,最後結算日,契約月份,臺指選擇權,SettlementDate
0,2025/5/21,202505,21848,2025-05-21
1,2025/5/14,202505W2,21751,2025-05-14
2,2025/5/7,202505W1,20466,2025-05-07
3,2025/4/30,202504W5,20211,2025-04-30
4,2025/4/23,202504W4,19583,2025-04-23
...,...,...,...,...
67,2024/1/31,202401W5,17896,2024-01-31
68,2024/1/24,202401W4,17873,2024-01-24
69,2024/1/17,202401,17168,2024-01-17
70,2024/1/10,202401W2,17481,2024-01-10


透過df_new中的ProductCode去抓取相應在df_settlement中的契約月份做mapping
1. 新增第一個欄位為SettlementPrice，抓取df_settlement中的臺指選擇權
2. 新增第二個欄位為SettlementDate，抓取df_settlement中的SettlementDate

In [None]:
df_new_0 = df_new
# 合併 df_new 與 df_settlement
df_new_0 = df_new_0.merge(
    df_settlement[["契約月份", "臺指選擇權", "SettlementDate"]], 
    left_on="ProductCode", 
    right_on="契約月份", 
    how="left"
)

# 重命名欄位
df_new_0.rename(columns={"臺指選擇權": "SettlementPrice"}, inplace=True)

# 移除 `契約月份` 欄位
df_new_0.drop(columns=["契約月份"], inplace=True)
# 轉換 `Time` 為 `HHMMSS` 整數格式
df_new_0["DateTime"] = pd.to_datetime(df_new_0["Time"]).dt.strftime("%H%M%S").astype(int)

# 檢查結果
print("📌 `SettlementPrice` 和 `SettlementDate` 匹配完成：")
print(df_new_0.head())
print(f"✅ 更新後數據量: {len(df_new_0)}")

df_new_0.to_csv(r'D:\CODE\SC_SP_delay_weekly策略\data\df_new.csv',index = False, encoding = 'utf-8-sig')
df_new_0

📌 `SettlementPrice` 和 `SettlementDate` 匹配完成：
                 Time Category  Price          ID   Fee  StrikePrice  \
0 2024-02-01 04:05:00        C    7.0  TX118600B4  11.0      18600.0   
1 2024-02-01 04:10:00        C   14.5  TX118450B4  12.0      18450.0   
2 2024-02-01 04:10:00        C   14.5  TX118450B4  12.0      18450.0   
3 2024-02-23 04:52:00        P    6.4  TX418500N4  11.0      18500.0   
4 2024-03-09 04:46:00        P    3.3  TX218850O4  11.0      18850.0   

   FutPrice        Date ProductCode  SettlementPrice SettlementDate  DateTime  
0   17917.0  2024-02-01    202402W1            18632     2024-02-15     40500  
1   17910.0  2024-02-01    202402W1            18632     2024-02-15     41000  
2   17913.0  2024-02-01    202402W1            18632     2024-02-15     41000  
3   18994.0  2024-02-23    202402W4            18993     2024-02-29     45200  
4   19744.0  2024-03-09    202403W2            19931     2024-03-13     44600  
✅ 更新後數據量: 594


Unnamed: 0,Time,Category,Price,ID,Fee,StrikePrice,FutPrice,Date,ProductCode,SettlementPrice,SettlementDate,DateTime
0,2024-02-01 04:05:00,C,7.0,TX118600B4,11.0,18600.0,17917.0,2024-02-01,202402W1,18632,2024-02-15,40500
1,2024-02-01 04:10:00,C,14.5,TX118450B4,12.0,18450.0,17910.0,2024-02-01,202402W1,18632,2024-02-15,41000
2,2024-02-01 04:10:00,C,14.5,TX118450B4,12.0,18450.0,17913.0,2024-02-01,202402W1,18632,2024-02-15,41000
3,2024-02-23 04:52:00,P,6.4,TX418500N4,11.0,18500.0,18994.0,2024-02-23,202402W4,18993,2024-02-29,45200
4,2024-03-09 04:46:00,P,3.3,TX218850O4,11.0,18850.0,19744.0,2024-03-09,202403W2,19931,2024-03-13,44600
...,...,...,...,...,...,...,...,...,...,...,...,...
589,2025-04-03 04:48:00,C,51.0,TX221450D5,14.0,21450.0,20884.0,2025-04-03,202504W2,17430,2025-04-09,44800
590,2025-04-03 04:48:00,C,33.0,TX221450D5,13.0,21450.0,20868.0,2025-04-03,202504W2,17430,2025-04-09,44800
591,2025-04-03 04:49:00,C,62.0,TX221600D5,14.0,21600.0,20827.0,2025-04-03,202504W2,17430,2025-04-09,44900
592,2025-04-18 04:15:00,C,92.0,TX419900D5,16.0,19900.0,19322.0,2025-04-18,202504W4,19583,2025-04-23,41500


## 市場上選擇權資料
抓出市場上之選擇權資料做預處理，合併每日之tick數據並篩選合適之時間段

這部分比較麻煩，且因為每日的tick資料都不小，所以要先針對個別數據限制範圍(4~5點)，最後在合併

以下分開來針對2024、2025年之資料撰寫函式

In [None]:
def load_tick_data_2024(tick_folder, files, start_time, end_time):
    """ 讀取PCP資料+每日日內tick資料，先篩選時間範圍後再合併 """
    df_list = []

    for f in files:
        file_path = f"{tick_folder}/2024_{f[:-4]}.csv"

        # 逐個讀取檔案並進行時間篩選
        df = pd.read_csv(file_path, encoding='cp950', low_memory=False)
        df_filtered = df[(df["成交時間"] >= start_time) & (df["成交時間"] <= end_time) & (df["IV"] != 9999)] # 去除呈現9999之異常值

        df_list.append(df_filtered)  # 只存篩選後的數據

    combined_df = pd.concat(df_list, ignore_index=True)  # 合併 DataFrame
    return combined_df

def convert_buy_sell(df):
    """ 將買賣權別欄位轉換為 'C' (買權) 或 'P' (賣權) """
    df = df.copy()
    df["買賣權別"] = df["買賣權別"].replace({1: "C", 0: "P"})
    return df

def option_tick_data_2024(tick_folder, files, start_time, end_time):
    """
    讀取 Tick 數據，先篩選時間範圍後合併，再進行買賣權別轉換
    """
    df_tick = load_tick_data_2024(tick_folder, files, start_time, end_time)
    df_tick = convert_buy_sell(df_tick)
    return df_tick


In [37]:
def load_tick_data_2025(tick_folder, files, start_time, end_time):
    """ 讀取PCP資料+每日日內tick資料，先篩選時間範圍後再合併 """
    df_list = []

    for f in files:
        file_path = f"{tick_folder}/2025_{f[:-4]}.csv"

        # **逐個讀取檔案並進行時間篩選**
        df = pd.read_csv(file_path, encoding='cp950', low_memory=False)
        df_filtered = df[(df["成交時間"] >= start_time) & (df["成交時間"] <= end_time) & (df["IV"] != 9999)]

        df_list.append(df_filtered)  # 只存篩選後的數據

    combined_df = pd.concat(df_list, ignore_index=True)  # 合併 DataFrame
    return combined_df

def convert_buy_sell(df):
    """ 將買賣權別欄位轉換為 'C' (買權) 或 'P' (賣權) """
    df = df.copy()
    df["買賣權別"] = df["買賣權別"].replace({1: "C", 0: "P"})
    return df

def option_tick_data_2025(tick_folder, files, start_time, end_time):
    """
    讀取 Tick 數據，先篩選時間範圍後合併，再進行買賣權別轉換
    """
    df_tick = load_tick_data_2025(tick_folder, files, start_time, end_time)
    df_tick = convert_buy_sell(df_tick)
    return df_tick


這部分比較麻煩，需要自行手動打逐月的數據

In [38]:
tick_folder_2024 = r'D:/Greek/2024'
files1 = ['01_02.csv', '01_03.csv','01_04.csv', '01_05.csv', '01_08.csv',
         '01_09.csv', '01_10.csv','01_11.csv', '01_12.csv', '01_15.csv',
         '01_16.csv', '01_17.csv','01_18.csv' ,'01_19.csv' ,'01_22.csv',
         '01_23.csv','01_24.csv','01_25.csv','01_26.csv','01_29.csv',
         '01_30.csv','01_31.csv']

files2 = ['02_01.csv', '02_02.csv','02_05.csv', '02_15.csv', '02_16.csv',
         '02_19.csv', '02_20.csv','02_21.csv', '02_22.csv', '02_23.csv',
         '02_26.csv', '02_27.csv','02_29.csv']

files3 = ['03_01.csv', '03_04.csv','03_05.csv', '03_06.csv', '03_07.csv',
         '03_08.csv', '03_11.csv','03_12.csv', '03_13.csv', '03_14.csv',
         '03_15.csv', '03_18.csv','03_19.csv', '03_20.csv', '03_21.csv', 
         '03_22.csv', '03_25.csv','03_26.csv', '03_27.csv', '03_28.csv',
         '03_29.csv']

files4 = ['04_01.csv', '04_02.csv','04_03.csv', '04_08.csv', '04_09.csv',
         '04_10.csv', '04_11.csv','04_12.csv', '04_15.csv', '04_16.csv',
         '04_17.csv', '04_18.csv','04_19.csv', '04_22.csv', '04_23.csv', 
         '04_24.csv', '04_25.csv','04_26.csv', '04_29.csv', '04_30.csv']

files5 = ['05_02.csv', '05_03.csv','05_06.csv', '05_07.csv', '05_08.csv',
         '05_09.csv', '05_10.csv','05_13.csv', '05_14.csv', '05_15.csv',
         '05_16.csv', '05_17.csv','05_20.csv', '05_21.csv', '05_22.csv', 
         '05_23.csv', '05_24.csv','05_27.csv', '05_28.csv', '05_29.csv',
         '05_30.csv', '05_31.csv']

files6 = ['06_03.csv', '06_04.csv','06_05.csv', '06_06.csv', '06_07.csv',
         '06_11.csv', '06_12.csv','06_13.csv', '06_14.csv',
         '06_17.csv', '06_18.csv','06_19.csv', '06_20.csv', '06_21.csv', 
         '06_24.csv', '06_25.csv','06_26.csv', '06_27.csv', '06_28.csv']

files7 = ['07_01.csv', '07_02.csv', '07_03.csv', '07_04.csv', '07_05.csv',
         '07_08.csv', '07_09.csv', '07_10.csv', '07_11.csv', '07_12.csv',
         '07_15.csv', '07_16.csv', '07_17.csv', '07_18.csv', '07_19.csv',
         '07_22.csv', '07_23.csv', '07_26.csv', '07_29.csv', '07_30.csv', '07_31.csv']

files8 = ['08_01.csv', '08_02.csv',
         '08_05.csv', '08_06.csv', '08_07.csv', '08_08.csv', '08_09.csv',
         '08_12.csv', '08_13.csv', '08_14.csv', '08_15.csv', '08_16.csv',
         '08_19.csv', '08_20.csv', '08_21.csv', '08_22.csv', '08_23.csv',
         '08_26.csv', '08_27.csv', '08_28.csv', '08_29.csv', '08_30.csv']

files9 = ['09_02.csv', '09_03.csv', '09_04.csv', '09_05.csv', '09_06.csv',
         '09_09.csv', '09_10.csv', '09_11.csv', '09_12.csv', '09_13.csv',
         '09_16.csv', '09_18.csv', '09_19.csv', '09_20.csv',
         '09_23.csv', '09_24.csv', '09_25.csv', '09_26.csv', '09_27.csv','09_30.csv',
]

files10 = ['10_01.csv', '10_04.csv',
         '10_07.csv', '10_08.csv', '10_09.csv', '10_11.csv',
         '10_14.csv', '10_15.csv', '10_16.csv', '10_17.csv', '10_18.csv',
         '10_21.csv', '10_22.csv', '10_23.csv', '10_24.csv', '10_25.csv',
         '10_28.csv', '10_29.csv', '10_30.csv'
]

files11 = ['11_01.csv', '11_04.csv', '11_05.csv', '11_06.csv', '11_07.csv',
         '11_08.csv', '11_11.csv', '11_12.csv', '11_13.csv', '11_14.csv',
         '11_15.csv', '11_18.csv', '11_19.csv', '11_20.csv', '11_21.csv',
         '11_22.csv', '11_25.csv', '11_26.csv', '11_27.csv', '11_28.csv',
         '11_29.csv'
]

files12 = ['12_02.csv', '12_03.csv', '12_04.csv', '12_05.csv', '12_06.csv',
         '12_09.csv', '12_10.csv', '12_11.csv', '12_12.csv', '12_13.csv',
         '12_16.csv', '12_17.csv', '12_18.csv', '12_19.csv', '12_20.csv',
         '12_23.csv', '12_24.csv', '12_25.csv', '12_26.csv', '12_27.csv',
         '12_30.csv', '12_31.csv'
]


tick_folder_2025 = r'D:/Greek/2025'
files13 = ['01_02.csv', '01_03.csv', '01_06.csv', '01_07.csv', '01_08.csv',
         '01_09.csv', '01_10.csv', '01_13.csv', '01_14.csv', '01_15.csv',
         '01_16.csv', '01_17.csv', '01_20.csv', '01_21.csv', '01_22.csv'
]

files14 = ['02_03.csv', '02_04.csv', '02_05.csv', '02_06.csv', '02_07.csv',
         '02_10.csv', '02_11.csv', '02_12.csv', '02_13.csv', '02_14.csv',
         '02_17.csv', '02_18.csv', '02_19.csv', '02_20.csv', '02_21.csv',
         '02_24.csv', '02_25.csv', '02_26.csv', '02_27.csv',
]

files15 = ['03_03.csv', '03_04.csv', '03_05.csv', '03_06.csv', '03_07.csv',
         '03_10.csv', '03_11.csv', '03_12.csv', '03_13.csv', '03_14.csv',
         '03_17.csv', '03_18.csv', '03_19.csv', '03_20.csv', '03_21.csv',
         '03_24.csv', '03_25.csv', '03_26.csv', '03_27.csv', '03_28.csv',
         '03_31.csv'
]

files16 = ['04_01.csv', '04_02.csv', '04_07.csv', '04_08.csv', '04_09.csv',
         '04_10.csv', '04_11.csv', '04_14.csv', '04_15.csv', '04_16.csv',
         '04_17.csv', '04_18.csv', '04_21.csv', '04_22.csv', '04_23.csv',
]

In [39]:
# 8:45~8:55 市場開盤數據
start_time = 84500
end_time = 85500

# 4:00~5:00 市場夜盤數據
start_time2 = 40000  
end_time2 = 50000  

files_2024 = files1 + files2 + files3 + files4 + files5 + files6 + files7 + files8 + files9 + files10 + files11 + files12 
df_tick_2024 = option_tick_data_2024(tick_folder_2024, files_2024, start_time, end_time)
df_tick_2024.to_csv(r'D:\CODE\SC_SP_delay_weekly策略\data\tick_2024_0845_0855.csv',index = False, encoding = 'utf-8-sig')
df_tick_2024_1 = option_tick_data_2024(tick_folder_2024, files_2024, start_time2, end_time2)
df_tick_2024_1.to_csv(r'D:\CODE\SC_SP_delay_weekly策略\data\tick_2024_0400_0500.csv',index = False, encoding = 'utf-8-sig')

files_2025 = files13 + files14 + files15 + files16
df_tick_2025 = option_tick_data_2025(tick_folder_2025, files_2025, start_time, end_time)
df_tick_2025.to_csv(r'D:\CODE\SC_SP_delay_weekly策略\data\tick_2025_0845_0855.csv',index = False, encoding = 'utf-8-sig')
df_tick_2025 = option_tick_data_2025(tick_folder_2025, files_2025, start_time2, end_time2)
df_tick_2025.to_csv(r'D:\CODE\SC_SP_delay_weekly策略\data\tick_2025_0400_0500.csv',index = False, encoding = 'utf-8-sig')

In [40]:
# **合併數據**
df_tick = pd.concat([df_tick_2024, df_tick_2025], axis=0, ignore_index=True)
df_tick.to_csv(r'D:\CODE\SC_SP_delay_weekly策略\data\df_tick.csv',index = False, encoding = 'utf-8-sig')
df_tick

Unnamed: 0,成交日期,履約價格,到期月份(週別),買賣權別,成交時間,成交價格,成交數量(B or S),剩餘到期時間(秒),當下指數,IV,delta,gamma,vega,theta,rho
0,20240102,16900.0,202401,P,84500.0,17.5,1.0,1313999.0,17841.12782,0.175103,-0.060176,0.000187,4.348210,-2.552475,0.454324
1,20240102,17800.0,202401,P,84500.0,193.0,1.0,1313999.0,17841.12782,0.146865,-0.454260,0.000741,14.433095,-7.343764,3.454969
2,20240102,17000.0,202401,P,84500.0,22.0,120.0,1313999.0,17841.12782,0.169003,-0.075349,0.000231,5.173430,-2.936218,0.568908
3,20240102,16500.0,202401,P,84500.0,8.1,1.0,1313999.0,17841.12782,0.202836,-0.027132,0.000085,2.279272,-1.542189,0.204933
4,20240102,17300.0,202401,P,84500.0,48.0,1.0,1313999.0,17841.12782,0.154292,-0.154930,0.000424,8.675381,-4.527715,1.170927
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5005825,20250423,20300.0,202504W5,C,45946.5,13.5,1.0,636313.0,19141.00000,0.247322,0.049846,0.000153,2.797302,-4.739438,0.189726
5005826,20250423,19000.0,202504W5,C,45954.0,380.0,1.0,636305.0,19141.00000,0.281743,0.584366,0.000509,10.603404,-20.770564,2.179455
5005827,20250423,19400.0,202504W5,C,45954.0,165.0,1.0,636305.0,19141.00000,0.252216,0.363939,0.000548,10.209732,-17.789944,1.371809
5005828,20250423,19000.0,202504W5,C,45954.5,380.0,1.0,636305.0,19141.00000,0.281743,0.584366,0.000509,10.603404,-20.770564,2.179455


#### 4點到5點選擇權市場數據 (策略2、3需要)

In [41]:
# **讀取 CSV 檔案**
df_tick_2024_1 = pd.read_csv(r'D:\CODE\SC_SP_delay_weekly策略\data\tick_2024_0400_0500.csv', encoding='utf-8-sig')
df_tick_2025_1 = pd.read_csv(r'D:\CODE\SC_SP_delay_weekly策略\data\tick_2025_0400_0500.csv', encoding='utf-8-sig')

# **合併數據**
df_tick_1 = pd.concat([df_tick_2024_1, df_tick_2025_1], axis=0, ignore_index=True)
df_tick_1 = df_tick_1[(df_tick_1["成交時間"] >= 40000) & (df_tick_1["成交時間"] <= 50000)]
# **轉換 `df_tick` 的成交日期格式**
df_tick_1["TradeDate"] = pd.to_datetime(df_tick_1["成交日期"].astype(str), format="%Y%m%d").dt.strftime("%Y-%m-%d")
df_tick_1.to_csv(r'D:\CODE\SC_SP_delay_weekly策略\data\df_tick_1.csv',index = False, encoding = 'utf-8-sig')
df_tick_1

Unnamed: 0,成交日期,履約價格,到期月份(週別),買賣權別,成交時間,成交價格,成交數量(B or S),剩餘到期時間(秒),當下指數,IV,delta,gamma,vega,theta,rho,TradeDate
0,20231230,18100.0,202401,C,40000.0,104.0,4.0,1590299.0,17828.012195,0.132429,0.320505,0.000675,14.326361,-5.407154,2.826597,2023-12-30
1,20231230,18100.0,202401,C,40000.2,104.0,4.0,1590299.0,17828.012195,0.132429,0.320505,0.000675,14.326361,-5.407154,2.826597,2023-12-30
2,20231230,18100.0,202401,C,40000.4,104.0,4.0,1590299.0,17828.012195,0.132429,0.320505,0.000675,14.326361,-5.407154,2.826597,2023-12-30
3,20231230,18100.0,202401,C,40000.6,104.0,4.0,1590299.0,17828.012195,0.132429,0.320505,0.000675,14.326361,-5.407154,2.826597,2023-12-30
4,20231230,17800.0,202401,C,40001.0,229.0,1.0,1590298.0,17828.500000,0.134548,0.538079,0.000737,15.899242,-6.234061,4.718128,2023-12-30
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1700949,20250423,20300.0,202504W5,C,45946.5,13.5,1.0,636313.0,19141.000000,0.247322,0.049846,0.000153,2.797302,-4.739438,0.189726,2025-04-23
1700950,20250423,19000.0,202504W5,C,45954.0,380.0,1.0,636305.0,19141.000000,0.281743,0.584366,0.000509,10.603404,-20.770564,2.179455,2025-04-23
1700951,20250423,19400.0,202504W5,C,45954.0,165.0,1.0,636305.0,19141.000000,0.252216,0.363939,0.000548,10.209732,-17.789944,1.371809,2025-04-23
1700952,20250423,19000.0,202504W5,C,45954.5,380.0,1.0,636305.0,19141.000000,0.281743,0.584366,0.000509,10.603404,-20.770564,2.179455,2025-04-23
