In [10]:
# IVV & SPY 關聯規則分析（Association Rule Analysis）

# 安裝必要套件
!pip install mlxtend --quiet

# 載入套件
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
import matplotlib.pyplot as plt
import seaborn as sns


In [11]:
from google.colab import drive#連結雲端硬碟
drive.mount('/content/drive')

Mounted at /content/drive


In [12]:
# 載入 IVV 和 SPY 的資料
ivv_df = pd.read_csv('/content/drive/MyDrive/金融大數據應用/期末報告/IVV.csv')
spy_df = pd.read_csv('/content/drive/MyDrive/金融大數據應用/期末報告/SPY.csv')

FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/金融大數據應用/期末報告/IVV.csv'

In [None]:
# 轉換日期格式
ivv_df['Date'] = pd.to_datetime(ivv_df['Date'])
spy_df['Date'] = pd.to_datetime(spy_df['Date'])

# 只保留收盤價與日期
ivv_df = ivv_df[['Date', 'Close']].rename(columns={'Close': 'IVV_Close'})
spy_df = spy_df[['Date', 'Close']].rename(columns={'Close': 'SPY_Close'})

# 合併資料
merged_df = pd.merge(ivv_df, spy_df, on='Date', how='inner').sort_values('Date')
merged_df

In [None]:
# 顯示欄位名稱確認內容
print("IVV columns:", ivv_df.columns)
print("SPY columns:", spy_df.columns)


In [None]:
# 嘗試找出收盤價欄位名稱（處理大小寫差異）
ivv_close_col = [col for col in ivv_df.columns if 'close' in col.lower()][0]
spy_close_col = [col for col in spy_df.columns if 'close' in col.lower()][0]

In [None]:
# 轉換日期格式
ivv_df['Date'] = pd.to_datetime(ivv_df['Date'])
spy_df['Date'] = pd.to_datetime(spy_df['Date'])

# 只保留收盤價與日期
ivv_df = ivv_df[['Date', ivv_close_col]].rename(columns={ivv_close_col: 'IVV_Close'})
spy_df = spy_df[['Date', spy_close_col]].rename(columns={spy_close_col: 'SPY_Close'})

# 合併資料
merged_df = pd.merge(ivv_df, spy_df, on='Date', how='inner').sort_values('Date')


In [None]:
# 計算每日漲跌
merged_df['IVV_Up'] = merged_df['IVV_Close'].diff() > 0
merged_df['SPY_Up'] = merged_df['SPY_Close'].diff() > 0


In [None]:
# 計算移動平均線 (3日與7日)
merged_df['IVV_MA3'] = merged_df['IVV_Close'].rolling(window=3).mean()
merged_df['IVV_MA7'] = merged_df['IVV_Close'].rolling(window=7).mean()
merged_df['SPY_MA3'] = merged_df['SPY_Close'].rolling(window=3).mean()
merged_df['SPY_MA7'] = merged_df['SPY_Close'].rolling(window=7).mean()

In [None]:
# 比較均線與收盤價
merged_df['IVV_MA3_Up'] = merged_df['IVV_Close'] > merged_df['IVV_MA3']
merged_df['IVV_MA7_Up'] = merged_df['IVV_Close'] > merged_df['IVV_MA7']
merged_df['SPY_MA3_Up'] = merged_df['SPY_Close'] > merged_df['SPY_MA3']
merged_df['SPY_MA7_Up'] = merged_df['SPY_Close'] > merged_df['SPY_MA7']

In [None]:
# 計算移動平均（30 日與年線 252 日）
merged_df['IVV_MA30'] = merged_df['IVV_Close'].rolling(window=30).mean()
merged_df['IVV_MA252'] = merged_df['IVV_Close'].rolling(window=252).mean()
merged_df['SPY_MA30'] = merged_df['SPY_Close'].rolling(window=30).mean()
merged_df['SPY_MA252'] = merged_df['SPY_Close'].rolling(window=252).mean()

In [None]:
# 建立與均線比較的布林欄位
merged_df['IVV_MA30_Up'] = merged_df['IVV_Close'] > merged_df['IVV_MA30']
merged_df['IVV_MA252_Up'] = merged_df['IVV_Close'] > merged_df['IVV_MA252']
merged_df['SPY_MA30_Up'] = merged_df['SPY_Close'] > merged_df['SPY_MA30']
merged_df['SPY_MA252_Up'] = merged_df['SPY_Close'] > merged_df['SPY_MA252']


In [None]:
# 建立交易資料表
transaction_df = merged_df[['Date']].copy()
binary_columns = [
    'IVV_Up', 'SPY_Up',
    'IVV_MA3_Up', 'IVV_MA7_Up', 'IVV_MA30_Up', 'IVV_MA252_Up',
    'SPY_MA3_Up', 'SPY_MA7_Up', 'SPY_MA30_Up', 'SPY_MA252_Up'
]
for col in binary_columns:
    transaction_df[col] = merged_df[col].astype(bool)


In [None]:
transaction_df

In [None]:
# 去除前252筆（均線才能完整）
data_for_apriori = transaction_df.drop(columns=['Date']).iloc[252:]

In [None]:
# 執行 Apriori 演算法
frequent_itemsets = apriori(data_for_apriori, min_support=0.1, use_colnames=True)
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.6)

In [None]:
# 顯示關聯規則
rules_df = rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']]
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.0)
print("Top 10 關聯規則：")
print(rules_df.head(10))

In [None]:
# 安裝中文字型（思源黑體）
!apt-get -qq install fonts-noto-cjk


In [None]:
import matplotlib.pyplot as plt
import matplotlib
# 設定中文字型
plt.rcParams['font.family'] = 'Noto Sans CJK JP'


In [None]:
import matplotlib.font_manager as fm


In [None]:
# ✅ 安裝中文字型：思源黑體
!apt-get -y install fonts-noto-cjk > /dev/null

# ✅ 重新載入字型設定
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
import matplotlib as mpl

mpl.rcParams['font.family'] = 'Noto Sans CJK TC'
mpl.rcParams['axes.unicode_minus'] = False  # 避免負號亂碼


In [None]:
# ✅ 匯入套件
import matplotlib.pyplot as plt
#引入matplotlib.pyplot用來畫圖
import matplotlib.font_manager as fm
#引入matplotlib.font_manage用來載入字體
#font_manager：負責載入/管理外部字型檔（重點！）
# ✅ 指定 Noto 字型（用完整路徑）
font_path = "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc"
#➡️告訴Python：「我想使用這個字型檔案來顯示中文字」。
my_font = fm.FontProperties(fname=font_path)

#| 部分             | 說明                         |
#|-------------------------------|---------------------------------------------------------|
#| font_path          | 指定字型檔案的絕對路徑（我們已知 Colab 安裝的位置） |
#| FontProperties()       | 建立一個「字型物件」，用來指定要使用的字體 |
#|fname=font_path	       |告訴它「這個字型檔案的路徑是 font_path」
#📌建立一個字型設定 my_font，使用指定路徑 /usr/... 裡的 Noto Sans CJK 字體

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as colors

# ➤ 根據 support 值的範圍建立顏色正規化器
norm = colors.Normalize(vmin=rules_df['support'].min(), vmax=rules_df['support'].max())

# ➤ 使用內建的 viridis 色彩映射（亮綠～深紫）
cmap = cm.viridis

# ➤ 將每筆 support 值轉換為對應的顏色
colors_map = cmap(norm(rules_df['support']))

# ➤ 畫圖開始：使用 matplotlib 的 scatter 畫散佈圖（能配合 colorbar）
plt.figure(figsize=(9, 6))
scatter = plt.scatter(
    rules_df['confidence'],         # x 軸：信賴度
    rules_df['lift'],              # y 軸：提升度
    c=rules_df['support'],         # 點的顏色根據 support 值
    s=30,                         # 每個點大小
    cmap='viridis',                # 顏色使用 viridis colormap
    alpha=0.8,                     # 半透明度
    edgecolors='k'                 # 邊框顏色為黑色
)

# ➤ 加上 colorbar（支援視覺化 support 數值對應顏色）
cbar = plt.colorbar(scatter)
cbar.set_label('支持度（Support）', fontproperties=my_font)  # 若出錯可移除 fontproperties

# ➤ 加上標題與軸標籤
plt.title("Confidence vs Lift with Support as Color", fontproperties=my_font)
plt.xlabel("信賴度（Confidence）", fontproperties=my_font)
plt.ylabel("提升度（Lift）", fontproperties=my_font)

plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
# 同時站上 3 日均線
above_ma3 = (
    (merged_df['IVV_Close'] > merged_df['IVV_MA3']) &
    (merged_df['SPY_Close'] > merged_df['SPY_MA3'])
).mean()

# 同時站上 7 日均線
above_ma7 = (
    (merged_df['IVV_Close'] > merged_df['IVV_MA7']) &
    (merged_df['SPY_Close'] > merged_df['SPY_MA7'])
).mean()

# 同時站上 30 日均線
above_ma10 = (
    (merged_df['IVV_Close'] > merged_df['IVV_MA30']) &
    (merged_df['SPY_Close'] > merged_df['SPY_MA30'])
).mean()

# 同時站上 252 日均線
above_ma252 = (
    (merged_df['IVV_Close'] > merged_df['IVV_MA252']) &
    (merged_df['SPY_Close'] > merged_df['SPY_MA252'])
).mean()

# 印出結果
print(f"同時站上 3 日均線的比例：{above_ma3:.2%}")
print(f"同時站上 7 日均線的比例：{above_ma7:.2%}")
print(f"同時站上 10 日均線的比例：{above_ma10:.2%}")
print(f"同時站上 252 日均線的比例：{above_ma252:.2%}")


In [None]:
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties

# 套用中文字型（Colab 環境用思源黑體）
my_font = FontProperties(fname='/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc')

# 資料準備
df = merged_df.copy()
df['IVV_Return'] = df['IVV_Close'].pct_change()
df['SPY_Return'] = df['SPY_Close'].pct_change()

# 建立 1～10 天的未來報酬
for lag in range(1, 11):
    df[f'SPY_Return_{lag}D'] = df['SPY_Close'].shift(-lag) / df['SPY_Close'] - 1

# 篩選 IVV 上漲日
ivv_up_df = df[df['IVV_Up'] == 1]

# 畫 10 張直方圖
plt.figure(figsize=(20, 8))

for i, lag in enumerate(range(1, 11)):
    plt.subplot(2, 5, i + 1)
    data = ivv_up_df[f'SPY_Return_{lag}D'].dropna()
    plt.hist(data, bins=30, alpha=0.7, color='skyblue', edgecolor='black')
    plt.axvline(data.mean(), color='red', linestyle='dashed', linewidth=1.5, label=f'平均：{data.mean():.2%}')
    plt.title(f'IVV 上漲後第 {lag} 天\nSPY 報酬', fontproperties=my_font, fontsize=12)
    plt.xlabel('報酬率', fontproperties=my_font)
    plt.ylabel('次數', fontproperties=my_font)
    plt.legend(prop=my_font)

plt.tight_layout()
plt.show()

In [None]:
print(merged_df.columns)

In [None]:
import matplotlib.pyplot as plt

# 設定 subplot
fig, axs = plt.subplots(2, 2, figsize=(16, 10))  # 2x2 subplot
fig.suptitle('SPY vs IVV Moving Averages', fontsize=16)

# 繪圖函數（用在 subplot 上）
def plot_ma_subplot(ax, window):
    ax.plot(merged_df['Date'], merged_df['SPY_Close'], label='SPY Close', alpha=1 ,ls='--')
    ax.plot(merged_df['Date'], merged_df[f'SPY_MA{window}'], label=f'SPY {window}-day MA', linewidth=3, ls='--')

    ax.plot(merged_df['Date'], merged_df['IVV_Close'], label='IVV Close', alpha=0.3)
    ax.plot(merged_df['Date'], merged_df[f'IVV_MA{window}'], label=f'IVV {window}-day MA', linewidth=2)

    ax.set_title(f'{window}-Day MA')
    ax.set_xlabel('Date')
    ax.set_ylabel('Close Price')
    ax.grid(True)
    ax.legend()


# 對應 subplot 位置
windows = [3, 7, 30, 252]
axes = axs.flatten()

for i, window in enumerate(windows):
    plot_ma_subplot(axes[i], window)

plt.tight_layout(rect=[0, 0, 1, 0.96])  # 調整空間避免標題被蓋住
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import pearsonr

# 假設你已有 df，包含 'IVV_Return' 和 'SPY_Return'
max_lag = 10  # 看未來幾天
correlations = []

for lag in range(0, max_lag + 1):
    shifted_ivv = df['IVV_Return'].shift(lag)
    valid_idx = ~shifted_ivv.isna() & ~df['SPY_Return'].isna()
    corr, _ = pearsonr(shifted_ivv[valid_idx], df['SPY_Return'][valid_idx])
    correlations.append(corr)

# 畫圖
plt.figure(figsize=(10, 5))
plt.plot(range(0, max_lag + 1), correlations, marker='o')
plt.title('IVV 領先 SPY 的延遲相關分析', fontproperties=my_font)
plt.xlabel('延遲天數 (IVV 領先)', fontproperties=my_font)
plt.ylabel('相關係數', fontproperties=my_font)
plt.grid(True)
plt.show()

# 最佳延遲日
best_lag = np.argmax(correlations)
print(f"IVV 領先 SPY 的最佳延遲天數是：{best_lag} 天，相關係數為：{correlations[best_lag]:.4f}")

In [None]:
df = merged_df.copy()  # 假設你用的是 merged_df

# 計算每日報酬（百分比變動）
df['IVV_Return'] = df['IVV_Close'].pct_change()
df['SPY_Return'] = df['SPY_Close'].pct_change()
