In [None]:
# 共通事前處理
# 隱藏不必要的警告
import warnings
warnings.filterwarnings('ignore')

# 匯入必要的函式庫
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import font_manager as fm

# 將字型新增到 matplotlib
fm.fontManager.addfont('../TaipeiSansTCBeta-Regular.ttf')

# from pathlib import Path
# 取得當前檔案的路徑
# 假設你在一個目錄中執行，字體在上一層，使用 Path.cwd()
# font_path = Path.cwd().parent / 'TaipeiSansTCBeta-Regular.ttf'
# 將 Path 物件轉換為字串傳入 addfont
# fm.fontManager.addfont(str(font_path))

# 用來顯示資料框的函式
from IPython.display import display

# 調整顯示選項
# NumPy 的浮點數表示精度
np.set_printoptions(suppress=True, precision=4)
# pandas 中的浮點數表示精度
pd.options.display.float_format = '{:.4f}'.format
# 顯示資料框中的所有項目
pd.set_option("display.max_columns",None)
# 指定圖形的預設字體大小
plt.rcParams["font.size"] = 14
# 指定圖形的預設字型
plt.rcParams['font.family'] = 'Taipei Sans TC Beta'
# 隨機種子
random_seed = 2277

In [10]:
#載入資料、確認資料
df = pd.read_csv('stockData.csv')
column = ['指數代碼', '日期', '開盤價', '最高價', '最低價', '收盤價', '調整後收盤價', '交易量']
df.columns = column

display(df.head())
display(df.tail())

Unnamed: 0,指數代碼,日期,開盤價,最高價,最低價,收盤價,調整後收盤價,交易量
0,NYA,1965-12-31,528.69,528.69,528.69,528.69,528.69,0.0
1,NYA,1966-01-03,527.21,527.21,527.21,527.21,527.21,0.0
2,NYA,1966-01-04,527.84,527.84,527.84,527.84,527.84,0.0
3,NYA,1966-01-05,531.12,531.12,531.12,531.12,531.12,0.0
4,NYA,1966-01-06,532.07,532.07,532.07,532.07,532.07,0.0


Unnamed: 0,指數代碼,日期,開盤價,最高價,最低價,收盤價,調整後收盤價,交易量
112452,N100,2021-05-27,1241.12,1251.91,1241.12,1247.0699,1247.0699,379696400.0
112453,N100,2021-05-28,1249.47,1259.21,1249.03,1256.6,1256.6,160773400.0
112454,N100,2021-05-31,1256.08,1258.88,1248.14,1248.9301,1248.9301,91173700.0
112455,N100,2021-06-01,1254.61,1265.66,1254.61,1258.58,1258.58,155179900.0
112456,N100,2021-06-02,1258.49,1263.71,1258.24,1263.62,1263.62,148465000.0


In [None]:
# 確認遺失值、欲處理資料
missing_rows = df[df.isnull().any(axis=1)]
# 查看有遺失值的列(最後5列)
display(missing_rows.tail())
# 向前填充遺失值，同df.fillnu(method='ffill')
df_filled = df.ffill()
#df_filled.isnull().sum()


Unnamed: 0,指數代碼,日期,開盤價,最高價,最低價,收盤價,調整後收盤價,交易量
108316,N100,2005-03-28,,,,,,
108511,N100,2005-12-26,,,,,,
110051,N100,2012-01-02,,,,,,
110643,N100,2014-05-01,,,,,,
112090,N100,2019-12-25,,,,,,


In [None]:
# 欲處理資料
# 要使用Porphet需提取ds、y
df2 = df_filled.copy()
df2 = df2[['日期', '調整後收盤價']]
df2.columns = ['ds', 'y']
display(df2.head())

Unnamed: 0,ds,y
0,1965-12-31,528.69
1,1966-01-03,527.21
2,1966-01-04,527.84
3,1966-01-05,531.12
4,1966-01-06,532.07


In [22]:
# 分割(訓練train、驗證validation、測試test)
# 日期分割
train_end_date = pd.to_datetime('2005-12-31')
validation_end_date = pd.to_datetime('2015-12-31')

# 把2005-12-31(含)前的資料指派給train_df2
train_df2 = df2[df2['ds'] <= train_end_date]
# 把2015-12-31(含)前的資料指派給validation_df2
validation_df2 = df2[(df2['ds'] <= validation_end_date) & (df2['ds'] > train_end_date)] #注意pd中的and要用'&'代替，且是優先運算因子
# 把2015-12-31(不含)後的資料指派給test_df2
test_df2 = df2[df2['ds'] > validation_end_date]

display(train_df2.tail())
display(validation_df2.head())
display(validation_df2.tail())
display(test_df2.head())

Unnamed: 0,ds,y
108511,2005-12-26,815.3
108512,2005-12-27,817.45
108513,2005-12-28,815.5
108514,2005-12-29,818.37
108515,2005-12-30,810.35


Unnamed: 0,ds,y
10070,2006-01-03,7912.4102
10071,2006-01-04,7962.9399
10072,2006-01-05,7944.0601
10073,2006-01-06,8031.6602
10074,2006-01-09,8053.4199


Unnamed: 0,ds,y
111066,2015-12-23,910.94
111067,2015-12-24,910.01
111068,2015-12-28,902.45
111069,2015-12-29,918.11
111070,2015-12-30,914.11


Unnamed: 0,ds,y
12587,2016-01-04,10001.5596
12588,2016-01-05,10028.0596
12589,2016-01-06,9868.2598
12590,2016-01-07,9650.4199
12591,2016-01-08,9528.7695


In [31]:
#選擇演算法
from prophet import Prophet
model = Prophet(
    yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=False,
    growth='linear', seasonality_mode='multiplicative', changepoint_prior_scale=0.5
)
model.fit(train_df2) # 訓練模型
print('模型已訓練完成')

22:26:15 - cmdstanpy - INFO - Chain [1] start processing
22:26:58 - cmdstanpy - INFO - Chain [1] done processing


模型已訓練完成


In [32]:
#使用驗證集來進行未來預測，Prophet 只需要 'ds' 欄位來預測
futuer_validation = validation_df2[['ds']].copy()
pred_validation = model.predict(futuer_validation) # 預測模型
display(pred_validation.head())

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,weekly,weekly_lower,weekly_upper,yearly,yearly_lower,yearly_upper,additive_terms,additive_terms_lower,additive_terms_upper,yhat
0,2006-01-02,3337.1568,-969.8915,12355.9819,3337.1568,3337.1568,0.7525,0.7525,0.7525,0.7489,0.7489,0.7489,0.0036,0.0036,0.0036,0.0,0.0,0.0,5848.3102
1,2006-01-02,3337.1568,-726.9511,12249.5763,3337.1568,3337.1568,0.7525,0.7525,0.7525,0.7489,0.7489,0.7489,0.0036,0.0036,0.0036,0.0,0.0,0.0,5848.3102
2,2006-01-02,3337.1568,-1216.4884,12448.3443,3337.1568,3337.1568,0.7525,0.7525,0.7525,0.7489,0.7489,0.7489,0.0036,0.0036,0.0036,0.0,0.0,0.0,5848.3102
3,2006-01-02,3337.1568,-1062.5968,12701.185,3337.1568,3337.1568,0.7525,0.7525,0.7525,0.7489,0.7489,0.7489,0.0036,0.0036,0.0036,0.0,0.0,0.0,5848.3102
4,2006-01-02,3337.1568,-765.3775,13028.0413,3337.1568,3337.1568,0.7525,0.7525,0.7525,0.7489,0.7489,0.7489,0.0036,0.0036,0.0036,0.0,0.0,0.0,5848.3102


In [33]:
# 評估測試集結果 (最終性能報告)
df_validation_results = validation_df2.merge(
    pred_validation[['ds', 'yhat', 'yhat_lower', 'yhat_upper']],
    on='ds',
    how='left'
).rename(columns={'y': 'y_true'})

display(df_validation_results.head())


Unnamed: 0,ds,y_true,yhat,yhat_lower,yhat_upper
0,2006-01-03,7912.4102,5781.3492,-882.1839,12661.5108
1,2006-01-03,7912.4102,5781.3492,-1135.8576,12358.6446
2,2006-01-03,7912.4102,5781.3492,-993.4023,12741.5062
3,2006-01-03,7912.4102,5781.3492,-1268.302,12846.5322
4,2006-01-03,7912.4102,5781.3492,-1296.3603,12659.0873


In [34]:
# 準備計算R2值(決定係數)、RMSE(均方值誤差)
from sklearn.metrics import mean_squared_error, r2_score
y_true = df_validation_results['y_true']
y_pred = df_validation_results['yhat']

r2score = r2_score(y_true, y_pred)
print(f'驗證集 R2_score: {r2score:.4f}')
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
print(f'驗證集 RMSE: {rmse:.4f}')

驗證集 R2_score: 0.0131
驗證集 RMSE: 8651.8427
