In [40]:
# 1. Import thư viện
from VNSfinance import history
import pandas as pd
import ta
from prophet import Prophet
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

In [41]:
# 2. Load dữ liệu cổ phiếu
df = history(symbol='PLX', start='2020-01-01', end=pd.Timestamp.today().strftime('%Y-%m-%d'), interval='ONE_DAY')
df.rename(columns={'time': 'ds', 'close': 'y'}, inplace=True)
df['ds'] = pd.to_datetime(df['ds'])
df.sort_values('ds', inplace=True)

In [42]:
df.head()

Unnamed: 0,symbol,ds,open,high,low,y,volume
678,PLX,2020-01-02,44.9321,45.73302,44.37145,45.25247,380760
679,PLX,2020-01-03,45.33256,46.53395,45.33256,45.73302,586600
680,PLX,2020-01-06,46.45386,46.45386,45.73302,45.89321,497990
681,PLX,2020-01-07,46.0534,46.0534,45.17237,45.25247,290780
682,PLX,2020-01-08,45.65293,45.65293,44.53163,44.61172,237220


In [43]:
# 3. Tính chỉ báo kỹ thuật
df['ema20'] = ta.trend.EMAIndicator(df['y'], 20).ema_indicator()
macd = ta.trend.MACD(df['y'])
df['macd'] = macd.macd()
df['macd_signal'] = macd.macd_signal()
df['adx'] = ta.trend.ADXIndicator(df['high'], df['low'], df['y']).adx()
df['psar'] = ta.trend.PSARIndicator(df['high'], df['low'], df['y']).psar()

df['rsi'] = ta.momentum.RSIIndicator(df['y']).rsi()
df['roc'] = ta.momentum.ROCIndicator(df['y']).roc()
df['cci'] = ta.trend.CCIIndicator(df['high'], df['low'], df['y']).cci()
df['williams_r'] = ta.momentum.WilliamsRIndicator(df['high'], df['low'], df['y']).williams_r()
df['stoch'] = ta.momentum.StochasticOscillator(df['high'], df['low'], df['y']).stoch()


df['obv'] = ta.volume.OnBalanceVolumeIndicator(df['y'], df['volume']).on_balance_volume()
df['mfi'] = ta.volume.MFIIndicator(df['high'], df['low'], df['y'], df['volume']).money_flow_index()
df['cmf'] = ta.volume.ChaikinMoneyFlowIndicator(df['high'], df['low'], df['y'], df['volume']).chaikin_money_flow()

df['atr'] = ta.volatility.AverageTrueRange(df['high'], df['low'], df['y']).average_true_range()
bb = ta.volatility.BollingerBands(df['y'])
df['bollinger_h'] = bb.bollinger_hband()
df['bollinger_l'] = bb.bollinger_lband()
dc = ta.volatility.DonchianChannel(df['high'], df['low'], df['y'])
df['donchian_h'] = dc.donchian_channel_hband()
df['donchian_l'] = dc.donchian_channel_lband()


In [44]:
# 4. Loại bỏ các dòng NaN do tính chỉ báo
df.dropna(inplace=True)

In [45]:
from sklearn.preprocessing import StandardScaler

# Chọn các cột chỉ báo để scale
features = ['ema20','psar','rsi','obv','bollinger_h','bollinger_l','atr','macd','macd_signal']

# Scale các chỉ báo
scaler = StandardScaler()
df[features] = scaler.fit_transform(df[features])

In [46]:
# 5. Chia tập train/test theo tỷ lệ 80:20
split_index = int(len(df) * 0.8)
train_df = df.iloc[:split_index]
test_df = df.iloc[split_index:]

In [47]:
from sklearn.preprocessing import MinMaxScaler

def prepare_scaled_data(df, top_features):
    # Chỉ scale các chỉ báo ngoại sinh
    scaler = MinMaxScaler()
    scaled_features = scaler.fit_transform(df[top_features])
    scaled_df = df[['y']].copy()
    for i, col in enumerate(top_features):
        scaled_df[col] = scaled_features[:, i]
    
    scaled_df['ds'] = df.index
    scaled_df = scaled_df[['ds', 'y'] + top_features]
    return scaled_df, scaler


In [48]:
from prophet.diagnostics import cross_validation, performance_metrics
from prophet import Prophet
from itertools import product
import random

# Define search space
param_grid = {
    'changepoint_prior_scale': [0.001, 0.01, 0.05, 0.1, 0.5],
    'seasonality_prior_scale': [1.0, 5.0, 10.0, 20.0],
    'seasonality_mode': ['additive', 'multiplicative']
}

# Create a list of all possible combinations
all_params = [dict(zip(param_grid, v)) for v in product(*param_grid.values())]
random.shuffle(all_params)  # Optional: randomize to speed up

features = ['rsi', 'macd', 'macd_signal', 'ema20', 'roc', 'williams_r', 'cci']
best_params = None
best_rmse = float('inf')

print("🔍 Tuning hyperparameters...")

for params in all_params[:15]:  # kiểm tra 15 cấu hình đầu tiên
    model = Prophet(
        changepoint_prior_scale=params['changepoint_prior_scale'],
        seasonality_prior_scale=params['seasonality_prior_scale'],
        seasonality_mode=params['seasonality_mode'],
        daily_seasonality=False
    )
    
    for reg in features:
        model.add_regressor(reg)

    model.fit(train_df[['ds', 'y'] + features])

    # Cross-validation trên tập train
    df_cv = cross_validation(model, horizon='30 days', initial='365 days', period='90 days', parallel="processes")
    df_p = performance_metrics(df_cv, rolling_window=1)

    rmse = df_p['rmse'].values[0]

    print(f"Params: {params}, RMSE: {rmse:.2f}")
    
    if rmse < best_rmse:
        best_rmse = rmse
        best_params = params

print("🎯 Best Params Found:", best_params)


13:06:21 - cmdstanpy - INFO - Chain [1] start processing


🔍 Tuning hyperparameters...


13:06:21 - cmdstanpy - INFO - Chain [1] done processing
Seasonality has period of 365.25 days which is larger than initial window. Consider increasing initial.


Params: {'changepoint_prior_scale': 0.05, 'seasonality_prior_scale': 1.0, 'seasonality_mode': 'additive'}, RMSE: 0.99


13:06:29 - cmdstanpy - INFO - Chain [1] start processing
13:06:30 - cmdstanpy - INFO - Chain [1] done processing
Seasonality has period of 365.25 days which is larger than initial window. Consider increasing initial.


Params: {'changepoint_prior_scale': 0.05, 'seasonality_prior_scale': 5.0, 'seasonality_mode': 'additive'}, RMSE: 0.99


13:06:36 - cmdstanpy - INFO - Chain [1] start processing
13:06:36 - cmdstanpy - INFO - Chain [1] done processing
Seasonality has period of 365.25 days which is larger than initial window. Consider increasing initial.


Params: {'changepoint_prior_scale': 0.001, 'seasonality_prior_scale': 10.0, 'seasonality_mode': 'multiplicative'}, RMSE: 0.58


13:06:43 - cmdstanpy - INFO - Chain [1] start processing
13:06:44 - cmdstanpy - INFO - Chain [1] done processing
Seasonality has period of 365.25 days which is larger than initial window. Consider increasing initial.


Params: {'changepoint_prior_scale': 0.5, 'seasonality_prior_scale': 5.0, 'seasonality_mode': 'multiplicative'}, RMSE: 1.15


13:06:53 - cmdstanpy - INFO - Chain [1] start processing
13:06:53 - cmdstanpy - INFO - Chain [1] done processing
Seasonality has period of 365.25 days which is larger than initial window. Consider increasing initial.


Params: {'changepoint_prior_scale': 0.001, 'seasonality_prior_scale': 1.0, 'seasonality_mode': 'multiplicative'}, RMSE: 0.58


13:07:00 - cmdstanpy - INFO - Chain [1] start processing
13:07:00 - cmdstanpy - INFO - Chain [1] done processing
Seasonality has period of 365.25 days which is larger than initial window. Consider increasing initial.


Params: {'changepoint_prior_scale': 0.01, 'seasonality_prior_scale': 20.0, 'seasonality_mode': 'additive'}, RMSE: 0.70


13:07:05 - cmdstanpy - INFO - Chain [1] start processing
13:07:06 - cmdstanpy - INFO - Chain [1] done processing
Seasonality has period of 365.25 days which is larger than initial window. Consider increasing initial.


Params: {'changepoint_prior_scale': 0.001, 'seasonality_prior_scale': 1.0, 'seasonality_mode': 'additive'}, RMSE: 0.60


13:07:12 - cmdstanpy - INFO - Chain [1] start processing
13:07:13 - cmdstanpy - INFO - Chain [1] done processing
Seasonality has period of 365.25 days which is larger than initial window. Consider increasing initial.


Params: {'changepoint_prior_scale': 0.05, 'seasonality_prior_scale': 5.0, 'seasonality_mode': 'multiplicative'}, RMSE: 1.05


13:07:22 - cmdstanpy - INFO - Chain [1] start processing
13:07:24 - cmdstanpy - INFO - Chain [1] done processing
Seasonality has period of 365.25 days which is larger than initial window. Consider increasing initial.


Params: {'changepoint_prior_scale': 0.1, 'seasonality_prior_scale': 20.0, 'seasonality_mode': 'multiplicative'}, RMSE: 1.13


13:07:32 - cmdstanpy - INFO - Chain [1] start processing
13:07:33 - cmdstanpy - INFO - Chain [1] done processing
Seasonality has period of 365.25 days which is larger than initial window. Consider increasing initial.


Params: {'changepoint_prior_scale': 0.5, 'seasonality_prior_scale': 20.0, 'seasonality_mode': 'multiplicative'}, RMSE: 1.16


13:07:43 - cmdstanpy - INFO - Chain [1] start processing
13:07:43 - cmdstanpy - INFO - Chain [1] done processing
Seasonality has period of 365.25 days which is larger than initial window. Consider increasing initial.


Params: {'changepoint_prior_scale': 0.01, 'seasonality_prior_scale': 10.0, 'seasonality_mode': 'additive'}, RMSE: 0.71


13:07:48 - cmdstanpy - INFO - Chain [1] start processing
13:07:48 - cmdstanpy - INFO - Chain [1] done processing
Seasonality has period of 365.25 days which is larger than initial window. Consider increasing initial.


Params: {'changepoint_prior_scale': 0.001, 'seasonality_prior_scale': 20.0, 'seasonality_mode': 'additive'}, RMSE: 0.60


13:07:54 - cmdstanpy - INFO - Chain [1] start processing
13:07:55 - cmdstanpy - INFO - Chain [1] done processing
Seasonality has period of 365.25 days which is larger than initial window. Consider increasing initial.


Params: {'changepoint_prior_scale': 0.1, 'seasonality_prior_scale': 5.0, 'seasonality_mode': 'multiplicative'}, RMSE: 1.13


13:08:06 - cmdstanpy - INFO - Chain [1] start processing
13:08:06 - cmdstanpy - INFO - Chain [1] done processing
Seasonality has period of 365.25 days which is larger than initial window. Consider increasing initial.


Params: {'changepoint_prior_scale': 0.05, 'seasonality_prior_scale': 10.0, 'seasonality_mode': 'multiplicative'}, RMSE: 1.03


13:08:15 - cmdstanpy - INFO - Chain [1] start processing
13:08:15 - cmdstanpy - INFO - Chain [1] done processing
Seasonality has period of 365.25 days which is larger than initial window. Consider increasing initial.


Params: {'changepoint_prior_scale': 0.05, 'seasonality_prior_scale': 10.0, 'seasonality_mode': 'additive'}, RMSE: 0.99
🎯 Best Params Found: {'changepoint_prior_scale': 0.001, 'seasonality_prior_scale': 10.0, 'seasonality_mode': 'multiplicative'}


In [49]:
from prophet import Prophet

model = Prophet(
    changepoint_prior_scale=best_params['changepoint_prior_scale'],
    seasonality_prior_scale=best_params['seasonality_prior_scale'],
    seasonality_mode=best_params['seasonality_mode'],
    daily_seasonality=False
)


In [50]:
for reg in features:
    model.add_regressor(reg)

In [51]:
# Huấn luyện mô hình
model.fit(train_df[['ds', 'y'] + features])

13:08:21 - cmdstanpy - INFO - Chain [1] start processing
13:08:21 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x1d5378d94c0>

In [52]:
# Dự báo trên tập test
future = test_df[['ds'] + features]
forecast = model.predict(future)

In [53]:
# Gộp kết quả để đánh giá
result = test_df[['ds', 'y']].copy()
result['yhat'] = forecast['yhat'].values

In [54]:
# Đánh giá mô hình
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

mae = mean_absolute_error(result['y'], result['yhat'])
rmse = np.sqrt(mean_squared_error(result['y'], result['yhat']))
print(f"✅ MAE: {mae:.2f}, RMSE: {rmse:.2f}")

✅ MAE: 0.41, RMSE: 0.52
