In [12]:
# Import thư viện
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns
from datetime import datetime
import warnings
import xgboost as xgb
import lightgbm as lgb
from sklearn.model_selection import train_test_split, TimeSeriesSplit, cross_val_score
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, VotingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [13]:
# Load data
stock_data = pd.read_csv('./data/XRPUSDT20182024new.csv')

# Tiền xử lý
stock_data['Date'] = pd.to_datetime(stock_data['Date'])
stock_data.drop(columns=['Change %'], errors='ignore', inplace=True)
stock_data = stock_data.sort_values('Date').reset_index(drop=True)

In [14]:
stock_data

Unnamed: 0,Date,Price,Open,High,Low,Vol
0,2018-05-26,0.6088,0.6056,0.6247,0.5989,99110000.0
1,2018-05-27,0.6046,0.6088,0.6119,0.5988,95150000.0
2,2018-05-28,0.5476,0.6047,0.6075,0.5460,158340000.0
3,2018-05-29,0.5994,0.5476,0.6088,0.5444,155950000.0
4,2018-05-30,0.5938,0.5994,0.6255,0.5786,132830000.0
...,...,...,...,...,...,...
2185,2024-05-20,0.5377,0.5096,0.5379,0.5067,672450000.0
2186,2024-05-21,0.5372,0.5377,0.5565,0.5321,812410000.0
2187,2024-05-22,0.5268,0.5372,0.5382,0.5236,509460000.0
2188,2024-05-23,0.5288,0.5269,0.5456,0.5091,948100000.0


In [15]:
def create_advanced_features(data):
    """
    Tạo các features kỹ thuật cho dự đoán giá cổ phiếu
    """
    feature_data = data.copy()
    
    # ==================== PRICE-BASED FEATURES ====================
    # Returns = % thay đổi giá
    # Đo lợi suất ngắn hạn
    # Thể hiện giá tăng hay giảm và mạnh hay yếu
    feature_data['Return_1d'] = feature_data['Price'].pct_change(1) * 100
    feature_data['Return_3d'] = feature_data['Price'].pct_change(3) * 100
    feature_data['Return_7d'] = feature_data['Price'].pct_change(7) * 100
    
    # Price ranges
    # HL_Range, HL_Range_Pct: độ giao động trong ngày, phản ánh sự giằng co mua bán -> Range lớn -> thị trường biến động mạnh dùng để đo risk/volatility
    feature_data['HL_Range'] = feature_data['High'] - feature_data['Low']
    feature_data['HL_Range_Pct'] = (feature_data['HL_Range'] / feature_data['Low']) * 100
    # OC_Range, OC_Range_Pct: giá bị đẩy mạnh hay bị bán mạnh trong ngày, giúp model nhận biết được ngày breakout/ panic
    feature_data['OC_Range'] = abs(feature_data['Open'] - feature_data['Price'])
    feature_data['OC_Range_Pct'] = (feature_data['OC_Range'] / feature_data['Open']) * 100
    
    # Price position: giá đóng cửa nằm ở đâu trong biên độ ngày: gần HIGH -> phe mua thắng , gần LOW -> phe bán thắng: Cô đọng tâm lý thị trường vào 1 số từ 0 -> 1 
    # Panic là cố gắng bán bằng mọi giá sống sót khi mà giá chứng khoáng giảm
    # Short Sell mượn cổ phiếu sau đó  bán rồi mua lại và trả lại cho công ty
    feature_data['Price_Position'] = (feature_data['Price'] - feature_data['Low']) / feature_data['HL_Range']
    
    # ==================== MOVING AVERAGES ====================
    # SMA: Trung bình giá k ngày : đại diện cho xu hướng ngắn/trung/dài hạn: Gía thường hồi về trung bình -> ML học được mối quan hệ: giá đang quá cao hay quá thấp hơn so với trend
    for period in [5, 7, 10, 14, 20, 30]:
        feature_data[f'SMA_{period}'] = feature_data['Price'].rolling(window=period).mean()
        feature_data[f'Price_to_SMA_{period}'] = (feature_data['Price'] / feature_data[f'SMA_{period}'] - 1) * 100
    
    # EMA phản ứng nhanh hơn SMA: thị trường tài chính thay đổi nhanh EMA giúp model bắt trend sớm
    for period in [5, 10, 20]:
        feature_data[f'EMA_{period}'] = feature_data['Price'].ewm(span=period, adjust=False).mean()
        feature_data[f'Price_to_EMA_{period}'] = (feature_data['Price'] / feature_data[f'EMA_{period}'] - 1) * 100
    
    # ==================== BOLLINGER BANDS ====================
    for period in [10, 20]:
        bb_middle = feature_data['Price'].rolling(window=period).mean()
        bb_std = feature_data['Price'].rolling(window=period).std()
        
        # biên trên/ dưới của giá bình thường
        feature_data[f'BB_Upper_{period}'] = bb_middle + (2 * bb_std)
        feature_data[f'BB_Lower_{period}'] = bb_middle - (2 * bb_std)
        # độ biến động của thị trường -> width lướn -> thị trường bất ổn
        feature_data[f'BB_Width_{period}'] = feature_data[f'BB_Upper_{period}'] - feature_data[f'BB_Lower_{period}']
        # giá đang ở đâu trong band: kết hợp giá + volatility
        feature_data[f'BB_Position_{period}'] = (feature_data['Price'] - feature_data[f'BB_Lower_{period}']) / feature_data[f'BB_Width_{period}']
    
    # ==================== MOMENTUM INDICATORS ====================
    # RSI: đó sức mạnh tăng/ giảm RSI >70: quá mua <30: quá bán rút gọn hành vi giá phức tạp thành 1 chỉ số
    for period in [7, 14]:
        price_delta = feature_data['Price'].diff()
        gain = (price_delta.where(price_delta > 0, 0)).rolling(window=period).mean()
        loss = (-price_delta.where(price_delta < 0, 0)).rolling(window=period).mean()
        rs_ratio = gain / loss
        feature_data[f'RSI_{period}'] = 100 - (100 / (1 + rs_ratio))
    
    # Stochastic : gái hiện tại so với min/max quá khứ -> giá đang ở đỉnh hay đáy cục bộ 
    for period in [14]:
        low_min = feature_data['Low'].rolling(window=period).min()
        high_max = feature_data['High'].rolling(window=period).max()
        feature_data[f'Stoch_{period}'] = 100 * (feature_data['Price'] - low_min) / (high_max - low_min)
    
    # ROC: tốc độ thay đổi giá: Moementum mạnh -> khả năng tiếp diễn xu hướng
    for period in [5, 10]:
        feature_data[f'ROC_{period}'] = ((feature_data['Price'] - feature_data['Price'].shift(period)) / feature_data['Price'].shift(period)) * 100
    
    # ==================== MACD ====================
    # So sánh xu hướng ngắn hạn và dài hạn
    ema_12 = feature_data['Price'].ewm(span=12, adjust=False).mean()
    ema_26 = feature_data['Price'].ewm(span=26, adjust=False).mean()
    feature_data['MACD'] = ema_12 - ema_26
    feature_data['MACD_Signal'] = feature_data['MACD'].ewm(span=9, adjust=False).mean()
    feature_data['MACD_Histogram'] = feature_data['MACD'] - feature_data['MACD_Signal']
    
    # ==================== VOLATILITY ====================
    for period in [5, 10, 20]:
        # Độ rủi ro ngắn hạn
        feature_data[f'Volatility_{period}'] = feature_data['Return_1d'].rolling(window=period).std()
        # Dao động trung bình thực tế
        # giá tăng 1% trong thị trường yên tĩnh khác với trong thị trường hỗn loạn
        feature_data[f'ATR_{period}'] = feature_data['HL_Range'].rolling(window=period).mean()
    
    # ==================== VOLUME FEATURES ====================
    # dòng tiền đột biến
    feature_data['Vol_Change'] = feature_data['Vol'].pct_change() * 100
    
    for period in [5, 10, 20]:
        feature_data[f'Vol_SMA_{period}'] = feature_data['Vol'].rolling(window=period).mean()
        # có tiền lớn vào hay không
        feature_data[f'Vol_Ratio_{period}'] = feature_data['Vol'] / feature_data[f'Vol_SMA_{period}']
    # giá tăng có được ủng hổ bởi volume không
    feature_data['Vol_Price_Corr_20'] = feature_data['Vol'].rolling(window=20).corr(feature_data['Price'])
    
    # ==================== LAG FEATURES ====================
    # TRÍ NHỚ QUÁ KHỨ
    for lag in [1, 2, 3, 5, 7]:
        # thị trường có tính phụ thuộc thời gian
        feature_data[f'Price_Lag_{lag}'] = feature_data['Price'].shift(lag)
        feature_data[f'Return_Lag_{lag}'] = feature_data['Return_1d'].shift(lag)
        feature_data[f'Vol_Lag_{lag}'] = feature_data['Vol'].shift(lag)
    
    # ==================== STATISTICAL FEATURES ====================
    # Mô tả phân phối giá trong một cửa sổ thời gian
    for period in [7, 14, 30]:
        feature_data[f'Price_Mean_{period}'] = feature_data['Price'].rolling(window=period).mean()
        feature_data[f'Price_Std_{period}'] = feature_data['Price'].rolling(window=period).std()
        feature_data[f'Price_Min_{period}'] = feature_data['Price'].rolling(window=period).min()
        feature_data[f'Price_Max_{period}'] = feature_data['Price'].rolling(window=period).max()
        feature_data[f'Price_Range_{period}'] = feature_data[f'Price_Max_{period}'] - feature_data[f'Price_Min_{period}']
    
    # ==================== TIME FEATURES ====================
    feature_data['Day_of_Week'] = feature_data['Date'].dt.dayofweek
    feature_data['Day_of_Month'] = feature_data['Date'].dt.day
    feature_data['Month'] = feature_data['Date'].dt.month
    feature_data['Quarter'] = feature_data['Date'].dt.quarter
    feature_data['Year'] = feature_data['Date'].dt.year
    
    # ==================== TARGET VARIABLE ====================
    # nhãn
    feature_data['Target_Price'] = feature_data['Price'].shift(-1)
    feature_data['Target_Return'] = ((feature_data['Target_Price'] - feature_data['Price']) / feature_data['Price']) * 100
    
    return feature_data

# Tạo features
feature_engineered_data = create_advanced_features(stock_data)

In [16]:
feature_engineered_data.drop(columns=['Target_Return'], errors='ignore', inplace=True)
feature_engineered_data

Unnamed: 0,Date,Price,Open,High,Low,Vol,Return_1d,Return_3d,Return_7d,HL_Range,...,Price_Std_30,Price_Min_30,Price_Max_30,Price_Range_30,Day_of_Week,Day_of_Month,Month,Quarter,Year,Target_Price
0,2018-05-26,0.6088,0.6056,0.6247,0.5989,99110000.0,,,,0.0258,...,,,,,5,26,5,2,2018,0.6046
1,2018-05-27,0.6046,0.6088,0.6119,0.5988,95150000.0,-0.689882,,,0.0131,...,,,,,6,27,5,2,2018,0.5476
2,2018-05-28,0.5476,0.6047,0.6075,0.5460,158340000.0,-9.427721,,,0.0615,...,,,,,0,28,5,2,2018,0.5994
3,2018-05-29,0.5994,0.5476,0.6088,0.5444,155950000.0,9.459459,-1.544021,,0.0644,...,,,,,1,29,5,2,2018,0.5938
4,2018-05-30,0.5938,0.5994,0.6255,0.5786,132830000.0,-0.934268,-1.786305,,0.0469,...,,,,,2,30,5,2,2018,0.6110
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2185,2024-05-20,0.5377,0.5096,0.5379,0.5067,672450000.0,5.493428,2.732136,6.433096,0.0312,...,0.013577,0.4993,0.5562,0.0569,0,20,5,2,2024,0.5372
2186,2024-05-21,0.5372,0.5377,0.5565,0.5321,812410000.0,-0.092989,3.069839,7.440000,0.0244,...,0.013898,0.4993,0.5562,0.0569,1,21,5,2,2024,0.5268
2187,2024-05-22,0.5268,0.5372,0.5382,0.5236,509460000.0,-1.935964,3.354915,1.502890,0.0146,...,0.012268,0.4993,0.5445,0.0452,2,22,5,2,2024,0.5288
2188,2024-05-23,0.5288,0.5269,0.5456,0.5091,948100000.0,0.379651,-1.655198,2.560124,0.0365,...,0.011496,0.4993,0.5400,0.0407,3,23,5,2,2024,0.5358


In [17]:
feature_engineered_data

Unnamed: 0,Date,Price,Open,High,Low,Vol,Return_1d,Return_3d,Return_7d,HL_Range,...,Price_Std_30,Price_Min_30,Price_Max_30,Price_Range_30,Day_of_Week,Day_of_Month,Month,Quarter,Year,Target_Price
0,2018-05-26,0.6088,0.6056,0.6247,0.5989,99110000.0,,,,0.0258,...,,,,,5,26,5,2,2018,0.6046
1,2018-05-27,0.6046,0.6088,0.6119,0.5988,95150000.0,-0.689882,,,0.0131,...,,,,,6,27,5,2,2018,0.5476
2,2018-05-28,0.5476,0.6047,0.6075,0.5460,158340000.0,-9.427721,,,0.0615,...,,,,,0,28,5,2,2018,0.5994
3,2018-05-29,0.5994,0.5476,0.6088,0.5444,155950000.0,9.459459,-1.544021,,0.0644,...,,,,,1,29,5,2,2018,0.5938
4,2018-05-30,0.5938,0.5994,0.6255,0.5786,132830000.0,-0.934268,-1.786305,,0.0469,...,,,,,2,30,5,2,2018,0.6110
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2185,2024-05-20,0.5377,0.5096,0.5379,0.5067,672450000.0,5.493428,2.732136,6.433096,0.0312,...,0.013577,0.4993,0.5562,0.0569,0,20,5,2,2024,0.5372
2186,2024-05-21,0.5372,0.5377,0.5565,0.5321,812410000.0,-0.092989,3.069839,7.440000,0.0244,...,0.013898,0.4993,0.5562,0.0569,1,21,5,2,2024,0.5268
2187,2024-05-22,0.5268,0.5372,0.5382,0.5236,509460000.0,-1.935964,3.354915,1.502890,0.0146,...,0.012268,0.4993,0.5445,0.0452,2,22,5,2,2024,0.5288
2188,2024-05-23,0.5288,0.5269,0.5456,0.5091,948100000.0,0.379651,-1.655198,2.560124,0.0365,...,0.011496,0.4993,0.5400,0.0407,3,23,5,2,2024,0.5358


In [18]:
# Kiểm tra feature_engineered_data
if 'feature_engineered_data' not in globals():
    raise ValueError("feature_engineered_data chưa được tạo. Chạy cell tạo features trước.")

# Loại bỏ NaN
cleaned_data = feature_engineered_data.dropna()

cleaned_data

Unnamed: 0,Date,Price,Open,High,Low,Vol,Return_1d,Return_3d,Return_7d,HL_Range,...,Price_Std_30,Price_Min_30,Price_Max_30,Price_Range_30,Day_of_Week,Day_of_Month,Month,Quarter,Year,Target_Price
29,2018-06-24,0.4745,0.4903,0.4953,0.4406,172040000.0,-3.123724,-11.158959,-9.705043,0.0547,...,0.062960,0.4745,0.6880,0.2135,6,24,6,2,2018,0.4808
30,2018-06-25,0.4808,0.4745,0.4914,0.4726,96460000.0,1.327713,-0.311010,-10.681776,0.0188,...,0.065589,0.4745,0.6880,0.2135,0,25,6,2,2018,0.4530
31,2018-06-26,0.4530,0.4809,0.4819,0.4517,71510000.0,-5.782030,-7.513271,-17.501366,0.0302,...,0.069432,0.4530,0.6880,0.2350,1,26,6,2,2018,0.4719
32,2018-06-27,0.4719,0.4530,0.4760,0.4486,101670000.0,4.172185,-0.547945,-12.594925,0.0274,...,0.071830,0.4530,0.6880,0.2350,2,27,6,2,2018,0.4479
33,2018-06-28,0.4479,0.4716,0.4743,0.4456,141220000.0,-5.085823,-6.842762,-16.139300,0.0287,...,0.075185,0.4479,0.6880,0.2401,3,28,6,2,2018,0.4553
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2184,2024-05-19,0.5097,0.5213,0.5234,0.5071,209990000.0,-2.206447,-1.144298,2.082916,0.0163,...,0.013283,0.4993,0.5562,0.0569,6,19,5,2,2024,0.5377
2185,2024-05-20,0.5377,0.5096,0.5379,0.5067,672450000.0,5.493428,2.732136,6.433096,0.0312,...,0.013577,0.4993,0.5562,0.0569,0,20,5,2,2024,0.5372
2186,2024-05-21,0.5372,0.5377,0.5565,0.5321,812410000.0,-0.092989,3.069839,7.440000,0.0244,...,0.013898,0.4993,0.5562,0.0569,1,21,5,2,2024,0.5268
2187,2024-05-22,0.5268,0.5372,0.5382,0.5236,509460000.0,-1.935964,3.354915,1.502890,0.0146,...,0.012268,0.4993,0.5445,0.0452,2,22,5,2,2024,0.5288


In [19]:
# Kiểm tra dữ liệu sau khi dropna
if len(cleaned_data) == 0:
    raise ValueError(f"cleaned_data rỗng sau dropna(). feature_engineered_data có {len(feature_engineered_data)} dòng, nhưng tất cả đều có NaN.")

# Chọn features (loại bỏ Date, Target columns)
exclude_cols = ['Date', 'Target_Price', 'Target_Return']
feature_columns = [col for col in cleaned_data.columns if col not in exclude_cols]

# Prepare features and target
features = cleaned_data[feature_columns]
target = cleaned_data['Target_Price']

# Kiểm tra features và target
if len(features) == 0:
    raise ValueError(f"features rỗng. cleaned_data có {len(cleaned_data)} dòng.")
if len(target) == 0:
    raise ValueError(f"target rỗng.")

# Time-based split (không shuffle để giữ tính thời gian)
split_idx = int(len(features) * 0.5)
if split_idx == 0:
    raise ValueError(f"split_idx = 0. features chỉ có {len(features)} dòng, cần ít nhất 2 dòng để split.")

train_features, test_features = features[:split_idx], features[split_idx:]
train_target, test_target = target[:split_idx], target[split_idx:]

# Kiểm tra train và test sets
if len(train_features) == 0:
    raise ValueError(f"train_features rỗng. split_idx={split_idx}, features có {len(features)} dòng.")
if len(test_features) == 0:
    raise ValueError(f"test_features rỗng. split_idx={split_idx}, features có {len(features)} dòng.")

# Get date ranges for train and test
train_dates = cleaned_data.loc[train_features.index, 'Date']
test_dates = cleaned_data.loc[test_features.index, 'Date']

# Scaling
feature_scaler = RobustScaler()
scaled_train_features = feature_scaler.fit_transform(train_features)
scaled_test_features = feature_scaler.transform(test_features)

# Convert back to DataFrame
scaled_train_features = pd.DataFrame(scaled_train_features, columns=feature_columns, index=train_features.index)
scaled_test_features = pd.DataFrame(scaled_test_features, columns=feature_columns, index=test_features.index)

In [20]:
num_rows_with_nan = scaled_train_features.isna().any(axis=1).sum()
num_rows_with_nan

np.int64(0)

In [21]:
cleaned_data

Unnamed: 0,Date,Price,Open,High,Low,Vol,Return_1d,Return_3d,Return_7d,HL_Range,...,Price_Std_30,Price_Min_30,Price_Max_30,Price_Range_30,Day_of_Week,Day_of_Month,Month,Quarter,Year,Target_Price
29,2018-06-24,0.4745,0.4903,0.4953,0.4406,172040000.0,-3.123724,-11.158959,-9.705043,0.0547,...,0.062960,0.4745,0.6880,0.2135,6,24,6,2,2018,0.4808
30,2018-06-25,0.4808,0.4745,0.4914,0.4726,96460000.0,1.327713,-0.311010,-10.681776,0.0188,...,0.065589,0.4745,0.6880,0.2135,0,25,6,2,2018,0.4530
31,2018-06-26,0.4530,0.4809,0.4819,0.4517,71510000.0,-5.782030,-7.513271,-17.501366,0.0302,...,0.069432,0.4530,0.6880,0.2350,1,26,6,2,2018,0.4719
32,2018-06-27,0.4719,0.4530,0.4760,0.4486,101670000.0,4.172185,-0.547945,-12.594925,0.0274,...,0.071830,0.4530,0.6880,0.2350,2,27,6,2,2018,0.4479
33,2018-06-28,0.4479,0.4716,0.4743,0.4456,141220000.0,-5.085823,-6.842762,-16.139300,0.0287,...,0.075185,0.4479,0.6880,0.2401,3,28,6,2,2018,0.4553
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2184,2024-05-19,0.5097,0.5213,0.5234,0.5071,209990000.0,-2.206447,-1.144298,2.082916,0.0163,...,0.013283,0.4993,0.5562,0.0569,6,19,5,2,2024,0.5377
2185,2024-05-20,0.5377,0.5096,0.5379,0.5067,672450000.0,5.493428,2.732136,6.433096,0.0312,...,0.013577,0.4993,0.5562,0.0569,0,20,5,2,2024,0.5372
2186,2024-05-21,0.5372,0.5377,0.5565,0.5321,812410000.0,-0.092989,3.069839,7.440000,0.0244,...,0.013898,0.4993,0.5562,0.0569,1,21,5,2,2024,0.5268
2187,2024-05-22,0.5268,0.5372,0.5382,0.5236,509460000.0,-1.935964,3.354915,1.502890,0.0146,...,0.012268,0.4993,0.5445,0.0452,2,22,5,2,2024,0.5288


In [22]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import TimeSeriesSplit
import numpy as np

scaled_train_features = scaled_train_features.dropna()
layer1_model = RandomForestRegressor(
    n_estimators=500,
    max_depth=8,
    min_samples_leaf=20,
    random_state=42,
    n_jobs=-1
)

layer1_model.fit(scaled_train_features, train_target)

cleaned_data.loc[train_features.index, 'RF_Pred_Tomorrow'] = layer1_model.predict(scaled_train_features)
cleaned_data.loc[test_features.index, 'RF_Pred_Tomorrow'] = layer1_model.predict(scaled_test_features)

# RF dự đoán Close[t+1] tại ngày t
# → dùng cho ngày t+1
cleaned_data['RF_Pred_Today'] = cleaned_data['RF_Pred_Tomorrow'].shift(1)

cleaned_data.tail()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleaned_data.loc[train_features.index, 'RF_Pred_Tomorrow'] = layer1_model.predict(scaled_train_features)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleaned_data['RF_Pred_Today'] = cleaned_data['RF_Pred_Tomorrow'].shift(1)


Unnamed: 0,Date,Price,Open,High,Low,Vol,Return_1d,Return_3d,Return_7d,HL_Range,...,Price_Max_30,Price_Range_30,Day_of_Week,Day_of_Month,Month,Quarter,Year,Target_Price,RF_Pred_Tomorrow,RF_Pred_Today
2184,2024-05-19,0.5097,0.5213,0.5234,0.5071,209990000.0,-2.206447,-1.144298,2.082916,0.0163,...,0.5562,0.0569,6,19,5,2,2024,0.5377,0.518655,0.523093
2185,2024-05-20,0.5377,0.5096,0.5379,0.5067,672450000.0,5.493428,2.732136,6.433096,0.0312,...,0.5562,0.0569,0,20,5,2,2024,0.5372,0.528885,0.518655
2186,2024-05-21,0.5372,0.5377,0.5565,0.5321,812410000.0,-0.092989,3.069839,7.44,0.0244,...,0.5562,0.0569,1,21,5,2,2024,0.5268,0.528955,0.528885
2187,2024-05-22,0.5268,0.5372,0.5382,0.5236,509460000.0,-1.935964,3.354915,1.50289,0.0146,...,0.5445,0.0452,2,22,5,2,2024,0.5288,0.527936,0.528955
2188,2024-05-23,0.5288,0.5269,0.5456,0.5091,948100000.0,0.379651,-1.655198,2.560124,0.0365,...,0.54,0.0407,3,23,5,2,2024,0.5358,0.527864,0.527936


In [23]:
new_row = {
    'Date': pd.to_datetime('2024-05-24'),
    'Price': 0.5358,
    'Open':  0.5288,
    'High':  0.5372,
    'Low':   0.5169,
    'Vol':   513880000
}

stock_data = pd.concat(
    [stock_data, pd.DataFrame([new_row])],
    ignore_index=True
)
feature_data = create_advanced_features(stock_data)

In [25]:
feature_data = feature_data.drop(feature_data.index[-1])

In [26]:
feature_data

Unnamed: 0,Date,Price,Open,High,Low,Vol,Return_1d,Return_3d,Return_7d,HL_Range,...,Price_Min_30,Price_Max_30,Price_Range_30,Day_of_Week,Day_of_Month,Month,Quarter,Year,Target_Price,Target_Return
0,2018-05-26,0.6088,0.6056,0.6247,0.5989,99110000.0,,,,0.0258,...,,,,5,26,5,2,2018,0.6046,-0.689882
1,2018-05-27,0.6046,0.6088,0.6119,0.5988,95150000.0,-0.689882,,,0.0131,...,,,,6,27,5,2,2018,0.5476,-9.427721
2,2018-05-28,0.5476,0.6047,0.6075,0.5460,158340000.0,-9.427721,,,0.0615,...,,,,0,28,5,2,2018,0.5994,9.459459
3,2018-05-29,0.5994,0.5476,0.6088,0.5444,155950000.0,9.459459,-1.544021,,0.0644,...,,,,1,29,5,2,2018,0.5938,-0.934268
4,2018-05-30,0.5938,0.5994,0.6255,0.5786,132830000.0,-0.934268,-1.786305,,0.0469,...,,,,2,30,5,2,2018,0.6110,2.896598
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2185,2024-05-20,0.5377,0.5096,0.5379,0.5067,672450000.0,5.493428,2.732136,6.433096,0.0312,...,0.4993,0.5562,0.0569,0,20,5,2,2024,0.5372,-0.092989
2186,2024-05-21,0.5372,0.5377,0.5565,0.5321,812410000.0,-0.092989,3.069839,7.440000,0.0244,...,0.4993,0.5562,0.0569,1,21,5,2,2024,0.5268,-1.935964
2187,2024-05-22,0.5268,0.5372,0.5382,0.5236,509460000.0,-1.935964,3.354915,1.502890,0.0146,...,0.4993,0.5445,0.0452,2,22,5,2,2024,0.5288,0.379651
2188,2024-05-23,0.5288,0.5269,0.5456,0.5091,948100000.0,0.379651,-1.655198,2.560124,0.0365,...,0.4993,0.5400,0.0407,3,23,5,2,2024,0.5358,1.323752


In [27]:
latest_row = feature_data.iloc[-1]
latest_row = latest_row.drop('Target_Return')
latest_row

Date            2024-05-24 00:00:00
Price                        0.5358
Open                         0.5288
High                         0.5372
Low                          0.5169
                       ...         
Day_of_Month                     24
Month                             5
Quarter                           2
Year                           2024
Target_Price                 0.5358
Name: 2189, Length: 98, dtype: object

In [28]:
X_future = latest_row[feature_columns].to_frame().T
X_future_scaled = feature_scaler.transform(X_future)

pred_price_27_5 = layer1_model.predict(X_future_scaled)[0]

print(f"📌 Dự đoán giá cho ngày 25/05/2024: {pred_price_27_5:.4f}")


📌 Dự đoán giá cho ngày 25/05/2024: 0.5290


