In [1]:
# Block 1 — Login & Lấy dữ liệu tất cả HOSE/HNX/UPCOM
import pandas as pd
from FiinQuantX import FiinSession, BarDataUpdate

# --- Login ---
username = "DSTC_18@fiinquant.vn"
password = "Fiinquant0606"

client = FiinSession(
    username=username,
    password=password
).login()

# --- Lấy danh sách cổ phiếu từng sàn ---
tickers_hose  = list(client.TickerList(ticker="VNINDEX"))     # HOSE
print(f"Số mã HOSE: {len(tickers_hose)}")

# --- Lấy dữ liệu lịch sử toàn bộ (có thể nặng, nên lấy theo batch nếu cần) ---
event_history = client.Fetch_Trading_Data(
    realtime=False,
    tickers=tickers_hose,
    fields=['open','high','low','close','volume','bu','sd','fs','fn'], 
    adjusted=True,
    by="1d",
    from_date="2023-01-01"   # backtest từ 2023 tới nay
)

df_all = event_history.get_data()
print("History ban đầu:", df_all.head())

# --- Callback realtime ---
def onDataUpdate(data: BarDataUpdate):
    global df_all
    df_update = data.to_dataFrame()
    df_all = pd.concat([df_all, df_update])
    df_all = df_all.drop_duplicates()
    print("Realtime update:")
    print(df_update.head())

# --- Bật realtime nối tiếp dữ liệu ---
event_realtime = client.Fetch_Trading_Data(
    realtime=True,
    tickers=tickers_hose,
    fields=['open','high','low','close','volume','bu','sd','fs','fn'], 
    adjusted=True,
    by="1d",
    period=1,
    callback=onDataUpdate
)


Số mã HOSE: 415
Fetching data, it may take a while. Please wait...
History ban đầu:   ticker         timestamp      open      high       low     close     volume  \
0    AAA  2023-01-03 00:00  6539.643  6866.145  6539.643  6866.145  1543984.0   
1    AAA  2023-01-04 00:00  6866.145  7000.587  6827.733  6827.733  1302505.0   
2    AAA  2023-01-05 00:00  6866.145  6904.557  6808.527  6885.351   980473.0   
3    AAA  2023-01-06 00:00  6885.351  6990.984  6818.130  6856.542  1431699.0   
4    AAA  2023-01-09 00:00  6914.160  6962.175  6760.512  6789.321  1121385.0   

         bu        sd           fs           fn  
0  938600.0  504700.0   40579000.0  899404000.0  
1  462900.0  780600.0  151639000.0   36850000.0  
2  487200.0  473700.0  343911000.0  -59103000.0  
3  564300.0  828300.0  345999000.0 -294312000.0  
4  414000.0  631800.0  514557000.0 -483197000.0  


In [2]:
# Block 2 — Lấy dữ liệu FA theo quý (HOSE only)

def fetch_fa_quarterly(ticker, latest_year=2025, n_periods=32):
    try:
        fi_list = client.FundamentalAnalysis().get_ratios(
            tickers=[ticker],
            TimeFilter="Quarterly",
            LatestYear=latest_year,
            NumberOfPeriod=n_periods,
            Consolidated=True
        )

        # Nếu không có dữ liệu thì bỏ qua
        if not fi_list or not isinstance(fi_list, list):
            return pd.DataFrame()

        df = pd.DataFrame(fi_list)
        if df.empty:
            return pd.DataFrame()

        df["ticker"] = ticker
        if "ReportDate" in df.columns:
            df["ReportDate"] = pd.to_datetime(df["ReportDate"])
        else:
            # Nếu không có ReportDate thì tạo cột null để tránh lỗi concat
            df["ReportDate"] = pd.NaT

        return df

    except Exception as e:
        print(f"⚠️ Lỗi khi lấy FA cho {ticker}: {e}")
        return pd.DataFrame()


# --- Lọc danh sách: chỉ giữ những mã có dữ liệu FA ---
fa_list = []
valid_tickers = []

for t in tickers_hose:   # lấy theo danh sách HOSE từ Block 1
    df_fa = fetch_fa_quarterly(t, latest_year=2025, n_periods=32)
    if not df_fa.empty:
        fa_list.append(df_fa)
        valid_tickers.append(t)

# --- Gộp DataFrame ---
if fa_list:
    fa_data = pd.concat(fa_list, ignore_index=True)
else:
    fa_data = pd.DataFrame()

print(f"Số mã HOSE ban đầu: {len(tickers_hose)}")
print(f"Số mã có dữ liệu FA: {len(valid_tickers)}")
print("FA Data sample:")
print(fa_data.head())


⚠️ Lỗi khi lấy FA cho FUETPVND: 'FUETPVND'
Số mã HOSE ban đầu: 415
Số mã có dữ liệu FA: 392
FA Data sample:
   organizationId ticker  year  quarter  \
0          894364    CCC  2023        4   
1          894364    CCC  2024        1   
2          894364    CCC  2024        2   
3          894364    CCC  2024        3   
4          894364    CCC  2024        4   

                                              ratios ReportDate  
0  {'SolvencyRatio': {'DebtToEquityRatio': 1.5102...        NaT  
1  {'SolvencyRatio': {'DebtToEquityRatio': 0.7722...        NaT  
2  {'SolvencyRatio': {'DebtToEquityRatio': 0.7357...        NaT  
3  {'SolvencyRatio': {'DebtToEquityRatio': 0.7914...        NaT  
4  {'SolvencyRatio': {'DebtToEquityRatio': 0.6437...        NaT  


In [3]:
# Block 3 — Chuẩn hoá FA + Merge với giá (HOSE only, dựa theo Block 2)

import pandas as pd

# --- Các chỉ số FA cần lấy ---
fa_fields = [
    "DebtToEquityRatio","EBITMargin","ROA","ROE","ROIC",
    "BasicEPS","PriceToBook","PriceToEarning",
    "NetRevenueGrowthYoY","GrossProfitGrowthYoY"
]

# --- Hàm nổ ratios ---
def explode_ratios(df, fa_fields):
    records = []
    for _, row in df.iterrows():
        d = {
            "ticker": row["ticker"],
            "fa_year": int(row["year"]),
            "fa_quarter": int(row["quarter"])
        }
        ratios = row.get("ratios", {})
        if isinstance(ratios, dict):   # ✅ fix chỗ lỗi
            for f in fa_fields:
                val = None
                for section in ratios.values():
                    if isinstance(section, dict) and f in section:
                        val = section[f]
                d[f] = val
        else:
            # nếu ratios không phải dict thì gán NaN hết
            for f in fa_fields:
                d[f] = None
        records.append(d)
    return pd.DataFrame(records)

# --- Chuẩn hoá FA ---
fa_clean = explode_ratios(fa_data, fa_fields)

# --- Chuẩn hoá giá ---
df_price = df_all[df_all["ticker"].isin(valid_tickers)].copy()
df_price["timestamp"] = pd.to_datetime(df_price["timestamp"])
df_price = df_price.sort_values(["ticker","timestamp"])

# tạo key (fa_year, fa_quarter) = quý trước
pi = df_price["timestamp"].dt.to_period("Q")
prev_pi = pi - 1
df_price["fa_year"] = prev_pi.dt.year.astype(int)
df_price["fa_quarter"] = prev_pi.dt.quarter.astype(int)

# --- Xử lý FA: giữ duy nhất bản cuối cùng mỗi quý
fa_clean = (
    fa_clean.sort_values(["ticker","fa_year","fa_quarter"])
            .drop_duplicates(subset=["ticker","fa_year","fa_quarter"], keep="last")
)

# --- Merge giá + FA ---
df_merged = df_price.merge(
    fa_clean,
    on=["ticker","fa_year","fa_quarter"],
    how="left"
)

# FFill theo thời gian trong từng ticker để lấp chỗ trống
df_merged = df_merged.sort_values(["ticker","timestamp"])
df_merged[fa_fields] = df_merged.groupby("ticker")[fa_fields].ffill()

print("Sample merged:")
print(df_merged.head())
print("Số mã merge thành công:", df_merged["ticker"].nunique())




Sample merged:
  ticker  timestamp      open      high       low     close     volume  \
0    AAA 2023-01-03  6539.643  6866.145  6539.643  6866.145  1543984.0   
1    AAA 2023-01-04  6866.145  7000.587  6827.733  6827.733  1302505.0   
2    AAA 2023-01-05  6866.145  6904.557  6808.527  6885.351   980473.0   
3    AAA 2023-01-06  6885.351  6990.984  6818.130  6856.542  1431699.0   
4    AAA 2023-01-09  6914.160  6962.175  6760.512  6789.321  1121385.0   

         bu        sd           fs  ...  DebtToEquityRatio  EBITMargin  \
0  938600.0  504700.0   40579000.0  ...           0.507521   -0.049731   
1  462900.0  780600.0  151639000.0  ...           0.507521   -0.049731   
2  487200.0  473700.0  343911000.0  ...           0.507521   -0.049731   
3  564300.0  828300.0  345999000.0  ...           0.507521   -0.049731   
4  414000.0  631800.0  514557000.0  ...           0.507521   -0.049731   

        ROA       ROE      ROIC    BasicEPS  PriceToBook  PriceToEarning  \
0  0.014669  0.0295

In [4]:
import gc
del df_all
gc.collect()


0

In [5]:
# Block 4 — Tính các chỉ số TA (trên df_merged từ Block 3)

import pandas as pd

# --- Khởi tạo Indicator ---
fi = client.FiinIndicator()

# --- Hàm tính TA theo từng ticker ---
def add_ta_indicators(df):
    df = df.sort_values("timestamp").copy()

    # EMA
    df['ema_5']  = fi.ema(df['close'], window=5)
    df['ema_20'] = fi.ema(df['close'], window=20)
    df['ema_50'] = fi.ema(df['close'], window=50)

    # MACD
    df['macd']        = fi.macd(df['close'], window_fast=12, window_slow=26)
    df['macd_signal'] = fi.macd_signal(df['close'], window_fast=12, window_slow=26, window_sign=9)
    df['macd_diff']   = fi.macd_diff(df['close'], window_fast=12, window_slow=26, window_sign=9)

    # RSI
    df['rsi'] = fi.rsi(df['close'], window=14)

    # Bollinger Bands
    df['bollinger_hband'] = fi.bollinger_hband(df['close'], window=20, window_dev=2)
    df['bollinger_lband'] = fi.bollinger_lband(df['close'], window=20, window_dev=2)

    # ATR
    df['atr'] = fi.atr(df['high'], df['low'], df['close'], window=14)

    # OBV
    df['obv'] = fi.obv(df['close'], df['volume'])

    # VWAP
    df['vwap'] = fi.vwap(df['high'], df['low'], df['close'], df['volume'], window=14)

    return df

# --- Áp dụng cho toàn bộ df_merged ---
df_with_ta = df_merged.groupby("ticker", group_keys=False).apply(add_ta_indicators)

print("Sample with TA:")
print(df_with_ta.head())
print("Shape sau khi thêm TA:", df_with_ta.shape)


Sample with TA:
  ticker  timestamp      open      high       low     close     volume  \
0    AAA 2023-01-03  6539.643  6866.145  6539.643  6866.145  1543984.0   
1    AAA 2023-01-04  6866.145  7000.587  6827.733  6827.733  1302505.0   
2    AAA 2023-01-05  6866.145  6904.557  6808.527  6885.351   980473.0   
3    AAA 2023-01-06  6885.351  6990.984  6818.130  6856.542  1431699.0   
4    AAA 2023-01-09  6914.160  6962.175  6760.512  6789.321  1121385.0   

         bu        sd           fs  ...  ema_50  macd  macd_signal  macd_diff  \
0  938600.0  504700.0   40579000.0  ...     NaN   NaN          NaN        NaN   
1  462900.0  780600.0  151639000.0  ...     NaN   NaN          NaN        NaN   
2  487200.0  473700.0  343911000.0  ...     NaN   NaN          NaN        NaN   
3  564300.0  828300.0  345999000.0  ...     NaN   NaN          NaN        NaN   
4  414000.0  631800.0  514557000.0  ...     NaN   NaN          NaN        NaN   

   rsi  bollinger_hband  bollinger_lband  atr       

In [6]:
import gc
del df_merged
gc.collect()

31

In [7]:
# Block 5 — Feature engineering & scaling

import numpy as np

# --- Danh sách cột FA & TA ---
fa_features = [
    "DebtToEquityRatio","EBITMargin","ROA","ROE","ROIC",
    "BasicEPS","PriceToBook","PriceToEarning",
    "NetRevenueGrowthYoY","GrossProfitGrowthYoY"
]

ta_features = [
    "ema_5","ema_20","ema_50","macd","macd_signal","macd_diff",
    "rsi","bollinger_hband","bollinger_lband","atr","obv","vwap"
]

# --- Chuẩn hoá FA: cross-section min-max scaling theo ngày ---
def scale_fa_minmax(df):
    df_scaled = df.copy()
    for f in fa_features:
        vals = df[f].astype(float)
        vmin, vmax = vals.min(), vals.max()
        if np.isfinite(vmin) and np.isfinite(vmax) and vmax > vmin:
            df_scaled[f] = (vals - vmin) / (vmax - vmin)
        else:
            df_scaled[f] = np.nan
    return df_scaled

df_scaled_fa = df_with_ta.groupby("timestamp", group_keys=False).apply(scale_fa_minmax)

# --- Chuẩn hoá TA: rolling z-score theo từng ticker ---
def zscore_rolling(series, window=60):
    return (series - series.rolling(window).mean()) / series.rolling(window).std()

df_scaled = df_scaled_fa.groupby("ticker", group_keys=False).apply(
    lambda g: g.assign(**{f"{col}_z": zscore_rolling(g[col], 60) for col in ta_features})
)

# --- Drop các cột gốc TA, giữ bản z-score ---
keep_cols = ["ticker","timestamp"] + fa_features + [f"{col}_z" for col in ta_features]
df_features = df_scaled[keep_cols].dropna().reset_index(drop=True)

print("Sample features:")
print(df_features.head())
print("Shape sau khi scaling & dropna:", df_features.shape)


Sample features:
  ticker  timestamp  DebtToEquityRatio  EBITMargin       ROA       ROE  \
0    AAA 2023-06-14           0.559115    0.978956  0.368731  0.415728   
1    AAA 2023-06-15           0.559115    0.978956  0.368731  0.415728   
2    AAA 2023-06-16           0.559115    0.978956  0.368731  0.415728   
3    AAA 2023-06-19           0.559115    0.978956  0.368731  0.415728   
4    AAA 2023-06-20           0.559115    0.978956  0.368731  0.415728   

     ROIC  BasicEPS  PriceToBook  PriceToEarning  ...  ema_50_z    macd_z  \
0  0.7533  0.158046      0.37764        0.537353  ...  1.799046 -0.008320   
1  0.7533  0.158046      0.37764        0.537353  ...  1.761009 -0.314808   
2  0.7533  0.158046      0.37764        0.537353  ...  1.701399 -0.891565   
3  0.7533  0.158046      0.37764        0.537353  ...  1.651219 -1.278916   
4  0.7533  0.158046      0.37764        0.537353  ...  1.609327 -1.509982   

   macd_signal_z  macd_diff_z     rsi_z  bollinger_hband_z  bollinger_lband

In [8]:
del df_with_ta, df_scaled, df_scaled_fa
gc.collect()


0

In [10]:
# Block 6 — Dimensionality reduction & Clustering (t-SNE + DBSCAN)

from sklearn.manifold import TSNE
from sklearn.cluster import DBSCAN

# --- Chọn các cột features để clustering ---
feature_cols = [
    "DebtToEquityRatio","EBITMargin","ROA","ROE","ROIC",
    "BasicEPS","PriceToBook","PriceToEarning",
    "NetRevenueGrowthYoY","GrossProfitGrowthYoY"
] + [c for c in df_features.columns if c.endswith("_z")]

# --- Thêm cột tháng để snapshot ---
df_features["month"] = df_features["timestamp"].dt.to_period("M")

cluster_results = []

for (month, g) in df_features.groupby("month"):
    if len(g) < 10:   # quá ít cổ phiếu thì bỏ
        continue

    X = g[feature_cols].values

    # --- t-SNE giảm chiều còn 2D ---
    tsne = TSNE(n_components=2, perplexity=30, learning_rate="auto", init="random", random_state=42)
    X_emb = tsne.fit_transform(X)

    # --- DBSCAN clustering ---
    db = DBSCAN(eps=0.5, min_samples=5).fit(X_emb)
    labels = db.labels_

    temp = g[["ticker","timestamp"]].copy()
    temp["cluster"] = labels
    temp["tsne_x"] = X_emb[:,0]
    temp["tsne_y"] = X_emb[:,1]
    temp["month"]  = str(month)

    cluster_results.append(temp)

df_clusters = pd.concat(cluster_results, ignore_index=True)

print("Cluster sample:")
print(df_clusters.head())
print("Số cụm mỗi tháng:")
print(df_clusters.groupby("month")["cluster"].nunique())


Cluster sample:
  ticker  timestamp  cluster     tsne_x     tsne_y    month
0    AAA 2023-06-14       -1  34.417667 -21.641058  2023-06
1    AAA 2023-06-15       -1  34.361568 -21.848602  2023-06
2    AAA 2023-06-16       -1  35.918636 -42.906532  2023-06
3    AAA 2023-06-19       -1  35.490566 -42.649422  2023-06
4    AAA 2023-06-20       -1  34.553066 -42.459969  2023-06
Số cụm mỗi tháng:
month
2023-06     16
2023-07     65
2023-08     80
2023-09     39
2023-10     82
2023-11     55
2023-12     84
2024-01     83
2024-02     33
2024-03     66
2024-04     44
2024-05     67
2024-06     92
2024-07     88
2024-08     65
2024-09     88
2024-10    107
2024-11     77
2024-12     71
2025-01     48
2025-02     70
2025-03     92
2025-04     51
2025-05     76
2025-06     96
2025-07    109
2025-08     72
Name: cluster, dtype: int64


In [11]:
#block 7
import numpy as np
import pandas as pd
import os, gc, json

LOOKBACK = 64   # window size
DATA_DIR = "./tensors/"
os.makedirs(DATA_DIR, exist_ok=True)

# --- chọn feature columns (bỏ các cột không phải feature) ---
feature_cols = [c for c in df_features.columns if c not in ["ticker","timestamp","cluster","month"]]

tensor_index = []

# --- Lặp qua từng cluster ---
for c_id, g in df_clusters.groupby("cluster"):
    if c_id == -1:   # DBSCAN noise bỏ qua
        continue

    tickers = sorted(g["ticker"].unique())
    g_feat = df_features[df_features["ticker"].isin(tickers)].copy()

    # Pivot: index = timestamp, columns = MultiIndex (ticker, feature)
    pivoted = g_feat.pivot(index="timestamp", columns="ticker", values=feature_cols)
    pivoted.columns = pd.MultiIndex.from_product([tickers, feature_cols])

    # Mask: 1 = có dữ liệu, 0 = NaN
    mask_df = ~pivoted.isna()

    # Fill NaN để reshape được (mask vẫn giữ thông tin missing)
    pivoted_filled = pivoted.ffill().bfill()

    T, N, F = len(pivoted_filled.index), len(tickers), len(feature_cols)

    X = pivoted_filled.values.reshape(T, N, F)
    M = mask_df.values.reshape(T, N, F).astype(int)

    cluster_tensors, cluster_masks = [], []
    for i in range(LOOKBACK, T):
        cluster_tensors.append(X[i-LOOKBACK:i])
        cluster_masks.append(M[i-LOOKBACK:i])

    if cluster_tensors:
        X_arr, M_arr = np.array(cluster_tensors), np.array(cluster_masks)

        # Save file
        tensor_file = f"cluster_{c_id}_tensor.npy"
        mask_file   = f"cluster_{c_id}_mask.npy"
        np.save(os.path.join(DATA_DIR, tensor_file), X_arr)
        np.save(os.path.join(DATA_DIR, mask_file), M_arr)

        tensor_index.append({
            "cluster": int(c_id),
            "tickers": tickers,
            "dates": [str(d) for d in pivoted_filled.index[LOOKBACK:]],
            "tensor_file": tensor_file,
            "mask_file": mask_file
        })

        print(f"Cluster {c_id}: tensor {X_arr.shape}, mask {M_arr.shape} saved.")

    # Giải phóng RAM
    del g_feat, pivoted, pivoted_filled, mask_df, X, M, cluster_tensors, cluster_masks
    gc.collect()

# --- Lưu metadata ---
with open(os.path.join(DATA_DIR, "tensor_index.json"), "w") as f:
    json.dump(tensor_index, f, indent=2)

print("✅ Done Block 7: tensors + masks saved for all clusters.")

# Sau Block 7 có thể xoá df_features cho nhẹ RAM
del df_features
gc.collect()


Cluster 0: tensor (490, 64, 21, 22), mask (490, 64, 21, 22) saved.
Cluster 1: tensor (490, 64, 14, 22), mask (490, 64, 14, 22) saved.
Cluster 2: tensor (490, 64, 15, 22), mask (490, 64, 15, 22) saved.
Cluster 3: tensor (490, 64, 15, 22), mask (490, 64, 15, 22) saved.
Cluster 4: tensor (490, 64, 22, 22), mask (490, 64, 22, 22) saved.
Cluster 5: tensor (490, 64, 23, 22), mask (490, 64, 23, 22) saved.
Cluster 6: tensor (490, 64, 23, 22), mask (490, 64, 23, 22) saved.
Cluster 7: tensor (490, 64, 26, 22), mask (490, 64, 26, 22) saved.
Cluster 8: tensor (490, 64, 20, 22), mask (490, 64, 20, 22) saved.
Cluster 9: tensor (490, 64, 21, 22), mask (490, 64, 21, 22) saved.
Cluster 10: tensor (490, 64, 23, 22), mask (490, 64, 23, 22) saved.
Cluster 11: tensor (490, 64, 23, 22), mask (490, 64, 23, 22) saved.
Cluster 12: tensor (490, 64, 29, 22), mask (490, 64, 29, 22) saved.
Cluster 13: tensor (490, 64, 21, 22), mask (490, 64, 21, 22) saved.
Cluster 14: tensor (490, 64, 24, 22), mask (490, 64, 24, 2

33

In [None]:
# Block 7.5 — Chuẩn bị dữ liệu backtest cho reward thật
import gc

# Chỉ giữ dữ liệu cần thiết để tính reward (close price)
# df_price có từ Block 1 (OHLCV đầy đủ)
df_backtest = df_price[["ticker", "timestamp", "close"]].copy()

# Ép timestamp về dạng datetime để đồng bộ
df_backtest["timestamp"] = pd.to_datetime(df_backtest["timestamp"])

print("✅ Done Block 7.5: df_backtest sẵn sàng cho reward.")
print("Kích thước df_backtest:", df_backtest.shape)
print("Tickers unique:", df_backtest["ticker"].nunique())

# Xóa những biến không còn cần để tiết kiệm RAM
del df_price
gc.collect()


⚠️ Không lấy được benchmarks: FiinSession.Fetch_Trading_Data() missing 2 required positional arguments: 'tickers' and 'fields'
✅ Done Block 7.5: df_backtest sẵn sàng cho reward.
Kích thước df_backtest: (256151, 3)
Tickers unique: 391


1052

In [None]:
# Block 8 — A3C multi-stock per-cluster
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
import json, os, gc

DATA_DIR = "./tensors/"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# --- Load metadata ---
with open(os.path.join(DATA_DIR, "tensor_index.json"), "r") as f:
    tensor_index = json.load(f)

# --- Actor-Critic network ---
class A3CClusterNet(nn.Module):
    def __init__(self, n_features, hidden_size=64):
        super().__init__()
        self.lstm = nn.LSTM(input_size=n_features, hidden_size=hidden_size, batch_first=True)
        self.actor_head = nn.Linear(hidden_size, 3)   # -1,0,1 actions
        self.critic_head = nn.Linear(hidden_size, 1)

    def forward(self, x):
        # x shape = (batch, T, F)
        out, _ = self.lstm(x)
        h = out[:, -1, :]  # lấy hidden cuối
        return self.actor_head(h), self.critic_head(h)

# --- Loss function ---
def a3c_loss(policy_logits, values, actions, rewards, gamma=0.99, entropy_beta=0.01):
    advantage = rewards - values.squeeze(-1)

    # critic loss
    critic_loss = advantage.pow(2).mean()

    # actor loss
    log_probs = torch.log_softmax(policy_logits, dim=-1)
    chosen_log_probs = log_probs.gather(1, actions.unsqueeze(1)).squeeze(1)
    actor_loss = -(chosen_log_probs * advantage.detach()).mean()

    # entropy bonus
    entropy = -(torch.softmax(policy_logits, dim=-1) * log_probs).sum(-1).mean()

    return actor_loss + 0.5 * critic_loss - entropy_beta * entropy

# --- Train loop (per cluster) ---
def train_cluster(cluster_meta, epochs=3, lr=1e-3):
    tickers = cluster_meta["tickers"]
    dates = cluster_meta["dates"]

    # load tensors
    X = np.load(os.path.join(DATA_DIR, cluster_meta["tensor_file"]))
    M = np.load(os.path.join(DATA_DIR, cluster_meta["mask_file"]))

    B, T, N, F = X.shape
    print(f"Training cluster {cluster_meta['cluster']} | X={X.shape}, tickers={len(tickers)}")

    model = A3CClusterNet(n_features=F).to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)

    # convert to torch
    X_tensor = torch.tensor(X.reshape(B*N, T, F), dtype=torch.float32).to(device)

    # tạo rewards từ df_backtest
    rewards = []
    for tk in tickers:
        px = df_backtest[df_backtest["ticker"] == tk].set_index("timestamp")["close"]
        px = px.reindex(dates).ffill().bfill()
        r = np.log(px.values[1:] / px.values[:-1])
        rewards.append(np.concatenate([[0], r]))  # align length
    rewards = np.array(rewards).T  # shape (B, N)
    rewards_tensor = torch.tensor(rewards.reshape(B*N), dtype=torch.float32).to(device)

    # giả định actions random để khởi động training
    actions_tensor = torch.randint(0, 3, (B*N,), dtype=torch.long).to(device)

    for ep in range(epochs):
        logits, values = model(X_tensor)
        loss = a3c_loss(logits, values, actions_tensor, rewards_tensor)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        print(f"  Epoch {ep+1}/{epochs}, Loss={loss.item():.4f}")

    # --- inference: lấy action cuối cùng ---
    with torch.no_grad():
        logits, _ = model(X_tensor)
        actions = torch.argmax(logits, dim=-1).cpu().numpy()

    # reshape về (B, N)
    actions = actions.reshape(B, N)

    # tạo DataFrame tín hiệu
    signals = []
    for i, d in enumerate(dates):
        for j, tk in enumerate(tickers):
            signals.append({"date": d, "ticker": tk, "signal": int(actions[i, j] - 1)})
            # map: 0→-1, 1→0, 2→1

    return pd.DataFrame(signals)

# --- Chạy tất cả clusters ---
a3c_signals = []
for meta in tensor_index:
    df_sig = train_cluster(meta, epochs=3)
    a3c_signals.append(df_sig)
    gc.collect()

a3c_signals = pd.concat(a3c_signals, ignore_index=True)
print("✅ Done Block 8: a3c_signals ready:", a3c_signals.shape)


In [None]:
# Block 9 — DDPG cluster-level allocation (reward thật)
import torch, torch.nn as nn, torch.optim as optim
import numpy as np, pandas as pd, gc

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# --- Actor & Critic ---
class Actor(nn.Module):
    def __init__(self, s_dim, a_dim): 
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(s_dim, 64), nn.ReLU(),
            nn.Linear(64, a_dim), nn.Softmax(-1)
        )
    def forward(self, x): return self.net(x)

class Critic(nn.Module):
    def __init__(self, s_dim, a_dim): 
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(s_dim+a_dim, 64), nn.ReLU(),
            nn.Linear(64, 1)
        )
    def forward(self, s, a): return self.net(torch.cat([s,a], -1))

# --- Replay buffer ---
class Buffer:
    def __init__(self, cap=2000): self.buf=[]; self.cap=cap
    def push(self,*x):
        if len(self.buf)>=self.cap: self.buf.pop(0)
        self.buf.append(x)
    def sample(self, bz):
        idx=np.random.choice(len(self.buf),bz,False)
        return map(np.array, zip(*[self.buf[i] for i in idx]))
    def __len__(self): return len(self.buf)

# --- Init ---
clusters = [c for c in df_clusters["cluster"].unique() if c!=-1]
nC, feat_per_cluster = len(clusters), 5   # 5 feature mỗi cluster
s_dim, a_dim = feat_per_cluster*nC, nC

actor, critic = Actor(s_dim, a_dim).to(device), Critic(s_dim, a_dim).to(device)
t_actor, t_critic = Actor(s_dim, a_dim).to(device), Critic(s_dim, a_dim).to(device)
t_actor.load_state_dict(actor.state_dict()); t_critic.load_state_dict(critic.state_dict())
optA, optC = optim.Adam(actor.parameters(),1e-3), optim.Adam(critic.parameters(),1e-3)
buf = Buffer(); γ, τ = 0.99, 0.01

# --- Update ---
def update():
    if len(buf)<64: return
    s,a,r,s2 = buf.sample(64)
    s,a,r,s2 = [torch.tensor(x,dtype=torch.float32).to(device) for x in [s,a,r[:,None],s2]]
    q = critic(s,a); q_t = r + γ*t_critic(s2,t_actor(s2)).detach()
    lC = nn.MSELoss()(q,q_t); optC.zero_grad(); lC.backward(); optC.step()
    lA = -critic(s,actor(s)).mean(); optA.zero_grad(); lA.backward(); optA.step()
    for t,s in zip(t_actor.parameters(),actor.parameters()): t.data.copy_(τ*s+(1-τ)*t)
    for t,s in zip(t_critic.parameters(),critic.parameters()): t.data.copy_(τ*s+(1-τ)*t)

# --- Helper: extract cluster features ---
def get_cluster_state(date, prev_date):
    feats = []
    for c in clusters:
        tickers = df_clusters[df_clusters.cluster==c].ticker.unique()

        # A3C signals
        sigs = a3c_signals[(a3c_signals.date==date)&(a3c_signals.ticker.isin(tickers))]["signal"]
        mean_sig = sigs.mean() if len(sigs) else 0
        long_r  = (sigs==1).mean() if len(sigs) else 0
        short_r = (sigs==-1).mean() if len(sigs) else 0

        # Returns
        px = df_backtest[df_backtest.ticker.isin(tickers)].pivot(index="timestamp",columns="ticker",values="close")
        if date not in px.index or prev_date not in px.index:
            ret, vol = 0, 0
        else:
            r = np.log(px.loc[date]/px.loc[prev_date])
            ret, vol = r.mean(), r.std()

        feats.extend([mean_sig,long_r,short_r,ret,vol])
    return np.array(feats)

# --- Train loop ---
dates = sorted(a3c_signals["date"].unique()); rec=[]
for t in range(1,len(dates)):
    day, prev = dates[t], dates[t-1]
    s = get_cluster_state(day, prev); s_t=torch.tensor(s).float().unsqueeze(0).to(device)

    # Action
    a = actor(s_t).cpu().detach().numpy().squeeze()

    # Reward (portfolio log-return)
    px = df_backtest[df_backtest.timestamp.isin([prev,day])].pivot(index="timestamp",columns="ticker",values="close")
    if px.shape[0]<2: continue
    ret = np.log(px.loc[day]/px.loc[prev])

    merged = a3c_signals[a3c_signals.date==day].merge(ret.rename("ret"),on="ticker",how="left")
    merged["pnl"] = merged["signal"]*merged["ret"].fillna(0)
    cl_ret = merged.groupby(df_clusters["cluster"]).pnl.mean().reindex(clusters).fillna(0).values

    reward = float(np.dot(a, cl_ret))
    s2 = get_cluster_state(day, prev)

    buf.push(s,a,reward,s2); update()
    rec.append({"date":day, **{f"w{c}":a[i] for i,c in enumerate(clusters)}})

ddpg_weights = pd.DataFrame(rec)
print("✅ Done Block 9:", ddpg_weights.shape)


In [None]:
# Block 10 — Portfolio Construction & Backtest
import pandas as pd
import numpy as np
import gc
from FiinQuantX import FiinSession

INIT_CAPITAL = 10000
TCOST = 0.0005  # transaction cost 0.05%

# --- Chuẩn hóa dữ liệu ---
a3c_signals["date"] = pd.to_datetime(a3c_signals["date"])
df_backtest["date"] = pd.to_datetime(df_backtest["timestamp"])
ddpg_weights["date"] = pd.to_datetime(ddpg_weights["date"])
df_clusters["timestamp"] = pd.to_datetime(df_clusters["timestamp"])

# Map ticker → cluster
cluster_map = df_clusters[["ticker", "timestamp", "cluster"]].rename(columns={"timestamp":"date"})

# --- Tải benchmark ---
username = "REPLACE_WITH_YOUR_USER_NAME"
password = "REPLACE_WITH_YOUR_PASS_WORD"
client = FiinSession(username=username, password=password).login()

benchmarks = client.Fetch_Trading_Data(
    realtime=False,
    tickers=["VNINDEX"],
    fields=["close"],
    adjusted=True,
    by="1d",
    from_date=a3c_signals["date"].min().strftime("%Y-%m-%d")
).get_data().reset_index()

benchmarks["date"] = pd.to_datetime(benchmarks["date"])
benchmarks = benchmarks[["date", "close"]].rename(columns={"close":"benchmark_close"})
benchmarks["benchmark_return"] = benchmarks["benchmark_close"].pct_change().fillna(0)

# --- Backtest ---
equity = INIT_CAPITAL
portfolio_history = []
positions = {}  # ticker → số vốn phân bổ

for d in sorted(a3c_signals["date"].unique()):
    # Lấy tín hiệu A3C
    sig_today = a3c_signals[a3c_signals["date"] == d]
    if sig_today.empty:
        portfolio_history.append({"date": d, "equity": equity})
        continue

    # Map cluster
    sig_today = sig_today.merge(cluster_map, on=["date","ticker"], how="left")

    # Lấy weights từ DDPG
    w_today = ddpg_weights[ddpg_weights["date"] == d]
    if w_today.empty:
        cluster_weights = {c: 1/len(sig_today["cluster"].unique()) for c in sig_today["cluster"].unique()}
    else:
        cluster_weights = {int(c.split("w")[-1]): w_today.iloc[0][c] for c in w_today.columns if c.startswith("w")}

    # Tính phân bổ vốn
    positions = {}
    for clus, g in sig_today.groupby("cluster"):
        if clus not in cluster_weights:
            continue
        w_c = cluster_weights[clus] / max(len(g), 1)
        for _, row in g.iterrows():
            if row["signal"] != 0:
                positions[row["ticker"]] = equity * w_c * row["signal"]

    # Tính lợi nhuận trong ngày
    px_today = df_backtest[df_backtest["date"] == d].set_index("ticker")["close"]
    px_prev  = df_backtest[df_backtest["date"] < d].groupby("ticker").last()["close"]

    daily_ret = 0
    for tk, alloc in positions.items():
        if tk in px_today.index and tk in px_prev.index:
            r = (px_today[tk] / px_prev[tk] - 1)
            daily_ret += alloc / equity * r

    equity = equity * (1 + daily_ret - TCOST)
    portfolio_history.append({"date": d, "equity": equity})

# --- Kết quả cuối ---
df_trading = pd.DataFrame(portfolio_history)
df_trading = df_trading.merge(benchmarks, on="date", how="left")

print("✅ Done Block 10: Backtest finished")
print("Final equity:", equity)
print("Benchmark final:", df_trading["benchmark_close"].iloc[-1] / df_trading["benchmark_close"].iloc[0])


In [None]:
# Block 11 — Walk-Forward Validation
import pandas as pd
import numpy as np

# --- Input từ Block 10 ---
# df_trading: equity curve + benchmark
# a3c_signals: tín hiệu
# ddpg_weights: phân bổ vốn
# df_backtest: giá đóng cửa

# --- Chia dữ liệu ---
train_end = "2024-06-06"
val_end   = "2024-12-31"
test_end  = df_trading["date"].max().strftime("%Y-%m-%d")

splits = {
    "train": (df_trading["date"] <= train_end),
    "val":   (df_trading["date"] > train_end) & (df_trading["date"] <= val_end),
    "test":  (df_trading["date"] > val_end)
}

results = {}
for name, mask in splits.items():
    equity = df_trading.loc[mask, "equity"]
    bench  = df_trading.loc[mask, "benchmark_close"]

    if equity.empty: 
        continue

    # CAGR
    years = (equity.index[-1] - equity.index[0]).days / 365
    cagr = (equity.iloc[-1] / equity.iloc[0])**(1/years) - 1 if years > 0 else 0

    # Volatility (annualized)
    daily_ret = equity.pct_change().dropna()
    vol = daily_ret.std() * np.sqrt(252)

    # Sharpe ratio (risk-free ~ 0)
    sharpe = daily_ret.mean() / daily_ret.std() * np.sqrt(252) if daily_ret.std() > 0 else 0

    # Max drawdown
    roll_max = equity.cummax()
    dd = (equity / roll_max - 1).min()

    # Benchmark return
    bench_ret = bench.iloc[-1] / bench.iloc[0] - 1 if len(bench) > 1 else 0

    results[name] = {
        "CAGR": round(cagr, 4),
        "Volatility": round(vol, 4),
        "Sharpe": round(sharpe, 2),
        "MaxDD": round(dd, 4),
        "BenchmarkRet": round(bench_ret, 4)
    }

# --- In kết quả ---
print("✅ Done Block 11: Walk-forward validation results")
for k, v in results.items():
    print(f"{k.upper()} → {v}")


In [None]:
# Block 12 — Evaluation, Risk Controls & Reporting
import matplotlib.pyplot as plt

# --- Input: df_trading từ Block 10, results từ Block 11 ---
# df_trading có: ["date","equity","benchmark_close","signal","weight","pnl"]

# Vẽ equity curve vs benchmark
plt.figure(figsize=(12,6))
plt.plot(df_trading["date"], df_trading["equity"], label="Strategy", linewidth=2)
plt.plot(df_trading["date"], 
         df_trading["benchmark_close"] / df_trading["benchmark_close"].iloc[0] * 10000, 
         label="Benchmark (VNINDEX)", linestyle="--")
plt.title("Equity Curve vs Benchmark")
plt.xlabel("Date")
plt.ylabel("Portfolio Value")
plt.legend()
plt.grid(True)
plt.show()

# --- Drawdowns ---
df_trading["roll_max"] = df_trading["equity"].cummax()
df_trading["drawdown"] = df_trading["equity"]/df_trading["roll_max"] - 1

plt.figure(figsize=(12,4))
plt.fill_between(df_trading["date"], df_trading["drawdown"], color="red", alpha=0.3)
plt.title("Drawdown over time")
plt.xlabel("Date")
plt.ylabel("Drawdown")
plt.grid(True)
plt.show()

# --- Histogram of daily returns ---
daily_ret = df_trading["equity"].pct_change().dropna()
plt.figure(figsize=(8,4))
plt.hist(daily_ret, bins=50, alpha=0.7)
plt.title("Distribution of Daily Returns")
plt.xlabel("Daily Return")
plt.ylabel("Frequency")
plt.grid(True)
plt.show()

# --- In summary stats ---
print("✅ Done Block 12: Evaluation Report")
print("Final Equity:", round(df_trading['equity'].iloc[-1],2))
print("Total Return:", round(df_trading['equity'].iloc[-1]/df_trading['equity'].iloc[0]-1,4))
print("Sharpe:", round(daily_ret.mean()/daily_ret.std()*np.sqrt(252),2))
print("Max Drawdown:", round(df_trading['drawdown'].min(),4))
