In [1]:
# Block 1 — Login & Lấy dữ liệu tất cả HOSE/HNX/UPCOM
import pandas as pd
from FiinQuantX import FiinSession, BarDataUpdate

# --- Login ---
username = "DSTC_18@fiinquant.vn"
password = "Fiinquant0606"

client = FiinSession(
    username=username,
    password=password
).login()

# --- Lấy danh sách cổ phiếu từng sàn ---
tickers_hose  = list(client.TickerList(ticker="VNINDEX"))     # HOSE
print(f"Số mã HOSE: {len(tickers_hose)}")

# --- Lấy dữ liệu lịch sử toàn bộ (có thể nặng, nên lấy theo batch nếu cần) ---
event_history = client.Fetch_Trading_Data(
    realtime=False,
    tickers=tickers_hose,
    fields=['open','high','low','close','volume','bu','sd','fs','fn'], 
    adjusted=True,
    by="1d",
    from_date="2022-01-01"   # backtest từ 2022 tới nay
)

df_all = event_history.get_data()
print("History ban đầu:", df_all.head())

# --- Callback realtime ---
def onDataUpdate(data: BarDataUpdate):
    global df_all
    df_update = data.to_dataFrame()
    df_all = pd.concat([df_all, df_update])
    df_all = df_all.drop_duplicates()
    print("Realtime update:")
    print(df_update.head())

# --- Bật realtime nối tiếp dữ liệu ---
event_realtime = client.Fetch_Trading_Data(
    realtime=True,
    tickers=tickers_hose,
    fields=['open','high','low','close','volume','bu','sd','fs','fn'], 
    adjusted=True,
    by="1d",
    period=1,
    callback=onDataUpdate
)


Số mã HOSE: 415
Fetching data, it may take a while. Please wait...
History ban đầu:   ticker         timestamp       open       high        low      close  \
0    AAA  2022-01-04 00:00  19302.030  20550.420  19302.030  19878.210   
1    AAA  2022-01-05 00:00  19974.240  20934.540  19830.195  20118.285   
2    AAA  2022-01-06 00:00  20070.270  21510.720  19974.240  21510.720   
3    AAA  2022-01-07 00:00  22038.885  22230.945  21654.765  21894.840   
4    AAA  2022-01-10 00:00  22086.900  22471.020  20406.375  20406.375   

       volume         bu         sd            fs            fn  
0   7354500.0  3727500.0  3474700.0  3.871515e+09  1.077420e+09  
1   7187400.0  3248400.0  3816200.0  2.180410e+09 -1.785885e+09  
2  12375600.0  5193100.0  6236500.0  2.383965e+09  1.195269e+10  
3   7009900.0        0.0        0.0  5.174500e+09  3.160220e+09  
4  11396800.0  5208000.0  5167700.0  9.241110e+09 -9.088565e+09  


In [2]:
# Block 2 — Lấy dữ liệu FA theo quý (HOSE only)

def fetch_fa_quarterly(ticker, latest_year=2025, n_periods=32):
    try:
        fi_list = client.FundamentalAnalysis().get_ratios(
            tickers=[ticker],
            TimeFilter="Quarterly",
            LatestYear=latest_year,
            NumberOfPeriod=n_periods,
            Consolidated=True
        )

        # Nếu không có dữ liệu thì bỏ qua
        if not fi_list or not isinstance(fi_list, list):
            return pd.DataFrame()

        df = pd.DataFrame(fi_list)
        if df.empty:
            return pd.DataFrame()

        df["ticker"] = ticker
        if "ReportDate" in df.columns:
            df["ReportDate"] = pd.to_datetime(df["ReportDate"])
        else:
            # Nếu không có ReportDate thì tạo cột null để tránh lỗi concat
            df["ReportDate"] = pd.NaT

        return df

    except Exception as e:
        print(f"⚠️ Lỗi khi lấy FA cho {ticker}: {e}")
        return pd.DataFrame()


# --- Lọc danh sách: chỉ giữ những mã có dữ liệu FA ---
fa_list = []
valid_tickers = []

for t in tickers_hose:   # lấy theo danh sách HOSE từ Block 1
    df_fa = fetch_fa_quarterly(t, latest_year=2025, n_periods=32)
    if not df_fa.empty:
        fa_list.append(df_fa)
        valid_tickers.append(t)

# --- Gộp DataFrame ---
if fa_list:
    fa_data = pd.concat(fa_list, ignore_index=True)
else:
    fa_data = pd.DataFrame()

print(f"Số mã HOSE ban đầu: {len(tickers_hose)}")
print(f"Số mã có dữ liệu FA: {len(valid_tickers)}")
print("FA Data sample:")
print(fa_data.head())


⚠️ Lỗi khi lấy FA cho FUETPVND: 'FUETPVND'
Số mã HOSE ban đầu: 415
Số mã có dữ liệu FA: 392
FA Data sample:
   organizationId ticker  year  quarter  \
0          894364    CCC  2023        4   
1          894364    CCC  2024        1   
2          894364    CCC  2024        2   
3          894364    CCC  2024        3   
4          894364    CCC  2024        4   

                                              ratios ReportDate  
0  {'SolvencyRatio': {'DebtToEquityRatio': 1.5102...        NaT  
1  {'SolvencyRatio': {'DebtToEquityRatio': 0.7722...        NaT  
2  {'SolvencyRatio': {'DebtToEquityRatio': 0.7357...        NaT  
3  {'SolvencyRatio': {'DebtToEquityRatio': 0.7914...        NaT  
4  {'SolvencyRatio': {'DebtToEquityRatio': 0.6437...        NaT  


In [3]:
# Block 3 — Chuẩn hoá FA + Merge với giá (HOSE only, dựa theo Block 2)

import pandas as pd

# --- Các chỉ số FA cần lấy ---
fa_fields = [
    "DebtToEquityRatio","EBITMargin","ROA","ROE","ROIC",
    "BasicEPS","PriceToBook","PriceToEarning",
    "NetRevenueGrowthYoY","GrossProfitGrowthYoY"
]

# --- Hàm nổ ratios ---
def explode_ratios(df, fa_fields):
    records = []
    for _, row in df.iterrows():
        d = {
            "ticker": row["ticker"],
            "fa_year": int(row["year"]),
            "fa_quarter": int(row["quarter"])
        }
        ratios = row.get("ratios", {})
        if isinstance(ratios, dict):   # ✅ fix chỗ lỗi
            for f in fa_fields:
                val = None
                for section in ratios.values():
                    if isinstance(section, dict) and f in section:
                        val = section[f]
                d[f] = val
        else:
            # nếu ratios không phải dict thì gán NaN hết
            for f in fa_fields:
                d[f] = None
        records.append(d)
    return pd.DataFrame(records)

# --- Chuẩn hoá FA ---
fa_clean = explode_ratios(fa_data, fa_fields)

# --- Chuẩn hoá giá ---
df_price = df_all[df_all["ticker"].isin(valid_tickers)].copy()
df_price["timestamp"] = pd.to_datetime(df_price["timestamp"])
df_price = df_price.sort_values(["ticker","timestamp"])

# tạo key (fa_year, fa_quarter) = quý trước
pi = df_price["timestamp"].dt.to_period("Q")
prev_pi = pi - 1
df_price["fa_year"] = prev_pi.dt.year.astype(int)
df_price["fa_quarter"] = prev_pi.dt.quarter.astype(int)

# --- Xử lý FA: giữ duy nhất bản cuối cùng mỗi quý
fa_clean = (
    fa_clean.sort_values(["ticker","fa_year","fa_quarter"])
            .drop_duplicates(subset=["ticker","fa_year","fa_quarter"], keep="last")
)

# --- Merge giá + FA ---
df_merged = df_price.merge(
    fa_clean,
    on=["ticker","fa_year","fa_quarter"],
    how="left"
)

# FFill theo thời gian trong từng ticker để lấp chỗ trống
df_merged = df_merged.sort_values(["ticker","timestamp"])
df_merged[fa_fields] = df_merged.groupby("ticker")[fa_fields].ffill()

print("Sample merged:")
print(df_merged.head())
print("Số mã merge thành công:", df_merged["ticker"].nunique())




Sample merged:
  ticker  timestamp       open       high        low      close      volume  \
0    AAA 2022-01-04  19302.030  20550.420  19302.030  19878.210   7354500.0   
1    AAA 2022-01-05  19974.240  20934.540  19830.195  20118.285   7187400.0   
2    AAA 2022-01-06  20070.270  21510.720  19974.240  21510.720  12375600.0   
3    AAA 2022-01-07  22038.885  22230.945  21654.765  21894.840   7009900.0   
4    AAA 2022-01-10  22086.900  22471.020  20406.375  20406.375  11396800.0   

          bu         sd            fs  ...  DebtToEquityRatio  EBITMargin  \
0  3727500.0  3474700.0  3.871515e+09  ...           0.613528    0.016455   
1  3248400.0  3816200.0  2.180410e+09  ...           0.613528    0.016455   
2  5193100.0  6236500.0  2.383965e+09  ...           0.613528    0.016455   
3        0.0        0.0  5.174500e+09  ...           0.613528    0.016455   
4  5208000.0  5167700.0  9.241110e+09  ...           0.613528    0.016455   

        ROA      ROE      ROIC    BasicEPS  Pri

In [4]:
# Block 4 — Tính các chỉ số TA (trên df_merged từ Block 3)

import pandas as pd

# --- Khởi tạo Indicator ---
fi = client.FiinIndicator()

# --- Hàm tính TA theo từng ticker ---
def add_ta_indicators(df):
    df = df.sort_values("timestamp").copy()

    # EMA
    df['ema_5']  = fi.ema(df['close'], window=5)
    df['ema_20'] = fi.ema(df['close'], window=20)
    df['ema_50'] = fi.ema(df['close'], window=50)

    # MACD
    df['macd']        = fi.macd(df['close'], window_fast=12, window_slow=26)
    df['macd_signal'] = fi.macd_signal(df['close'], window_fast=12, window_slow=26, window_sign=9)
    df['macd_diff']   = fi.macd_diff(df['close'], window_fast=12, window_slow=26, window_sign=9)

    # RSI
    df['rsi'] = fi.rsi(df['close'], window=14)

    # Bollinger Bands
    df['bollinger_hband'] = fi.bollinger_hband(df['close'], window=20, window_dev=2)
    df['bollinger_lband'] = fi.bollinger_lband(df['close'], window=20, window_dev=2)

    # ATR
    df['atr'] = fi.atr(df['high'], df['low'], df['close'], window=14)

    # OBV
    df['obv'] = fi.obv(df['close'], df['volume'])

    # VWAP
    df['vwap'] = fi.vwap(df['high'], df['low'], df['close'], df['volume'], window=14)

    return df

# --- Áp dụng cho toàn bộ df_merged ---
df_with_ta = df_merged.groupby("ticker", group_keys=False).apply(add_ta_indicators)

print("Sample with TA:")
print(df_with_ta.head())
print("Shape sau khi thêm TA:", df_with_ta.shape)


Sample with TA:
  ticker  timestamp       open       high        low      close      volume  \
0    AAA 2022-01-04  19302.030  20550.420  19302.030  19878.210   7354500.0   
1    AAA 2022-01-05  19974.240  20934.540  19830.195  20118.285   7187400.0   
2    AAA 2022-01-06  20070.270  21510.720  19974.240  21510.720  12375600.0   
3    AAA 2022-01-07  22038.885  22230.945  21654.765  21894.840   7009900.0   
4    AAA 2022-01-10  22086.900  22471.020  20406.375  20406.375  11396800.0   

          bu         sd            fs  ...  ema_50  macd  macd_signal  \
0  3727500.0  3474700.0  3.871515e+09  ...     NaN   NaN          NaN   
1  3248400.0  3816200.0  2.180410e+09  ...     NaN   NaN          NaN   
2  5193100.0  6236500.0  2.383965e+09  ...     NaN   NaN          NaN   
3        0.0        0.0  5.174500e+09  ...     NaN   NaN          NaN   
4  5208000.0  5167700.0  9.241110e+09  ...     NaN   NaN          NaN   

   macd_diff  rsi  bollinger_hband  bollinger_lband  atr         obv  

In [5]:
# Block 5 — Feature engineering & scaling

import numpy as np

# --- Danh sách cột FA & TA ---
fa_features = [
    "DebtToEquityRatio","EBITMargin","ROA","ROE","ROIC",
    "BasicEPS","PriceToBook","PriceToEarning",
    "NetRevenueGrowthYoY","GrossProfitGrowthYoY"
]

ta_features = [
    "ema_5","ema_20","ema_50","macd","macd_signal","macd_diff",
    "rsi","bollinger_hband","bollinger_lband","atr","obv","vwap"
]

# --- Chuẩn hoá FA: cross-section min-max scaling theo ngày ---
def scale_fa_minmax(df):
    df_scaled = df.copy()
    for f in fa_features:
        vals = df[f].astype(float)
        vmin, vmax = vals.min(), vals.max()
        if np.isfinite(vmin) and np.isfinite(vmax) and vmax > vmin:
            df_scaled[f] = (vals - vmin) / (vmax - vmin)
        else:
            df_scaled[f] = np.nan
    return df_scaled

df_scaled_fa = df_with_ta.groupby("timestamp", group_keys=False).apply(scale_fa_minmax)

# --- Chuẩn hoá TA: rolling z-score theo từng ticker ---
def zscore_rolling(series, window=60):
    return (series - series.rolling(window).mean()) / series.rolling(window).std()

df_scaled = df_scaled_fa.groupby("ticker", group_keys=False).apply(
    lambda g: g.assign(**{f"{col}_z": zscore_rolling(g[col], 60) for col in ta_features})
)

# --- Drop các cột gốc TA, giữ bản z-score ---
keep_cols = ["ticker","timestamp"] + fa_features + [f"{col}_z" for col in ta_features]
df_features = df_scaled[keep_cols].dropna().reset_index(drop=True)

print("Sample features:")
print(df_features.head())
print("Shape sau khi scaling & dropna:", df_features.shape)


Sample features:
  ticker  timestamp  DebtToEquityRatio  EBITMargin       ROA       ROE  \
0    AAA 2022-06-15           0.864203    0.997114  0.368342  0.987969   
1    AAA 2022-06-16           0.864203    0.997114  0.368342  0.987969   
2    AAA 2022-06-17           0.864203    0.997114  0.368342  0.987969   
3    AAA 2022-06-20           0.864203    0.997114  0.368342  0.987969   
4    AAA 2022-06-21           0.864203    0.997114  0.368342  0.987969   

       ROIC  BasicEPS  PriceToBook  PriceToEarning  ...  ema_50_z    macd_z  \
0  0.935557  0.144912     0.024681         0.19159  ... -1.537094  0.524295   
1  0.935557  0.144912     0.024681         0.19159  ... -1.516392  0.494055   
2  0.935557  0.144912     0.024681         0.19159  ... -1.511681  0.369820   
3  0.935557  0.144912     0.024681         0.19159  ... -1.519197  0.188197   
4  0.935557  0.144912     0.024681         0.19159  ... -1.510095  0.161472   

   macd_signal_z  macd_diff_z     rsi_z  bollinger_hband_z  bol

In [6]:
# Block 6 — Dimensionality reduction & Clustering (t-SNE + DBSCAN)

from sklearn.manifold import TSNE
from sklearn.cluster import DBSCAN

# --- Chọn các cột features để clustering ---
feature_cols = [
    "DebtToEquityRatio","EBITMargin","ROA","ROE","ROIC",
    "BasicEPS","PriceToBook","PriceToEarning",
    "NetRevenueGrowthYoY","GrossProfitGrowthYoY"
] + [c for c in df_features.columns if c.endswith("_z")]

# --- Thêm cột tháng để snapshot ---
df_features["month"] = df_features["timestamp"].dt.to_period("M")

cluster_results = []

for (month, g) in df_features.groupby("month"):
    if len(g) < 10:   # quá ít cổ phiếu thì bỏ
        continue

    X = g[feature_cols].values

    # --- t-SNE giảm chiều còn 2D ---
    tsne = TSNE(n_components=2, perplexity=30, learning_rate="auto", init="random", random_state=42)
    X_emb = tsne.fit_transform(X)

    # --- DBSCAN clustering ---
    db = DBSCAN(eps=0.5, min_samples=5).fit(X_emb)
    labels = db.labels_

    temp = g[["ticker","timestamp"]].copy()
    temp["cluster"] = labels
    temp["tsne_x"] = X_emb[:,0]
    temp["tsne_y"] = X_emb[:,1]
    temp["month"]  = str(month)

    cluster_results.append(temp)

df_clusters = pd.concat(cluster_results, ignore_index=True)

print("Cluster sample:")
print(df_clusters.head())
print("Số cụm mỗi tháng:")
print(df_clusters.groupby("month")["cluster"].nunique())


  File "c:\Users\USER\AppData\Local\Programs\Python\Python313\Lib\site-packages\joblib\externals\loky\backend\context.py", line 255, in _count_physical_cores
    raise ValueError(f"found {cpu_count_physical} physical cores < 1")


Cluster sample:
  ticker  timestamp  cluster     tsne_x     tsne_y    month
0    AAA 2022-06-15       -1 -17.608253 -20.577604  2022-06
1    AAA 2022-06-16       -1 -17.507252 -20.379070  2022-06
2    AAA 2022-06-17       -1 -15.360531 -20.337601  2022-06
3    AAA 2022-06-20       -1  -4.166707   1.878922  2022-06
4    AAA 2022-06-21       -1 -18.837034 -15.488154  2022-06
Số cụm mỗi tháng:
month
2022-06     13
2022-07     69
2022-08     69
2022-09     49
2022-10     54
2022-11     56
2022-12     64
2023-01     45
2023-02     64
2023-03    139
2023-04     68
2023-05     84
2023-06     73
2023-07     76
2023-08     76
2023-09     45
2023-10     77
2023-11     60
2023-12     89
2024-01     95
2024-02     34
2024-03     64
2024-04     51
2024-05     67
2024-06     96
2024-07     97
2024-08     69
2024-09     94
2024-10    116
2024-11     91
2024-12     86
2025-01     57
2025-02     76
2025-03     90
2025-04     48
2025-05     77
2025-06    109
2025-07    106
2025-08     76
Name: cluster, 

In [7]:
# Block 7 — Tạo tensor dữ liệu cho A3C (per-cluster) kèm mask matrix

import numpy as np

LOOKBACK = 30  # số ngày lookback
tensors, masks = {}, {}

# --- Lặp qua từng cluster ---
for c_id, g in df_clusters.groupby("cluster"):
    if c_id == -1:   # DBSCAN noise bỏ qua
        continue

    tickers = g["ticker"].unique()
    g_feat = df_features[df_features["ticker"].isin(tickers)].copy()

    # Pivot (timestamp, ticker) -> feature matrix
    pivoted = g_feat.pivot(index="timestamp", columns="ticker", values=feature_cols)

    # Mask: 1 nếu dữ liệu thật, 0 nếu NaN
    mask_df = ~pivoted.isna()

    # Fill NaN để reshape được (dữ liệu dùng cho X, mask giữ sự thật)
    pivoted_filled = pivoted.ffill().bfill()

    T = len(pivoted_filled.index)
    N = len(pivoted_filled.columns) // len(feature_cols)
    F = len(feature_cols)

    # Reshape thành (T, N, F)
    X = pivoted_filled.values.reshape(T, N, F)
    M = mask_df.values.reshape(T, N, F).astype(int)

    # Rolling windows (LOOKBACK, N, F)
    cluster_tensors, cluster_masks = [], []
    for i in range(LOOKBACK, T):
        cluster_tensors.append(X[i-LOOKBACK:i])
        cluster_masks.append(M[i-LOOKBACK:i])

    if cluster_tensors:
        tensors[c_id] = np.array(cluster_tensors)
        masks[c_id]   = np.array(cluster_masks)

        print(f"Cluster {c_id}: tensor {tensors[c_id].shape}, mask {masks[c_id].shape}")


Cluster 0: tensor (773, 30, 22, 22), mask (773, 30, 22, 22)
Cluster 1: tensor (773, 30, 23, 22), mask (773, 30, 23, 22)
Cluster 2: tensor (773, 30, 20, 22), mask (773, 30, 20, 22)
Cluster 3: tensor (773, 30, 22, 22), mask (773, 30, 22, 22)
Cluster 4: tensor (773, 30, 31, 22), mask (773, 30, 31, 22)
Cluster 5: tensor (773, 30, 25, 22), mask (773, 30, 25, 22)
Cluster 6: tensor (773, 30, 33, 22), mask (773, 30, 33, 22)
Cluster 7: tensor (773, 30, 31, 22), mask (773, 30, 31, 22)
Cluster 8: tensor (773, 30, 30, 22), mask (773, 30, 30, 22)
Cluster 9: tensor (773, 30, 30, 22), mask (773, 30, 30, 22)
Cluster 10: tensor (773, 30, 34, 22), mask (773, 30, 34, 22)
Cluster 11: tensor (773, 30, 35, 22), mask (773, 30, 35, 22)
Cluster 12: tensor (773, 30, 34, 22), mask (773, 30, 34, 22)
Cluster 13: tensor (773, 30, 33, 22), mask (773, 30, 33, 22)
Cluster 14: tensor (773, 30, 32, 22), mask (773, 30, 32, 22)
Cluster 15: tensor (773, 30, 36, 22), mask (773, 30, 36, 22)
Cluster 16: tensor (773, 30, 38, 2

In [8]:
# Block 8 — A3C multi-stock per-cluster (theo nghiên cứu)

import torch
import torch.nn as nn
import torch.optim as optim

# --- Actor-Critic network ---
class A3CClusterNet(nn.Module):
    def __init__(self, n_stocks, n_features, hidden_size=64):
        super().__init__()
        self.n_stocks = n_stocks
        self.n_features = n_features
        self.hidden_size = hidden_size

        # LSTM để xử lý chuỗi (T steps)
        self.lstm = nn.LSTM(input_size=n_features,
                            hidden_size=hidden_size,
                            batch_first=True)

        # Actor: chính sách per-stock (3 actions: -1, 0, 1)
        self.actor_head = nn.Linear(hidden_size, 3)

        # Critic: giá trị per-stock
        self.critic_head = nn.Linear(hidden_size, 1)

    def forward(self, x):
        """
        x shape = (batch, T, N, F)
        """
        B, T, N, F = x.shape

        # Reshape để đưa qua LSTM: (B*N, T, F)
        x = x.view(B * N, T, F)

        lstm_out, _ = self.lstm(x)   # (B*N, T, H)
        h = lstm_out[:, -1, :]       # lấy hidden state cuối (B*N, H)

        # Actor logits
        policy_logits = self.actor_head(h)    # (B*N, 3)
        policy_logits = policy_logits.view(B, N, 3)

        # Critic values
        values = self.critic_head(h)          # (B*N, 1)
        values = values.view(B, N)

        return policy_logits, values


# --- Loss function ---
def a3c_loss(policy_logits, values, actions, rewards, gamma=0.99, entropy_beta=0.01):
    """
    policy_logits: (B, N, 3)
    values:        (B, N)
    actions:       (B, N)   # đã chọn (-1,0,1) mapped thành (0,1,2)
    rewards:       (B, N)
    """
    B, N, _ = policy_logits.shape

    # --- Critic loss (TD error) ---
    returns = rewards  # ở đây đơn giản reward = return, có thể mở rộng TD(lambda)
    advantage = returns - values

    critic_loss = advantage.pow(2).mean()

    # --- Actor loss ---
    log_probs = torch.log_softmax(policy_logits, dim=-1)
    act_idx = actions.view(B, N, 1)
    chosen_log_probs = log_probs.gather(-1, act_idx).squeeze(-1)

    actor_loss = -(chosen_log_probs * advantage.detach()).mean()

    # --- Entropy (exploration bonus) ---
    entropy = -(torch.softmax(policy_logits, dim=-1) * log_probs).sum(-1).mean()

    total_loss = actor_loss + 0.5 * critic_loss - entropy_beta * entropy
    return total_loss


# --- Ví dụ huấn luyện 1 bước ---
def train_step(model, optimizer, batch_x, batch_actions, batch_rewards):
    policy_logits, values = model(batch_x)
    loss = a3c_loss(policy_logits, values, batch_actions, batch_rewards)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    return loss.item()


In [9]:
#Block 9 — DDPG cho phân bổ vốn theo cụm
import gymnasium as gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.distributions import Normal

# ------------------------------
# Env: Cluster Allocation
# ------------------------------
class ClusterAllocEnv(gym.Env):
    def __init__(self, df_clusters, cluster_features, transaction_cost=0.0005, lam=0.1):
        super().__init__()
        self.df = df_clusters
        self.cluster_features = cluster_features
        self.transaction_cost = transaction_cost
        self.lam = lam

        self.cluster_ids = sorted(self.df["cluster"].unique())
        self.n_clusters = len(self.cluster_ids)
        self.t_steps = sorted(self.df["timestamp"].unique())
        self.current_step = 0

        # Action = phân bổ vốn [0,1], continuous
        self.action_space = gym.spaces.Box(low=0, high=1, shape=(self.n_clusters,), dtype=np.float32)
        # State = vector đặc trưng per-cluster
        self.observation_space = gym.spaces.Box(low=-np.inf, high=np.inf, 
                                                shape=(self.n_clusters, len(cluster_features)),
                                                dtype=np.float32)

    def reset(self):
        self.current_step = 0
        return self._get_state()

    def step(self, action):
        # Normalize action để tổng = 1
        w = np.clip(action, 0, 1)
        w = w / (w.sum() + 1e-8)

        # Lấy returns và FA score từ ngày hiện tại
        state = self._get_state()
        returns = self._get_returns()
        fa_score = self._get_fa_score()

        # Reward = w·returns – cost + λ*fa_score
        reward = np.dot(w, returns) - self.transaction_cost * np.sum(np.abs(np.diff(w))) + self.lam * np.dot(w, fa_score)

        self.current_step += 1
        done = self.current_step >= len(self.t_steps) - 1
        next_state = self._get_state() if not done else np.zeros_like(state)

        return next_state, reward, done, {}

    def _get_state(self):
        t = self.t_steps[self.current_step]
        g = self.df[self.df["timestamp"] == t]
        state = []
        for c in self.cluster_ids:
            sub = g[g["cluster"] == c]
            if sub.empty:
                state.append(np.zeros(len(self.cluster_features)))
            else:
                state.append(sub[self.cluster_features].mean().values)
        return np.array(state, dtype=np.float32)

    def _get_returns(self):
        # Ví dụ: lấy return trung bình trong cluster
        t = self.t_steps[self.current_step]
        g = self.df[self.df["timestamp"] == t]
        returns = []
        for c in self.cluster_ids:
            sub = g[g["cluster"] == c]
            if "return" in sub.columns and not sub.empty:
                returns.append(sub["return"].mean())
            else:
                returns.append(0.0)
        return np.array(returns, dtype=np.float32)

    def _get_fa_score(self):
        t = self.t_steps[self.current_step]
        g = self.df[self.df["timestamp"] == t]
        scores = []
        for c in self.cluster_ids:
            sub = g[g["cluster"] == c]
            if "ROE" in sub.columns and not sub.empty:
                scores.append(sub["ROE"].mean())
            else:
                scores.append(0.0)
        return np.array(scores, dtype=np.float32)

# ------------------------------
# DDPG Agent
# ------------------------------
class Actor(nn.Module):
    def __init__(self, n_clusters, hidden=64):
        super().__init__()
        self.fc = nn.Sequential(
            nn.Linear(n_clusters*10, hidden),  # giả định mỗi cluster có 10 features
            nn.ReLU(),
            nn.Linear(hidden, n_clusters),
            nn.Softmax(dim=-1)   # để đảm bảo sum = 1
        )
    def forward(self, x):
        return self.fc(x.view(x.size(0), -1))

class Critic(nn.Module):
    def __init__(self, n_clusters, hidden=64):
        super().__init__()
        self.fc = nn.Sequential(
            nn.Linear(n_clusters*10 + n_clusters, hidden),
            nn.ReLU(),
            nn.Linear(hidden, 1)
        )
    def forward(self, state, action):
        x = torch.cat([state.view(state.size(0), -1), action], dim=-1)
        return self.fc(x)

# ------------------------------
# Training skeleton
# ------------------------------
def ddpg_train(env, actor, critic, episodes=10):
    optimizerA = optim.Adam(actor.parameters(), lr=1e-3)
    optimizerC = optim.Adam(critic.parameters(), lr=1e-3)

    for ep in range(episodes):
        state = env.reset()
        state = torch.tensor(state, dtype=torch.float32).unsqueeze(0)
        done = False
        total_reward = 0

        while not done:
            with torch.no_grad():
                action = actor(state).squeeze(0).numpy()
            next_state, reward, done, _ = env.step(action)

            next_state = torch.tensor(next_state, dtype=torch.float32).unsqueeze(0)
            reward_t = torch.tensor([reward], dtype=torch.float32)

            # Critic loss
            q_val = critic(state, torch.tensor(action, dtype=torch.float32).unsqueeze(0))
            target = reward_t
            lossC = (q_val - target.detach()).pow(2).mean()

            optimizerC.zero_grad()
            lossC.backward()
            optimizerC.step()

            # Actor loss (maximize Q)
            pred_action = actor(state)
            lossA = -critic(state, pred_action).mean()

            optimizerA.zero_grad()
            lossA.backward()
            optimizerA.step()

            state = next_state
            total_reward += reward

        print(f"Episode {ep}, total reward {total_reward:.4f}")


In [10]:
# Block X — Build models_a3c cho tất cả clusters (sau Block 7 + Block 8)

models_a3c = {}

for cid, X in tensors.items():
    _, T, N, F = X.shape   # batch_size, lookback, n_stocks, n_features
    
    # Tạo model cho cluster này
    model = A3CClusterNet(n_stocks=N, n_features=F)
    models_a3c[cid] = model

print(f"✅ Đã build {len(models_a3c)} models A3C cho {len(tensors)} clusters")


✅ Đã build 138 models A3C cho 138 clusters


In [1]:
# Block Export A3C Signals — Safe Memory Version

import torch
import pandas as pd
import gc

# Đảm bảo model A3C cho từng cluster đã có trong models_a3c
# models_a3c[cid] = A3CClusterNet(...)

all_signals = []

for cid, X_all in tensors.items():  # từ Block 7
    if cid not in models_a3c:
        print(f"⚠️ Chưa có model cho cluster {cid}, bỏ qua.")
        continue

    model = models_a3c[cid]
    model.eval()

    # (T, LOOKBACK, N, F)
    T, L, N, F = X_all.shape

    with torch.no_grad():
        for t in range(T):
            X_batch = torch.tensor(X_all[t:t+1], dtype=torch.float32)  # (1, L, N, F)

            # Forward
            policy_logits, _ = model(X_batch)
            probs = torch.softmax(policy_logits, dim=-1)  # (1, N, 3)

            # Chọn action
            actions = torch.argmax(probs, dim=-1).squeeze(0).cpu().numpy()  # (N,)
            actions = actions - 1  # map [0,1,2] -> [-1,0,1]

            # Lưu kết quả
            all_signals.append({
                "cluster": cid,
                "t_index": t,
                "signals": actions.tolist()
            })

            # Giải phóng batch để tránh tràn RAM
            del X_batch, policy_logits, probs, actions
            gc.collect()
            if torch.cuda.is_available():
                torch.cuda.empty_cache()

df_signals_a3c = pd.DataFrame(all_signals)
print("✅ Export xong A3C signals, shape:", df_signals_a3c.shape)


NameError: name 'tensors' is not defined

In [None]:
import numpy as np
import pandas as pd

# --- Tham số ---
INIT_CAPITAL = 1_000_000_000   # 1 tỷ VND
TRANSACTION_COST = 0.0005      # 0.05%

def backtest(df_clusters, signals_a3c, weights_ddpg, df_price):
    capital = INIT_CAPITAL
    portfolio_value = []
    positions = {}   # {ticker: số lượng cổ phiếu đang nắm giữ}

    # sort ngày
    all_days = sorted(df_clusters["timestamp"].unique())

    for t in range(len(all_days)-1):  
        day = all_days[t]
        next_day = all_days[t+1]

        # --- 1. Lấy signals ---
        signals_today = signals_a3c.get(day, {})
        weights_today = weights_ddpg.get(day, {})

        # --- 2. Cluster mapping ---
        today_clusters = df_clusters[df_clusters["timestamp"] == day][["ticker","cluster"]]
        cluster_map = dict(zip(today_clusters["ticker"], today_clusters["cluster"]))

        # --- 3. Tính vốn phân bổ theo cluster ---
        cluster_alloc = {c: capital * w for c, w in weights_today.items()}

        # --- 4. Xử lý lệnh mua/bán ---
        # clear positions theo signals -1
        for ticker, sig in signals_today.items():
            if sig == -1 and ticker in positions:
                # bán hết ở giá close
                price = df_price.loc[(df_price["ticker"]==ticker) & (df_price["timestamp"]==day), "close"]
                if not price.empty:
                    capital += positions[ticker] * float(price.iloc[0]) * (1 - TRANSACTION_COST)
                    del positions[ticker]

        # mở vị thế long theo signals +1
        for ticker, sig in signals_today.items():
            if sig == 1:
                cluster = cluster_map.get(ticker, None)
                if cluster is None: 
                    continue
                price = df_price.loc[(df_price["ticker"]==ticker) & (df_price["timestamp"]==day), "close"]
                if price.empty: 
                    continue
                price = float(price.iloc[0])

                # vốn chia đều cho các cổ phiếu long trong cluster
                n_long = sum(1 for tk,s in signals_today.items() if s==1 and cluster_map.get(tk)==cluster)
                if n_long == 0: 
                    continue

                alloc = cluster_alloc.get(cluster, 0) / n_long
                n_shares = alloc // price
                if n_shares > 0:
                    cost = n_shares * price * (1 + TRANSACTION_COST)
                    if capital >= cost:
                        capital -= cost
                        positions[ticker] = positions.get(ticker, 0) + n_shares

        # --- 5. Đánh giá NAV cuối ngày ---
        value = capital
        for ticker, qty in positions.items():
            price = df_price.loc[(df_price["ticker"]==ticker) & (df_price["timestamp"]==day), "close"]
            if not price.empty:
                value += qty * float(price.iloc[0])
        portfolio_value.append({"date": day, "nav": value})

    df_nav = pd.DataFrame(portfolio_value)
    return df_nav


# --- Ví dụ chạy thử ---
# df_nav = backtest(df_clusters, signals_a3c, weights_ddpg, df_all)
# print(df_nav.head())
