In [8]:
!git clone https://github.com/CryAndRRich/visenet.git

fatal: destination path 'visenet' already exists and is not an empty directory.


In [9]:
!pip install -r visenet/requirements.txt



In [10]:
import torch
print(torch.__version__)

2.3.1+cu121


In [11]:
import sys
sys.path.append("/content/visenet")

In [12]:
import pandas as pd

def preprocess_top30(df, top_n=30):
    """
    Chuẩn hóa data sao cho mỗi ngày có đúng top_n tickers.
    Nếu ngày nào thiếu thì fill từ ngày gần nhất (trước hoặc sau).

    df: DataFrame có cột ['ticker', 'timestamp', ... feature_cols ...]
    feature_cols: list tên các cột feature (open, high, low, close, ...).
    top_n: số lượng tickers cần giữ lại mỗi ngày.
    """
    result = []
    all_dates = sorted(df['timestamp'].unique())

    for i, date in enumerate(all_dates):
        top_df = df[df['timestamp'] == date]

        # Nếu đủ top_n thì ok
        if len(top_df) == top_n:
            result.append(top_df)
        else:
            # Cần fill thêm
            missing = top_n - len(top_df)
            # Tìm ngày gần nhất có data đủ
            j = i - 1
            filled = []
            while j >= 0 and len(filled) < missing:
                prev_day = result[j]  # đã được chuẩn hóa từ trước
                # lấy ticker chưa có trong ngày hiện tại
                candidates = prev_day[~prev_day['ticker'].isin(top_df['ticker'])]
                needed = candidates.head(missing - len(filled))
                filled.append(needed)
                j -= 1
            # nếu vẫn chưa đủ thì lấy từ ngày sau
            if len(filled) < missing:
                k = i + 1
                while k < len(all_dates) and len(filled) < missing:
                    next_day = df[df['timestamp'] == all_dates[k]].nlargest(top_n, 'vol')
                    candidates = next_day[~next_day['ticker'].isin(top_df['ticker'])]
                    needed = candidates.head(missing - len(filled))
                    filled.append(needed)
                    k += 1
            # gộp lại
            filled_df = pd.concat(filled) if filled else pd.DataFrame(columns=top_df.columns)
            final_day = pd.concat([top_df, filled_df]).head(top_n)
            final_day['timestamp'] = date  # đảm bảo timestamp đúng
            result.append(final_day)

    df_out = pd.concat(result).sort_values(['timestamp', 'ticker']).reset_index(drop=True)
    df_out.to_csv("visenet/data/output/top_30_stocks_after_train_processed", index=False)
    return df_out

file_path = "visenet/data/output/top_30_stocks_after_train.csv"
df = pd.read_csv(file_path)
data_fixed = preprocess_top30(df, top_n=30)


  return datetime.utcnow().replace(tzinfo=utc)


In [13]:
import pandas as pd
import numpy as np
import time

from stable_baselines3 import A2C, PPO, TD3
from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise
from stable_baselines3.common.vec_env import DummyVecEnv

from env.EnvMultipleStock_train import StockEnvTrain
from env.EnvMultipleStock_validation import StockEnvValidation
from env.EnvMultipleStock_trade import StockEnvTrade
from config import config
from preprocess.data_split import data_split

def train_A2C(env_train, model_name, timesteps=25000):
    """Train A2C model"""
    start = time.time()
    model = A2C("MlpPolicy", env_train, verbose=0)
    model.learn(total_timesteps=timesteps)
    end = time.time()
    model.save(f"{config.TRAINED_MODEL_DIR}/{model_name}")
    print("Training time (A2C): ", (end - start) / 60, " minutes")
    return model


def train_TD3(env_train, model_name, timesteps=10000):
    """Train TD3 model (thay cho DDPG)"""
    n_actions = env_train.action_space.shape[-1]
    action_noise = OrnsteinUhlenbeckActionNoise(
        mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions)
    )

    start = time.time()
    model = TD3("MlpPolicy", env_train, action_noise=action_noise, verbose=0)
    model.learn(total_timesteps=timesteps)
    end = time.time()
    model.save(f"{config.TRAINED_MODEL_DIR}/{model_name}")
    print("Training time (TD3): ", (end - start) / 60, " minutes")
    return model


def train_PPO(env_train, model_name, timesteps=50000):
    """Train PPO model"""
    start = time.time()
    model = PPO("MlpPolicy", env_train, ent_coef=0.005, verbose=0)
    model.learn(total_timesteps=timesteps)
    end = time.time()
    model.save(f"{config.TRAINED_MODEL_DIR}/{model_name}")
    print("Training time (PPO): ", (end - start) / 60, " minutes")
    return model

def DRL_prediction(df, model, name, last_state, iter_num,
                   unique_trade_date, rebalance_window,
                   turbulence_threshold, initial):
    """Make predictions based on trained model"""

    trade_data = data_split(df,
                            start=unique_trade_date[iter_num - rebalance_window],
                            end=unique_trade_date[iter_num])
    env_trade = DummyVecEnv([lambda: StockEnvTrade(trade_data,
                                                   turbulence_threshold=turbulence_threshold,
                                                   initial=initial,
                                                   previous_state=last_state,
                                                   model_name=name,
                                                   iteration=iter_num)])
    obs_trade = env_trade.reset()

    for i in range(len(trade_data.index.unique())):
        action, _ = model.predict(obs_trade)
        obs_trade, _, _, _ = env_trade.step(action)
        if i == (len(trade_data.index.unique()) - 2):
            last_state = env_trade.envs[0].state

    if isinstance(last_state, (list, np.ndarray)):
        df_last_state = pd.DataFrame([last_state])   # 1 row
    else:
        df_last_state = pd.DataFrame({"last_state": [last_state]})  # scalar -> bọc lại

    df_last_state.to_csv(f"results/last_state_{name}_{iter_num}.csv", index=False)

    return last_state


def DRL_validation(model, test_data, test_env, test_obs) -> None:
    """Validation loop"""
    for i in range(len(test_data.index.unique())):
        action, _ = model.predict(test_obs)
        test_obs, _, _, _ = test_env.step(action)


def get_validation_sharpe(iteration):
    """Calculate Sharpe ratio from validation results"""
    df_total_value = pd.read_csv(
        f"results/account_value_validation_{iteration}.csv", index_col=0
    )
    df_total_value.columns = ["account_value_train"]
    df_total_value["daily_return"] = df_total_value.pct_change(1)
    sharpe = (4 ** 0.5) * df_total_value["daily_return"].mean() / \
             df_total_value["daily_return"].std()
    return sharpe

def run_ensemble_strategy(df, unique_trade_date, rebalance_window, validation_window) -> None:
    """Ensemble Strategy combining PPO, A2C and TD3"""
    print("============Start Ensemble Strategy============")
    last_state_ensemble = []

    ppo_sharpe_list = []
    td3_sharpe_list = []
    a2c_sharpe_list = []
    model_use = []

    insample_turbulence = df[(df.timestamp < 20240101) & (df.timestamp >= 20181009)]
    insample_turbulence = insample_turbulence.drop_duplicates(subset=["timestamp"])
    insample_turbulence_threshold = np.quantile(insample_turbulence.turbulence.values, .90)

    start = time.time()
    for i in range(rebalance_window + validation_window, len(unique_trade_date), rebalance_window):
        print("============================================")

        if i - rebalance_window - validation_window == 0:
            initial = True
        else:
            initial = False

        # set turbulence threshold
        end_date_index = df.index[df["timestamp"] ==
                                  unique_trade_date[i - rebalance_window - validation_window]].to_list()[-1]
        print(df.index[df["timestamp"] ==
                                  unique_trade_date[i - rebalance_window - validation_window]].to_list())
        end_date_index = int(end_date_index)
        start_date_index = end_date_index - validation_window * 30 + 1
        historical_turbulence = df.iloc[start_date_index:(end_date_index + 1), :]
        historical_turbulence = historical_turbulence.drop_duplicates(subset=["timestamp"])
        historical_turbulence_mean = np.mean(historical_turbulence.turbulence.values)

        if historical_turbulence_mean > insample_turbulence_threshold:
            turbulence_threshold = insample_turbulence_threshold
        else:
            turbulence_threshold = np.quantile(insample_turbulence.turbulence.values, 1)
        print("turbulence_threshold: ", turbulence_threshold)

        # training env
        train = data_split(df, start=20181009,
                           end=unique_trade_date[i - rebalance_window - validation_window])
        env_train = DummyVecEnv([lambda: StockEnvTrain(train)])

        # validation env
        validation = data_split(df,
                                start=unique_trade_date[i - rebalance_window - validation_window],
                                end=unique_trade_date[i - rebalance_window])
        env_val = DummyVecEnv([lambda: StockEnvValidation(validation,
                                                          turbulence_threshold=turbulence_threshold,
                                                          iteration=i)])
        obs_val = env_val.reset()

        # Train A2C
        print("======A2C Training========")
        model_a2c = train_A2C(env_train, f"A2C_30k_dow_{i}", timesteps=30000)
        DRL_validation(model_a2c, validation, env_val, obs_val)
        sharpe_a2c = get_validation_sharpe(i)
        print("A2C Sharpe Ratio: ", sharpe_a2c)

        # Train PPO
        print("======PPO Training========")
        model_ppo = train_PPO(env_train, f"PPO_100k_dow_{i}", timesteps=100000)
        DRL_validation(model_ppo, validation, env_val, obs_val)
        sharpe_ppo = get_validation_sharpe(i)
        print("PPO Sharpe Ratio: ", sharpe_ppo)

        # Train TD3
        print("======TD3 Training========")
        model_td3 = train_TD3(env_train, f"TD3_10k_dow_{i}", timesteps=10000)
        DRL_validation(model_td3, validation, env_val, obs_val)
        sharpe_td3 = get_validation_sharpe(i)
        print("TD3 Sharpe Ratio: ", sharpe_td3)

        ppo_sharpe_list.append(sharpe_ppo)
        a2c_sharpe_list.append(sharpe_a2c)
        td3_sharpe_list.append(sharpe_td3)

        # model selection
        if (sharpe_ppo >= sharpe_a2c) and (sharpe_ppo >= sharpe_td3):
            model_ensemble = model_ppo
            model_use.append("PPO")
        elif (sharpe_a2c > sharpe_ppo) and (sharpe_a2c > sharpe_td3):
            model_ensemble = model_a2c
            model_use.append("A2C")
        else:
            model_ensemble = model_td3
            model_use.append("TD3")

        # Trading
        print("======Trading from: ", unique_trade_date[i - rebalance_window], "to ", unique_trade_date[i])
        last_state_ensemble = DRL_prediction(df, model_ensemble, "ensemble",
                                             last_state_ensemble, i,
                                             unique_trade_date,
                                             rebalance_window,
                                             turbulence_threshold,
                                             initial)

    end = time.time()
    print("Ensemble Strategy took: ", (end - start) / 60, " minutes")


In [7]:
import os

def run_model() -> None:
    """Train the model."""
    os.makedirs("results", exist_ok=True)

    data = data_fixed

    data['timestamp'] = data['timestamp'].astype(int)

    unique_trade_date = data[(data.timestamp > 20240101)&(data.timestamp <= 20250829)].timestamp.unique()
    print(unique_trade_date)

    rebalance_window = 63
    validation_window = 63

    ## Ensemble Strategy
    run_ensemble_strategy(df=data,
                          unique_trade_date=unique_trade_date,
                          rebalance_window=rebalance_window,
                          validation_window=validation_window)

    #_logger.info(f"saving model version: {_version}")

if __name__ == "__main__":
    run_model()




[20240102 20240103 20240104 20240105 20240108 20240109 20240110 20240111
 20240112 20240115 20240116 20240117 20240118 20240119 20240122 20240123
 20240124 20240125 20240126 20240129 20240130 20240131 20240201 20240202
 20240205 20240206 20240207 20240215 20240216 20240219 20240220 20240221
 20240222 20240223 20240226 20240227 20240228 20240229 20240301 20240304
 20240305 20240306 20240307 20240308 20240311 20240312 20240313 20240314
 20240315 20240318 20240319 20240320 20240321 20240322 20240325 20240326
 20240327 20240328 20240329 20240401 20240402 20240403 20240404 20240405
 20240408 20240409 20240410 20240411 20240412 20240415 20240416 20240417
 20240419 20240422 20240423 20240424 20240425 20240426 20240502 20240503
 20240506 20240507 20240508 20240509 20240510 20240513 20240514 20240515
 20240516 20240517 20240520 20240521 20240522 20240523 20240524 20240527
 20240528 20240529 20240530 20240531 20240603 20240604 20240605 20240606
 20240607 20240610 20240611 20240612 20240613 20240

  return datetime.utcnow().replace(tzinfo=utc)


Training time (A2C):  1.3994225104649862  minutes


  return datetime.utcnow().replace(tzinfo=utc)


A2C Sharpe Ratio:  0.3143443451496732


  return datetime.utcnow().replace(tzinfo=utc)


Training time (PPO):  4.293164483706156  minutes


  return datetime.utcnow().replace(tzinfo=utc)


PPO Sharpe Ratio:  0.10368742068369263


  return datetime.utcnow().replace(tzinfo=utc)


Training time (TD3):  6.540156702200572  minutes


  return datetime.utcnow().replace(tzinfo=utc)


TD3 Sharpe Ratio:  0.30245662066653434


  return datetime.utcnow().replace(tzinfo=utc)


previous_total_asset: 1000000
end_total_asset: 1033787.2472019703
total_reward: 33787.24720197031
total_cost:  2223.805102745839
total trades:  982
Sharpe:  0.11546593171039586
[41160, 41161, 41162, 41163, 41164, 41165, 41166, 41167, 41168, 41169, 41170, 41171, 41172, 41173, 41174, 41175, 41176, 41177, 41178, 41179, 41180, 41181, 41182, 41183, 41184, 41185, 41186, 41187, 41188, 41189]
turbulence_threshold:  289.5317758225865


  return datetime.utcnow().replace(tzinfo=utc)


Training time (A2C):  1.4090196768442789  minutes


  return datetime.utcnow().replace(tzinfo=utc)


A2C Sharpe Ratio:  0.0006446320945584595


  return datetime.utcnow().replace(tzinfo=utc)


Training time (PPO):  4.240950612227122  minutes
PPO Sharpe Ratio:  -0.7249628103396681


  return datetime.utcnow().replace(tzinfo=utc)


Training time (TD3):  6.61533952554067  minutes
TD3 Sharpe Ratio:  0.10958998757895021


  return datetime.utcnow().replace(tzinfo=utc)


previous_total_asset: 1033787.2472019703
end_total_asset: 1021566.1541627331
total_reward: -12221.093039237196
total_cost:  0.0
total trades:  1054
Sharpe:  -0.019779899841304585
[43050, 43051, 43052, 43053, 43054, 43055, 43056, 43057, 43058, 43059, 43060, 43061, 43062, 43063, 43064, 43065, 43066, 43067, 43068, 43069, 43070, 43071, 43072, 43073, 43074, 43075, 43076, 43077, 43078, 43079]
turbulence_threshold:  289.5317758225865


  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)


Training time (A2C):  1.3913861393928528  minutes
A2C Sharpe Ratio:  -0.18071774099223747


  return datetime.utcnow().replace(tzinfo=utc)


Training time (PPO):  4.255435029665629  minutes
PPO Sharpe Ratio:  -0.36837420871197923


  return datetime.utcnow().replace(tzinfo=utc)


Training time (TD3):  6.564013270537059  minutes
TD3 Sharpe Ratio:  -0.18391699077131762


  return datetime.utcnow().replace(tzinfo=utc)


previous_total_asset: 1021566.1541627331
end_total_asset: 993733.86460625
total_reward: -27832.289556483156
total_cost:  6358.1226
total trades:  1089
Sharpe:  -0.11026389480938649
[44940, 44941, 44942, 44943, 44944, 44945, 44946, 44947, 44948, 44949, 44950, 44951, 44952, 44953, 44954, 44955, 44956, 44957, 44958, 44959, 44960, 44961, 44962, 44963, 44964, 44965, 44966, 44967, 44968, 44969]
turbulence_threshold:  289.5317758225865


  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)


Training time (A2C):  1.4046669324239096  minutes
A2C Sharpe Ratio:  0.06526623944064576


  return datetime.utcnow().replace(tzinfo=utc)


Training time (PPO):  4.290662590662638  minutes
PPO Sharpe Ratio:  -0.19538074796320443


  return datetime.utcnow().replace(tzinfo=utc)


Training time (TD3):  6.5640443682670595  minutes
TD3 Sharpe Ratio:  -0.031820265301161


  return datetime.utcnow().replace(tzinfo=utc)


previous_total_asset: 993733.86460625
end_total_asset: 1044028.0657650001
total_reward: 50294.201158750104
total_cost:  143.11165
total trades:  780
Sharpe:  0.1324236092969825
[46830, 46831, 46832, 46833, 46834, 46835, 46836, 46837, 46838, 46839, 46840, 46841, 46842, 46843, 46844, 46845, 46846, 46847, 46848, 46849, 46850, 46851, 46852, 46853, 46854, 46855, 46856, 46857, 46858, 46859]
turbulence_threshold:  289.5317758225865


  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)


Training time (A2C):  1.4058732271194458  minutes
A2C Sharpe Ratio:  0.5785673751699403


  return datetime.utcnow().replace(tzinfo=utc)


Training time (PPO):  4.34198944568634  minutes
PPO Sharpe Ratio:  -0.23532036989139835


  return datetime.utcnow().replace(tzinfo=utc)


Training time (TD3):  6.540973428885142  minutes
TD3 Sharpe Ratio:  -0.011581683593040798


  return datetime.utcnow().replace(tzinfo=utc)


previous_total_asset: 1044028.0657650001
end_total_asset: 1453218.69921875
total_reward: 409190.633454
total_cost:  3910.6892
total trades:  802
Sharpe:  0.4669219390395922
Ensemble Strategy took:  61.33148408333461  minutes


  return datetime.utcnow().replace(tzinfo=utc)


In [15]:
!zip -r results.zip results

  adding: results/ (stored 0%)
  adding: results/account_value_trade_ensemble_189.csv (deflated 60%)
  adding: results/account_value_validation_315.csv (deflated 73%)
  adding: results/account_value_trade_ensemble_378.csv (deflated 65%)
  adding: results/account_value_trade_ensemble_315.png (deflated 9%)
  adding: results/account_rewards_trade_ensemble_126.csv (deflated 48%)
  adding: results/account_rewards_trade_ensemble_189.csv (deflated 57%)
  adding: results/account_rewards_trade_ensemble_252.csv (deflated 49%)
  adding: results/account_value_validation_189.png (deflated 9%)
  adding: results/account_value_validation_378.csv (deflated 80%)
  adding: results/last_state_ensemble_189.csv (deflated 49%)
  adding: results/account_rewards_trade_ensemble_315.csv (deflated 56%)
  adding: results/account_value_train.csv (deflated 54%)
  adding: results/account_value_validation_252.csv (deflated 65%)
  adding: results/account_value_validation_315.png (deflated 8%)
  adding: results/account_

In [16]:
!zip -r trained_models.zip trained_models

  adding: trained_models/ (stored 0%)
  adding: trained_models/2025-09-02 01:44:37.827718/ (stored 0%)
  adding: trained_models/2025-09-02 01:44:37.827718/TD3_10k_dow_252.zip (stored 0%)
  adding: trained_models/2025-09-02 01:44:37.827718/TD3_10k_dow_378.zip (stored 0%)
  adding: trained_models/2025-09-02 01:44:37.827718/A2C_30k_dow_252.zip (stored 0%)
  adding: trained_models/2025-09-02 01:44:37.827718/PPO_100k_dow_189.zip (stored 0%)
  adding: trained_models/2025-09-02 01:44:37.827718/A2C_30k_dow_189.zip (stored 0%)
  adding: trained_models/2025-09-02 01:44:37.827718/A2C_30k_dow_126.zip (stored 0%)
  adding: trained_models/2025-09-02 01:44:37.827718/PPO_100k_dow_252.zip (stored 0%)
  adding: trained_models/2025-09-02 01:44:37.827718/TD3_10k_dow_315.zip (stored 0%)
  adding: trained_models/2025-09-02 01:44:37.827718/TD3_10k_dow_189.zip (stored 0%)
  adding: trained_models/2025-09-02 01:44:37.827718/PPO_100k_dow_378.zip (stored 0%)
  adding: trained_models/2025-09-02 01:44:37.827718/PP