In [1]:
!git clone https://github.com/CryAndRRich/visenet.git

Cloning into 'visenet'...
remote: Enumerating objects: 151, done.[K
remote: Counting objects: 100% (111/111), done.[K
remote: Compressing objects: 100% (94/94), done.[K
remote: Total 151 (delta 54), reused 30 (delta 12), pack-reused 40 (from 1)[K
Receiving objects: 100% (151/151), 144.48 MiB | 31.26 MiB/s, done.
Resolving deltas: 100% (67/67), done.


In [2]:
!pip install -r visenet/requirements.txt

Collecting torch==2.3.1 (from -r visenet/requirements.txt (line 1))
  Downloading torch-2.3.1-cp312-cp312-manylinux1_x86_64.whl.metadata (26 kB)
Collecting sb3-contrib (from -r visenet/requirements.txt (line 5))
  Downloading sb3_contrib-2.7.0-py3-none-any.whl.metadata (4.1 kB)
Collecting shimmy>=2.0 (from -r visenet/requirements.txt (line 6))
  Downloading Shimmy-2.0.0-py3-none-any.whl.metadata (3.5 kB)
Collecting stable-baselines3[extra] (from -r visenet/requirements.txt (line 4))
  Downloading stable_baselines3-2.7.0-py3-none-any.whl.metadata (4.8 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch==2.3.1->-r visenet/requirements.txt (line 1))
  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch==2.3.1->-r visenet/requirements.txt (line 1))
  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.1

In [3]:
import sys
sys.path.append("/content/visenet")

In [11]:

import pandas as pd

def preprocess_top30(df, feature_cols, top_n=30):
    """
    Chuẩn hóa data sao cho mỗi ngày có đúng top_n tickers.
    Nếu ngày nào thiếu thì fill từ ngày gần nhất (trước hoặc sau).

    df: DataFrame có cột ['ticker', 'timestamp', ... feature_cols ...]
    feature_cols: list tên các cột feature (open, high, low, close, ...).
    top_n: số lượng tickers cần giữ lại mỗi ngày.
    """
    result = []
    all_dates = sorted(df['timestamp'].unique())

    for i, date in enumerate(all_dates):
        day_df = df[df['timestamp'] == date]

        # Lấy top_n tickers (ví dụ dựa vào volume)
        top_df = day_df.nlargest(top_n, 'vol')

        # Nếu đủ top_n thì ok
        if len(top_df) == top_n:
            result.append(top_df)
        else:
            # Cần fill thêm
            missing = top_n - len(top_df)
            # Tìm ngày gần nhất có data đủ
            j = i - 1
            filled = []
            while j >= 0 and len(filled) < missing:
                prev_day = result[j]  # đã được chuẩn hóa từ trước
                # lấy ticker chưa có trong ngày hiện tại
                candidates = prev_day[~prev_day['ticker'].isin(top_df['ticker'])]
                needed = candidates.head(missing - len(filled))
                filled.append(needed)
                j -= 1
            # nếu vẫn chưa đủ thì lấy từ ngày sau
            if len(filled) < missing:
                k = i + 1
                while k < len(all_dates) and len(filled) < missing:
                    next_day = df[df['timestamp'] == all_dates[k]].nlargest(top_n, 'vol')
                    candidates = next_day[~next_day['ticker'].isin(top_df['ticker'])]
                    needed = candidates.head(missing - len(filled))
                    filled.append(needed)
                    k += 1
            # gộp lại
            filled_df = pd.concat(filled) if filled else pd.DataFrame(columns=day_df.columns)
            final_day = pd.concat([top_df, filled_df]).head(top_n)
            final_day['timestamp'] = date  # đảm bảo timestamp đúng
            result.append(final_day)

    df_out = pd.concat(result).sort_values(['timestamp', 'ticker']).reset_index(drop=True)
    df_out.to_csv("visenet/data/output/top_30_stocks_after_train_processed", index=False)
    return df_out
file_path = "visenet/data/output/top_30_stocks_after_train.csv"
df = pd.read_csv(file_path)
feature_cols = ['open','high','low','close','vol','liq','rsi','macd','cci','adx','turbulence']
data_fixed = preprocess_top30(df, feature_cols, top_n=30)

  return datetime.utcnow().replace(tzinfo=utc)


In [7]:
# ================================================================
# Common libraries
# ================================================================
import pandas as pd
import numpy as np
import time
import gym

# ================================================================
# RL models from stable-baselines3
# ================================================================
from stable_baselines3 import A2C, PPO, TD3, SAC
from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise
from stable_baselines3.common.vec_env import DummyVecEnv

# ================================================================
# Customized env
# ================================================================
from env.EnvMultipleStock_train import StockEnvTrain
from env.EnvMultipleStock_validation import StockEnvValidation
from env.EnvMultipleStock_trade import StockEnvTrade
from config import config

def data_split(df, start, end):
    """Tách dữ liệu thành tập huấn luyện hoặc kiểm tra dựa trên ngày tháng"""
    data = df[(df.timestamp >= start) & (df.timestamp < end)]
    data=data.sort_values(['timestamp', 'ticker'], ignore_index=True)
    data.index = data.timestamp.factorize()[0]
    return data

# ================================================================
# Training functions
# ================================================================
def train_A2C(env_train, model_name, timesteps=25000):
    """Train A2C model"""
    start = time.time()
    model = A2C("MlpPolicy", env_train, verbose=0)
    model.learn(total_timesteps=timesteps)
    end = time.time()
    model.save(f"{config.TRAINED_MODEL_DIR}/{model_name}")
    print("Training time (A2C): ", (end - start) / 60, " minutes")
    return model


def train_TD3(env_train, model_name, timesteps=10000):
    """Train TD3 model (thay cho DDPG)"""
    n_actions = env_train.action_space.shape[-1]
    action_noise = OrnsteinUhlenbeckActionNoise(
        mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions)
    )

    start = time.time()
    model = TD3("MlpPolicy", env_train, action_noise=action_noise, verbose=0)
    model.learn(total_timesteps=timesteps)
    end = time.time()
    model.save(f"{config.TRAINED_MODEL_DIR}/{model_name}")
    print("Training time (TD3): ", (end - start) / 60, " minutes")
    return model


def train_PPO(env_train, model_name, timesteps=50000):
    """Train PPO model"""
    start = time.time()
    model = PPO("MlpPolicy", env_train, ent_coef=0.005, verbose=0)
    model.learn(total_timesteps=timesteps)
    end = time.time()
    model.save(f"{config.TRAINED_MODEL_DIR}/{model_name}")
    print("Training time (PPO): ", (end - start) / 60, " minutes")
    return model

# ================================================================
# DRL prediction / validation
# ================================================================
def DRL_prediction(df, model, name, last_state, iter_num,
                   unique_trade_date, rebalance_window,
                   turbulence_threshold, initial):
    """Make predictions based on trained model"""

    trade_data = data_split(df,
                            start=unique_trade_date[iter_num - rebalance_window],
                            end=unique_trade_date[iter_num])
    env_trade = DummyVecEnv([lambda: StockEnvTrade(trade_data,
                                                   turbulence_threshold=turbulence_threshold,
                                                   initial=initial,
                                                   previous_state=last_state,
                                                   model_name=name,
                                                   iteration=iter_num)])
    obs_trade = env_trade.reset()

    for i in range(len(trade_data.index.unique())):
        action, _states = model.predict(obs_trade)
        obs_trade, rewards, dones, info = env_trade.step(action)
        if i == (len(trade_data.index.unique()) - 2):
            last_state = env_trade.envs[0].state

    if isinstance(last_state, (list, np.ndarray)):
        df_last_state = pd.DataFrame([last_state])   # 1 row
    else:
        df_last_state = pd.DataFrame({"last_state": [last_state]})  # scalar -> bọc lại

    df_last_state.to_csv(f"results/last_state_{name}_{iter_num}.csv", index=False)

    return last_state


def DRL_validation(model, test_data, test_env, test_obs) -> None:
    """Validation loop"""
    for i in range(len(test_data.index.unique())):
        action, _states = model.predict(test_obs)
        test_obs, rewards, dones, info = test_env.step(action)


def get_validation_sharpe(iteration):
    """Calculate Sharpe ratio from validation results"""
    df_total_value = pd.read_csv(
        f"results/account_value_validation_{iteration}.csv", index_col=0
    )
    df_total_value.columns = ["account_value_train"]
    df_total_value["daily_return"] = df_total_value.pct_change(1)
    sharpe = (4 ** 0.5) * df_total_value["daily_return"].mean() / \
             df_total_value["daily_return"].std()
    return sharpe

# ================================================================
# Ensemble strategy
# ================================================================
def run_ensemble_strategy(df, unique_trade_date, rebalance_window, validation_window) -> None:
    """Ensemble Strategy combining PPO, A2C and TD3"""
    print("============Start Ensemble Strategy============")
    last_state_ensemble = []

    ppo_sharpe_list = []
    td3_sharpe_list = []
    a2c_sharpe_list = []
    model_use = []

    insample_turbulence = df[(df.timestamp < 20240101) & (df.timestamp >= 20181009)]
    insample_turbulence = insample_turbulence.drop_duplicates(subset=["timestamp"])
    insample_turbulence_threshold = np.quantile(insample_turbulence.turbulence.values, .90)

    start = time.time()
    for i in range(rebalance_window + validation_window, len(unique_trade_date), rebalance_window):
        print("============================================")

        if i - rebalance_window - validation_window == 0:
            initial = True
        else:
            initial = False

        # set turbulence threshold
        end_date_index = df.index[df["timestamp"] ==
                                  unique_trade_date[i - rebalance_window - validation_window]].to_list()[-1]
        print(df.index[df["timestamp"] ==
                                  unique_trade_date[i - rebalance_window - validation_window]].to_list())
        end_date_index = int(end_date_index)
        start_date_index = end_date_index - validation_window * 30 + 1
        historical_turbulence = df.iloc[start_date_index:(end_date_index + 1), :]
        historical_turbulence = historical_turbulence.drop_duplicates(subset=["timestamp"])
        historical_turbulence_mean = np.mean(historical_turbulence.turbulence.values)

        if historical_turbulence_mean > insample_turbulence_threshold:
            turbulence_threshold = insample_turbulence_threshold
        else:
            turbulence_threshold = np.quantile(insample_turbulence.turbulence.values, 1)
        print("turbulence_threshold: ", turbulence_threshold)

        # training env
        train = data_split(df, start=20181009,
                           end=unique_trade_date[i - rebalance_window - validation_window])
        env_train = DummyVecEnv([lambda: StockEnvTrain(train)])

        # validation env
        validation = data_split(df,
                                start=unique_trade_date[i - rebalance_window - validation_window],
                                end=unique_trade_date[i - rebalance_window])
        env_val = DummyVecEnv([lambda: StockEnvValidation(validation,
                                                          turbulence_threshold=turbulence_threshold,
                                                          iteration=i)])
        obs_val = env_val.reset()

        # Train A2C
        print("======A2C Training========")
        model_a2c = train_A2C(env_train, f"A2C_30k_dow_{i}", timesteps=30000)
        DRL_validation(model_a2c, validation, env_val, obs_val)
        sharpe_a2c = get_validation_sharpe(i)
        print("A2C Sharpe Ratio: ", sharpe_a2c)

        # Train PPO
        print("======PPO Training========")
        model_ppo = train_PPO(env_train, f"PPO_100k_dow_{i}", timesteps=100000)
        DRL_validation(model_ppo, validation, env_val, obs_val)
        sharpe_ppo = get_validation_sharpe(i)
        print("PPO Sharpe Ratio: ", sharpe_ppo)

        # Train TD3
        print("======TD3 Training========")
        model_td3 = train_TD3(env_train, f"TD3_10k_dow_{i}", timesteps=10000)
        DRL_validation(model_td3, validation, env_val, obs_val)
        sharpe_td3 = get_validation_sharpe(i)
        print("TD3 Sharpe Ratio: ", sharpe_td3)

        ppo_sharpe_list.append(sharpe_ppo)
        a2c_sharpe_list.append(sharpe_a2c)
        td3_sharpe_list.append(sharpe_td3)

        # model selection
        if (sharpe_ppo >= sharpe_a2c) and (sharpe_ppo >= sharpe_td3):
            model_ensemble = model_ppo
            model_use.append("PPO")
        elif (sharpe_a2c > sharpe_ppo) and (sharpe_a2c > sharpe_td3):
            model_ensemble = model_a2c
            model_use.append("A2C")
        else:
            model_ensemble = model_td3
            model_use.append("TD3")

        # Trading
        print("======Trading from: ", unique_trade_date[i - rebalance_window], "to ", unique_trade_date[i])
        last_state_ensemble = DRL_prediction(df, model_ensemble, "ensemble",
                                             last_state_ensemble, i,
                                             unique_trade_date,
                                             rebalance_window,
                                             turbulence_threshold,
                                             initial)

    end = time.time()
    print("Ensemble Strategy took: ", (end - start) / 60, " minutes")

In [9]:
def run_A2C_strategy(df, unique_trade_date, rebalance_window, validation_window) -> None:
    """Chạy chiến lược chỉ với A2C (không ensemble)"""
    print("============Start A2C Strategy============")
    last_state = []

    a2c_sharpe_list = []

    # turbulence threshold từ in-sample
    insample_turbulence = df[(df.timestamp < 20240101) & (df.timestamp >= 20181009)]
    insample_turbulence = insample_turbulence.drop_duplicates(subset=["timestamp"])
    insample_turbulence_threshold = np.quantile(insample_turbulence.turbulence.values, .90)

    start = time.time()
    for i in range(rebalance_window + validation_window, len(unique_trade_date), rebalance_window):
        print("============================================")

        if i - rebalance_window - validation_window == 0:
            initial = True
        else:
            initial = False

        # set turbulence threshold
        end_date_index = df.index[df["timestamp"] ==
                                  unique_trade_date[i - rebalance_window - validation_window]].to_list()[-1]
        end_date_index = int(end_date_index)
        start_date_index = end_date_index - validation_window * 30 + 1
        historical_turbulence = df.iloc[start_date_index:(end_date_index + 1), :]
        historical_turbulence = historical_turbulence.drop_duplicates(subset=["timestamp"])
        historical_turbulence_mean = np.mean(historical_turbulence.turbulence.values)

        if historical_turbulence_mean > insample_turbulence_threshold:
            turbulence_threshold = insample_turbulence_threshold
        else:
            turbulence_threshold = np.quantile(insample_turbulence.turbulence.values, 1)

        print("turbulence_threshold: ", turbulence_threshold)

        # training env
        train = data_split(df, start=20181009,
                           end=unique_trade_date[i - rebalance_window - validation_window])
        env_train = DummyVecEnv([lambda: StockEnvTrain(train)])

        # validation env
        validation = data_split(df,
                                start=unique_trade_date[i - rebalance_window - validation_window],
                                end=unique_trade_date[i - rebalance_window])
        env_val = DummyVecEnv([lambda: StockEnvValidation(validation,
                                                          turbulence_threshold=turbulence_threshold,
                                                          iteration=i)])
        obs_val = env_val.reset()

        # Train A2C
        print("======A2C Training========")
        model_a2c = train_A2C(env_train, f"A2C_30k_dow_{i}", timesteps=30000)
        DRL_validation(model_a2c, validation, env_val, obs_val)
        sharpe_a2c = get_validation_sharpe(i)
        print("A2C Sharpe Ratio: ", sharpe_a2c)

        a2c_sharpe_list.append(sharpe_a2c)

        # Trading
        print("======Trading from: ", unique_trade_date[i - rebalance_window], "to ", unique_trade_date[i])
        last_state = DRL_prediction(df, model_a2c, "A2C",
                                    last_state, i,
                                    unique_trade_date,
                                    rebalance_window,
                                    turbulence_threshold,
                                    initial)

    end = time.time()
    print("A2C Strategy took: ", (end - start) / 60, " minutes")


In [24]:
import os
import pandas as pd
def run_model() -> None:
    """Train the model."""
    os.makedirs("results", exist_ok=True)
    # read and preprocess data
    data = data_fixed
    # 2024/01/01 is the date that validation starts
    # 2025/01/01 is the date that real trading starts
    # unique_trade_date needs to start from 2015/10/01 for validation purpose
    data['timestamp'] = data['timestamp'].astype(int)

    unique_trade_date = data[(data.timestamp > 20240101)&(data.timestamp <= 20250829)].timestamp.unique()
    print(unique_trade_date)

    # rebalance_window is the number of months to retrain the model
    # validation_window is the number of months to validation the model and select for trading
    rebalance_window = 63
    validation_window = 63

    ## Ensemble Strategy
    run_A2C_strategy(df=data,
                      unique_trade_date= unique_trade_date,
                      rebalance_window = rebalance_window,
                      validation_window=validation_window)

    #_logger.info(f"saving model version: {_version}")

if __name__ == "__main__":
    run_model()


[20240102 20240103 20240104 20240105 20240108 20240109 20240110 20240111
 20240112 20240115 20240116 20240117 20240118 20240119 20240122 20240123
 20240124 20240125 20240126 20240129 20240130 20240131 20240201 20240202
 20240205 20240206 20240207 20240215 20240216 20240219 20240220 20240221
 20240222 20240223 20240226 20240227 20240228 20240229 20240301 20240304
 20240305 20240306 20240307 20240308 20240311 20240312 20240313 20240314
 20240315 20240318 20240319 20240320 20240321 20240322 20240325 20240326
 20240327 20240328 20240329 20240401 20240402 20240403 20240404 20240405
 20240408 20240409 20240410 20240411 20240412 20240415 20240416 20240417
 20240419 20240422 20240423 20240424 20240425 20240426 20240502 20240503
 20240506 20240507 20240508 20240509 20240510 20240513 20240514 20240515
 20240516 20240517 20240520 20240521 20240522 20240523 20240524 20240527
 20240528 20240529 20240530 20240531 20240603 20240604 20240605 20240606
 20240607 20240610 20240611 20240612 20240613 20240

  return datetime.utcnow().replace(tzinfo=utc)


Training time (A2C):  1.794075564543406  minutes


  return datetime.utcnow().replace(tzinfo=utc)


A2C Sharpe Ratio:  -0.06584488797829564


  return datetime.utcnow().replace(tzinfo=utc)


previous_total_asset: 1000000
end_total_asset: 1102120.96153125
total_reward: 102120.96153125004
total_cost:  10642.233
total trades:  940
Sharpe:  0.22555267891033573
turbulence_threshold:  289.5317758225865


  return datetime.utcnow().replace(tzinfo=utc)


Training time (A2C):  1.789150853951772  minutes


  return datetime.utcnow().replace(tzinfo=utc)


A2C Sharpe Ratio:  0.1184227687094901
previous_total_asset: 1102120.96153125
end_total_asset: 1030812.0540021952
total_reward: -71308.90752905479
total_cost:  1087.0891
total trades:  780
Sharpe:  -0.18837885118193343


  return datetime.utcnow().replace(tzinfo=utc)


turbulence_threshold:  289.5317758225865


  return datetime.utcnow().replace(tzinfo=utc)


Training time (A2C):  1.8082391937573752  minutes
A2C Sharpe Ratio:  -0.18391699077131762


  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)


previous_total_asset: 1030812.0540021952
end_total_asset: 1050660.6934225
total_reward: 19848.639420304797
total_cost:  0.0
total trades:  990
Sharpe:  0.07715681430633639
turbulence_threshold:  289.5317758225865


  return datetime.utcnow().replace(tzinfo=utc)


Training time (A2C):  1.7734170436859131  minutes


  return datetime.utcnow().replace(tzinfo=utc)


A2C Sharpe Ratio:  0.05498968516132469


  return datetime.utcnow().replace(tzinfo=utc)


previous_total_asset: 1050660.6934225
end_total_asset: 1037584.29864375
total_reward: -13076.394778750022
total_cost:  1041.2537
total trades:  890
Sharpe:  -0.020158898011099213
turbulence_threshold:  289.5317758225865


  return datetime.utcnow().replace(tzinfo=utc)
  return datetime.utcnow().replace(tzinfo=utc)


Training time (A2C):  1.8502389192581177  minutes
A2C Sharpe Ratio:  0.19008769523758193


  return datetime.utcnow().replace(tzinfo=utc)


previous_total_asset: 1037584.29864375
end_total_asset: 1377321.6324016093
total_reward: 339737.3337578593
total_cost:  2621.8647
total trades:  949
Sharpe:  0.4300333950571724
A2C Strategy took:  9.056378801663717  minutes


  return datetime.utcnow().replace(tzinfo=utc)
