In [1]:
%reload_ext autoreload

In [2]:
%autoreload 2

In [3]:
import warnings

In [4]:
warnings.filterwarnings('ignore')

In [5]:
import datetime
import glob
import mlflow
import os

import numpy as np
import pandas as pd

from pathlib import Path

In [6]:
import qlib

from qlib.data.dataset import DataHandlerLP
from qlib.constant import REG_CN, REG_US
from qlib.contrib.report import analysis_model, analysis_position
from qlib.utils import init_instance_by_config
from qlib.workflow import R
from qlib.workflow.record_temp import PortAnaRecord, SigAnaRecord

In [7]:
from longcapital.data.dataset.processor import ChangeInstrument, DropInstrument, Fillna
from longcapital.utils.io import get_params_from_file, update_params_to_file, update_report_df
from longcapital.utils.time import get_diff_date
from longcapital.workflow.record_temp import SignalRecord


`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.



In [8]:
EXP_NAME = "long-capital"

In [9]:
# records
STRATEGY_PARAMS_FILE = "../data/params/strategy.json"
MODEL_PARAMS_FILE = "../data/params/model.json"
PERFORMANCE_FILE = "../data/params/performance.json"
REPORT_DF_FOLDER = "../data/report_df"

In [10]:
# model
MODEL_VALID_LOSS_KEY_DICT = {
    "mse": "l2",
    "mse_log": "l2",
    "binary": "binary_logloss",
    "lambdarank": "ndcg@5",
}
MODEL_LOSS_NAME_DICT = {
    "mse": "mse",
    "mse_log": "mse",
    "binary": "binary",
    "lambdarank": "lambdarank"
}

# strategy
BECHMARK_PARAMS = {
    "csi300": "SH000300",
    "csi500": "SH000905",
    "csi800": "SH000906",
    # https://github.com/microsoft/qlib/issues/720
    "SP500": "^gspc",
    "NASDAQ100": "^ndx",
}

In [11]:
def get_last_date_from_calendar(region=REG_CN):
    file = f"~/.qlib/qlib_data/{region}_data/calendars/day.txt"
    date = os.popen(f"tail -n 1 {file}").read().split("\n")[0]
    return date


def get_date_config(region=REG_CN, pred_date=None):
    if pred_date is None:
        pred_date = get_last_date_from_calendar(region)
    test_end_date = pred_date
    backtest_end_date = get_diff_date(pred_date, -1)
    DATE_CONFIG = {
        REG_CN: {
            "train": {
                "start": "2006-01-01",
                "end": "2016-12-31"
            },
            "valid": {
                "start": "2017-01-01",
                "end": "2018-12-31"
            },
            "test": {
                "start": "2019-01-01",
                "end": test_end_date
            },
            "backtest": {
                "start": "2019-01-01",
                "end": backtest_end_date
            }
        },
        REG_US: {
            "train": {
                "start": "2006-01-01",
                "end": "2016-12-31"
            },
            "valid": {
                "start": "2017-01-01",
                "end": "2018-12-31"
            },
            "test": {
                "start": "2019-01-01",
                "end": test_end_date
            },
            "backtest": {
                "start": "2019-01-01",
                "end": backtest_end_date
            }
        }
    }
    return DATE_CONFIG[region]


def get_backtest_config(region=REG_CN, instruments="csi300", deal_price="open"):
    REGION_CONFIG = {
        REG_CN: {
            "benchmark": BECHMARK_PARAMS[instruments],
            "exchange_kwargs": {
                "codes": instruments,
                "freq": "day",
                "trade_unit": 100,
                "limit_threshold": 0.095,
                "deal_price": deal_price,
                "open_cost": 0.0005,
                "close_cost": 0.0015,
                "min_cost": 5,
            }
        },
        REG_US: {
            "benchmark": BECHMARK_PARAMS[instruments],
            "exchange_kwargs": {
                "codes": instruments,
                "freq": "day",
                "trade_unit": 1,
                "limit_threshold": None,
                "deal_price": deal_price,
                # estimated from moomoo sg
                "open_cost": 0.003,
                "close_cost": 0.005,
                "min_cost": 0
            }
        }
    }
    return REGION_CONFIG[region]

In [12]:
def get_all_config(
    region=REG_CN,
    instruments="csi300",
    benchmark_feature=None,
    deal_price="open",
    days_ahead=4, 
    loss_type="mse",
    label_norm="CSZScoreNorm",
    model_type="default", 
    strategy_type="best",
    hold_thresh=2, 
    date_config=None
):
    config = {
        # market
        # [REG_CN, REG_US]
        "region": region,
        # ["csi300", "csi500", "csi800", "csiall", "all"]
        "instruments": instruments,

        # feature
        # [None, "raw", "diff", "both"]
        "benchmark_feature": benchmark_feature,

        # label
        # ["open", "close"]
        "deal_price": deal_price,
        # [1,2,3,...]
        "days_ahead": days_ahead,
        # ["mse", "mse_log", "binary", "lambdarank"]
        "loss_type": loss_type,
        # ["CSZScoreNorm", "CSRankNorm"]
        "label_norm": label_norm,
        
        # model
        # ["default", "best"]
        "model_type": model_type,

        # strategy
        "strategy_type": "best",
        # [1,2,3,...]
        "hold_thresh": hold_thresh
    }
    dataset_key = "-".join([f"{k}={v}" for k,v in config.items() if k not in ["model_type","strategy_type","hold_thresh"]])
    model_key = "-".join([f"{k}={v}" for k,v in config.items() if k not in ["strategy_type","hold_thresh"]])
    config_key = "-".join([f"{k}={v}" for k,v in config.items()])
    if date_config is None:
        date_config = get_date_config(region=config["region"])
    config["date"] = date_config
    config["backtest"] = get_backtest_config(
        region=config["region"], 
        instruments=config["instruments"], 
        deal_price=config["deal_price"]
    )
    config.update({
        "dataset_key": dataset_key,
        "model_key": model_key,
        "config_key": config_key,
    })
    return config

In [13]:
def append_benchmark_to_pool(region, instrument, benchmark):
    folder = f"/Users/chenglong.chen/.qlib/qlib_data/{region}_data/instruments"
    # already appended
    with open(f"{folder}/{instrument}.txt", "r") as f:
        for line in f.readlines():
            if benchmark in line:
                return
    # append
    with open(f"{folder}/all.txt", "r") as f:
        for line in f.readlines():
            if benchmark in line:
                break
    with open(f"{folder}/{instrument}.txt", "a") as f:
        f.write(line)

In [14]:
def load_dataset(config, label=None):
    # processors
    fields = []
    names = []
    if config["benchmark_feature"] in ["raw","diff","both"]:
        infer_processors = [
            {"class": "Fillna"},
#             Fillna(fields_group="feature"),
            ChangeInstrument(instrument=BECHMARK_PARAMS[config["instruments"]], append_type=config["benchmark_feature"], fields_group="feature"),
            DropInstrument(instruments=[BECHMARK_PARAMS[config["instruments"]]])
        ]
        append_benchmark_to_pool(config["region"], config["instruments"], BECHMARK_PARAMS[config["instruments"]])
    else:
        infer_processors = [
            {"class": "Fillna"},
#             Fillna(fields_group="feature"),
            DropInstrument(instruments=[BECHMARK_PARAMS[config["instruments"]]])
        ]
    learn_processors = [
        {"class": "DropnaLabel"}
    ]
    if config["loss_type"] not in ["lambdarank"]:
        learn_processors += [
            {"class": config["label_norm"], "kwargs": {"fields_group": "label"}},
        ]
    
    # handler
    data_start_time = min(
        config["date"]["train"]["start"], 
        config["date"]["valid"]["start"], 
        config["date"]["test"]["start"]
    )
    data_end_time = max(
        config["date"]["train"]["end"], 
        config["date"]["valid"]["end"], 
        config["date"]["test"]["end"]
    )
    
    handler_kwargs = {
        "start_time": data_start_time,
        "end_time": data_end_time,
        "fit_start_time": config["date"]["train"]["start"],
        "fit_end_time": config["date"]["train"]["end"],
        "instruments": config["instruments"],
        "feature": (fields, names),
        "learn_processors": learn_processors,
        "infer_processors": infer_processors,
        "loss_type": config["loss_type"],
        "next_label_price_expr": f"$open",
        "curr_label_price_expr": f"$open",
        "days_ahead": config["days_ahead"],
        "include_volume": False,
    }
    if label:
        handler_kwargs["label"] = label
    handler_conf = {
        "class": "Alpha158",
        "module_path": "longcapital.contrib.data.handler",
        "kwargs": handler_kwargs,
    }
    hd = init_instance_by_config(handler_conf)
    
    # dataset
    dataset_conf = {
        "class": "DatasetH",
        "module_path": "qlib.data.dataset",
        "kwargs": {
            "handler": hd,
            "segments": {
                "train": (config["date"]["train"]["start"], config["date"]["train"]["end"]),
                "valid": (config["date"]["valid"]["start"], config["date"]["valid"]["end"]),
                "test": (config["date"]["test"]["start"], config["date"]["test"]["end"]),
            },
        },
    }
    dataset = init_instance_by_config(dataset_conf)
    
    # nan check
    df_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
    df_test = dataset.prepare("test", col_set=["feature"], data_key=DataHandlerLP.DK_I)
    m = np.isfinite(df_train["feature"].values).mean(axis=0) != 1
    if m.sum():
        print(df_train["feature"].columns[m])
    m = np.isfinite(df_test["feature"].values).mean(axis=0) != 1
    if m.sum():
        print(df_test["feature"].columns[m])
    
    return dataset

In [15]:
def train_model(dataset, config):
    mlflow.end_run()
    with R.start(experiment_name=EXP_NAME):
        if get_params_from_file(MODEL_PARAMS_FILE, config["config_key"]) is None:
            model_params = get_params_from_file(MODEL_PARAMS_FILE, "default")
            update_params_to_file(MODEL_PARAMS_FILE, config["config_key"], model_params)
        else:
            model_params = get_params_from_file(MODEL_PARAMS_FILE, config["config_key"])
        model = init_instance_by_config({
            "class": "LGBModel",
            "module_path": "longcapital.contrib.model.gbdt",
            "kwargs": model_params
        })
        model.fit(dataset)
        
        R.save_objects(trained_model=model)

        rec = R.get_recorder()
        rid = rec.id # save the record id

        # Inference and saving signal
        sr = SignalRecord(model, dataset, rec, neutralize=False, riskiest_features_num=50)
        sr.generate()
        
    return model, rid

In [16]:
def prepare_signal(days_aheads, signal_names, hold_thresh):
    df_valid = None
    df_test = None
    for days_ahead, signal_name in zip(days_aheads, signal_names):
        config = get_all_config(days_ahead=days_ahead, hold_thresh=hold_thresh)
        qlib.init(provider_uri=f"~/.qlib/qlib_data/{config['region']}_data", region=config["region"])
        dataset = load_dataset(config)
        model, rid = train_model(dataset, config)
        if df_valid is None:
            df_valid = dataset.prepare("valid", col_set=["feature"], data_key=DataHandlerLP.DK_L)
        if df_test is None:
            df_test = dataset.prepare("test", col_set=["feature"], data_key=DataHandlerLP.DK_I)
        df_valid.insert(0, ("feature",signal_name), model.predict(dataset, "valid"))
        df_test.insert(0, ("feature",signal_name), model.predict(dataset, "test"))
    signal = pd.concat([df_valid, df_test], axis=0)
    return signal, rid

In [17]:
# trading config
topk = 10
n_drop = 2
hold_thresh = 3
account = 100000000

In [18]:
# the following signal will be inserted in the 0 index one by one
# so finally, the first signal will be days_aheads[-1] and signal_names[-1] at last position
# which will be used in TopkDropoutStrategy as default signal for ranking
# (see: https://github.com/microsoft/qlib/blob/main/qlib/contrib/strategy/signal_strategy.py#L147)
# and also will be used in ranking the features to get the obs/state space for RL training.
# (see: TradeStrategy.get_feature)
days_aheads = [2, 3, 4, 5, 6, 7, 8][-1:]
signal_names = ["signal_2", "signal_3", "signal_4", "signal_5", "signal_6", "signal_7", "signal"][-1:]
signal_key = "signal"

# model config
# alpha158 + signals + position flag + unhold flag + [amount, weight, price, count_day]
dim = 158 + len(signal_names) + 1 + 1 + 2
feature_buffer_size = 1
# number of stock candidates for ranking
stock_num = 20

In [19]:
config = get_all_config(days_ahead=days_aheads[-1], hold_thresh=hold_thresh)
signal, rid = prepare_signal(days_aheads, signal_names, hold_thresh)

[34473:MainThread](2023-03-18 07:24:08,315) INFO - qlib.Initialization - [config.py:416] - default_conf: client.
[34473:MainThread](2023-03-18 07:24:08,323) INFO - qlib.Initialization - [__init__.py:74] - qlib successfully initialized based on client settings.
[34473:MainThread](2023-03-18 07:24:08,324) INFO - qlib.Initialization - [__init__.py:76] - data_path={'__DEFAULT_FREQ': PosixPath('/Users/chenglong.chen/.qlib/qlib_data/cn_data')}
[34473:MainThread](2023-03-18 07:25:13,587) INFO - qlib.timer - [log.py:128] - Time cost: 65.256s | Loading data Done
[34473:MainThread](2023-03-18 07:25:19,446) INFO - qlib.timer - [log.py:128] - Time cost: 4.440s | Fillna Done
[34473:MainThread](2023-03-18 07:25:20,271) INFO - qlib.timer - [log.py:128] - Time cost: 0.824s | DropInstrument Done
[34473:MainThread](2023-03-18 07:25:21,273) INFO - qlib.timer - [log.py:128] - Time cost: 0.462s | DropnaLabel Done
[34473:MainThread](2023-03-18 07:25:28,888) INFO - qlib.timer - [log.py:128] - Time cost: 7.61

ModuleNotFoundError. CatBoostModel are skipped. (optional: maybe installing CatBoostModel can fix it.)



Please use `line_search_wolfe2` from the `scipy.optimize` namespace, the `scipy.optimize.linesearch` namespace is deprecated.


Please use `line_search_wolfe1` from the `scipy.optimize` namespace, the `scipy.optimize.linesearch` namespace is deprecated.



Training until validation scores don't improve for 50 rounds
[20]	train's l2: 0.961997	valid's l2: 0.986656
[40]	train's l2: 0.941246	valid's l2: 0.985369
[60]	train's l2: 0.924647	valid's l2: 0.986241
[80]	train's l2: 0.909259	valid's l2: 0.98719
Early stopping, best iteration is:
[36]	train's l2: 0.945509	valid's l2: 0.985177


[34473:MainThread](2023-03-18 07:26:06,413) INFO - qlib.workflow - [record_temp.py:196] - Signal record 'pred.pkl' has been saved as the artifact of the Experiment 3


'The following are prediction results of the LGBModel model.'
                          score
datetime   instrument          
2019-01-02 SH600000   -0.077803
           SH600004   -0.108537
           SH600009    0.248091
           SH600010   -0.010179
           SH600011    0.030957


[34473:MainThread](2023-03-18 07:26:08,164) INFO - qlib.timer - [log.py:128] - Time cost: 0.000s | waiting `async_log` Done


# RL

In [20]:
from qlib.contrib.evaluate import risk_analysis
from qlib.rl.trainer import Checkpoint, EarlyStopping, MetricsWriter, train, backtest
from qlib.rl.utils.log import CsvWriter


`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.


`np.typeDict` is a deprecated alias for `np.sctypeDict`.



In [21]:
from tianshou.data import Batch


`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.



In [43]:
from longcapital.rl.order_execution.reward import (
    ExcessReturnReward, 
    EpisodeInformationRatioReward, 
    ExecutionInformationRatioReward, 
    ExcessExecutionInformationRatioReward
)
from longcapital.rl.order_execution.state import TradeStrategyInitiateState
from longcapital.rl.order_execution.strategy import (
    TopkDropoutStrategy, 
    TopkDropoutSignalStrategy, 
    TopkDropoutSelectionStrategy,
    TopkDropoutDynamicStrategy,
    TopkStrategy, 
    WeightStrategy
)
from longcapital.rl.order_execution.simulator import TradeStrategySimulator
from longcapital.rl.order_execution.policy import continuous
from longcapital.rl.order_execution.policy import discrete


`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.



## Check Simulator

In [44]:
def check_simulator(trade_strategy, simulator):
    reward = EpisodeInformationRatioReward(scale=1.)
    state = simulator.get_state()
    obs = [{"obs": trade_strategy.state_interpreter.interpret(state), "info": {}}]

    policy_out = trade_strategy.policy(Batch(obs))

    act = trade_strategy.action_interpreter.interpret(state, policy_out.act)
    print(f"Action = {act}")

    simulator.step(act)
    rew = float(reward.reward(simulator.get_state()))
    print(f"Reward = {rew:.6f}")

In [45]:
initial_states_train = [
    TradeStrategyInitiateState(
        start_time=get_diff_date(config["date"]["valid"]["start"], 7), # to avoid start_time not tradable
        end_time=config["date"]["valid"]["end"],
        sample_date=False
    )
]
initial_states_valid = [
    TradeStrategyInitiateState(
        start_time=config["date"]["backtest"]["start"],
        end_time=config["date"]["backtest"]["end"],
        sample_date=False
    )
]

In [46]:
topk_dropout_strategy = TopkDropoutStrategy(
    signal=signal,
    dim=dim,
    stock_num=stock_num,
    topk=topk,
    n_drop=n_drop,
    only_tradable=True,
    hold_thresh=hold_thresh,
    signal_key="signal",
    policy_cls=discrete.PPO,
    feature_buffer_size=feature_buffer_size
)

In [47]:
topk_dropout_simulator = TradeStrategySimulator(
    trade_strategy=topk_dropout_strategy, 
    initial_state=initial_states_train[0], 
    account=account,
    benchmark=config["backtest"]["benchmark"],
    exchange_kwargs=config["backtest"]["exchange_kwargs"]
)

[34473:MainThread](2023-03-18 07:31:31,835) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31


In [48]:
check_simulator(topk_dropout_strategy, topk_dropout_simulator)

Action = TopkDropoutStrategyAction(n_drop=6)
Reward = 0.000000



`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.



In [49]:
topk_dropout_signal_strategy = TopkDropoutSignalStrategy(
    signal=signal,
    dim=dim,
    stock_num=stock_num,
    topk=topk,
    n_drop=n_drop,
    only_tradable=True,
    hold_thresh=hold_thresh,
    signal_key="signal",
    policy_cls=continuous.MetaPPO,
    feature_buffer_size=feature_buffer_size
)


`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.



In [50]:
topk_dropout_signal_simulator = TradeStrategySimulator(
    trade_strategy=topk_dropout_signal_strategy, 
    initial_state=initial_states_train[0], 
    account=account,
    benchmark=config["backtest"]["benchmark"],
    exchange_kwargs=config["backtest"]["exchange_kwargs"]
)

[34473:MainThread](2023-03-18 07:31:44,083) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31


In [51]:
check_simulator(topk_dropout_signal_strategy, topk_dropout_signal_simulator)

Action = TopkDropoutSignalStrategyAction(signal=instrument
SH600519   -3.439271
SH600703    0.276011
SZ000568   -0.809705
SH600660   -0.339498
SZ000858   -0.442969
SZ002304   -0.049251
SH600196   -1.340284
SZ300070    1.281227
SZ000768   -0.137250
SH600023   -0.731666
SH600297    0.279189
SH601888   -2.166513
SZ002475   -1.043969
SH600383    0.398076
SZ002008    1.786606
SZ000063    0.308335
SH601333   -2.288858
SH600372   -0.872858
SH600688    0.350583
SH601939   -1.037247
Name: (feature, signal), dtype: float64)
Reward = 0.000000



`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.



In [52]:
topk_dropout_selection_strategy = TopkDropoutSelectionStrategy(
    signal=signal,
    dim=dim,
    stock_num=stock_num,
    topk=topk,
    n_drop=n_drop,
    only_tradable=True,
    hold_thresh=hold_thresh,
    signal_key="signal",
    policy_cls=discrete.PPO,
    feature_buffer_size=feature_buffer_size
)


`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.



In [53]:
topk_dropout_selction_simulator = TradeStrategySimulator(
    trade_strategy=topk_dropout_selection_strategy, 
    initial_state=initial_states_train[0], 
    account=account,
    benchmark=config["backtest"]["benchmark"],
    exchange_kwargs=config["backtest"]["exchange_kwargs"]
)

[34473:MainThread](2023-03-18 07:31:59,690) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31


In [54]:
check_simulator(topk_dropout_selection_strategy, topk_dropout_selction_simulator)

Action = TopkDropoutSignalStrategyAction(signal=instrument
SH600519    0.255932
SH600703    0.240300
SZ000568    0.235384
SH600660    0.230448
SZ000858    0.207171
SZ002304    0.203800
SH600196    0.190205
SZ300070    0.188669
SZ000768    0.188146
SH600023    0.174263
SH600297    0.173814
SH601888    0.171736
SZ002475    0.166362
SH600383    0.160947
SZ002008    0.160311
SZ000063    0.158970
SH601333    0.157508
SH600372    0.155727
SH600688    0.152897
SH601939    0.146187
Name: (feature, signal), dtype: float64)
Reward = 0.000000



`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.



In [55]:
topk_dropout_dynamic_strategy = TopkDropoutDynamicStrategy(
    signal=signal,
    dim=dim,
    stock_num=stock_num,
    topk=topk,
    n_drop=n_drop,
    only_tradable=True,
    hold_thresh=hold_thresh,
    signal_key="signal",
    policy_cls=continuous.MetaPPO,
    feature_buffer_size=feature_buffer_size
)


`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.



In [56]:
topk_dropout_dynamic_simulator = TradeStrategySimulator(
    trade_strategy=topk_dropout_dynamic_strategy, 
    initial_state=initial_states_train[0], 
    account=account,
    benchmark=config["backtest"]["benchmark"],
    exchange_kwargs=config["backtest"]["exchange_kwargs"]
)

[34473:MainThread](2023-03-18 07:32:13,460) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31


In [57]:
check_simulator(topk_dropout_dynamic_strategy, topk_dropout_dynamic_simulator)


`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.



Action = TopkDropoutDynamicStrategyAction(signal=instrument
SH600519    1.0
SH600703    1.0
SZ000568    1.0
SH600660    1.0
SZ000858    0.0
SZ002304    1.0
SH600196    0.0
SZ300070    1.0
SZ000768    1.0
SH600023    0.0
SH600297    0.0
SH601888    1.0
SZ002475    0.0
SH600383    0.0
SZ002008    1.0
SZ000063    0.0
SH601333    0.0
SH600372    0.0
SH600688    1.0
SH601939    0.0
Name: (feature, signal), dtype: float64, topk=10, n_drop=0)
Reward = 0.000000


In [58]:
topk_strategy = TopkStrategy(
    signal=signal,
    dim=dim,
    stock_num=stock_num,
    signal_key="signal",
    policy_cls=discrete.MetaPPO,
    feature_buffer_size=feature_buffer_size
)


`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.



In [59]:
topk_simulator = TradeStrategySimulator(
    trade_strategy=topk_strategy, 
    initial_state=initial_states_train[0], 
    account=account,
    benchmark=config["backtest"]["benchmark"],
    exchange_kwargs=config["backtest"]["exchange_kwargs"]
)

[34473:MainThread](2023-03-18 07:32:29,561) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31


In [60]:
check_simulator(topk_strategy, topk_simulator)

Action = WeightStrategyAction(target_weight_position={'SH600519': 0.125, 'SZ300070': 0.125, 'SZ002475': 0.125, 'SZ002008': 0.125, 'SZ000063': 0.125, 'SH601333': 0.125, 'SH600372': 0.125, 'SH600688': 0.125})
Reward = 0.000000



`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.



In [61]:
weight_strategy = WeightStrategy(
    signal=signal,
    dim=dim,
    stock_num=stock_num,
    topk=topk,
    signal_key="signal",
    policy_cls=continuous.MetaPPO,
    feature_buffer_size=feature_buffer_size
)


`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.



In [62]:
weight_strategy_simulator = TradeStrategySimulator(
    trade_strategy=weight_strategy, 
    initial_state=initial_states_train[0], 
    account=account,
    benchmark=config["backtest"]["benchmark"],
    exchange_kwargs=config["backtest"]["exchange_kwargs"]
)

[34473:MainThread](2023-03-18 07:32:43,420) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31


In [63]:
check_simulator(weight_strategy, weight_strategy_simulator)

Action = WeightStrategyAction(target_weight_position={'SZ300070': 0.1, 'SH600383': 0.1, 'SZ000063': 0.1, 'SH601333': 0.1, 'SZ000768': 0.1, 'SZ002304': 0.1, 'SH600023': 0.1, 'SZ002008': 0.1, 'SH600196': 0.1, 'SH600660': 0.1})



`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.



Reward = 0.000000


## Train Policy

In [64]:
def train_trade_strategy(trade_strategy, max_iters=2, concurrency=1, episode_per_iter=1):
    def _exploration_noise():
        if trade_strategy.policy_cls in [continuous.MetaDDPG, continuous.MetaTD3, continuous.MetaSAC]:
            return True
        return False
    
    def _start_episodes():
        if trade_strategy.policy_cls in [continuous.MetaDDPG, continuous.MetaTD3, continuous.MetaSAC]:
            return 5 * episode_per_iter
        return None
    
    reward = EpisodeInformationRatioReward(scale=1.)
    val_reward = EpisodeInformationRatioReward(scale=1.)
    
    output_dir = f'../records/EpisodeInformationRatioReward/{trade_strategy}/{trade_strategy.policy}/{datetime.datetime.now().strftime("%Y_%m_%d_%H_%M")}'
    
    trainer_kwargs = {
        "max_iters": max_iters,
        "finite_env_type": "dummy",
        "concurrency": concurrency,
        "val_every_n_iters": 1,
        "callbacks": [
            Checkpoint(
                dirpath=Path(f"{output_dir}/checkpoints"),
                every_n_iters=1,
                save_latest="copy",
            ),
            EarlyStopping(
                monitor="reward", 
                min_delta=0.0, 
                patience=1000, 
                restore_best_weights=True
            ),
            MetricsWriter(
                dirpath=Path(f"{output_dir}/metrics")
            ),
        ],
    }
    vessel_kwargs = {
        "update_kwargs": {"batch_size": 64, "repeat": 5},
        "episode_per_iter": episode_per_iter,
        "val_initial_states": initial_states_valid,
        "exploration_noise": _exploration_noise(),
        "start_episodes": _start_episodes(),
    }
    
    simulator_fn=lambda initial_state: TradeStrategySimulator(
        trade_strategy=trade_strategy, 
        initial_state=initial_state, 
        account=account,
        benchmark=config["backtest"]["benchmark"],
        exchange_kwargs=config["backtest"]["exchange_kwargs"]
    )
    
    # baseline
    baseline_logger = CsvWriter(
        output_dir=Path(f"{output_dir}/baseline")
    )
    backtest(
        simulator_fn=simulator_fn,
        state_interpreter=trade_strategy.state_interpreter,
        action_interpreter=trade_strategy.baseline_action_interpreter,
        policy=trade_strategy.policy,
        reward=val_reward,
        initial_states=initial_states_valid,
        finite_env_type=trainer_kwargs["finite_env_type"],
        logger=[baseline_logger]
    )
    del baseline_logger
    
    # train
    train(
        simulator_fn=simulator_fn,
        state_interpreter=trade_strategy.state_interpreter,
        action_interpreter=trade_strategy.action_interpreter,
        policy=trade_strategy.policy,
        reward=reward,
        val_reward=val_reward,
        initial_states=initial_states_train,
        trainer_kwargs=trainer_kwargs,
        vessel_kwargs=vessel_kwargs
    )


`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.



In [65]:
# train_trade_strategy(trade_strategy=topk_dropout_strategy, max_iters=1000, concurrency=1)

In [66]:
# train_trade_strategy(trade_strategy=topk_dropout_signal_strategy, max_iters=1000, concurrency=1)

In [67]:
# train_trade_strategy(trade_strategy=topk_dropout_selection_strategy, max_iters=1000, concurrency=1)

In [None]:
train_trade_strategy(trade_strategy=topk_dropout_dynamic_strategy, max_iters=1000, concurrency=1)

[34473:MainThread](2023-03-18 07:32:55,567) INFO - qlib.rl.trainer.vessel - [vessel.py:166] - Testing initial states collection size: 1
[34473:MainThread](2023-03-18 07:32:55,569) INFO - qlib.rl.utils.data_queue - [data_queue.py:70] - Automatically set data queue maxsize to 12 to avoid overwhelming.
[34473:MainThread](2023-03-18 07:32:56,857) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2019-01-01, end_time: 2023-03-15


[34473:MainThread](2023-03-18 07:33:41,356) INFO - qlib.rl.trainer.trainer - [trainer.py:211] - 
2023-03-18 07:33:41	Train iteration 1/1000
[34473:MainThread](2023-03-18 07:33:41,358) INFO - qlib.rl.trainer.vessel - [vessel.py:151] - Training initial states collection size: 1
[34473:MainThread](2023-03-18 07:33:41,359) INFO - qlib.rl.utils.data_queue - [data_queue.py:70] - Automatically set data queue maxsize to 12 to avoid overwhelming.
[34473:MainThread](2023-03-18 07:33:41,367) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31


[34473:MainThread](2023-03-18 07:34:10,321) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange
[34473:MainThread](2023-03-18 07:34:23,615) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31




start_time: 2017-01-08, end_time: 2018-12-31


[34473:MainThread](2023-03-18 07:34:38,922) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 1] n/ep = 1
[34473:MainThread](2023-03-18 07:34:38,923) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 1] n/st = 483
[34473:MainThread](2023-03-18 07:34:38,925) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 1] rews = 2.43329750879226
[34473:MainThread](2023-03-18 07:34:38,926) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 1] lens = 483.0
[34473:MainThread](2023-03-18 07:34:38,927) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 1] idxs = 0.0
[34473:MainThread](2023-03-18 07:34:38,928) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 1] rew = 2.43329750879226
[34473:MainThread](2023-03-18 07:34:38,930) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 1] len = 483.0
[34473:MainThread](2023-03-18 07:34:38,931) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 1] rew_std = 0.0
[34473:MainThread](2023-03-18 07:34:38,932) INFO - qlib.rl.trainer.

start_time: 2019-01-01, end_time: 2023-03-15


[34473:MainThread](2023-03-18 07:35:25,751) INFO - qlib.rl.trainer.callbacks - [callbacks.py:161] - #0 current reward: 2.4333, best reward: 2.4333 in #0
[34473:MainThread](2023-03-18 07:35:25,790) INFO - qlib.rl.trainer.trainer - [trainer.py:211] - 
2023-03-18 07:35:25	Train iteration 2/1000
[34473:MainThread](2023-03-18 07:35:25,791) INFO - qlib.rl.trainer.vessel - [vessel.py:151] - Training initial states collection size: 1
[34473:MainThread](2023-03-18 07:35:25,793) INFO - qlib.rl.utils.data_queue - [data_queue.py:70] - Automatically set data queue maxsize to 12 to avoid overwhelming.
[34473:MainThread](2023-03-18 07:35:26,803) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31


[34473:MainThread](2023-03-18 07:35:56,836) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange
[34473:MainThread](2023-03-18 07:36:10,028) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31




start_time: 2017-01-08, end_time: 2018-12-31


[34473:MainThread](2023-03-18 07:36:27,016) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 2] n/ep = 1
[34473:MainThread](2023-03-18 07:36:27,018) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 2] n/st = 483
[34473:MainThread](2023-03-18 07:36:27,019) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 2] rews = 3.0956133614666874
[34473:MainThread](2023-03-18 07:36:27,020) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 2] lens = 483.0
[34473:MainThread](2023-03-18 07:36:27,021) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 2] idxs = 0.0
[34473:MainThread](2023-03-18 07:36:27,022) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 2] rew = 3.0956133614666874
[34473:MainThread](2023-03-18 07:36:27,024) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 2] len = 483.0
[34473:MainThread](2023-03-18 07:36:27,025) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 2] rew_std = 0.0
[34473:MainThread](2023-03-18 07:36:27,026) INFO - qlib.rl.trai

start_time: 2019-01-01, end_time: 2023-03-15


[34473:MainThread](2023-03-18 07:37:13,342) INFO - qlib.rl.trainer.callbacks - [callbacks.py:161] - #1 current reward: 3.0956, best reward: 3.0956 in #1
[34473:MainThread](2023-03-18 07:37:13,381) INFO - qlib.rl.trainer.trainer - [trainer.py:211] - 
2023-03-18 07:37:13	Train iteration 3/1000
[34473:MainThread](2023-03-18 07:37:13,383) INFO - qlib.rl.trainer.vessel - [vessel.py:151] - Training initial states collection size: 1
[34473:MainThread](2023-03-18 07:37:13,384) INFO - qlib.rl.utils.data_queue - [data_queue.py:70] - Automatically set data queue maxsize to 12 to avoid overwhelming.
[34473:MainThread](2023-03-18 07:37:14,395) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31


[34473:MainThread](2023-03-18 07:37:47,278) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange
[34473:MainThread](2023-03-18 07:38:00,131) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31




start_time: 2017-01-08, end_time: 2018-12-31


[34473:MainThread](2023-03-18 07:38:18,824) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 3] n/ep = 1
[34473:MainThread](2023-03-18 07:38:18,825) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 3] n/st = 483
[34473:MainThread](2023-03-18 07:38:18,827) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 3] rews = 2.7427622827101805
[34473:MainThread](2023-03-18 07:38:18,828) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 3] lens = 483.0
[34473:MainThread](2023-03-18 07:38:18,829) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 3] idxs = 0.0
[34473:MainThread](2023-03-18 07:38:18,830) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 3] rew = 2.7427622827101805
[34473:MainThread](2023-03-18 07:38:18,831) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 3] len = 483.0
[34473:MainThread](2023-03-18 07:38:18,832) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 3] rew_std = 0.0
[34473:MainThread](2023-03-18 07:38:18,834) INFO - qlib.rl.trai

start_time: 2019-01-01, end_time: 2023-03-15


[34473:MainThread](2023-03-18 07:39:05,443) INFO - qlib.rl.trainer.callbacks - [callbacks.py:161] - #2 current reward: 2.7428, best reward: 3.0956 in #1
[34473:MainThread](2023-03-18 07:39:05,471) INFO - qlib.rl.trainer.trainer - [trainer.py:211] - 
2023-03-18 07:39:05	Train iteration 4/1000
[34473:MainThread](2023-03-18 07:39:05,472) INFO - qlib.rl.trainer.vessel - [vessel.py:151] - Training initial states collection size: 1
[34473:MainThread](2023-03-18 07:39:05,474) INFO - qlib.rl.utils.data_queue - [data_queue.py:70] - Automatically set data queue maxsize to 12 to avoid overwhelming.
[34473:MainThread](2023-03-18 07:39:06,481) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31


[34473:MainThread](2023-03-18 07:39:37,443) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange
[34473:MainThread](2023-03-18 07:39:49,258) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31




start_time: 2017-01-08, end_time: 2018-12-31


[34473:MainThread](2023-03-18 07:40:05,136) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 4] n/ep = 1
[34473:MainThread](2023-03-18 07:40:05,137) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 4] n/st = 483
[34473:MainThread](2023-03-18 07:40:05,138) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 4] rews = 2.5675596803842717
[34473:MainThread](2023-03-18 07:40:05,140) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 4] lens = 483.0
[34473:MainThread](2023-03-18 07:40:05,141) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 4] idxs = 0.0
[34473:MainThread](2023-03-18 07:40:05,142) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 4] rew = 2.5675596803842717
[34473:MainThread](2023-03-18 07:40:05,144) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 4] len = 483.0
[34473:MainThread](2023-03-18 07:40:05,145) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 4] rew_std = 0.0
[34473:MainThread](2023-03-18 07:40:05,146) INFO - qlib.rl.trai

start_time: 2019-01-01, end_time: 2023-03-15


[34473:MainThread](2023-03-18 07:40:52,860) INFO - qlib.rl.trainer.callbacks - [callbacks.py:161] - #3 current reward: 2.5676, best reward: 3.0956 in #1
[34473:MainThread](2023-03-18 07:40:52,890) INFO - qlib.rl.trainer.trainer - [trainer.py:211] - 
2023-03-18 07:40:52	Train iteration 5/1000
[34473:MainThread](2023-03-18 07:40:52,892) INFO - qlib.rl.trainer.vessel - [vessel.py:151] - Training initial states collection size: 1
[34473:MainThread](2023-03-18 07:40:52,894) INFO - qlib.rl.utils.data_queue - [data_queue.py:70] - Automatically set data queue maxsize to 12 to avoid overwhelming.
[34473:MainThread](2023-03-18 07:40:53,902) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31


[34473:MainThread](2023-03-18 07:41:25,446) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange
[34473:MainThread](2023-03-18 07:41:37,485) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31




start_time: 2017-01-08, end_time: 2018-12-31


[34473:MainThread](2023-03-18 07:41:52,605) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 5] n/ep = 1
[34473:MainThread](2023-03-18 07:41:52,606) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 5] n/st = 483
[34473:MainThread](2023-03-18 07:41:52,607) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 5] rews = 2.7205613807821614
[34473:MainThread](2023-03-18 07:41:52,608) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 5] lens = 483.0
[34473:MainThread](2023-03-18 07:41:52,610) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 5] idxs = 0.0
[34473:MainThread](2023-03-18 07:41:52,611) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 5] rew = 2.7205613807821614
[34473:MainThread](2023-03-18 07:41:52,612) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 5] len = 483.0
[34473:MainThread](2023-03-18 07:41:52,613) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 5] rew_std = 0.0
[34473:MainThread](2023-03-18 07:41:52,614) INFO - qlib.rl.trai

start_time: 2019-01-01, end_time: 2023-03-15


[34473:MainThread](2023-03-18 07:42:41,926) INFO - qlib.rl.trainer.callbacks - [callbacks.py:161] - #4 current reward: 2.7206, best reward: 3.0956 in #1
[34473:MainThread](2023-03-18 07:42:41,952) INFO - qlib.rl.trainer.trainer - [trainer.py:211] - 
2023-03-18 07:42:41	Train iteration 6/1000
[34473:MainThread](2023-03-18 07:42:41,953) INFO - qlib.rl.trainer.vessel - [vessel.py:151] - Training initial states collection size: 1
[34473:MainThread](2023-03-18 07:42:41,954) INFO - qlib.rl.utils.data_queue - [data_queue.py:70] - Automatically set data queue maxsize to 12 to avoid overwhelming.
[34473:MainThread](2023-03-18 07:42:42,962) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31


[34473:MainThread](2023-03-18 07:43:09,521) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange
[34473:MainThread](2023-03-18 07:43:21,014) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31




start_time: 2017-01-08, end_time: 2018-12-31


[34473:MainThread](2023-03-18 07:43:35,901) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 6] n/ep = 1
[34473:MainThread](2023-03-18 07:43:35,903) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 6] n/st = 483
[34473:MainThread](2023-03-18 07:43:35,904) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 6] rews = 2.2985557014137554
[34473:MainThread](2023-03-18 07:43:35,906) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 6] lens = 483.0
[34473:MainThread](2023-03-18 07:43:35,907) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 6] idxs = 0.0
[34473:MainThread](2023-03-18 07:43:35,909) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 6] rew = 2.2985557014137554
[34473:MainThread](2023-03-18 07:43:35,910) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 6] len = 483.0
[34473:MainThread](2023-03-18 07:43:35,912) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 6] rew_std = 0.0
[34473:MainThread](2023-03-18 07:43:35,913) INFO - qlib.rl.trai

start_time: 2019-01-01, end_time: 2023-03-15


[34473:MainThread](2023-03-18 07:44:22,825) INFO - qlib.rl.trainer.callbacks - [callbacks.py:161] - #5 current reward: 2.2986, best reward: 3.0956 in #1
[34473:MainThread](2023-03-18 07:44:22,863) INFO - qlib.rl.trainer.trainer - [trainer.py:211] - 
2023-03-18 07:44:22	Train iteration 7/1000
[34473:MainThread](2023-03-18 07:44:22,865) INFO - qlib.rl.trainer.vessel - [vessel.py:151] - Training initial states collection size: 1
[34473:MainThread](2023-03-18 07:44:22,866) INFO - qlib.rl.utils.data_queue - [data_queue.py:70] - Automatically set data queue maxsize to 12 to avoid overwhelming.
[34473:MainThread](2023-03-18 07:44:23,877) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31


[34473:MainThread](2023-03-18 07:44:52,338) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange
[34473:MainThread](2023-03-18 07:45:04,402) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31




start_time: 2017-01-08, end_time: 2018-12-31


[34473:MainThread](2023-03-18 07:45:21,508) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 7] n/ep = 1
[34473:MainThread](2023-03-18 07:45:21,510) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 7] n/st = 483
[34473:MainThread](2023-03-18 07:45:21,511) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 7] rews = 2.796989705966456
[34473:MainThread](2023-03-18 07:45:21,512) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 7] lens = 483.0
[34473:MainThread](2023-03-18 07:45:21,514) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 7] idxs = 0.0
[34473:MainThread](2023-03-18 07:45:21,515) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 7] rew = 2.796989705966456
[34473:MainThread](2023-03-18 07:45:21,516) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 7] len = 483.0
[34473:MainThread](2023-03-18 07:45:21,517) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 7] rew_std = 0.0
[34473:MainThread](2023-03-18 07:45:21,518) INFO - qlib.rl.traine

start_time: 2019-01-01, end_time: 2023-03-15


[34473:MainThread](2023-03-18 07:46:09,415) INFO - qlib.rl.trainer.callbacks - [callbacks.py:161] - #6 current reward: 2.7970, best reward: 3.0956 in #1
[34473:MainThread](2023-03-18 07:46:09,451) INFO - qlib.rl.trainer.trainer - [trainer.py:211] - 
2023-03-18 07:46:09	Train iteration 8/1000
[34473:MainThread](2023-03-18 07:46:09,452) INFO - qlib.rl.trainer.vessel - [vessel.py:151] - Training initial states collection size: 1
[34473:MainThread](2023-03-18 07:46:09,455) INFO - qlib.rl.utils.data_queue - [data_queue.py:70] - Automatically set data queue maxsize to 12 to avoid overwhelming.
[34473:MainThread](2023-03-18 07:46:10,468) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31


[34473:MainThread](2023-03-18 07:46:37,118) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange
[34473:MainThread](2023-03-18 07:46:48,511) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31




start_time: 2017-01-08, end_time: 2018-12-31


[34473:MainThread](2023-03-18 07:47:03,253) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 8] n/ep = 1
[34473:MainThread](2023-03-18 07:47:03,255) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 8] n/st = 483
[34473:MainThread](2023-03-18 07:47:03,256) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 8] rews = 2.243513239883214
[34473:MainThread](2023-03-18 07:47:03,257) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 8] lens = 483.0
[34473:MainThread](2023-03-18 07:47:03,258) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 8] idxs = 0.0
[34473:MainThread](2023-03-18 07:47:03,259) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 8] rew = 2.243513239883214
[34473:MainThread](2023-03-18 07:47:03,260) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 8] len = 483.0
[34473:MainThread](2023-03-18 07:47:03,261) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 8] rew_std = 0.0
[34473:MainThread](2023-03-18 07:47:03,263) INFO - qlib.rl.traine

start_time: 2019-01-01, end_time: 2023-03-15


[34473:MainThread](2023-03-18 07:47:46,538) INFO - qlib.rl.trainer.callbacks - [callbacks.py:161] - #7 current reward: 2.2435, best reward: 3.0956 in #1
[34473:MainThread](2023-03-18 07:47:46,575) INFO - qlib.rl.trainer.trainer - [trainer.py:211] - 
2023-03-18 07:47:46	Train iteration 9/1000
[34473:MainThread](2023-03-18 07:47:46,576) INFO - qlib.rl.trainer.vessel - [vessel.py:151] - Training initial states collection size: 1
[34473:MainThread](2023-03-18 07:47:46,578) INFO - qlib.rl.utils.data_queue - [data_queue.py:70] - Automatically set data queue maxsize to 12 to avoid overwhelming.
[34473:MainThread](2023-03-18 07:47:47,587) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31


[34473:MainThread](2023-03-18 07:48:13,518) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange
[34473:MainThread](2023-03-18 07:48:24,655) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31




start_time: 2017-01-08, end_time: 2018-12-31


[34473:MainThread](2023-03-18 07:48:39,135) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 9] n/ep = 1
[34473:MainThread](2023-03-18 07:48:39,136) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 9] n/st = 483
[34473:MainThread](2023-03-18 07:48:39,137) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 9] rews = 2.146652057833966
[34473:MainThread](2023-03-18 07:48:39,138) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 9] lens = 483.0
[34473:MainThread](2023-03-18 07:48:39,140) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 9] idxs = 0.0
[34473:MainThread](2023-03-18 07:48:39,141) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 9] rew = 2.146652057833966
[34473:MainThread](2023-03-18 07:48:39,142) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 9] len = 483.0
[34473:MainThread](2023-03-18 07:48:39,143) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 9] rew_std = 0.0
[34473:MainThread](2023-03-18 07:48:39,144) INFO - qlib.rl.traine

start_time: 2019-01-01, end_time: 2023-03-15


[34473:MainThread](2023-03-18 07:49:22,298) INFO - qlib.rl.trainer.callbacks - [callbacks.py:161] - #8 current reward: 2.1467, best reward: 3.0956 in #1
[34473:MainThread](2023-03-18 07:49:22,332) INFO - qlib.rl.trainer.trainer - [trainer.py:211] - 
2023-03-18 07:49:22	Train iteration 10/1000
[34473:MainThread](2023-03-18 07:49:22,333) INFO - qlib.rl.trainer.vessel - [vessel.py:151] - Training initial states collection size: 1
[34473:MainThread](2023-03-18 07:49:22,335) INFO - qlib.rl.utils.data_queue - [data_queue.py:70] - Automatically set data queue maxsize to 12 to avoid overwhelming.
[34473:MainThread](2023-03-18 07:49:23,344) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31


[34473:MainThread](2023-03-18 07:49:49,001) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange
[34473:MainThread](2023-03-18 07:50:00,365) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31




start_time: 2017-01-08, end_time: 2018-12-31


[34473:MainThread](2023-03-18 07:50:14,947) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 10] n/ep = 1
[34473:MainThread](2023-03-18 07:50:14,949) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 10] n/st = 483
[34473:MainThread](2023-03-18 07:50:14,950) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 10] rews = 2.907500001126218
[34473:MainThread](2023-03-18 07:50:14,951) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 10] lens = 483.0
[34473:MainThread](2023-03-18 07:50:14,952) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 10] idxs = 0.0
[34473:MainThread](2023-03-18 07:50:14,954) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 10] rew = 2.907500001126218
[34473:MainThread](2023-03-18 07:50:14,955) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 10] len = 483.0
[34473:MainThread](2023-03-18 07:50:14,956) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 10] rew_std = 0.0
[34473:MainThread](2023-03-18 07:50:14,957) INFO - qlib.r

start_time: 2019-01-01, end_time: 2023-03-15


[34473:MainThread](2023-03-18 07:50:59,501) INFO - qlib.rl.trainer.callbacks - [callbacks.py:161] - #9 current reward: 2.9075, best reward: 3.0956 in #1
[34473:MainThread](2023-03-18 07:50:59,531) INFO - qlib.rl.trainer.trainer - [trainer.py:211] - 
2023-03-18 07:50:59	Train iteration 11/1000
[34473:MainThread](2023-03-18 07:50:59,533) INFO - qlib.rl.trainer.vessel - [vessel.py:151] - Training initial states collection size: 1
[34473:MainThread](2023-03-18 07:50:59,534) INFO - qlib.rl.utils.data_queue - [data_queue.py:70] - Automatically set data queue maxsize to 12 to avoid overwhelming.
[34473:MainThread](2023-03-18 07:51:00,543) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31


[34473:MainThread](2023-03-18 07:51:35,391) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange
[34473:MainThread](2023-03-18 07:51:47,669) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31




start_time: 2017-01-08, end_time: 2018-12-31


[34473:MainThread](2023-03-18 07:52:04,288) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 11] n/ep = 1
[34473:MainThread](2023-03-18 07:52:04,290) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 11] n/st = 483
[34473:MainThread](2023-03-18 07:52:04,291) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 11] rews = 3.176898321244443
[34473:MainThread](2023-03-18 07:52:04,293) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 11] lens = 483.0
[34473:MainThread](2023-03-18 07:52:04,294) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 11] idxs = 0.0
[34473:MainThread](2023-03-18 07:52:04,295) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 11] rew = 3.176898321244443
[34473:MainThread](2023-03-18 07:52:04,296) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 11] len = 483.0
[34473:MainThread](2023-03-18 07:52:04,297) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 11] rew_std = 0.0
[34473:MainThread](2023-03-18 07:52:04,299) INFO - qlib.r

start_time: 2019-01-01, end_time: 2023-03-15


[34473:MainThread](2023-03-18 07:52:51,343) INFO - qlib.rl.trainer.callbacks - [callbacks.py:161] - #10 current reward: 3.1769, best reward: 3.1769 in #10
[34473:MainThread](2023-03-18 07:52:51,379) INFO - qlib.rl.trainer.trainer - [trainer.py:211] - 
2023-03-18 07:52:51	Train iteration 12/1000
[34473:MainThread](2023-03-18 07:52:51,380) INFO - qlib.rl.trainer.vessel - [vessel.py:151] - Training initial states collection size: 1
[34473:MainThread](2023-03-18 07:52:51,382) INFO - qlib.rl.utils.data_queue - [data_queue.py:70] - Automatically set data queue maxsize to 12 to avoid overwhelming.
[34473:MainThread](2023-03-18 07:52:52,393) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31


[34473:MainThread](2023-03-18 07:53:23,196) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange
[34473:MainThread](2023-03-18 07:53:40,129) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31




start_time: 2017-01-08, end_time: 2018-12-31


[34473:MainThread](2023-03-18 07:53:58,397) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 12] n/ep = 1
[34473:MainThread](2023-03-18 07:53:58,399) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 12] n/st = 483
[34473:MainThread](2023-03-18 07:53:58,400) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 12] rews = 2.5971870014882232
[34473:MainThread](2023-03-18 07:53:58,401) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 12] lens = 483.0
[34473:MainThread](2023-03-18 07:53:58,403) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 12] idxs = 0.0
[34473:MainThread](2023-03-18 07:53:58,404) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 12] rew = 2.5971870014882232
[34473:MainThread](2023-03-18 07:53:58,406) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 12] len = 483.0
[34473:MainThread](2023-03-18 07:53:58,407) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 12] rew_std = 0.0
[34473:MainThread](2023-03-18 07:53:58,409) INFO - qlib

start_time: 2019-01-01, end_time: 2023-03-15


[34473:MainThread](2023-03-18 07:54:50,178) INFO - qlib.rl.trainer.callbacks - [callbacks.py:161] - #11 current reward: 2.5972, best reward: 3.1769 in #10
[34473:MainThread](2023-03-18 07:54:50,224) INFO - qlib.rl.trainer.trainer - [trainer.py:211] - 
2023-03-18 07:54:50	Train iteration 13/1000
[34473:MainThread](2023-03-18 07:54:50,228) INFO - qlib.rl.trainer.vessel - [vessel.py:151] - Training initial states collection size: 1
[34473:MainThread](2023-03-18 07:54:50,229) INFO - qlib.rl.utils.data_queue - [data_queue.py:70] - Automatically set data queue maxsize to 12 to avoid overwhelming.
[34473:MainThread](2023-03-18 07:54:51,241) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31


[34473:MainThread](2023-03-18 07:55:20,565) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange
[34473:MainThread](2023-03-18 07:55:32,079) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31




start_time: 2017-01-08, end_time: 2018-12-31


[34473:MainThread](2023-03-18 07:55:46,921) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 13] n/ep = 1
[34473:MainThread](2023-03-18 07:55:46,922) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 13] n/st = 483
[34473:MainThread](2023-03-18 07:55:46,923) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 13] rews = 3.2799291650855382
[34473:MainThread](2023-03-18 07:55:46,924) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 13] lens = 483.0
[34473:MainThread](2023-03-18 07:55:46,925) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 13] idxs = 0.0
[34473:MainThread](2023-03-18 07:55:46,926) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 13] rew = 3.2799291650855382
[34473:MainThread](2023-03-18 07:55:46,927) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 13] len = 483.0
[34473:MainThread](2023-03-18 07:55:46,928) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 13] rew_std = 0.0
[34473:MainThread](2023-03-18 07:55:46,929) INFO - qlib

start_time: 2019-01-01, end_time: 2023-03-15


[34473:MainThread](2023-03-18 07:56:30,750) INFO - qlib.rl.trainer.callbacks - [callbacks.py:161] - #12 current reward: 3.2799, best reward: 3.2799 in #12
[34473:MainThread](2023-03-18 07:56:30,787) INFO - qlib.rl.trainer.trainer - [trainer.py:211] - 
2023-03-18 07:56:30	Train iteration 14/1000
[34473:MainThread](2023-03-18 07:56:30,789) INFO - qlib.rl.trainer.vessel - [vessel.py:151] - Training initial states collection size: 1
[34473:MainThread](2023-03-18 07:56:30,790) INFO - qlib.rl.utils.data_queue - [data_queue.py:70] - Automatically set data queue maxsize to 12 to avoid overwhelming.
[34473:MainThread](2023-03-18 07:56:31,799) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31


[34473:MainThread](2023-03-18 07:56:58,487) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange
[34473:MainThread](2023-03-18 07:57:09,693) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31




start_time: 2017-01-08, end_time: 2018-12-31


[34473:MainThread](2023-03-18 07:57:24,440) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 14] n/ep = 1
[34473:MainThread](2023-03-18 07:57:24,441) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 14] n/st = 483
[34473:MainThread](2023-03-18 07:57:24,443) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 14] rews = 3.248982119857062
[34473:MainThread](2023-03-18 07:57:24,444) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 14] lens = 483.0
[34473:MainThread](2023-03-18 07:57:24,445) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 14] idxs = 0.0
[34473:MainThread](2023-03-18 07:57:24,447) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 14] rew = 3.248982119857062
[34473:MainThread](2023-03-18 07:57:24,448) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 14] len = 483.0
[34473:MainThread](2023-03-18 07:57:24,449) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 14] rew_std = 0.0
[34473:MainThread](2023-03-18 07:57:24,450) INFO - qlib.r

start_time: 2019-01-01, end_time: 2023-03-15


[34473:MainThread](2023-03-18 07:58:12,963) INFO - qlib.rl.trainer.callbacks - [callbacks.py:161] - #13 current reward: 3.2490, best reward: 3.2799 in #12
[34473:MainThread](2023-03-18 07:58:13,000) INFO - qlib.rl.trainer.trainer - [trainer.py:211] - 
2023-03-18 07:58:12	Train iteration 15/1000
[34473:MainThread](2023-03-18 07:58:13,001) INFO - qlib.rl.trainer.vessel - [vessel.py:151] - Training initial states collection size: 1
[34473:MainThread](2023-03-18 07:58:13,003) INFO - qlib.rl.utils.data_queue - [data_queue.py:70] - Automatically set data queue maxsize to 12 to avoid overwhelming.
[34473:MainThread](2023-03-18 07:58:14,015) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31


[34473:MainThread](2023-03-18 07:58:49,517) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange
[34473:MainThread](2023-03-18 07:59:04,250) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31




start_time: 2017-01-08, end_time: 2018-12-31


[34473:MainThread](2023-03-18 07:59:23,855) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 15] n/ep = 1
[34473:MainThread](2023-03-18 07:59:23,857) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 15] n/st = 483
[34473:MainThread](2023-03-18 07:59:23,858) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 15] rews = 2.228651933403118
[34473:MainThread](2023-03-18 07:59:23,860) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 15] lens = 483.0
[34473:MainThread](2023-03-18 07:59:23,861) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 15] idxs = 0.0
[34473:MainThread](2023-03-18 07:59:23,862) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 15] rew = 2.228651933403118
[34473:MainThread](2023-03-18 07:59:23,863) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 15] len = 483.0
[34473:MainThread](2023-03-18 07:59:23,865) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 15] rew_std = 0.0
[34473:MainThread](2023-03-18 07:59:23,866) INFO - qlib.r

start_time: 2019-01-01, end_time: 2023-03-15


[34473:MainThread](2023-03-18 08:00:13,653) INFO - qlib.rl.trainer.callbacks - [callbacks.py:161] - #14 current reward: 2.2287, best reward: 3.2799 in #12
[34473:MainThread](2023-03-18 08:00:13,691) INFO - qlib.rl.trainer.trainer - [trainer.py:211] - 
2023-03-18 08:00:13	Train iteration 16/1000
[34473:MainThread](2023-03-18 08:00:13,693) INFO - qlib.rl.trainer.vessel - [vessel.py:151] - Training initial states collection size: 1
[34473:MainThread](2023-03-18 08:00:13,694) INFO - qlib.rl.utils.data_queue - [data_queue.py:70] - Automatically set data queue maxsize to 12 to avoid overwhelming.
[34473:MainThread](2023-03-18 08:00:14,702) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31


[34473:MainThread](2023-03-18 08:00:43,604) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange
[34473:MainThread](2023-03-18 08:00:57,581) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31




start_time: 2017-01-08, end_time: 2018-12-31


[34473:MainThread](2023-03-18 08:01:14,592) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 16] n/ep = 1
[34473:MainThread](2023-03-18 08:01:14,594) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 16] n/st = 483
[34473:MainThread](2023-03-18 08:01:14,595) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 16] rews = 1.9551147616920768
[34473:MainThread](2023-03-18 08:01:14,597) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 16] lens = 483.0
[34473:MainThread](2023-03-18 08:01:14,598) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 16] idxs = 0.0
[34473:MainThread](2023-03-18 08:01:14,599) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 16] rew = 1.9551147616920768
[34473:MainThread](2023-03-18 08:01:14,601) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 16] len = 483.0
[34473:MainThread](2023-03-18 08:01:14,602) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 16] rew_std = 0.0
[34473:MainThread](2023-03-18 08:01:14,603) INFO - qlib

start_time: 2019-01-01, end_time: 2023-03-15


[34473:MainThread](2023-03-18 08:02:10,572) INFO - qlib.rl.trainer.callbacks - [callbacks.py:161] - #15 current reward: 1.9551, best reward: 3.2799 in #12
[34473:MainThread](2023-03-18 08:02:10,607) INFO - qlib.rl.trainer.trainer - [trainer.py:211] - 
2023-03-18 08:02:10	Train iteration 17/1000
[34473:MainThread](2023-03-18 08:02:10,608) INFO - qlib.rl.trainer.vessel - [vessel.py:151] - Training initial states collection size: 1
[34473:MainThread](2023-03-18 08:02:10,610) INFO - qlib.rl.utils.data_queue - [data_queue.py:70] - Automatically set data queue maxsize to 12 to avoid overwhelming.
[34473:MainThread](2023-03-18 08:02:11,624) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31


[34473:MainThread](2023-03-18 08:02:43,252) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange
[34473:MainThread](2023-03-18 08:02:56,046) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31




start_time: 2017-01-08, end_time: 2018-12-31


[34473:MainThread](2023-03-18 08:03:19,037) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 17] n/ep = 1
[34473:MainThread](2023-03-18 08:03:19,039) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 17] n/st = 483
[34473:MainThread](2023-03-18 08:03:19,040) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 17] rews = 2.1125519622310875
[34473:MainThread](2023-03-18 08:03:19,041) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 17] lens = 483.0
[34473:MainThread](2023-03-18 08:03:19,043) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 17] idxs = 0.0
[34473:MainThread](2023-03-18 08:03:19,044) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 17] rew = 2.1125519622310875
[34473:MainThread](2023-03-18 08:03:19,045) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 17] len = 483.0
[34473:MainThread](2023-03-18 08:03:19,046) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 17] rew_std = 0.0
[34473:MainThread](2023-03-18 08:03:19,047) INFO - qlib

start_time: 2019-01-01, end_time: 2023-03-15


[34473:MainThread](2023-03-18 08:04:08,900) INFO - qlib.rl.trainer.callbacks - [callbacks.py:161] - #16 current reward: 2.1126, best reward: 3.2799 in #12
[34473:MainThread](2023-03-18 08:04:08,939) INFO - qlib.rl.trainer.trainer - [trainer.py:211] - 
2023-03-18 08:04:08	Train iteration 18/1000
[34473:MainThread](2023-03-18 08:04:08,941) INFO - qlib.rl.trainer.vessel - [vessel.py:151] - Training initial states collection size: 1
[34473:MainThread](2023-03-18 08:04:08,942) INFO - qlib.rl.utils.data_queue - [data_queue.py:70] - Automatically set data queue maxsize to 12 to avoid overwhelming.
[34473:MainThread](2023-03-18 08:04:09,948) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31


[34473:MainThread](2023-03-18 08:04:44,974) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange
[34473:MainThread](2023-03-18 08:04:58,079) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31




start_time: 2017-01-08, end_time: 2018-12-31


[34473:MainThread](2023-03-18 08:05:14,635) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 18] n/ep = 1
[34473:MainThread](2023-03-18 08:05:14,636) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 18] n/st = 483
[34473:MainThread](2023-03-18 08:05:14,638) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 18] rews = 2.660951320545316
[34473:MainThread](2023-03-18 08:05:14,639) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 18] lens = 483.0
[34473:MainThread](2023-03-18 08:05:14,641) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 18] idxs = 0.0
[34473:MainThread](2023-03-18 08:05:14,643) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 18] rew = 2.660951320545316
[34473:MainThread](2023-03-18 08:05:14,644) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 18] len = 483.0
[34473:MainThread](2023-03-18 08:05:14,645) INFO - qlib.rl.trainer.vessel - [vessel.py:84] - [Iter 18] rew_std = 0.0
[34473:MainThread](2023-03-18 08:05:14,647) INFO - qlib.r

start_time: 2019-01-01, end_time: 2023-03-15


[34473:MainThread](2023-03-18 08:06:03,057) INFO - qlib.rl.trainer.callbacks - [callbacks.py:161] - #17 current reward: 2.6610, best reward: 3.2799 in #12
[34473:MainThread](2023-03-18 08:06:03,094) INFO - qlib.rl.trainer.trainer - [trainer.py:211] - 
2023-03-18 08:06:03	Train iteration 19/1000
[34473:MainThread](2023-03-18 08:06:03,095) INFO - qlib.rl.trainer.vessel - [vessel.py:151] - Training initial states collection size: 1
[34473:MainThread](2023-03-18 08:06:03,097) INFO - qlib.rl.utils.data_queue - [data_queue.py:70] - Automatically set data queue maxsize to 12 to avoid overwhelming.
[34473:MainThread](2023-03-18 08:06:04,109) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31


[34473:MainThread](2023-03-18 08:06:41,026) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange
[34473:MainThread](2023-03-18 08:06:55,482) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


start_time: 2017-01-08, end_time: 2018-12-31


In [None]:
# train_trade_strategy(trade_strategy=topk_strategy, max_iters=1, concurrency=1)

In [None]:
# train_trade_strategy(trade_strategy=weight_strategy, max_iters=1, concurrency=1)

## Backtest Policy

In [None]:
def get_port_analysis_config(config, strategy_config):
    port_analysis_config = {
        "executor": {
            "class": "SimulatorExecutor",
            "module_path": "qlib.backtest.executor",
            "kwargs": {
                "time_per_step": "day",
                "generate_portfolio_metrics": True,
            },
        },
        "strategy": strategy_config,
        "backtest": {
            "start_time": config["date"]["backtest"]["start"],
            "end_time": config["date"]["backtest"]["end"],
            "account": account,
            "benchmark": config["backtest"]["benchmark"],
            "exchange_kwargs": config["backtest"]["exchange_kwargs"]
        },
    }
    return port_analysis_config

In [None]:
def get_topk_dropout_port_analysis_config(config):
    strategy_config = {
        "class": "TopkDropoutStrategy",
        "module_path": "qlib.contrib.strategy.signal_strategy",
        "kwargs": {
            "signal": signal,
            "topk": topk,
            "n_drop": n_drop,
            "risk_degree": 0.95,
            "only_tradable": True,
            "hold_thresh": hold_thresh,
        }
    }
    return get_port_analysis_config(config, strategy_config)

In [None]:
def get_latest_checkpoint_path(strategy):
    output_dir = sorted(glob.glob(f"../records/EpisodeInformationRatioReward/{strategy}/{strategy.policy}/*"))[-1]
    checkpoint_path = f"./{output_dir}/checkpoints/latest.pth"
    print(checkpoint_path)
    return checkpoint_path


def get_best_checkpoint_path(strategy):
    output_dir = sorted(glob.glob(f"../records/EpisodeInformationRatioReward/{strategy}/{strategy.policy}/*"))[-1]
    df_valid = pd.read_csv(f"{output_dir}/metrics/validation_result.csv", index_col=0)
    best = df_valid.sort_values("val/reward", ascending=True).iloc[-1]
    epoch = int(best.name) + 1
    checkpoint_path = f"./{output_dir}/checkpoints/{epoch:03d}.pth"
    print(checkpoint_path)
    print(best["val/reward"])
    return checkpoint_path

In [None]:
def get_topk_dropout_strategy_port_analysis_config(config, checkpoint_path=None):
    if checkpoint_path is None:
        checkpoint_path = get_best_checkpoint_path(topk_dropout_strategy)
    strategy_config = {
        "class": "TopkDropoutStrategy",
        "module_path": "longcapital.rl.order_execution.strategy",
        "kwargs": {
            "signal": signal,
            "dim": dim,
            "stock_num": stock_num,
            "topk": topk,
            "risk_degree": 0.95,
            "only_tradable": True,
            "hold_thresh": hold_thresh,
            "signal_key": "signal",
            "policy_cls": topk_dropout_strategy.policy_cls,
            "checkpoint_path": checkpoint_path
        }
    }
    return get_port_analysis_config(config, strategy_config)

In [None]:
def get_topk_dropout_signal_strategy_port_analysis_config(config, checkpoint_path=None):
    if checkpoint_path is None:
        checkpoint_path = get_best_checkpoint_path(topk_dropout_signal_strategy)
    strategy_config = {
        "class": "TopkDropoutSignalStrategy",
        "module_path": "longcapital.rl.order_execution.strategy",
        "kwargs": {
            "signal": signal,
            "dim": dim,
            "stock_num": stock_num,
            "topk": topk,
            "n_drop": n_drop,
            "risk_degree": 0.95,
            "only_tradable": True,
            "hold_thresh": hold_thresh,
            "signal_key": "signal",
            "policy_cls": topk_dropout_signal_strategy.policy_cls,
            "checkpoint_path": checkpoint_path
        }
    }
    return get_port_analysis_config(config, strategy_config)

In [None]:
def get_topk_strategy_port_analysis_config(config, checkpoint_path=None):
    if checkpoint_path is None:
        checkpoint_path = get_best_checkpoint_path(topk_strategy)
    strategy_config = {
        "class": "TopkStrategy",
        "module_path": "longcapital.rl.order_execution.strategy",
        "kwargs": {
            "signal": signal,
            "dim": dim,
            "stock_num": stock_num,
            "policy_cls": topk_strategy.policy_cls,
            "checkpoint_path": checkpoint_path
        }
    }
    return get_port_analysis_config(config, strategy_config)

In [None]:
def get_weight_strategy_port_analysis_config(config, checkpoint_path=None):
    if checkpoint_path is None:
        checkpoint_path = get_best_checkpoint_path(weight_strategy)
    strategy_config = {
        "class": "WeightStrategy",
        "module_path": "longcapital.rl.order_execution.strategy",
        "kwargs": {
            "signal": signal,
            "dim": dim,
            "stock_num": stock_num,
            "topk": topk,
            "signal_key": "signal",
            "policy_cls": weight_strategy.policy_cls,
            "checkpoint_path": checkpoint_path
        }
    }
    return get_port_analysis_config(config, strategy_config)

In [None]:
def run_trade_strategy_backtest(rid, port_analysis_config, start_time=None, end_time=None):
    mlflow.end_run()
    with R.start(experiment_name=EXP_NAME, recorder_id=rid, resume=True):
        if start_time:
            port_analysis_config["backtest"]["start_time"] = start_time
        if end_time:
            port_analysis_config["backtest"]["end_time"] = end_time
        
        rec = R.get_recorder()
        par = PortAnaRecord(rec, port_analysis_config, skip_existing=False)
        par.generate()
        
        analysis_df = rec.load_object("portfolio_analysis/port_analysis_1day.pkl")
        report_normal_df = rec.load_object("portfolio_analysis/report_normal_1day.pkl")
        print(analysis_df)
        analysis_position.report_graph(report_normal_df)

In [None]:
run_trade_strategy_backtest(
    rid, 
    port_analysis_config=get_topk_dropout_port_analysis_config(
        config=config
    )
)

In [None]:
# run_trade_strategy_backtest(
#     rid, 
#     port_analysis_config=get_topk_dropout_strategy_port_analysis_config(
#         config,
#     )
# )

In [None]:
run_trade_strategy_backtest(
    rid, 
    port_analysis_config=get_topk_dropout_signal_strategy_port_analysis_config(
        config, 
    )
)

In [None]:
# run_trade_strategy_backtest(
#     rid, 
#     port_analysis_config=get_topk_strategy_port_analysis_config(
#         config, 
#     )
# )

In [None]:
# run_trade_strategy_backtest(
#     rid, 
#     port_analysis_config=get_weight_strategy_port_analysis_config(
#         config, 
#     )
# )

# Trade

In [None]:
initial_states_test = [
    TradeStrategyInitiateState(
        start_time=config["date"]["test"]["end"],
        end_time=config["date"]["test"]["end"],
        sample_date=False
    )
]

In [None]:
account = {
    "cash": 17392,
    "SH601985": {"amount": 4000, "price": 6.49, "count_day": 1},
    "SH601225": {"amount": 1200, "price": 19.34, "count_day": 6},
    "SH603833": {"amount": 200, "price": 129.69, "count_day": 4},
    "SH600188": {"amount": 700, "price": 32.46, "count_day": 6},
    "SZ002032": {"amount": 500, "price": 56.09, "count_day": 6},
    "SH603986": {"amount": 200, "price": 101.03, "count_day": 2},
}

In [None]:
topk_dropout_signal_strategy = TopkDropoutSignalStrategy(
    signal=signal,
    dim=dim,
    stock_num=stock_num,
    topk=10,
    n_drop=2,
    only_tradable=False, # we don't know it before trading
    hold_thresh=hold_thresh,
    signal_key="signal",
    policy_cls=continuous.MetaPPO,
    feature_buffer_size=feature_buffer_size,
    checkpoint_path=get_best_checkpoint_path(topk_dropout_signal_strategy)
)

In [None]:
topk_dropout_signal_simulator = TradeStrategySimulator(
    trade_strategy=topk_dropout_signal_strategy, 
    initial_state=initial_states_test[0], 
    account=account,
    benchmark=None,
    exchange_kwargs=config["backtest"]["exchange_kwargs"]
)

In [None]:
decision = topk_dropout_signal_strategy.trade()

In [None]:
decision