In [1]:
import sys
sys.path.insert(0, "../..")

import qlib
import pandas as pd
from qlib.config import REG_CN
from qlib.utils import init_instance_by_config
from qlib.workflow import R
from qlib.workflow.record_temp import SignalRecord, PortAnaRecord
from qlib.utils import flatten_dict


In [2]:
# use default data
provider_uri = "../../data/china_stock_qlib_adj"  # target_dir
qlib.init(provider_uri=provider_uri, region=REG_CN)

[24019:MainThread](2021-11-29 21:50:53,685) INFO - qlib.Initialization - [config.py:393] - default_conf: client.
[24019:MainThread](2021-11-29 21:50:53,691) INFO - qlib.Initialization - [__init__.py:57] - qlib successfully initialized based on client settings.
[24019:MainThread](2021-11-29 21:50:53,693) INFO - qlib.Initialization - [__init__.py:59] - data_path={'__DEFAULT_FREQ': PosixPath('/data3/xujianjin/qlib/data/china_stock_qlib_adj')}


In [3]:
hidden_sizes = [256, 256, 256, 256]

# train model

In [4]:
market = "csi300"
benchmark = "SH000300"

win_size = 10

###################################
# train model
###################################
data_handler_config = {
    "start_time": "2008-01-01",
    "end_time": "2020-08-01",
    "fit_start_time": "2008-01-01",
    "fit_end_time": "2014-12-31",
    "instruments": market,
    "infer_processors": [
        {"class" : "DropnaProcessor", "kwargs": {"fields_group": "feature"}},
        {"class" : "DropnaProcessor", "kwargs": {"fields_group": "label"}},
    ],
    "learn_processors": [
        {"class" : "DropnaProcessor", "kwargs": {"fields_group": "feature"}},
        {"class" : "DropnaProcessor", "kwargs": {"fields_group": "label"}},
    ],
    "label": ["Ref($close, -2) / Ref($close, -1) - 1"],
    "window" : win_size,
    "process_type" : "independent"
}

task = {
    "model": {
        "class": "DNNModelPytorch",
        "module_path": "qlib.contrib.model.pytorch_nn",
        "kwargs": {
            "input_dim" : 4 * win_size,
            "output_dim" : 1,
            "layers" : hidden_sizes,
            "lr" : 0.001,
            "max_steps" : 300,
            "batch_size" : 1024,
            "early_stop_rounds" : 50,
            "eval_steps" : 20,
            "lr_decay" : 0.96,
            "lr_decay_steps" : 100,
            "optimizer" : "adam",
            "loss" : "mse",
            "GPU" : 9,
            "seed" : None,
            "weight_decay" : 1e-4
        },
    },
    "dataset": {
        "class": "DatasetH",
        "module_path": "qlib.data.dataset",
        "kwargs": {
            "handler": {
                "class": "CustomAlpha",
                "module_path": "qlib.contrib.data.handler",
                "kwargs": data_handler_config,
            },
            "segments": {
                "train": ("2008-01-01", "2014-12-31"),
                "valid": ("2015-01-01", "2016-12-31"),
                "test": ("2017-01-01", "2020-08-01"),
            },
        },
    },
}

# model initiaiton
model = init_instance_by_config(task["model"])
dataset = init_instance_by_config(task["dataset"])

# start exp to train model
with R.start(experiment_name="train_model"):
    R.log_params(**flatten_dict(task))
    model.fit(dataset)
    R.save_objects(trained_model=model)
    rid = R.get_recorder().id


Please install necessary libs for CatBoostModel.


[24019:MainThread](2021-11-29 21:50:54,729) INFO - qlib.DNNModelPytorch - [pytorch_nn.py:71] - DNN pytorch version...
[24019:MainThread](2021-11-29 21:50:54,996) INFO - qlib.DNNModelPytorch - [pytorch_nn.py:88] - DNN parameters setting:
layers : [256, 256, 256, 256]
lr : 0.001
max_steps : 300
batch_size : 1024
early_stop_rounds : 50
eval_steps : 20
lr_decay : 0.96
lr_decay_steps : 100
optimizer : adam
loss_type : mse
eval_steps : 20
seed : None
device : cuda:9
use_GPU : True
weight_decay : 0.0001
[24019:MainThread](2021-11-29 21:50:55,006) INFO - qlib.DNNModelPytorch - [pytorch_nn.py:132] - model:
Net(
  (dnn_layers): ModuleList(
    (0): Dropout(p=0.05, inplace=False)
    (1): Sequential(
      (0): Linear(in_features=40, out_features=256, bias=True)
      (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.1)
    )
    (2): Sequential(
      (0): Linear(in_features=256, out_features=256, bias=True)
      (1): Bat

Epoch    11: reducing learning rate of group 0 to 5.0000e-04.


[24019:MainThread](2021-11-29 21:51:18,983) INFO - qlib.DNNModelPytorch - [pytorch_nn.py:240] - [Epoch 220]: train_loss nan, valid_loss nan
[24019:MainThread](2021-11-29 21:51:19,247) INFO - qlib.DNNModelPytorch - [pytorch_nn.py:240] - [Epoch 240]: train_loss nan, valid_loss nan
[24019:MainThread](2021-11-29 21:51:19,510) INFO - qlib.DNNModelPytorch - [pytorch_nn.py:240] - [Epoch 260]: train_loss nan, valid_loss nan
[24019:MainThread](2021-11-29 21:51:19,767) INFO - qlib.DNNModelPytorch - [pytorch_nn.py:240] - [Epoch 280]: train_loss nan, valid_loss nan
[24019:MainThread](2021-11-29 21:51:20,018) INFO - qlib.DNNModelPytorch - [pytorch_nn.py:240] - [Epoch 299]: train_loss nan, valid_loss nan
[24019:MainThread](2021-11-29 21:51:20,025) INFO - qlib.timer - [log.py:113] - Time cost: 0.000s | waiting `async_log` Done


EOFError: Ran out of input

In [None]:
train_df, val_df = dataset.prepare(["train", "valid"])
print(train_df.shape, val_df.shape)

In [None]:
train_df.isna().values.shape

# prediction, backtest & analysis

In [None]:
###################################
# prediction, backtest & analysis
###################################
port_analysis_config = {
    "executor": {
        "class": "SimulatorExecutor",
        "module_path": "qlib.backtest.executor",
        "kwargs": {
            "time_per_step": "day",
            "generate_portfolio_metrics": True,
        },
    },
    "strategy": {
        "class": "TopkDropoutStrategy",
        "module_path": "qlib.contrib.strategy.signal_strategy",
        "kwargs": {
            "model": model,
            "dataset": dataset,
            "topk": 50,
            "n_drop": 5,
        },
    },
    "backtest": {
        "start_time": "2017-01-01",
        "end_time": "2020-08-01",
        "account": 100000000,
        "benchmark": benchmark,
        "exchange_kwargs": {
            "freq": "day",
            "limit_threshold": 0.095,
            "deal_price": "close",
            "open_cost": 0.0005,
            "close_cost": 0.0015,
            "min_cost": 5,
        },
    },
}

# backtest and analysis
with R.start(experiment_name="backtest_analysis"):
    recorder = R.get_recorder(recorder_id=rid, experiment_name="train_model")
    model = recorder.load_object("trained_model")

    # prediction
    recorder = R.get_recorder()
    ba_rid = recorder.id
    sr = SignalRecord(model, dataset, recorder)
    sr.generate()

    # backtest & analysis
    par = PortAnaRecord(recorder, port_analysis_config, "day")
    par.generate()


In [None]:
par.portfolio_metric_dict

# analyze graphs

In [None]:
from qlib.contrib.report import analysis_model, analysis_position
from qlib.data import D
recorder = R.get_recorder(recorder_id=ba_rid, experiment_name="backtest_analysis")
print(recorder)
pred_df = recorder.load_object("pred.pkl")
pred_df_dates = pred_df.index.get_level_values(level='datetime')
report_normal_df = recorder.load_object("portfolio_analysis/report_normal_1day.pkl")
positions = recorder.load_object("portfolio_analysis/positions_normal_1day.pkl")
analysis_df = recorder.load_object("portfolio_analysis/port_analysis_1day.pkl")

## analysis position

In [None]:
pred_df.isna().sum()

### report

In [None]:
analysis_position.report_graph(report_normal_df)

### risk analysis

In [None]:
analysis_position.risk_analysis_graph(analysis_df, report_normal_df)

## analysis model

In [None]:
label_df = dataset.prepare("test", col_set="label")
label_df.columns = ['label']

### score IC

In [None]:
pred_label = pd.concat([label_df, pred_df], axis=1, sort=True).reindex(label_df.index)
analysis_position.score_ic_graph(pred_label)

### model performance

In [5]:
analysis_model.model_performance_graph(pred_label)

NameError: name 'analysis_model' is not defined