<a href="https://colab.research.google.com/github/microsoft/qlib/blob/main/examples/workflow_by_code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#  Copyright (c) Microsoft Corporation.
#  Licensed under the MIT License.

In [None]:
from copy import deepcopy
import qlib
import pandas as pd
from qlib.constant import REG_US
from qlib.data import D
from qlib.data.dataset.handler import DataHandlerLP
from qlib.contrib.data.handler import Alpha158
from qlib.utils import exists_qlib_data, init_instance_by_config, flatten_dict
from qlib.workflow import R
from qlib.workflow.record_temp import SignalRecord, PortAnaRecord
from qlib.contrib.report import analysis_model, analysis_position

In [None]:
MARKET = "nasdaq100"
BENCHMARK = "QQQ"

EXP_NAME = "nested"

## initialize qlib

In [None]:
# use custom data
# NOTE: need to download data from remote: python scripts/get_data.py qlib_data_us --target_dir /root/onethingai-tmp/.qlib/qlib_data/us_data
provider_uri = "/root/onethingai-tmp/.qlib/qlib_data/us_data"  # target_dir
if not exists_qlib_data(provider_uri):
    print(f"Qlib data is not found in {provider_uri}")
qlib.init(provider_uri=provider_uri, region=REG_US)

In [None]:
###################################
# initialize qlib
###################################
provider_uri_day = "/root/onethingai-tmp/.qlib/qlib_data/us_data" # target_dir
provider_uri_1min = "/root/onethingai-tmp/.qlib/qlib_data/us_data_1min"

if not exists_qlib_data(provider_uri_day) or not exists_qlib_data(provider_uri_1min):
    print(f"Qlib data is not found in {provider_uri_day} or {provider_uri_1min}")

provider_uri_map = {"1min": provider_uri_1min, "day": provider_uri_day}

qlib.init(provider_uri=provider_uri_map, dataset_cache=None, expression_cache=None)

## data handler

In [None]:
_DEFAULT_LEARN_PROCESSORS = [
    {"class": "DropnaLabel"},
    {"class": "CSZScoreNorm", "kwargs": {"fields_group": "label"}},
]

class Alpha159(Alpha158):
    def __init__(
        self,
        instruments="csi500",
        start_time=None,
        end_time=None,
        freq="day",
        infer_processors=[],
        learn_processors=_DEFAULT_LEARN_PROCESSORS,
        fit_start_time=None,
        fit_end_time=None,
        process_type=DataHandlerLP.PTYPE_A,
        filter_pipe=None,
        inst_processors=None,
        **kwargs
    ):
        # Initialize the handler
        super().__init__(
            instruments=instruments,
            start_time=start_time,
            end_time=end_time,
            freq=freq,
            infer_processors=infer_processors,
            learn_processors=learn_processors,
            fit_start_time=fit_start_time,
            fit_end_time=fit_end_time,
            process_type=process_type,
            filter_pipe=filter_pipe,
            inst_processors=inst_processors,
            **kwargs
        )
        
    def get_feature_config(self):
        # Get the feature configuration
        fields, names = super().get_feature_config()
        fields.append('$sentiment')
        names.append('SENTIMENT_SCORE')
        return fields, names
    
handler_kwargs = {
    "start_time": "2008-01-01",
    "end_time": "2024-06-13",
    "fit_start_time": "2008-01-01",
    "fit_end_time": "2014-12-31",
    "instruments": MARKET,
}

handler = Alpha159(**handler_kwargs)

## train model

In [None]:
###################################
# train model
###################################
task = {
    "model": {
        "class": "LGBModel",
        "module_path": "qlib.contrib.model.gbdt",
        "kwargs": {
            "loss": "mse",
            "colsample_bytree": 0.8879,
            "learning_rate": 0.0421,
            "subsample": 0.8789,
            "lambda_l1": 205.6999,
            "lambda_l2": 580.9768,
            "max_depth": 8,
            "num_leaves": 210,
            "num_threads": 20,
        },
    },
    "dataset": {
        "class": "DatasetH",
        "module_path": "qlib.data.dataset",
        "kwargs": {
            "handler": handler,
            "segments": {
                "train": ("2008-01-01", "2014-12-31"),
                "valid": ("2015-01-01", "2016-12-31"),
                "test": ("2020-01-01", "2020-08-01"),
            },
        },
    },
}

# model initialization
model = init_instance_by_config(task["model"])
dataset = init_instance_by_config(task["dataset"])

with R.start(experiment_name="train_model"):
    R.log_params(**flatten_dict(task))
    model.fit(dataset)
    R.save_objects(trained_model=model)
    rid = R.get_recorder().id

# prediction, backtest & analysis

In [None]:
###################################
# prediction, backtest & analysis
###################################
port_analysis_config = {
    "executor": {
        "class": "NestedExecutor",
        "module_path": "qlib.backtest.executor",
        "kwargs": {
            "time_per_step": "day",
            "inner_executor": {
                "class": "NestedExecutor",
                "module_path": "qlib.backtest.executor",
                "kwargs": {
                    "time_per_step": "30min",
                    "inner_executor": {
                        "class": "SimulatorExecutor",
                        "module_path": "qlib.backtest.executor",
                        "kwargs": {
                            "time_per_step": "5min",
                            "generate_portfolio_metrics": True,
                            "verbose": True,
                            "indicator_config": {
                                "show_indicator": True,
                            },
                        },
                    },
                    "inner_strategy": {
                        "class": "TWAPStrategy",
                        "module_path": "qlib.contrib.strategy.rule_strategy",
                    },
                    "generate_portfolio_metrics": True,
                    "indicator_config": {
                        "show_indicator": True,
                    },
                },
            },
            "inner_strategy": {
                "class": "SBBStrategyEMA",
                "module_path": "qlib.contrib.strategy.rule_strategy",
                "kwargs": {
                    "instruments": MARKET,
                    "freq": "1min",
                },
            },
            "track_data": True,
            "generate_portfolio_metrics": True,
            "indicator_config": {
                "show_indicator": True,
            },
        },
    },
    "backtest": {
        "start_time": "2020-01-01",
        "end_time": "2020-08-01",
        "account": 100000000,
        "BENCHMARK": BENCHMARK,
        "exchange_kwargs": {
            "freq": "1min",
            "limit_threshold": 0.095,
            "deal_price": "close",
            "open_cost": 0.0005,
            "close_cost": 0.0015,
            "min_cost": 5,
        },
    },
}

strategy_config = {
    "class": "TopkDropoutStrategy",
    "module_path": "qlib.contrib.strategy.signal_strategy",
    "kwargs": {
        "signal": (model, dataset),
        "topk": 50,
        "n_drop": 5,
    },
}
port_analysis_config["strategy"] = strategy_config

# backtest and analysis
with R.start(experiment_name=EXP_NAME, resume=True):
    recorder = R.get_recorder(recorder_id=rid, experiment_name="train_model")
    model = recorder.load_object("trained_model")

    # prediction
    recorder = R.get_recorder()
    ba_rid = recorder.id
    sr = SignalRecord(model, dataset, recorder)
    sr.generate()

    # backtest & analysis
    par = PortAnaRecord(
        recorder,
        port_analysis_config,
        indicator_analysis_method="value_weighted",
    )
    par.generate()

# analyze graphs

In [None]:
# load recorder
recorder = R.get_recorder(recorder_id=ba_rid, experiment_name="backtest_analysis")

# load previous results
pred_df = recorder.load_object("pred.pkl")
report_normal_df = recorder.load_object("portfolio_analysis/report_normal_1day.pkl")
positions = recorder.load_object("portfolio_analysis/positions_normal_1day.pkl")
analysis_df = recorder.load_object("portfolio_analysis/port_analysis_1day.pkl")

## analysis position

### report

In [None]:
analysis_position.report_graph(report_normal_df)

### risk analysis

In [None]:
analysis_position.risk_analysis_graph(analysis_df, report_normal_df)

## analysis model

In [None]:
label_df = dataset.prepare("test", col_set="label")
label_df.columns = ["label"]

### score IC

In [None]:
pred_label = pd.concat([label_df, pred_df], axis=1, sort=True).reindex(label_df.index)
analysis_position.score_ic_graph(pred_label)

### model performance

In [None]:
analysis_model.model_performance_graph(pred_label)