In [1]:
import pandas as pd
import numpy as np

In [3]:
# Show Evaluation Results
def metric_fn(preds, score='score'):
    preds = preds[~np.isnan(preds['label'])]
    precision = {}
    recall = {}
    temp = preds.groupby(level='datetime').apply(lambda x: x.sort_values(by=score, ascending=False))
    if len(temp.index[0]) > 2:
        temp = temp.reset_index(level=0).drop('datetime', axis=1)

    for k in [1, 3, 5, 10, 20, 30, 50, 100]:
        precision[k] = temp.groupby(level='datetime').apply(lambda x: (x.label[:k] > 0).sum() / k).mean()
        recall[k] = temp.groupby(level='datetime').apply(lambda x: (x.label[:k] > 0).sum() / (x.label > 0).sum()).mean()

    ic = preds.groupby(level='datetime').apply(lambda x: x.label.corr(x[score])).mean()
    rank_ic = preds.groupby(level='datetime').apply(lambda x: x.label.corr(x[score], method='spearman')).mean()
    icir = ic/preds.groupby(level='datetime').apply(lambda x: x.label.corr(x[score])).std()
    rank_icir = rank_ic/preds.groupby(level='datetime').apply(lambda x: x.label.corr(x[score], method='spearman')).std()

    return precision, recall, ic, rank_ic, icir, rank_icir

report = pd.DataFrame()
for name in ['doc2edga', 'dueefin', 'fr2kg', 'hidy', 'is', 'sht']:
    data = pd.read_pickle('output/'+name+'.pkl')
    temp = dict()
    temp['model'] = name
    precision, recall, ic, rank_ic, icir, rank_icir = metric_fn(data, score='pred_score')
    temp['P@5'] = precision[5]
    temp['P@10'] = precision[10]
    temp['P@20'] = precision[20]
    temp['IC'] = ic
    temp['ICIR'] = icir
    temp['RankIC'] = rank_ic
    temp['RankICIR'] = rank_icir
    report = report.append(temp, ignore_index=True)
report

  report = report.append(temp, ignore_index=True)
  report = report.append(temp, ignore_index=True)
  report = report.append(temp, ignore_index=True)
  report = report.append(temp, ignore_index=True)
  report = report.append(temp, ignore_index=True)
  report = report.append(temp, ignore_index=True)


Unnamed: 0,model,P@5,P@10,P@20,IC,ICIR,RankIC,RankICIR
0,doc2edga,0.546708,0.552263,0.55036,0.086019,0.72568,0.083341,0.706944
1,dueefin,0.553086,0.545988,0.54784,0.086672,0.739325,0.083954,0.718476
2,fr2kg,0.550823,0.554733,0.553601,0.088102,0.755876,0.085322,0.737719
3,hidy,0.560288,0.555144,0.556327,0.091892,0.797244,0.088764,0.77476
4,is,0.555144,0.549897,0.551235,0.086302,0.721037,0.083409,0.699932
5,sht,0.553909,0.545267,0.547994,0.087632,0.747951,0.08394,0.722328


## backtest result
## strategy: top 10, drop 5

In [29]:
from pprint import pprint
import qlib
import pandas as pd
from qlib.utils.time import Freq
from qlib.utils import flatten_dict
from qlib.backtest import backtest, executor
from qlib.contrib.evaluate import risk_analysis
from qlib.contrib.strategy import TopkDropoutStrategy
import qlib.contrib.report as qcr

# change diffrent pkl file here to see the backtest result
data = pd.read_pickle('output/hidy.pkl')
data = data[['pred_score']]
data.columns=[['score']]
# init qlib
qlib.init(provider_uri="../qlib_data/cn_data")
# qlib.init(provider_uri="~/.qlib/qlib_data/us_data")
CSI300_BENCH = "SH000300"
# Benchmark is for calculating the excess return of your strategy.
# Its data format will be like **ONE normal instrument**.
# For example, you can query its data with the code below
# `D.features(["SH000300"], ["$close"], start_time='2010-01-01', end_time='2017-12-31', freq='day')`
# It is different from the argument `market`, which indicates a universe of stocks (e.g. **A SET** of stocks like csi300)
# For example, you can query all data from a stock market with the code below.
# ` D.features(D.instruments(market='csi300'), ["$close"], start_time='2010-01-01', end_time='2017-12-31', freq='day')`

FREQ = "day"
STRATEGY_CONFIG = {
    "topk": 10,
    "n_drop": 5,
    # pred_score, pd.Series
    "signal": data,
}

EXECUTOR_CONFIG = {
    "time_per_step": "day",
    "generate_portfolio_metrics": True,
}

backtest_config = {
    "start_time": "2019-01-01",
    "end_time": "2022-12-30",
    "account": 100000000,
    "benchmark": CSI300_BENCH,
    "exchange_kwargs": {
        "freq": FREQ,
        "limit_threshold": 0.095,
        "deal_price": "close",
        "open_cost": 0.00005,
        "close_cost": 0.0003,
        "min_cost": 5,
    },
}

# strategy object
strategy_obj = TopkDropoutStrategy(**STRATEGY_CONFIG)
# executor object
executor_obj = executor.SimulatorExecutor(**EXECUTOR_CONFIG)
# backtest
portfolio_metric_dict, indicator_dict = backtest(executor=executor_obj, strategy=strategy_obj, **backtest_config)
analysis_freq = "{0}{1}".format(*Freq.parse(FREQ))
# backtest info
report_normal, positions_normal = portfolio_metric_dict.get(analysis_freq)

# analysis
analysis = dict()
analysis["excess_return_without_cost"] = risk_analysis(
    report_normal["return"] - report_normal["bench"], freq=analysis_freq
)
analysis["excess_return_with_cost"] = risk_analysis(
    report_normal["return"] - report_normal["bench"] - report_normal["cost"], freq=analysis_freq
)

analysis_df = pd.concat(analysis)  # type: pd.DataFrame
# log metrics
analysis_dict = flatten_dict(analysis_df["risk"].unstack().T.to_dict())
# print out results
# pprint(f"The following are analysis results of benchmark return({analysis_freq}).")
# pprint(risk_analysis(report_normal["bench"], freq=analysis_freq))
# pprint(f"The following are analysis results of the excess return without cost({analysis_freq}).")
# pprint(analysis["excess_return_without_cost"])
pprint(f"The following are analysis results of the excess return with cost({analysis_freq}).")
pprint(analysis["excess_return_with_cost"])


[26712:MainThread](2023-08-20 12:04:20,430) INFO - qlib.Initialization - [config.py:415] - default_conf: client.
[26712:MainThread](2023-08-20 12:04:20,431) INFO - qlib.Initialization - [__init__.py:74] - qlib successfully initialized based on client settings.
[26712:MainThread](2023-08-20 12:04:20,432) INFO - qlib.Initialization - [__init__.py:76] - data_path={'__DEFAULT_FREQ': PosixPath('/Users/haowang/Desktop/project/stock_model/qlib_data/cn_data')}
[26712:MainThread](2023-08-20 12:04:20,461) INFO - qlib.backtest caller - [__init__.py:93] - Create new exchange


backtest loop:   0%|          | 0/972 [00:00<?, ?it/s]

  return np.nanmean(self.data)


'The following are analysis results of the excess return with cost(1day).'
                       risk
mean               0.000614
std                0.009577
annualized_return  0.146199
information_ratio  0.989568
max_drawdown      -0.174865
