## 0.导入包

In [None]:
from pprint import pprint
import qlib
import pandas as pd
from qlib.utils.time import Freq
from qlib.utils import flatten_dict
from qlib.contrib.evaluate import backtest_daily
from qlib.contrib.evaluate import risk_analysis
from qlib.contrib.strategy import TopkDropoutStrategy
from qlib.utils import init_instance_by_config
from qlib.backtest.high_performance_ds import  NumpyQuote,BaseQuote
from qlib.backtest.exchange import  Exchange

from qlib.workflow import R
from qlib.workflow.record_temp import SignalRecord, PortAnaRecord, SigAnaRecord

from qlib.contrib.report import analysis_model, analysis_position

from qlib.backtest import backtest, executor,exchange

from qlib.backtest import high_performance_ds

## 1.初始化

In [None]:
qlib.init(provider_uri='/root/autodl-tmp/Stockformer/code_and_data/us_data_21-23')
MARKET = "all"
BENCHMARK = "spx.gi"
EXP_NAME = "tutorial_exp"
# CSI300_BENCH = "000300"

## 2.构造因子

In [None]:
# 数据参数
handler_kwargs = {
        "start_time": "2021-01-04",
        "end_time": "2023-06-30",
        "fit_start_time": "2021-01-04",
        "fit_end_time": "2023-01-31",
        "instruments": 'all',
}

# 因子生成参数
handler_conf = {
    "class": "Alpha158",
    "module_path": "qlib.contrib.data.handler",
    "kwargs": handler_kwargs,
}

pprint(handler_conf)

In [None]:
hd = init_instance_by_config(handler_conf)

In [None]:
df = hd.fetch()
df.tail()

## 3.回测

In [None]:
# 数据集参数
dataset_conf = {
        "class": "DatasetH",
        # "class": "TSDatasetH",
        "module_path": "qlib.data.dataset",
        "kwargs": {
            "handler": hd,
            "segments": {
                "train": ("2021-01-04", "2022-08-31"),
                "valid": ("2022-09-01", "2023-01-31"),
                "test": ("2023-02-01", "2023-06-30"),
            },
        },
}

In [None]:
dataset = init_instance_by_config(dataset_conf)

### 第一个模型开始

In [None]:
#### 模型训练

model = init_instance_by_config({
        "class": "CatBoostModel",
        "module_path": "qlib.contrib.model.catboost_model",
})

In [None]:
#### 模型训练

model = init_instance_by_config({
        "class": "LGBModel",
        "module_path": "qlib.contrib.model.gbdt",
        "kwargs": {
            "loss": "mse",
            "colsample_bytree": 0.8879,
            "learning_rate": 0.0421,
            "subsample": 0.8789,
            "lambda_l1": 205.6999,
            "lambda_l2": 580.9768,
            "max_depth": 8,
            "num_leaves": 210,
            "num_threads": 20,
        },
})

In [None]:
# start exp to train model
with R.start(experiment_name=EXP_NAME):
    model.fit(dataset)
    R.save_objects(trained_model=model)

    rec = R.get_recorder()
    rid = rec.id # save the record id

    # Inference and saving signal
    sr = SignalRecord(model, dataset, rec)
    sr.generate()

In [None]:
# load recorder
recorder = R.get_recorder(recorder_id=rid, experiment_name=EXP_NAME)
pred_df = recorder.load_object("pred.pkl")
pred_df

### Stockformer

In [None]:
my_pred_df = pd.read_csv('/root/autodl-tmp/Stockformer/output/US-Stock-pred-20230201-20230630-new.csv', index_col=[0, 1])
my_pred_df = my_pred_df.reset_index()
my_pred_df = my_pred_df.rename(columns={'datatime':'datetime'}) 
my_pred_df['datetime']= pd.to_datetime(my_pred_df['datetime'])
my_pred_df = my_pred_df.set_index(['datetime','instrument']) # 将code和date列设置为MultiIndex)

In [None]:
my_pred_df

In [None]:
############## my_predict
FREQ = "day"

STRATEGY_CONFIG = {
    "topk": 10,
    "n_drop": 2,
    # pred_score, pd.Series
    "signal": my_pred_df, ################################ 需要换 ##############
}

EXECUTOR_CONFIG = {
    "time_per_step": "day",
    "generate_portfolio_metrics": True,
}

backtest_config = {
    "start_time": "2023-02-01",
    "end_time": "2023-06-28",
    "account": 100000,
    "benchmark": 'spx.gi',
    "exchange_kwargs": {
        "freq": FREQ,
        "limit_threshold": 0.095,
        "deal_price": "close",
        "open_cost": 0.0005,
        "close_cost": 0.003,
        "min_cost": 5,
    },
}


In [None]:
# strategy object
strategy_obj = TopkDropoutStrategy(**STRATEGY_CONFIG)
# executor object
executor_obj = executor.SimulatorExecutor(**EXECUTOR_CONFIG)
# backtest
portfolio_metric_dict, indicator_dict = backtest(executor=executor_obj, strategy=strategy_obj, **backtest_config)
analysis_freq = "{0}{1}".format(*Freq.parse(FREQ))

# backtest info
my_report_normal, positions_normal = portfolio_metric_dict.get(analysis_freq)


In [None]:
my_report_normal

### LGBM

In [None]:
########## LGBM
FREQ = "day"

STRATEGY_CONFIG = {
    "topk": 10,
    "n_drop": 2,
    # pred_score, pd.Series
    "signal": pred_df, ################################ 需要换 ##############
}

EXECUTOR_CONFIG = {
    "time_per_step": "day",
    "generate_portfolio_metrics": True,
}

backtest_config = {
    "start_time": "2023-02-01",
    "end_time": "2023-06-28",
    "account": 100000,
    "benchmark": 'spx.gi',
    "exchange_kwargs": {
        "freq": FREQ,
        "limit_threshold": 0.095,
        "deal_price": "close",
        "open_cost": 0.0005,
        "close_cost": 0.003,
        "min_cost": 5,
    },
}


In [None]:
# strategy object
strategy_obj = TopkDropoutStrategy(**STRATEGY_CONFIG)
# executor object
executor_obj = executor.SimulatorExecutor(**EXECUTOR_CONFIG)
# backtest
portfolio_metric_dict, indicator_dict = backtest(executor=executor_obj, strategy=strategy_obj, **backtest_config)
analysis_freq = "{0}{1}".format(*Freq.parse(FREQ))

# backtest info
lgbm_report_normal, positions_normal = portfolio_metric_dict.get(analysis_freq)


### Catboost

In [None]:
# 模型训练

model = init_instance_by_config({
        "class": "CatBoostModel",
        "module_path": "qlib.contrib.model.catboost_model",
})

In [None]:
# start exp to train model
with R.start(experiment_name=EXP_NAME):
    model.fit(dataset)
    R.save_objects(trained_model=model)

    rec = R.get_recorder()
    rid = rec.id # save the record id

    # Inference and saving signal
    sr = SignalRecord(model, dataset, rec)
    sr.generate()

In [None]:
# load recorder
recorder = R.get_recorder(recorder_id=rid, experiment_name=EXP_NAME)
catboost_pred_df = recorder.load_object("pred.pkl")
catboost_pred_df

In [None]:
############## my_predict
FREQ = "day"

STRATEGY_CONFIG = {
    "topk": 10,
    "n_drop": 2,
    # pred_score, pd.Series
    "signal": catboost_pred_df, ################################ 需要换 ##############
}

EXECUTOR_CONFIG = {
    "time_per_step": "day",
    "generate_portfolio_metrics": True,
}

backtest_config = {
    "start_time": "2023-02-01",
    "end_time": "2023-06-28",
    "account": 100000,
    "benchmark": 'spx.gi',
    "exchange_kwargs": {
        "freq": FREQ,
        "limit_threshold": 0.095,
        "deal_price": "close",
        "open_cost": 0.0005,
        "close_cost": 0.003,
        "min_cost": 5,
    },
}


In [None]:
# strategy object
strategy_obj = TopkDropoutStrategy(**STRATEGY_CONFIG)
# executor object
executor_obj = executor.SimulatorExecutor(**EXECUTOR_CONFIG)
# backtest
portfolio_metric_dict, indicator_dict = backtest(executor=executor_obj, strategy=strategy_obj, **backtest_config)
analysis_freq = "{0}{1}".format(*Freq.parse(FREQ))

# backtest info
catboost_report_normal, catboost_positions_normal = portfolio_metric_dict.get(analysis_freq)

In [None]:
catboost_report_normal.head()

In [None]:
## 方便修改源码，把结果保存，方便调取
outpath = '/root/autodl-tmp/Stockformer/output'
my_report_normal = pd.read_csv(outpath+'my_report_normal.csv')
# 将 'datetime' 列转换为日期格式
my_report_normal['datetime'] = pd.to_datetime(my_report_normal['datetime'])

# 将 'datetime' 列设置为索引
my_report_normal.set_index('datetime', inplace=True)

In [None]:
my_report_normal

In [None]:
my_report_normal['catboost_return'] = catboost_report_normal['return']

In [None]:
my_report_normal

### XGBoost

In [None]:
# 模型训练

model = init_instance_by_config({
        "class": "XGBModel",
        "module_path": "qlib.contrib.model.xgboost",
})

In [None]:
# start exp to train model
with R.start(experiment_name=EXP_NAME):
    model.fit(dataset)
    R.save_objects(trained_model=model)

    rec = R.get_recorder()
    rid = rec.id # save the record id

    # Inference and saving signal
    sr = SignalRecord(model, dataset, rec)
    sr.generate()

In [None]:
# load recorder
recorder = R.get_recorder(recorder_id=rid, experiment_name=EXP_NAME)
xgboost_pred_df = recorder.load_object("pred.pkl")
xgboost_pred_df

In [None]:
############## my_predict
FREQ = "day"

STRATEGY_CONFIG = {
    "topk": 10,
    "n_drop": 2,
    # pred_score, pd.Series
    "signal": xgboost_pred_df, ################################ 需要换 ##############
}

EXECUTOR_CONFIG = {
    "time_per_step": "day",
    "generate_portfolio_metrics": True,
}

backtest_config = {
    "start_time": "2023-02-01",
    "end_time": "2023-06-28",
    "account": 100000,
    "benchmark": 'spx.gi',
    "exchange_kwargs": {
        "freq": FREQ,
        "limit_threshold": 0.095,
        "deal_price": "close",
        "open_cost": 0.0005,
        "close_cost": 0.003,
        "min_cost": 5,
    },
}


In [None]:
# strategy object
strategy_obj = TopkDropoutStrategy(**STRATEGY_CONFIG)
# executor object
executor_obj = executor.SimulatorExecutor(**EXECUTOR_CONFIG)
# backtest
portfolio_metric_dict, indicator_dict = backtest(executor=executor_obj, strategy=strategy_obj, **backtest_config)
analysis_freq = "{0}{1}".format(*Freq.parse(FREQ))

# backtest info
xgboost_report_normal, xgboost_positions_normal = portfolio_metric_dict.get(analysis_freq)

In [None]:
my_report_normal['xgboost_return'] = xgboost_report_normal['return']

In [None]:
my_report_normal

### highfreq gdbt

In [None]:
# 模型训练

model = init_instance_by_config({
        "class": "HFLGBModel",
        "module_path": "qlib.contrib.model.highfreq_gdbt_model",
})

In [None]:
# start exp to train model
with R.start(experiment_name=EXP_NAME):
    model.fit(dataset)
    R.save_objects(trained_model=model)

    rec = R.get_recorder()
    rid = rec.id # save the record id

    # Inference and saving signal
    sr = SignalRecord(model, dataset, rec)
    sr.generate()

In [None]:
# load recorder
recorder = R.get_recorder(recorder_id=rid, experiment_name=EXP_NAME)
HFLGB_pred_df = recorder.load_object("pred.pkl")
HFLGB_pred_df

In [None]:
############## my_predict
FREQ = "day"

STRATEGY_CONFIG = {
    "topk": 10,
    "n_drop": 2,
    # pred_score, pd.Series
    "signal": HFLGB_pred_df, ################################ 需要换 ##############
}

EXECUTOR_CONFIG = {
    "time_per_step": "day",
    "generate_portfolio_metrics": True,
}

backtest_config = {
    "start_time": "2023-02-01",
    "end_time": "2023-06-28",
    "account": 100000,
    "benchmark": 'spx.gi',
    "exchange_kwargs": {
        "freq": FREQ,
        "limit_threshold": 0.095,
        "deal_price": "close",
        "open_cost": 0.0005,
        "close_cost": 0.003,
        "min_cost": 5,
    },
}


In [None]:
# strategy object
strategy_obj = TopkDropoutStrategy(**STRATEGY_CONFIG)
# executor object
executor_obj = executor.SimulatorExecutor(**EXECUTOR_CONFIG)
# backtest
portfolio_metric_dict, indicator_dict = backtest(executor=executor_obj, strategy=strategy_obj, **backtest_config)
analysis_freq = "{0}{1}".format(*Freq.parse(FREQ))

# backtest info
HFLGB_report_normal, HFLGB_positions_normal = portfolio_metric_dict.get(analysis_freq)

In [None]:
my_report_normal['HFLGB_return'] = HFLGB_report_normal['return']

In [None]:
my_report_normal

In [None]:
## 方便修改源码，把结果保存，方便调取
outpath = '/root/autodl-tmp/Stockformer/output'
### 保存时不要注释
my_report_normal.to_csv(outpath+'my_report_normal.csv')

In [None]:
my_report_normal = pd.read_csv(outpath+'my_report_normal.csv')

In [None]:
my_report_normal

### Alstm

In [None]:
# 数据集参数
dataset_conf = {
        "class": "DatasetH",
        # "class": "TSDatasetH",
        "module_path": "qlib.data.dataset",
        "kwargs": {
            "handler": hd,
            "segments": {
                "train": ("2021-01-04", "2022-08-31"),
                "valid": ("2022-09-01", "2023-01-31"),
                "test": ("2023-02-01", "2023-06-30"),
            },
        },
}

In [None]:
dataset = init_instance_by_config(dataset_conf)

In [None]:
# 模型训练

model = init_instance_by_config({
        "class": "ALSTM",
        "module_path": "qlib.contrib.model.pytorch_alstm",
        "kwargs": {
            "d_feat": 158,
        }
})

In [None]:
# start exp to train model
with R.start(experiment_name=EXP_NAME):
    model.fit(dataset)
    R.save_objects(trained_model=model)

    rec = R.get_recorder()
    rid = rec.id # save the record id

    # Inference and saving signal
    sr = SignalRecord(model, dataset, rec)
    sr.generate()

In [None]:
# load recorder
recorder = R.get_recorder(recorder_id=rid, experiment_name=EXP_NAME)
alstm_pred_df = recorder.load_object("pred.pkl")
alstm_pred_df

In [None]:
############## my_predict
FREQ = "day"

STRATEGY_CONFIG = {
    "topk": 10,
    "n_drop": 2,
    # pred_score, pd.Series
    "signal": alstm_pred_df, ################################ 需要换 ##############
}

EXECUTOR_CONFIG = {
    "time_per_step": "day",
    "generate_portfolio_metrics": True,
}

backtest_config = {
    "start_time": "2023-02-01",
    "end_time": "2023-06-28",
    "account": 100000,
    "benchmark": 'spx.gi',
    "exchange_kwargs": {
        "freq": FREQ,
        "limit_threshold": 0.095,
        "deal_price": "close",
        "open_cost": 0.0005,
        "close_cost": 0.003,
        "min_cost": 5,
    },
}


In [None]:
# strategy object
strategy_obj = TopkDropoutStrategy(**STRATEGY_CONFIG)
# executor object
executor_obj = executor.SimulatorExecutor(**EXECUTOR_CONFIG)
# backtest
portfolio_metric_dict, indicator_dict = backtest(executor=executor_obj, strategy=strategy_obj, **backtest_config)
analysis_freq = "{0}{1}".format(*Freq.parse(FREQ))

# backtest info
alstm_report_normal, alstm_positions_normal = portfolio_metric_dict.get(analysis_freq)

In [None]:
alstm_report_normal['return']

In [None]:
outpath = '/root/autodl-tmp/Stockformer/output'
# 将 'datetime' 列转换为日期格式
my_report_normal = pd.read_csv(outpath+'my_report_normal.csv')
my_report_normal['datetime'] = pd.to_datetime(my_report_normal['datetime'])

# 将 'datetime' 列设置为索引
my_report_normal.set_index('datetime', inplace=True)

In [None]:
my_report_normal['alstm_return'] = alstm_report_normal['return']
my_report_normal

In [None]:
## 方便修改源码，把结果保存，方便调取
outpath = '/root/autodl-tmp/Stockformer/output'
### 保存时不要注释
my_report_normal.to_csv(outpath+'my_report_normal.csv')

In [None]:
my_report_normal = pd.read_csv(outpath+'my_report_normal.csv')

In [None]:
my_report_normal

### LSTM

In [None]:
# 数据集参数
dataset_conf = {
        "class": "DatasetH",
        "module_path": "qlib.data.dataset",
        "kwargs": {
            "handler": hd,
            "segments": {
                "train": ("2021-01-04", "2022-08-31"),
                "valid": ("2022-09-01", "2023-01-31"),
                "test": ("2023-02-01", "2023-06-30"),
            },
        },
}

In [None]:
dataset = init_instance_by_config(dataset_conf)

In [None]:
# 模型训练

model = init_instance_by_config({
        "class": "LSTM",
        "module_path": "qlib.contrib.model.pytorch_lstm",
        "kwargs": {
            "d_feat": 158,
        }
})

In [None]:
# start exp to train model
with R.start(experiment_name=EXP_NAME):
    model.fit(dataset)
    R.save_objects(trained_model=model)

    rec = R.get_recorder()
    rid = rec.id # save the record id

    # Inference and saving signal
    sr = SignalRecord(model, dataset, rec)
    sr.generate()

In [None]:
# load recorder
recorder = R.get_recorder(recorder_id=rid, experiment_name=EXP_NAME)
lstm_pred_df = recorder.load_object("pred.pkl")
lstm_pred_df

In [None]:
############## my_predict
FREQ = "day"

STRATEGY_CONFIG = {
    "topk": 10,
    "n_drop": 2,
    # pred_score, pd.Series
    "signal": lstm_pred_df, ################################ 需要换 ##############
}

EXECUTOR_CONFIG = {
    "time_per_step": "day",
    "generate_portfolio_metrics": True,
}

backtest_config = {
    "start_time": "2023-02-01",
    "end_time": "2023-06-28",
    "account": 100000,
    "benchmark": 'spx.gi',
    "exchange_kwargs": {
        "freq": FREQ,
        "limit_threshold": 0.095,
        "deal_price": "close",
        "open_cost": 0.0005,
        "close_cost": 0.003,
        "min_cost": 5,
    },
}


In [None]:
# strategy object
strategy_obj = TopkDropoutStrategy(**STRATEGY_CONFIG)
# executor object
executor_obj = executor.SimulatorExecutor(**EXECUTOR_CONFIG)
# backtest
portfolio_metric_dict, indicator_dict = backtest(executor=executor_obj, strategy=strategy_obj, **backtest_config)
analysis_freq = "{0}{1}".format(*Freq.parse(FREQ))

# backtest info
lstm_report_normal, lstm_positions_normal = portfolio_metric_dict.get(analysis_freq)

In [None]:
outpath = '/root/autodl-tmp/Stockformer/output'
# 将 'datetime' 列转换为日期格式
my_report_normal = pd.read_csv(outpath+'my_report_normal.csv')
my_report_normal['datetime'] = pd.to_datetime(my_report_normal['datetime'])

# 将 'datetime' 列设置为索引
my_report_normal.set_index('datetime', inplace=True)

In [None]:
my_report_normal['lstm_return'] = lstm_report_normal['return']
my_report_normal

In [None]:
## 方便修改源码，把结果保存，方便调取
outpath = '/root/autodl-tmp/Stockformer/output'
### 保存时不要注释
my_report_normal.to_csv(outpath+'my_report_normal.csv')

In [None]:
my_report_normal = pd.read_csv(outpath+'my_report_normal.csv')

In [None]:
my_report_normal

### GRU

In [None]:
# 数据集参数
dataset_conf = {
        "class": "DatasetH",
        # "class": "TSDatasetH",
        "module_path": "qlib.data.dataset",
        "kwargs": {
            "handler": hd,
            "segments": {
                "train": ("2021-01-04", "2022-08-31"),
                "valid": ("2022-09-01", "2023-01-31"),
                "test": ("2023-02-01", "2023-06-30"),
            },
        },
}

In [None]:
dataset = init_instance_by_config(dataset_conf)

In [None]:
# 模型训练

model = init_instance_by_config({
        "class": "GRU",
        "module_path": "qlib.contrib.model.pytorch_gru",
        "kwargs": {
            "d_feat": 158,
        }
})

In [None]:
# start exp to train model
with R.start(experiment_name=EXP_NAME):
    model.fit(dataset)
    R.save_objects(trained_model=model)

    rec = R.get_recorder()
    rid = rec.id # save the record id

    # Inference and saving signal
    sr = SignalRecord(model, dataset, rec)
    sr.generate()

In [None]:
# load recorder
recorder = R.get_recorder(recorder_id=rid, experiment_name=EXP_NAME)
gru_pred_df = recorder.load_object("pred.pkl")
gru_pred_df

In [None]:
############## my_predict
FREQ = "day"

STRATEGY_CONFIG = {
    "topk": 10,
    "n_drop": 2,
    # pred_score, pd.Series
    "signal": gru_pred_df, ################################ 需要换 ##############
}

EXECUTOR_CONFIG = {
    "time_per_step": "day",
    "generate_portfolio_metrics": True,
}

backtest_config = {
    "start_time": "2023-02-01",
    "end_time": "2023-06-28",
    "account": 100000,
    "benchmark": 'spx.gi',
    "exchange_kwargs": {
        "freq": FREQ,
        "limit_threshold": 0.095,
        "deal_price": "close",
        "open_cost": 0.0005,
        "close_cost": 0.003,
        "min_cost": 5,
    },
}


In [None]:
# strategy object
strategy_obj = TopkDropoutStrategy(**STRATEGY_CONFIG)
# executor object
executor_obj = executor.SimulatorExecutor(**EXECUTOR_CONFIG)
# backtest
portfolio_metric_dict, indicator_dict = backtest(executor=executor_obj, strategy=strategy_obj, **backtest_config)
analysis_freq = "{0}{1}".format(*Freq.parse(FREQ))

# backtest info
gru_report_normal, gru_positions_normal = portfolio_metric_dict.get(analysis_freq)

In [None]:
outpath = '/root/autodl-tmp/Stockformer/output'
# 将 'datetime' 列转换为日期格式
my_report_normal = pd.read_csv(outpath+'my_report_normal.csv')
my_report_normal['datetime'] = pd.to_datetime(my_report_normal['datetime'])

# 将 'datetime' 列设置为索引
my_report_normal.set_index('datetime', inplace=True)

In [None]:
my_report_normal['gru_return'] = gru_report_normal['return']
my_report_normal

In [None]:
## 方便修改源码，把结果保存，方便调取
outpath = '/root/autodl-tmp/Stockformer/output'
### 保存时不要注释
my_report_normal.to_csv(outpath+'my_report_normal.csv')

In [None]:
my_report_normal = pd.read_csv(outpath+'my_report_normal.csv')

In [None]:
my_report_normal

### GATS

In [None]:
# 数据集参数
dataset_conf = {
        "class": "DatasetH",
        "module_path": "qlib.data.dataset",
        "kwargs": {
            "handler": hd,
            "segments": {
                "train": ("2021-01-04", "2022-08-31"),
                "valid": ("2022-09-01", "2023-01-31"),
                "test": ("2023-02-01", "2023-06-30"),
            },
        },
}

In [None]:
dataset = init_instance_by_config(dataset_conf)

In [None]:
# 模型训练

model = init_instance_by_config({
        "class": "GATs",
        "module_path": "qlib.contrib.model.pytorch_gats",
        "kwargs": {
            "d_feat": 158,
        }
})

In [None]:
# start exp to train model
with R.start(experiment_name=EXP_NAME):
    model.fit(dataset)
    R.save_objects(trained_model=model)

    rec = R.get_recorder()
    rid = rec.id # save the record id

    # Inference and saving signal
    sr = SignalRecord(model, dataset, rec)
    sr.generate()

In [None]:
# load recorder
recorder = R.get_recorder(recorder_id=rid, experiment_name=EXP_NAME)
gats_pred_df = recorder.load_object("pred.pkl")
gats_pred_df

In [None]:
############## my_predict
FREQ = "day"

STRATEGY_CONFIG = {
    "topk": 10,
    "n_drop": 2,
    # pred_score, pd.Series
    "signal": gats_pred_df, ################################ 需要换 ##############
}

EXECUTOR_CONFIG = {
    "time_per_step": "day",
    "generate_portfolio_metrics": True,
}

backtest_config = {
    "start_time": "2023-02-01",
    "end_time": "2023-06-28",
    "account": 100000,
    "benchmark": 'spx.gi',
    "exchange_kwargs": {
        "freq": FREQ,
        "limit_threshold": 0.095,
        "deal_price": "close",
        "open_cost": 0.0005,
        "close_cost": 0.003,
        "min_cost": 5,
    },
}


In [None]:
# strategy object
strategy_obj = TopkDropoutStrategy(**STRATEGY_CONFIG)
# executor object
executor_obj = executor.SimulatorExecutor(**EXECUTOR_CONFIG)
# backtest
portfolio_metric_dict, indicator_dict = backtest(executor=executor_obj, strategy=strategy_obj, **backtest_config)
analysis_freq = "{0}{1}".format(*Freq.parse(FREQ))

# backtest info
gats_report_normal, gats_positions_normal = portfolio_metric_dict.get(analysis_freq)

In [None]:
gats_report_normal

In [None]:
outpath = '/root/autodl-tmp/Stockformer/output'
# 将 'datetime' 列转换为日期格式
my_report_normal = pd.read_csv(outpath+'my_report_normal.csv')
my_report_normal['datetime'] = pd.to_datetime(my_report_normal['datetime'])

# 将 'datetime' 列设置为索引
my_report_normal.set_index('datetime', inplace=True)

In [None]:
my_report_normal['gats_return'] = gats_report_normal['return']
my_report_normal

In [None]:
## 方便修改源码，把结果保存，方便调取
outpath = '/root/autodl-tmp/Stockformer/output'
### 保存时不要注释
my_report_normal.to_csv(outpath+'my_report_normal.csv')

In [None]:
my_report_normal = pd.read_csv(outpath+'my_report_normal.csv')

In [None]:
my_report_normal

### TCN

In [None]:
# 数据集参数
dataset_conf = {
        "class": "DatasetH",
        "module_path": "qlib.data.dataset",
        "kwargs": {
            "handler": hd,
            "segments": {
                "train": ("2021-01-04", "2022-08-31"),
                "valid": ("2022-09-01", "2023-01-31"),
                "test": ("2023-02-01", "2023-06-30"),
            },
        },
}

In [None]:
dataset = init_instance_by_config(dataset_conf)

In [None]:
# 模型训练

model = init_instance_by_config({
        "class": "TCN",
        "module_path": "qlib.contrib.model.pytorch_tcn",
        "kwargs": {
            "d_feat": 158,
        }
})

In [None]:
# start exp to train model
with R.start(experiment_name=EXP_NAME):
    model.fit(dataset)
    R.save_objects(trained_model=model)

    rec = R.get_recorder()
    rid = rec.id # save the record id

    # Inference and saving signal
    sr = SignalRecord(model, dataset, rec)
    sr.generate()

In [None]:
# load recorder
recorder = R.get_recorder(recorder_id=rid, experiment_name=EXP_NAME)
tcn_pred_df = recorder.load_object("pred.pkl")
tcn_pred_df

In [None]:
############## my_predict
FREQ = "day"

STRATEGY_CONFIG = {
    "topk": 10,
    "n_drop": 2,
    # pred_score, pd.Series
    "signal": tcn_pred_df, ################################ 需要换 ##############
}

EXECUTOR_CONFIG = {
    "time_per_step": "day",
    "generate_portfolio_metrics": True,
}

backtest_config = {
    "start_time": "2023-02-01",
    "end_time": "2023-06-28",
    "account": 100000,
    "benchmark": 'spx.gi',
    "exchange_kwargs": {
        "freq": FREQ,
        "limit_threshold": 0.095,
        "deal_price": "close",
        "open_cost": 0.0005,
        "close_cost": 0.003,
        "min_cost": 5,
    },
}


In [None]:
# strategy object
strategy_obj = TopkDropoutStrategy(**STRATEGY_CONFIG)
# executor object
executor_obj = executor.SimulatorExecutor(**EXECUTOR_CONFIG)
# backtest
portfolio_metric_dict, indicator_dict = backtest(executor=executor_obj, strategy=strategy_obj, **backtest_config)
analysis_freq = "{0}{1}".format(*Freq.parse(FREQ))

# backtest info
tcn_report_normal, tcn_positions_normal = portfolio_metric_dict.get(analysis_freq)

In [None]:
tcn_report_normal

In [None]:
outpath = '/root/autodl-tmp/Stockformer/output'
# 将 'datetime' 列转换为日期格式
my_report_normal = pd.read_csv(outpath+'my_report_normal.csv')
my_report_normal['datetime'] = pd.to_datetime(my_report_normal['datetime'])

# 将 'datetime' 列设置为索引
my_report_normal.set_index('datetime', inplace=True)

In [None]:
my_report_normal['tcn_return'] = tcn_report_normal['return']
my_report_normal

In [None]:
## 方便修改源码，把结果保存，方便调取
outpath = '/root/autodl-tmp/Stockformer/output'
### 保存时不要注释
my_report_normal.to_csv(outpath+'my_report_normal.csv')

In [None]:
my_report_normal = pd.read_csv(outpath+'my_report_normal.csv')

In [None]:
my_report_normal

### Localformer

In [None]:
# 数据集参数
dataset_conf = {
        "class": "DatasetH",
        "module_path": "qlib.data.dataset",
        "kwargs": {
            "handler": hd,
            "segments": {
                "train": ("2021-01-04", "2022-08-31"),
                "valid": ("2022-09-01", "2023-01-31"),
                "test": ("2023-02-01", "2023-06-30"),
            },
        },
}

In [None]:
dataset = init_instance_by_config(dataset_conf)

In [None]:
# 模型训练

model = init_instance_by_config({
        "class": "LocalformerModel",
        "module_path": "qlib.contrib.model.pytorch_localformer",
        "kwargs": {
            "d_feat": 158,
        }
})

In [None]:
# start exp to train model
with R.start(experiment_name=EXP_NAME):
    model.fit(dataset)
    R.save_objects(trained_model=model)

    rec = R.get_recorder()
    rid = rec.id # save the record id

    # Inference and saving signal
    sr = SignalRecord(model, dataset, rec)
    sr.generate()

In [None]:
# load recorder
recorder = R.get_recorder(recorder_id=rid, experiment_name=EXP_NAME)
localformer_pred_df = recorder.load_object("pred.pkl")
localformer_pred_df

In [None]:
############## my_predict
FREQ = "day"

STRATEGY_CONFIG = {
    "topk": 10,
    "n_drop": 2,
    # pred_score, pd.Series
    "signal": localformer_pred_df, ################################ 需要换 ##############
}

EXECUTOR_CONFIG = {
    "time_per_step": "day",
    "generate_portfolio_metrics": True,
}

backtest_config = {
    "start_time": "2023-02-01",
    "end_time": "2023-06-28",
    "account": 100000,
    "benchmark": 'spx.gi',
    "exchange_kwargs": {
        "freq": FREQ,
        "limit_threshold": 0.095,
        "deal_price": "close",
        "open_cost": 0.0005,
        "close_cost": 0.003,
        "min_cost": 5,
    },
}


In [None]:
# strategy object
strategy_obj = TopkDropoutStrategy(**STRATEGY_CONFIG)
# executor object
executor_obj = executor.SimulatorExecutor(**EXECUTOR_CONFIG)
# backtest
portfolio_metric_dict, indicator_dict = backtest(executor=executor_obj, strategy=strategy_obj, **backtest_config)
analysis_freq = "{0}{1}".format(*Freq.parse(FREQ))

# backtest info
localformer_report_normal, localformer_positions_normal = portfolio_metric_dict.get(analysis_freq)

In [None]:
outpath = '/root/autodl-tmp/Stockformer/output'
# 将 'datetime' 列转换为日期格式
my_report_normal = pd.read_csv(outpath+'my_report_normal.csv')
my_report_normal['datetime'] = pd.to_datetime(my_report_normal['datetime'])

# 将 'datetime' 列设置为索引
my_report_normal.set_index('datetime', inplace=True)

In [None]:
my_report_normal['localformer_return'] = localformer_report_normal['return']
my_report_normal

In [None]:
## 方便修改源码，把结果保存，方便调取
outpath = '/root/autodl-tmp/Stockformer/output'
### 保存时不要注释
my_report_normal.to_csv(outpath+'my_report_normal.csv')

In [None]:
my_report_normal = pd.read_csv(outpath+'my_report_normal.csv')

In [None]:
my_report_normal

### IGMTF

In [None]:
# 模型训练

model = init_instance_by_config({
        "class": "IGMTF",
        "module_path": "qlib.contrib.model.pytorch_igmtf",
        "kwargs": {
            "d_feat": 158,
            "metric": "ic"
        }
})

In [None]:
# start exp to train model
with R.start(experiment_name=EXP_NAME):
    model.fit(dataset)
    R.save_objects(trained_model=model)

    rec = R.get_recorder()
    rid = rec.id # save the record id

    # Inference and saving signal
    sr = SignalRecord(model, dataset, rec)
    sr.generate()

In [None]:
# load recorder
recorder = R.get_recorder(recorder_id=rid, experiment_name=EXP_NAME)
igmtf_pred_df = recorder.load_object("pred.pkl")
igmtf_pred_df

In [None]:
############## my_predict
FREQ = "day"

STRATEGY_CONFIG = {
    "topk": 10,
    "n_drop": 2,
    # pred_score, pd.Series
    "signal": igmtf_pred_df, ################################ 需要换 ##############
}

EXECUTOR_CONFIG = {
    "time_per_step": "day",
    "generate_portfolio_metrics": True,
}

backtest_config = {
    "start_time": "2023-02-01",
    "end_time": "2023-06-28",
    "account": 100000,
    "benchmark": 'spx.gi',
    "exchange_kwargs": {
        "freq": FREQ,
        "limit_threshold": 0.095,
        "deal_price": "close",
        "open_cost": 0.0005,
        "close_cost": 0.003,
        "min_cost": 5,
    },
}


In [None]:
# strategy object
strategy_obj = TopkDropoutStrategy(**STRATEGY_CONFIG)
# executor object
executor_obj = executor.SimulatorExecutor(**EXECUTOR_CONFIG)
# backtest
portfolio_metric_dict, indicator_dict = backtest(executor=executor_obj, strategy=strategy_obj, **backtest_config)
analysis_freq = "{0}{1}".format(*Freq.parse(FREQ))

# backtest info
igmtf_report_normal, igmtf_positions_normal = portfolio_metric_dict.get(analysis_freq)

In [None]:
outpath = '/root/autodl-tmp/Stockformer/output'
# 将 'datetime' 列转换为日期格式
my_report_normal = pd.read_csv(outpath+'my_report_normal.csv')
my_report_normal['datetime'] = pd.to_datetime(my_report_normal['datetime'])

# 将 'datetime' 列设置为索引
my_report_normal.set_index('datetime', inplace=True)

In [None]:
my_report_normal['igmtf_return'] = igmtf_report_normal['return']
my_report_normal

In [None]:
## 方便修改源码，把结果保存，方便调取
outpath = '/root/autodl-tmp/Stockformer/output'
### 保存时不要注释
my_report_normal.to_csv(outpath+'my_report_normal.csv')

In [None]:
my_report_normal = pd.read_csv(outpath+'my_report_normal.csv')

In [None]:
my_report_normal

## 回测报告

In [None]:
## 方便修改源码，把结果保存，方便调取
outpath = '/root/autodl-tmp/Stockformer/output/'
### 保存时不要注释
# my_report_normal.to_csv(outpath+'my_report_normal.csv')

In [None]:
my_report_normal = pd.read_csv(outpath+'my_report_normal.csv')

In [None]:
# 将 'datetime' 列转换为日期格式
my_report_normal['datetime'] = pd.to_datetime(my_report_normal['datetime'])

# 将 'datetime' 列设置为索引
my_report_normal.set_index('datetime', inplace=True)


In [None]:
my_report_normal.columns

In [None]:
# 假设 df 是您的 DataFrame
my_report_normal.rename(columns={'gru_return': 'temp_name', 'return': 'gru_return'}, inplace=True)
my_report_normal.rename(columns={'temp_name': 'return'}, inplace=True)


In [None]:
my_report_normal

In [None]:
### 保存时不要注释
my_report_normal.to_csv(outpath+'my_report_normal_processed.csv')

In [None]:
import pandas as pd

# 假设 my_report_normal 是你的原始DataFrame
# 提取包含'return'的列
returns_df = my_report_normal.filter(like='return').copy()

# 重命名'return'列为'Stockformer_return'
returns_df.rename(columns={'return': 'Stockformer_return'}, inplace=True)

# 对每列求和，得到一个Series对象
sums = returns_df.sum()

# 如果你想要将这个Series转化为DataFrame的形式
sums_df = sums.to_frame(name='sum').reset_index().rename(columns={'index': 'return_type'})

# 输出求和结果
print(sums_df)


In [None]:
## 给report_normal添加内容
# my_report_normal['lgb_return'] = lgbm_report_normal['return']
# report_normal['xgb_return'] = report_normal['return']
# report_normal['cat_return'] = report_normal['return']

In [None]:
# analysis = dict()
# # default frequency will be daily (i.e. "day")
# # analysis["excess_return_without_cost"] = risk_analysis(report_normal["return"] - report_normal["bench"])
# # analysis["excess_return_with_cost"] = risk_analysis(report_normal["return"] - report_normal["bench"] - report_normal["cost"])

# analysis["hh_return_without_cost"] = risk_analysis(my_report_normal["return"])
# analysis["hh_return_with_cost"] = risk_analysis(my_report_normal["return"]- my_report_normal["cost"])

# analysis_df = pd.concat(analysis)  # type: pd.DataFrame
# pprint(analysis_df)

In [None]:
analysis = dict()
analysis["my_return_without_cost"] = risk_analysis(my_report_normal["return"])
analysis["lgb_return_without_cost"] = risk_analysis(my_report_normal["lgb_return"])
analysis["catboost_return_without_cost"] = risk_analysis(my_report_normal["catboost_return"])
analysis["xgboost_return_without_cost"] = risk_analysis(my_report_normal["xgboost_return"])
analysis["HFLGB_return_without_cost"] = risk_analysis(my_report_normal["HFLGB_return"])
analysis["alstm_return_without_cost"] = risk_analysis(my_report_normal["alstm_return"])
analysis["lstm_return_without_cost"] = risk_analysis(my_report_normal["lstm_return"])
analysis["gru_return_without_cost"] = risk_analysis(my_report_normal["gru_return"])
analysis["gats_return_without_cost"] = risk_analysis(my_report_normal["gats_return"])
analysis["tcn_return_without_cost"] = risk_analysis(my_report_normal["tcn_return"])
analysis["localformer_return_without_cost"] = risk_analysis(my_report_normal["localformer_return"])
analysis["igmtf_return_without_cost"] = risk_analysis(my_report_normal["igmtf_return"])


In [None]:
analysis_df = pd.concat(analysis)  # type: pd.DataFrame
pprint(analysis_df)

In [None]:
outpath = '/root/autodl-tmp/Stockformer/output/'
my_report_normal = pd.read_csv(outpath+'my_report_normal_processed.csv')
# 将 'datetime' 列转换为日期格式
my_report_normal['datetime'] = pd.to_datetime(my_report_normal['datetime'])

# 将 'datetime' 列设置为索引
my_report_normal.set_index('datetime', inplace=True)

In [None]:
analysis_position.report_graph(my_report_normal)


In [None]:
my_report_normal.index

In [None]:
import pandas as pd

# 假设df是您的DataFrame，并且它已经包含了日收益率数据。
def calculate_annualized_returns(df):
    # 找出所有含有"return"的列
    return_columns = [col for col in df.columns if 'return' in col]
    
    # 年化因子
    annualization_factor = 252

    # 计算每个模型的年化收益率
    annualized_returns = {}
    for column in return_columns:
        # 计算平均日收益率
        daily_mean = df[column].mean()
        # 年化收益率 = 日平均收益率 * 年化因子
        annualized_returns[column] = daily_mean * annualization_factor
    
    return annualized_returns

# 调用函数并打印结果
annualized_returns = calculate_annualized_returns(my_report_normal)
for model, a_return in annualized_returns.items():
    print(f"{model}: {a_return:.2%}")


## 其他图

In [None]:
## df和pre_df导出来，方便修改源码

outpath = '/root/autodl-tmp/Stockformer/output/'
# df.to_csv(outpath+'df.csv')
# pred_df.to_csv(outpath+'pred_df.csv')

In [None]:
import pandas as pd

all_label = pd.read_csv(outpath+'LABEL0.csv', index_col=0, header=0)
all_label.index = pd.to_datetime(all_label.index, format='%Y-%m-%d')

In [None]:
# 读取first_pred数据
my_pred = pd.read_csv(outpath+'US-Stock-pred-20230201-20230630.csv')
my_pred['datetime'] = pd.to_datetime(my_pred['datetime'])
# my_pred = my_pred.set_index(['datetime', 'instrument'])
my_pred

In [None]:
my_label = pd.read_csv(outpath+'my_label.csv', header=None)
# 选取df的行索引为'2022-06-02'-'2022-12-13'的行，并将其作为first_pred的行索引
my_label.index = all_label.loc['2023-02-01':'2023-06-30'].index
# 将df的所有列索引作为first_pred的列索引
my_label.columns = all_label.columns
# 输出结果
print(my_label)
# pre_df = pd.read_csv(outpath+'US-Stock-pred-20230201-20230630.csv')

In [None]:
my_label.index = all_label.loc['2023-02-01':'2023-06-30'].index

# 将df的所有列索引作为first_pred的列索引
my_label.columns = all_label.columns

# 将first_pred转换为双层索引数据集
my_label = my_label.stack().rename_axis(['datetime', 'instrument']).reset_index(name='label')
my_label['datetime'] = pd.to_datetime(my_label['datetime'])
# my_label = my_label.set_index(['datetime', 'instrument'])
# 输出结果
print(my_label)

In [None]:
result_df = my_pred.merge(my_label[['datetime', 'instrument', 'label']], on=['datetime', 'instrument'], how='left')
result_df['datetime'] = pd.to_datetime(result_df['datetime'])
result_df = result_df.set_index(['datetime','instrument'])
result_df.dropna(subset=['label'], inplace=True)
result_df 

In [None]:
analysis_model.model_performance_graph(result_df)