In [1]:
import torch
from torch import nn
import numpy as np
import sys
sys.path.append('/mnt/')
from finrl.models.network import *
from finrl.models.policy import Policy
from finrl.models.agent import DDPG
from finrl.data.generate import portfolio_data_generate
from finrl.models.constants import *
from finrl.models.utils import data_split
from finrl.models.env import StockPortfolioEnv
import json

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
path = '/mnt/finrl/models/kwargs.json'
with open(path,'r',encoding='utf-8') as f:
    config = json.load(f)
print(config)

{'env_trade_kwargs': {'stock_dim': None, 'hmax': 100, 'initial_amount': 1000000, 'num_stock_shares': None, 'buy_cost_pct': None, 'sell_cost_pct': None, 'reward_scaling': 0.0001, 'state_space': None, 'action_space': None, 'tech_indicator_list': ['macd', 'boll_ub', 'boll_lb', 'rsi_30', 'cci_30', 'dx_30', 'close_30_sma', 'close_60_sma'], 'turbulence_threshold': None, 'risk_indicator_col': 'turbulence', 'reward_aliase': 'asset_diff', 'cash_norm_factor': 1e-06, 'num_share_norm_factor': None, 'if_price_norm': None, 'if_indicator_norm': None, 'if_num_share_norm': None, 'max_price': None, 'min_price': None}, 'env_portfolio_kwargs': {'stock_dim': None, 'hmax': 100, 'initial_amount': 1000000, 'buy_cost_pct': None, 'sell_cost_pct': None, 'reward_scaling': 0.0001, 'state_space': None, 'action_space': None, 'tech_indicator_list': ['macd', 'rsi_30', 'cci_30', 'dx_30'], 'turbulence_threshold': None, 'lookback': 252, 'reward_aliase': 'asset_diff'}, 'policy_kwargs': {'cnn_activation': None, 'lstm_input

In [5]:
# 导入数据

# 数据路径
data_dir = '/mnt/finrl/data/csv/DOW_30/'
vix_data_dir = '/mnt/finrl/data/csv/^VIX/'
dji_dir = '/mnt/finrl/data/csv/DJI/DJI.csv'  # .csv

# 设置日期
TRAIN_START_DATE = '2010-01-01'
TRAIN_END_DATE = '2021-12-31'
VALIDATE_START_DATE = '2022-01-01'
VALIDATE_END_DATE = '2022-12-31'
TEST_START_DATE = '2023-01-01'
TEST_END_DATE = '2024-01-30'

INDICATORS = ["macd","rsi_30","cci_30","dx_30"]
df = portfolio_data_generate(
    data_dir=data_dir,
    start_date=TRAIN_START_DATE,
    end_date=TEST_END_DATE,
    use_technical_indicator=True,
    use_turbulence=True,
    user_defined_feature=False,
    tech_indicator_list=INDICATORS,
    use_vix=True,
    vix_data_dir=vix_data_dir,
    dji_dir=dji_dir,
)

# 切分数据
df_train = data_split(df,TRAIN_START_DATE,TRAIN_END_DATE)
df_validation = data_split(df,VALIDATE_START_DATE,VALIDATE_END_DATE)
df_test = data_split(df,TEST_START_DATE,TEST_END_DATE)
df_train_validation = data_split(df,TRAIN_START_DATE,VALIDATE_END_DATE)


# 技术指标归一化。
if_indicator_norm = False
min_indicator,max_indicator = [],[]
if if_indicator_norm:
    print('正在进行技术指标归一化')
    tic_list = df_train_validation.tic.unique().tolist()
    min_indicator,max_indicator = [],[]

    for indicator in INDICATORS:
        df1 = df_train_validation[['tic',indicator]]
        for tic in tic_list:
            minimum = df1[df1.tic==tic][indicator].min()
            maximum = df1[df1.tic==tic][indicator].max()
            min_indicator.append(minimum)
            max_indicator.append(maximum)
    
    for i in range(df_train.shape[0]):
        tic_ = df_train.iloc[i]['tic']
        tic_index = tic_list.index(tic_)
        for indicator in INDICATORS:
            indicator_index = INDICATORS.index(indicator)
            indicator_column_index = df_train.columns.get_loc(indicator)
            df_train.iat[i,indicator_column_index] = (df_train.iat[i,indicator_column_index] - min_indicator[indicator_index*len(tic_list)+tic_index]) / (max_indicator[indicator_index*len(tic_list)+tic_index] - min_indicator[indicator_index*len(tic_list)+tic_index])

    for i in range(df_train_validation.shape[0]):
        tic_ = df_train_validation.iloc[i]['tic']
        tic_index = tic_list.index(tic_)
        for indicator in INDICATORS:
            indicator_index = INDICATORS.index(indicator)
            indicator_column_index = df_train_validation.columns.get_loc(indicator)
            df_train_validation.iat[i,indicator_column_index] = (df_train_validation.iat[i,indicator_column_index] - min_indicator[indicator_index*len(tic_list)+tic_index]) / (max_indicator[indicator_index*len(tic_list)+tic_index] - min_indicator[indicator_index*len(tic_list)+tic_index])

    for i in range(df_validation.shape[0]):
        tic_ = df_validation.iloc[i]['tic']
        tic_index = tic_list.index(tic_)
        for indicator in INDICATORS:
            indicator_index = INDICATORS.index(indicator)
            indicator_column_index = df_validation.columns.get_loc(indicator)
            df_validation.iat[i,indicator_column_index] = (df_validation.iat[i,indicator_column_index] - min_indicator[indicator_index*len(tic_list)+tic_index]) / (max_indicator[indicator_index*len(tic_list)+tic_index] - min_indicator[indicator_index*len(tic_list)+tic_index])

    for i in range(df_test.shape[0]):
        tic_ = df_test.iloc[i]['tic']
        tic_index = tic_list.index(tic_)
        for indicator in INDICATORS:
            indicator_index = INDICATORS.index(indicator)
            indicator_column_index = df_test.columns.get_loc(indicator)
            df_test.iat[i,indicator_column_index] = (df_test.iat[i,indicator_column_index] - min_indicator[indicator_index*len(tic_list)+tic_index]) / (max_indicator[indicator_index*len(tic_list)+tic_index] - min_indicator[indicator_index*len(tic_list)+tic_index])


# 创建环境
STOCK_DIM = len(df_train_validation.tic.unique())
ACTION_DIM = STOCK_DIM  # 29
STATE_DIM = STOCK_DIM * (STOCK_DIM + len(['macd', 'rsi_30', 'cci_30', 'dx_30']))

env_portfolio_kwargs = config['env_portfolio_kwargs']
env_portfolio_kwargs.update({
    'stock_dim':STOCK_DIM,
    'state_space':STOCK_DIM,
    'action_space':STOCK_DIM,
    'buy_cost_pct':[0.001]*ACTION_DIM,
    'sell_cost_pct':[0.001]*ACTION_DIM,
    'reward_aliase':'asset_diff',
    'reward_scaling':0.0001,
})

env_train = StockPortfolioEnv(df_train, **env_portfolio_kwargs)
env_train_validation = StockPortfolioEnv(df_train_validation, **env_portfolio_kwargs)
env_validation = StockPortfolioEnv(df_validation, **env_portfolio_kwargs)
env_test = StockPortfolioEnv(df_test, **env_portfolio_kwargs)  

env_train_validation.DATE_START,env_train_validation.DATE_END = TRAIN_START_DATE,VALIDATE_END_DATE
env_train.DATE_START,env_train.DATE_END = TRAIN_START_DATE,TRAIN_END_DATE
env_validation.DATE_START,env_validation.DATE_END = VALIDATE_START_DATE,VALIDATE_END_DATE
env_test.DATE_START,env_test.DATE_END = TEST_START_DATE,TEST_END_DATE
print(env_portfolio_kwargs)

  data_df = data_df.append(temp_df)
  data_df = data_df.append(temp_df)
  data_df = data_df.append(temp_df)
  data_df = data_df.append(temp_df)
  data_df = data_df.append(temp_df)
  data_df = data_df.append(temp_df)
  data_df = data_df.append(temp_df)
  data_df = data_df.append(temp_df)
  data_df = data_df.append(temp_df)
  data_df = data_df.append(temp_df)
  data_df = data_df.append(temp_df)
  data_df = data_df.append(temp_df)
  data_df = data_df.append(temp_df)
  data_df = data_df.append(temp_df)
  data_df = data_df.append(temp_df)
  data_df = data_df.append(temp_df)
  data_df = data_df.append(temp_df)
  data_df = data_df.append(temp_df)
  data_df = data_df.append(temp_df)
  data_df = data_df.append(temp_df)
  data_df = data_df.append(temp_df)
  data_df = data_df.append(temp_df)
  data_df = data_df.append(temp_df)
  data_df = data_df.append(temp_df)
  data_df = data_df.append(temp_df)
  data_df = data_df.append(temp_df)
  data_df = data_df.append(temp_df)
  data_df = data_df.append(t

Shape of DataFrame:  (103943, 8)


  indicator_df = indicator_df.append(
  indicator_df = indicator_df.append(
  indicator_df = indicator_df.append(
  indicator_df = indicator_df.append(
  indicator_df = indicator_df.append(
  indicator_df = indicator_df.append(
  indicator_df = indicator_df.append(
  indicator_df = indicator_df.append(
  indicator_df = indicator_df.append(
  indicator_df = indicator_df.append(
  indicator_df = indicator_df.append(
  indicator_df = indicator_df.append(
  indicator_df = indicator_df.append(
  indicator_df = indicator_df.append(
  indicator_df = indicator_df.append(
  indicator_df = indicator_df.append(
  indicator_df = indicator_df.append(
  indicator_df = indicator_df.append(
  indicator_df = indicator_df.append(
  indicator_df = indicator_df.append(
  indicator_df = indicator_df.append(
  indicator_df = indicator_df.append(
  indicator_df = indicator_df.append(
  indicator_df = indicator_df.append(
  indicator_df = indicator_df.append(
  indicator_df = indicator_df.append(
  indicator_

Successfully added technical indicators
Shape of DataFrame:  (3542, 8)
Successfully added vix


  data_df = data_df.append(temp_df)


Successfully added turbulence index
{'stock_dim': 29, 'hmax': 100, 'initial_amount': 1000000, 'buy_cost_pct': [0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001], 'sell_cost_pct': [0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001], 'reward_scaling': 0.0001, 'state_space': 29, 'action_space': 29, 'tech_indicator_list': ['macd', 'rsi_30', 'cci_30', 'dx_30'], 'turbulence_threshold': None, 'lookback': 252, 'reward_aliase': 'asset_diff'}


In [10]:
policy_kwargs = config['policy_kwargs']
agent_kwargs = config['agent_kwargs']
ou_noise_kwargs = config['ou_noise_kwargs']
ou_noise_kwargs.update(
    {
        'mu':np.array([0]*ACTION_DIM),
        'sigma':0.05,
        'theta':0.10,
        'dt':0.1,
    }
)
policy_kwargs.update({
    'cnn_activation':nn.ReLU,
    'lstm_input_size':704,
    'lstm_hidden_size':1024,
    'env_obs_dim':STATE_DIM,
    'action_dim':ACTION_DIM,
    'mlp_activation':nn.ReLU,
    'if_srl':True,
    'srl_aliase':'d2rl',
    'srl_hidden_dim':512,
})
agent_kwargs.update({
    "env_train": env_train,
    "env_validation": env_validation,
    "env_test": env_test,
    "episodes": 10,  
    "buffer_size": int(1e4),
    "batch_size": 3,  
    "n_steps": 1,
    "if_prioritized": False,
    "n_updates": 3,
    "gamma": 0.99,
    "tau": 0.005, 
    "policy_lr": 5e-8,
    'noise_kwargs':ou_noise_kwargs,
    'noise_aliase':'ou',
    "training_start": 200,
    "policy_kwargs":policy_kwargs,
    "target_update_interval": 1, 
    "print_interval": 200,
    "figure_path":'/mnt/finrl/data/figure/figures_DJIA/',
    "device": 'cuda',
    "task": 'portfolio',
})
agent = DDPG(**agent_kwargs)

------noise------
ou noise, randomness is True !
-----------------


In [1]:
for i in range(1,6):
    print(f'-------------------------------------------这是第{i}次训练---------------------------------------------')
    agent_kwargs['train_time'] = str(i)
    agent = DDPG(**agent_kwargs)
    agent.train()