In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm
from stable_baselines3 import A2C, DDPG, PPO, SAC, TD3

from finrl.agents.stablebaselines3.models import DRLAgent
from finrl.config import INDICATORS, TRAINED_MODEL_DIR
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.meta.preprocessor.preprocessors import data_split
from finrl import config_tickers

import ta

import itertools

%matplotlib inline

In [2]:
tickers = [
    "AAPL", "MSFT", "GOOG", "AMZN", "TSLA", "META", "NVDA", "JPM", 
    "JNJ", "V", "PG", "UNH", "HD", "MA", "XOM", "CVX", "DIS", "INTC", 
    "CSCO", "PEP", "KO", "PFE", "MRK", "ABBV", "WFC", "BAC", "VZ", "T", 
    "CMCSA", "NFLX", "ADBE", "CRM", "PYPL", "QCOM", "AMD", "BA", "CAT", "HON", 
    "MMM", "IBM", "LMT", "RTX", "GE", "F", "GM", "SBUX", "MCD", "NKE", 
    "TGT", "WMT", "COST", "LOW", "MRNA", "LLY", "BMY", "AXP", "GS", "SCHW", 
    "MS", "C", "BLK", "NVO", "TSM", "UBER", "SNOW", "SPGI", "ADP", "A"
]

In [3]:
from datetime import datetime, timedelta

# 获取今天的日期
today = datetime.today()

# 格式化为 "YYYY-MM-DD" 形式
today_str = today.strftime('%Y-%m-%d')

# 获取一个月前的日期
one_month_ago = today - timedelta(days=365)
one_month_ago_str = one_month_ago.strftime('%Y-%m-%d')

if len(tickers) != 68:
    print('Please select 68 tickers.')
    exit()

df_raw = YahooDownloader(start_date = '2023-01-01',
                     end_date = '2023-12-29',
                     ticker_list = tickers).fetch_data()

100%|██████████| 68/68 [00:08<00:00,  8.31it/s]

Shape of DataFrame:  (16932, 8)





In [22]:
# Step 3: Define function to calculate indicators
def add_indicators(df):
    # Ensure the data is sorted by date
    df = df.sort_values(by=['tic', 'date']).reset_index(drop=True)
    
    # Calculate MACD
    df['macd'] = ta.trend.MACD(df['close']).macd()
    
    # Calculate Bollinger Bands
    boll = ta.volatility.BollingerBands(df['close'])
    df['boll_ub'] = boll.bollinger_hband()
    df['boll_lb'] = boll.bollinger_lband()
    
    # Calculate RSI (Relative Strength Index)
    df['rsi_30'] = ta.momentum.RSIIndicator(df['close'], window=30).rsi()
    
    # Calculate CCI (Commodity Channel Index)
    df['cci_30'] = ta.trend.CCIIndicator(df['high'], df['low'], df['close'], window=30).cci()
    
    # Calculate DX (Directional Movement Index)
    df['dx_30'] = ta.trend.ADXIndicator(df['high'], df['low'], df['close'], window=30).adx()
    
    # Calculate simple moving averages
    df['close_30_sma'] = df['close'].rolling(window=30).mean()
    df['close_60_sma'] = df['close'].rolling(window=60).mean()
    
    # Calculate Historical Volatility (as proxy for VIX)
    df['vix'] = df['close'].pct_change().rolling(window=30).std() * np.sqrt(252) * 100  # Annualized volatility approximation
    # Calculate Turbulence Index
    df['turbulence'] = calculate_turbulence(df)
    
    return df

# Function to calculate turbulence
def calculate_turbulence(df):
    turbulence_list = []
    # Calculate turbulence for each day based on past 252 days (one year)
    for i in range(252, len(df)):
        historical_data = df.loc[i-252:i, ['high', 'low', 'close']]
        cov_matrix = historical_data.cov()  # Covariance matrix over past 252 days
        cov_matrix_inv = np.linalg.inv(cov_matrix.values)  # Inverse of covariance matrix
        
        # Calculate mean difference from the current data
        current_data = df.loc[i, ['high', 'low', 'close']]
        mean_diff = current_data - historical_data.mean()
        
        # Compute turbulence as a Mahalanobis distance
        turbulence = np.dot(np.dot(mean_diff.values, cov_matrix_inv), mean_diff.values.T)
        turbulence_list.append(turbulence)
    
    # Prepend NaN for the first 252 rows since turbulence is undefined
    turbulence_list = [np.nan] * 252 + turbulence_list
    return np.array(turbulence_list)

In [23]:
# Step 4: Apply the function to add indicators
df_with_indicators = add_indicators(df_raw)

In [6]:
def reset_shared_index(df):
    # Create a unique sequence index (day number) for each date
    df = df.sort_values(by='date').reset_index(drop=True)

    # 创建一个基于日期的全局序号（唯一的顺序编号），让每个日期分配一个编号
    date_to_index = {date: idx for idx, date in enumerate(sorted(df['date'].unique()))}

    # 将编号作为新的索引
    df['shared_index'] = df['date'].map(date_to_index)

    # 最后设置 'shared_index' 作为索引
    df.set_index('shared_index', inplace=True)

    df.index.name = None

    return df

In [7]:
df_with_indicators = reset_shared_index(df_with_indicators)

In [8]:
df_with_indicators

Unnamed: 0,date,open,high,low,close,volume,tic,day,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,vix,turbulence
0,2023-01-03,151.960007,153.130005,148.470001,148.204666,1414300,A,1,,,,,,0.000000,,,,
0,2023-01-03,109.779999,110.019997,105.489998,100.414619,15146200,XOM,1,3.849356,74.988389,31.727982,90.050829,1000.000000,14.927876,52.679629,53.041199,271.706324,17.115890
0,2023-01-03,720.000000,723.539978,703.770020,679.291809,540800,BLK,1,52.731800,346.360004,-218.240077,99.509901,1000.000000,31.333499,52.392387,39.326346,5634.700595,354.952773
0,2023-01-03,149.539993,150.449997,145.729996,143.634781,2762200,AXP,1,2.119507,156.514303,142.303175,50.185030,-34.118937,13.669899,148.227826,140.695580,25.983354,1.636797
0,2023-01-03,201.490005,202.300003,197.139999,192.134491,2602000,LOW,1,-18.476552,535.118913,319.194420,13.908637,-1000.000000,11.398945,429.988233,430.149442,164.609105,10.369399
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
248,2023-12-28,359.700012,361.899994,357.809998,357.244385,11798800,META,3,8.370737,363.745440,309.439676,62.892378,164.225955,13.368269,336.208384,324.930173,22.205976,1.398845
248,2023-12-28,294.470001,296.470001,294.000000,290.577881,1907100,MCD,3,4.795494,291.413423,277.261382,64.873263,126.684757,19.858212,280.377581,266.429733,13.246391,0.834444
248,2023-12-28,423.769989,428.359985,423.679993,424.431702,1505000,MA,3,5.872671,428.349325,404.974213,61.485138,117.472089,15.475843,412.449013,399.806427,12.060676,0.759751
248,2023-12-28,146.000000,146.009995,145.039993,143.026108,5023000,PG,3,-1.115728,149.402478,138.728475,46.167307,-62.296718,9.805566,145.517035,145.312954,17.827191,1.123007


In [9]:
processed = df_with_indicators

In [10]:
list_ticker = processed["tic"].unique().tolist()
list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str))
combination = list(itertools.product(list_date,list_ticker))

processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left")
processed_full = processed_full[processed_full['date'].isin(processed['date'])]
processed_full = processed_full.sort_values(['date','tic'])

processed_full = processed_full.fillna(0)

In [11]:
stock_dimension = len(processed_full.tic.unique())
state_space = 1 + 2 * stock_dimension + len(INDICATORS) * stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

buy_cost_list = sell_cost_list = [0.001] * stock_dimension
num_stock_shares = [0] * stock_dimension

env_kwargs = {
    "hmax": 100,
    "initial_amount": 1000000,
    "num_stock_shares": num_stock_shares,
    "buy_cost_pct": buy_cost_list,
    "sell_cost_pct": sell_cost_list,
    "state_space": state_space,
    # "state_space": 690,
    "stock_dim": stock_dimension,
    "tech_indicator_list": INDICATORS,
    "action_space": stock_dimension,
    "reward_scaling": 1e-4
}

Stock Dimension: 68, State Space: 681


In [12]:
trade=data_split(processed_full, '2023-01-01', '2024-01-01')
trade.head()

Unnamed: 0,date,tic,open,high,low,close,volume,day,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,vix,turbulence
0,2023-01-03,A,151.960007,153.130005,148.470001,148.204666,1414300.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,2023-01-03,AAPL,130.279999,130.899994,124.169998,123.904625,112117500.0,1.0,4.082597,143.452937,122.219061,50.322277,-34.043933,21.988181,128.981886,118.331358,45.338018,2.856027
0,2023-01-03,ABBV,162.039993,163.020004,160.809998,151.773148,4937500.0,1.0,-1.624885,210.334499,172.810513,28.525945,-640.035929,18.449877,190.884629,183.741707,63.288357,3.986792
0,2023-01-03,ADBE,340.160004,345.820007,331.920013,336.920013,2229100.0,1.0,17.833936,239.659941,73.505876,94.929354,958.274874,21.552297,149.406649,144.65948,358.093475,22.557769
0,2023-01-03,ADP,240.789993,241.509995,235.270004,228.564484,1749800.0,1.0,-28.689814,750.602378,419.730078,19.341707,-950.612662,23.008109,594.729151,573.683075,180.487072,11.369617


In [13]:
e_trade_gym = StockTradingEnv(df = trade, turbulence_threshold = 70,risk_indicator_col='vix', **env_kwargs)
# env_trade, obs_trade = e_trade_gym.get_sb_env()

In [14]:
trained_a2c = A2C.load(TRAINED_MODEL_DIR + "/agent_a2c")
trained_ddpg = DDPG.load(TRAINED_MODEL_DIR + "/agent_ddpg")
trained_ppo = PPO.load(TRAINED_MODEL_DIR + "/agent_ppo")
trained_td3 = TD3.load(TRAINED_MODEL_DIR + "/agent_td3")
trained_sac = SAC.load(TRAINED_MODEL_DIR + "/agent_sac")

In [15]:
def DRL_ensemble_prediction(environment, a2c_model=None, ddpg_model=None, ppo_model=None, td3_model=None, sac_model=None, weights=[1,1,1,1,1], deterministic=True):
    test_env, test_obs = environment.get_sb_env()
    account_memory = None
    actions_memory = None


    test_env.reset()
    max_steps = len(environment.df.index.unique()) - 1

    print(max_steps)

    _weights = []
    if a2c_model is not None:
        _weights.append(weights[0])
    if ddpg_model is not None:
        _weights.append(weights[1])
    if ppo_model is not None:
        _weights.append(weights[2])
    if td3_model is not None:
        _weights.append(weights[3])
    if sac_model is not None:
        _weights.append(weights[4])
    
    weights = np.array(_weights)
    

    # for i in tqdm(range(len(environment.df.index.unique()))):
    if a2c_model is not None:
        a2c_action, _states = a2c_model.predict(test_obs, deterministic=deterministic)
    if ddpg_model is not None:
        ddpg_action, _states = ddpg_model.predict(test_obs, deterministic=deterministic)
    if ppo_model is not None:
        ppo_action, _states = ppo_model.predict(test_obs, deterministic=deterministic)
    if td3_model is not None:
        td3_action, _states = td3_model.predict(test_obs, deterministic=deterministic)
    if sac_model is not None:
        sac_action, _states = sac_model.predict(test_obs, deterministic=deterministic)
    

    actions = []
    action_length = 0

    if a2c_model is not None and len(a2c_action) > 0:
        action_length = len(a2c_action[0])
    elif ddpg_model is not None and len(ddpg_action) > 0:
        action_length = len(ddpg_action[0])
    elif ppo_model is not None and len(ppo_action) > 0:
        action_length = len(ppo_action[0])
    elif td3_model is not None and len(td3_action) > 0:
        action_length = len(td3_action[0])
    elif sac_model is not None and len(sac_action) > 0:
        action_length = len(sac_action[0])

    for j in range(action_length):
        _actions = []
        if a2c_model is not None:
            # print(a2c_action)
            _actions.append(a2c_action[0][j])
        if ddpg_model is not None:
            _actions.append(ddpg_action[0][j])
        if ppo_model is not None:
            _actions.append(ppo_action[0][j])
        if td3_model is not None:
            _actions.append(td3_action[0][j])
        if sac_model is not None:
            _actions.append(sac_action[0][j])
        _action = np.sum(_actions * weights) / np.sum(weights)
        actions.append(_action)
    actions[0] = np.array(actions)

    # print(actions)
    return actions[0]

In [16]:
actions = DRL_ensemble_prediction(e_trade_gym, 
                        a2c_model=trained_a2c, 
                        ddpg_model=trained_ddpg, 
                        ppo_model=trained_ppo, 
                        td3_model=trained_td3, 
                        sac_model=trained_sac)

248


In [17]:
buy_sell_actions = {}
for i in range(len(df_with_indicators['tic'].unique().tolist())):
    buy_sell_actions[df_with_indicators['tic'].unique().tolist()[i]] = round(actions[i]*100)

In [18]:
buy_sell_actions

{'A': 63,
 'XOM': 26,
 'BLK': 13,
 'AXP': 19,
 'LOW': -14,
 'MS': -28,
 'IBM': -26,
 'TGT': -10,
 'COST': 22,
 'F': -14,
 'CSCO': 21,
 'SNOW': 15,
 'NKE': 30,
 'MCD': -50,
 'HON': -43,
 'ADP': -33,
 'VZ': 20,
 'BMY': 27,
 'PEP': 26,
 'AMZN': 2,
 'AAPL': -10,
 'MA': 42,
 'HD': -26,
 'PG': -12,
 'INTC': -9,
 'CMCSA': 12,
 'PFE': -27,
 'PYPL': -20,
 'KO': 39,
 'UNH': -36,
 'JPM': -28,
 'UBER': 49,
 'DIS': -32,
 'BA': 7,
 'V': 36,
 'BAC': 29,
 'RTX': 63,
 'SPGI': -1,
 'LLY': 13,
 'WFC': 22,
 'JNJ': 50,
 'T': 2,
 'ADBE': -60,
 'NFLX': 40,
 'LMT': -24,
 'MSFT': -27,
 'GS': -21,
 'CRM': 33,
 'TSLA': 21,
 'MMM': 26,
 'TSM': 32,
 'MRNA': -9,
 'CAT': 38,
 'GE': 66,
 'SBUX': -67,
 'WMT': 7,
 'GOOG': 11,
 'QCOM': 29,
 'ABBV': 33,
 'NVO': 18,
 'C': -73,
 'MRK': 25,
 'META': 36,
 'CVX': 14,
 'SCHW': -31,
 'AMD': -63,
 'NVDA': 22,
 'GM': -46}