In [1]:
## LSTM model 
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import LSTM, Flatten, Dense
from tensorflow.keras.models import Sequential
import tensorflow.keras.backend as K
from tensorflow.keras.callbacks import EarlyStopping
from tqdm import tqdm

In [2]:
class Model:
    def __init__(self,epochs, num_nodes, num_hidden_layers):
        self.data = None
        self.model = None
        self.epochs = epochs
        self.num_nodes = num_nodes
        self.num_hidden_layers = num_hidden_layers
        

    def __build_model(self, input_shape, outputs):
        '''
        Builds and returns the Deep Neural Network that will compute the allocation ratios
        that optimize the Sharpe Ratio of the portfolio

        inputs: input_shape - tuple of the input shape, outputs - the number of assets
        returns: a Deep Neural Network model
        '''
        model = Sequential([
            LSTM(self.num_nodes, 
                 input_shape=input_shape,
                 recurrent_dropout =0.1
                 ),
            Flatten(),
            Dense(outputs, activation='softmax')
        ])

        def sharpe_loss(_, y_pred):
            # make all time-series start at 1
            data = tf.divide(self.data, self.data[0])

            # value of the portfolio after allocations applied
            portfolio_values = tf.reduce_sum(tf.multiply(data, y_pred), axis=1)

            portfolio_returns = (portfolio_values[1:] - portfolio_values[:-1]) / portfolio_values[
                                                                                 :-1]  # % change formula

            sharpe = K.mean(portfolio_returns) / K.std(portfolio_returns)

            # since we want to maximize Sharpe, while gradient descent minimizes the loss,
            #   we can negate Sharpe (the min of a negated function is its max)
            return -sharpe

        model.compile(loss=sharpe_loss, optimizer='adam')
        return model
      
    def get_allocations(self, data):
        '''
        Computes and returns the allocation ratios that optimize the Sharpe over the given data

        input: data - DataFrame of historical closing prices of various assets

        return: the allocations ratios for each of the given assets
        '''

        # data with returns
        data_w_ret = np.concatenate([data.values[1:], data.pct_change().values[1:]], axis=1)

        data = data.iloc[1:]
        self.data = tf.cast(tf.constant(data), float)
        
        #early stopping
        early_stopping = EarlyStopping(monitor="loss", patience=2,)

        if self.model is None:
            self.model = self.__build_model(data_w_ret.shape, len(data.columns))

        fit_predict_data = data_w_ret[np.newaxis, :]
        self.model.fit(fit_predict_data, np.zeros((1, len(data.columns))), epochs=self.epochs, shuffle=False, callbacks=[early_stopping])
        return self.model.predict(fit_predict_data)[0]

여기 경로 변경

In [3]:
data = pd.read_pickle("Data/ETF_final.pickle") # 파일 불러오는 경로 변경
data

Unnamed: 0,TLT,SHY,GLD,GSG,VNQ,BIL,DBA,IJS,QQQ,SPY,VWO
2007-05-31,86.38,80.07,65.54,41.200,78.95,91.60,26.35,40.87,47.21,153.32,43.66
2007-06-01,85.59,79.74,66.44,41.570,79.06,91.62,26.39,41.29,47.24,154.08,44.65
2007-06-04,85.98,79.77,66.54,42.100,79.38,91.64,26.23,41.35,47.38,154.10,44.51
2007-06-05,85.44,79.70,66.37,42.007,78.12,91.64,26.31,41.00,47.38,153.49,44.27
2007-06-06,85.40,79.80,66.41,42.030,77.59,91.68,26.11,40.51,46.88,151.84,43.53
...,...,...,...,...,...,...,...,...,...,...,...
2023-04-25,106.96,82.35,185.75,20.010,81.25,91.73,20.99,89.45,309.99,406.08,39.22
2023-04-26,105.82,82.25,184.74,19.600,80.47,91.75,21.02,88.66,311.87,404.36,39.49
2023-04-27,104.77,82.09,184.75,19.630,82.29,91.78,20.98,90.18,320.35,412.41,40.00
2023-04-28,106.46,82.18,184.80,19.910,83.30,91.78,20.95,91.35,322.56,415.93,40.24


In [4]:
# 그냥 Pandas 사용해서 쉽게 만들어 봤어요,,,
def split_time_series(df:pd.DataFrame, look_back_size:int, freq:str):
    '''
    look_back_size = (365 -> 1년 / 90 -> 1분기 / 30 -> 1달)
    freq = ["M","Q","Y"]
    Note) 맨 첫번쨰 Split에서 Sample 개수가 look_back_size보다 작은 경우가 있지만 무시합시다...
    '''
    idx_list = pd.date_range("2008","2023", freq=freq)
    
    for end_date in idx_list:
        start_date = end_date - pd.Timedelta(days=look_back_size)
        yield df.loc[start_date:end_date]

In [5]:
# 요부분은 나중에 반복문 돌릴떄...
freq = "Q"
idx_list = pd.date_range("2008","2023", freq=freq)
len(idx_list)

60

In [6]:
# Backtest editted by Hwang
def simulate_strategy(group_weight_df:pd.DataFrame, daily_rtn_df:pd.DataFrame, fee_rate:float):
  '''
  전략의 수익을 평가합니다(Long-Only Portfolio)
  '''
  pf_value = 1
  pf_dict = {}
  weight = group_weight_df.iloc[0] # 시작 weight를 지정해준다(첫 weight에서 투자 시작, 장마감 직전에 포트폴리오 구성)
  rebalancing_idx = group_weight_df.index
  start_idx = rebalancing_idx[0]

  for idx, row in daily_rtn_df.loc[start_idx:].iloc[1:].iterrows(): #Daily로 반복, 첫 weight 구성 다음 날부터 성과를 평가
      # 수익률 평가가 리밸런싱보다 선행해야함
      dollar_value = weight * pf_value
      dollar_value = dollar_value * (1+np.nan_to_num(row)) # update the dollar value
      pf_value = np.nansum(dollar_value) # update the pf value
      weight = dollar_value / pf_value   # update the weight 

      if idx in rebalancing_idx: # Rebalancing Date (장마감 직전에 리벨런싱 실시)
          weight = group_weight_df.loc[idx]
          target_dollar_value = np.nan_to_num(pf_value * weight)
          dollar_fee = np.nansum(np.abs(target_dollar_value - np.nan_to_num(dollar_value)) * fee_rate)
          pf_value = pf_value - dollar_fee # fee 차감
          
      pf_dict[idx] = pf_value
      
  # 결과를 pct로 정렬
  pf_result = pd.Series(pf_dict)
  idx = pf_result.index[0] - pd.Timedelta(days=1)
  pf_result[idx] = 1
  pf_result.sort_index(inplace=True)
  pf_result = pf_result.pct_change().fillna(0)

  #sharpe ratio 계산
  sharpe_ratio = (pf_result.mean()*252) / (pf_result.std()*np.sqrt(252))

  return pf_result, weight, sharpe_ratio

In [7]:
# 파라미터 그리드 생성
from sklearn.model_selection import ParameterGrid

param_grid = {
    "hidden_layers":[2],
    "epochs":[100, 200, 300, 400, 500],
    "nodes" :[16, 32, 64, 80, 96, 112, 128],
    "lookbackwindow":[100,200,300,400]
}

# Create the parameter grid
grid = list(ParameterGrid(param_grid))

여기 경로 변경

In [8]:
import os

results = pd.DataFrame(columns = ['epochs', 'num_nodes', 'num_hidden_layers', 'look_back_window', 'sharpe_ratio'])
i = 0

output_dir = 'dl_result' # 여기 경로 파일 경로로 수정해야함 (결과 파일이 저장될 구글 드라이브 경로를 지정해야함)
existing_files = os.listdir(output_dir)


for params in tqdm(grid[::-1]):
    print(params.values(), "Preprocessing.....")
    
    hidden_layer = params["hidden_layers"]
    epochs = params["epochs"]
    lookbackwindow = params["lookbackwindow"]
    nodes = params["nodes"]
    
    # Define Data Generater
    data_generator = split_time_series(data, lookbackwindow, freq = freq)
    
    # Weight 저장할 데이터프레임 (Reset DataFrame)
    weight_df = pd.DataFrame(columns=data.columns)
    
    for _ in range(len(idx_list)): 
        # Model 초기화   
        tf.keras.backend.clear_session()
        model = Model(epochs=epochs, num_nodes=nodes, num_hidden_layers=hidden_layer)

        # Data 불러오기
        sample_data = next(data_generator)
        idx = sample_data.index[-1]

        # Model 학습 및 결과 저장
        allocations = model.get_allocations(data=sample_data)
        weight_df.loc[idx] = allocations
    
    # Save Result
    pf_result, weight, sharpe_ratio = simulate_strategy(weight_df, data.pct_change(), fee_rate = 0.0015)
    results.loc[i] =[epochs, nodes, hidden_layer, lookbackwindow, sharpe_ratio]
    results.to_csv(f'{output_dir}result_dataframe_ryu_rev{i}.csv')
    i += 1

  0%|          | 0/140 [00:00<?, ?it/s]

dict_values([500, 2, 400, 128]) Preprocessing.....
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/

  1%|          | 1/140 [59:43<138:20:50, 3583.10s/it]

dict_values([500, 2, 400, 112]) Preprocessing.....
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/

  1%|▏         | 2/140 [2:00:07<138:16:50, 3607.32s/it]

dict_values([500, 2, 400, 96]) Preprocessing.....
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/5