In [105]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns
from sklearn.metrics import mean_absolute_percentage_error
import xgboost as xgb

import lightgbm as lgb
import joblib
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import datetime as dt
from torch.autograd import Variable 

from sklearn.metrics import mean_squared_error
import random
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import MinMaxScaler

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.layers import LSTM , Bidirectional
color_pal = sns.color_palette()
plt.style.use('fivethirtyeight')

In [106]:
def seed(id):
    torch.manual_seed(id)
    np.random.seed(id)
    random.seed(id)
    tf.random.set_seed(id)

In [107]:
df = pd.read_csv('filtered_features.csv', index_col='Date')
df['Date'] = df.index
df.index = np.array(range(df.shape[0]))
df = df.drop('Unnamed: 0', axis = 1)
df.head()

Unnamed: 0,^GSPC_Open,^GSPC_Volume,^GSPC_SQZ_20_2.0_20_1.5,^GSPC_SQZ_ON,^GSPC_ABER_ATR_5_15,^GSPC_ADOSC_3_10,^GSPC_ADX_14,^GSPC_DMP_14,^GSPC_DMN_14,^GSPC_AROOND_14,...,DX-Y.NYB level 50,^GSPC_High,^GSPC_Low,^GSPC_Close,DX-Y.NYB_High,DX-Y.NYB_Low,DX-Y.NYB_Close,VIX_Close,IXIC_Close,Date
0,438.670013,247200000,7.049998,False,2.902007,68868950.0,18.006061,28.075804,22.869827,7.142857,...,0.0,438.929993,436.910004,438.779999,92.610001,91.080002,92.459999,12.42,696.340027,1993-01-29
1,438.779999,238570000,7.881663,False,2.957872,170382700.0,18.460351,34.209112,20.804444,0.0,...,0.0,442.519989,438.779999,442.519989,93.730003,92.419998,93.559998,12.33,701.77002,1993-02-01
2,442.519989,271560000,7.234996,False,2.901346,256618200.0,18.970118,33.292546,19.722291,0.0,...,0.0,442.869995,440.76001,442.549988,94.040001,93.199997,93.919998,12.25,705.119995,1993-02-02
3,442.559998,345410000,7.144994,False,3.027924,371587500.0,20.417289,40.078885,17.493099,35.714286,...,0.0,447.350006,442.559998,447.200012,94.599998,93.599998,94.239998,12.12,708.669983,1993-02-03
4,447.200012,351140000,8.356664,False,3.003394,471365500.0,22.193214,43.509068,16.387681,28.571429,...,0.0,449.859985,447.200012,449.559998,94.860001,94.040001,94.529999,12.29,708.849976,1993-02-04


In [25]:
df['Date'].shape

(7534,)

In [109]:
def save_data(data, time_index_loc, name):
    data = pd.DataFrame(data)            
    data['Date'] = time_index_loc
    data.to_csv(name)

def series_data_split_v2(data , time_index,  lag, horizon, t_train, t_val, t_test, name, step = 500):
    # data - в виде серии
     
    data = np.array(data.to_list()).reshape(-1, 1)
    indexes = np.array(range(len(data)))
    
    k = 0

    while t_val < indexes[-1]:
        x_train = []
        y_train = []
        x_test = []
        y_test = []
        x_val = []
        y_val = []

        if k == 0:
            #scaler = MinMaxScaler(feature_range=(0,1))
            #scaled_train_data = scaler.fit_transform(data[:t_train]) 
            #scaled = scaler.transform(data[: t_val])

            scaled = np.array(data[: t_val]).reshape(1, -1)[0]
            

            
            
            
            for i in range(lag + 1, t_train + 1):
                x_train.append(np.log(scaled[i-lag:i] / scaled[i-lag-1:i-1])) 
                y_train.append(np.log(scaled[i]/scaled[i-1]))
            
            for i in range(t_train + 1, t_val):
                x_val.append(np.log(scaled[i-lag:i] / scaled[i-lag-1:i-1]))
                if i + horizon <= t_val:
                    y_val.append(np.log(scaled[i : i + horizon]/scaled[i-1:i+horizon-1]))
                else:
                    y_val.append(np.log(scaled[i : t_val]/scaled[i-1:t_val-1]))
            
            
            save_data(x_train, time_index.iloc[indexes[lag : t_train]].to_list(), f'x_train_v2_{name}_{k}.csv')
            save_data(y_train, time_index.iloc[indexes[lag : t_train]].to_list(), f'y_train_v2_{name}_{k}.csv')
            
            
            save_data(x_val, time_index.iloc[indexes[t_train: t_val - 1]].to_list(), f'x_val_v2_{name}_{k}.csv')
            save_data(y_val, time_index.iloc[indexes[t_train: t_val - 1]].to_list(), f'y_val_v2_{name}_{k}.csv')
            


        
        #scaler = MinMaxScaler(feature_range=(0,1))
        #scaled_train_data = scaler.fit_transform(data[:t_val])
        scaled_train_data = np.array(data[:t_val]).reshape(1, -1)[0]
        #scaler_filename = f"scaler{k + 1}.save"
        #joblib.dump(scaler, scaler_filename) 

        

        
        x_train = []
        y_train = []
        x_test = []
        y_test = []
        x_val = []
        y_val = []

        
        for i in range(lag + 1, t_val):
            x_train.append(np.log(scaled_train_data[i-lag:i]/scaled_train_data[i-lag-1:i-1]))
            y_train.append(np.log(scaled_train_data[i]/scaled_train_data[i-1]))

        save_data(x_train, time_index.iloc[indexes[lag : t_val - 1]].to_list(), f'x_train_v2_{name}_{k + 1}.csv')
        save_data(y_train, time_index.iloc[indexes[lag : t_val - 1]].to_list(), f'y_train_v2_{name}_{k + 1}.csv')
        
        


        scaled_test = np.array(data[:t_test])
        scaled_test = scaled_test.reshape(1, -1)[0]

        pointer = 0
        for i in range(t_val, t_test):
            
            x_test.append(np.log(scaled_test[i-lag:i]/scaled_test[i-lag-1:i-1])) 
            if i + horizon <= t_test:
                y_test.append(np.log(scaled_test[i : i + horizon]/scaled_test[i-1:i+horizon-1]))
            else:
                y_test.append(np.log(scaled_test[i : t_test]/scaled_test[i-1 : t_test-1]))
            
              
       
        save_data(x_test, time_index.iloc[indexes[t_val  - 1: t_test - 1]].to_list(), f'x_test_v2_{name}_{k + 1}.csv')
        save_data(y_test, time_index.iloc[indexes[t_val  - 1: t_test - 1]].to_list(), f'y_test_v2_{name}_{k + 1}.csv')
          
        
        
        #save_data(x_test[:-horizon], time_index.iloc[indexes[t_val - 1: t_test - horizon]].to_list(), f'x_val_{k + 1}.csv')
        #save_data(y_test[:-horizon], time_index.iloc[indexes[t_val - 1: t_test - horizon]].to_list(), f'y_val_{k + 1}.csv')
        
        k += 1
        t_train = t_val
        

        t_val = t_test 
        t_test += step
        if t_test > indexes[-1]:
            t_test = indexes[-1] + 1

           
        
    print('final k', k)
name = 'IXIC_Close' #'DX-Y.NYB_Close' 'VIX_Close       
ser = df[name]
#filtered['VIX_Close'] = data['^VIX_Close']
#filtered['IXIC_Close']
time_index = df['Date']
lag = 40
horizon = 10
t_train = 1500
t_val = 1550
t_test = 1600
step = 50

series_data_split_v2(ser , time_index, lag, horizon, t_train, t_val, t_test, name, step)

final k 120


In [71]:
class EarlyStopper:
    def __init__(self, patience=3, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.min_validation_loss = np.inf

    def early_stop(self, validation_loss):
        if validation_loss < self.min_validation_loss:
            self.min_validation_loss = validation_loss
            self.counter = 0
        elif validation_loss > (self.min_validation_loss + self.min_delta):
            self.counter += 1
            if self.counter >= self.patience:
                return True
        return False

In [18]:
# LSTM, GRU, Bi-LSTM, Sequencies


class LSTM_Prediction_Model(nn.Module):
    def __init__(self,  input_size, hidden_size, num_layers, output_size, drop, bilstm=False):
        super(LSTM_Prediction_Model, self).__init__()
        
        self.num_layers = num_layers #number of layers
        self.input_size = input_size #input size
        self.hidden_size = hidden_size #hidden state
        
       
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                          num_layers=num_layers, batch_first=True, dropout=drop, bidirectional=bilstm) #lstm
        self.fc =  nn.Linear(hidden_size, output_size) #fully connected 1
        
       
    
    def forward(self, x):
        h_0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).requires_grad_() #hidden state
        c_0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).requires_grad_() #internal state
       
        # Propagate input through LSTM
        print(x)
        output, (hn, cn) = self.lstm(x, (h_0.detach(), c_0.detach())) #lstm with input, hidden, and internal state
        output = self.fc(output[:, -1, :]) #reshaping the data for Dense layer next
        
        
        
        return output

In [19]:
class nn_wrapper_torch:
    def __init__(self, nn_type):
        self.type = nn_type
        self.num_epochs = 100
        self.trial = 0
        self.epochs = []
    def set_params(self, **params):
        
        
        self.num_epochs = params['num_epochs']
        self.learning_rate = params['learning_rate']
        self.batch_size = params['batch_size']
        self.hidden_size = params['hidden_size'] #, num_layers, seq_length, drop
        self.num_layers = params['num_layers']
        self.drop = params['drop']
        self.patience = params['patience']
        if 'bilstm' in params.keys():
            self.bilstm = True
        else:
            self.bilstm = False

    def fit(self, X, y, X_val = 0, y_val = 0):

        if isinstance(X, np.ndarray):
            self.X_train = torch.from_numpy(np.reshape(X, (X.shape[0], X.shape[1], 1))).to(device = config.device, dtype = torch.float)
            self.y_train = torch.from_numpy(np.array(y).reshape((len(y), 1))).to(device = config.device, dtype = torch.float)
        else:
            self.X_train = torch.from_numpy(np.reshape(X.values, (X.shape[0], X.shape[1], 1))).to(device = config.device, dtype = torch.float)
            self.y_train = torch.from_numpy(np.array(y.values).reshape((len(y), 1))).to(device = config.device, dtype = torch.float)
        

        
        if isinstance(X_val, np.ndarray):
            self.X_val = torch.from_numpy(np.reshape(X_val, (X_val.shape[0], X_val.shape[1], 1))).to(device = config.device, dtype = torch.float)
            self.y_val = torch.from_numpy(np.array(y_val[:, 0]).reshape((len(y_val[:, 0]), 1))).to(device = config.device, dtype = torch.float)

        early_stopper = EarlyStopper(patience=self.patience, min_delta=0.1)
        self.model = LSTM_Prediction_Model(X.shape[1], self.hidden_size, self.num_layers, 1, self.drop, self.bilstm)
        criterion = torch.nn.MSELoss()    # mean-squared error for regression
        optimizer = torch.optim.Adam(self.model.parameters(), lr=self.learning_rate) 
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.9)
        data_loss = []
        data_val_loss = []

        
        for epoch in range(self.num_epochs):
            self.model.train()
            outputs = self.model(self.X_train) #forward pass
            optimizer.zero_grad() #calculate the gradient, manually setting to 0
            
            # obtain the loss function
            loss = criterion(outputs, self.y_train)
            
            loss.backward() #calculates the loss of the loss function
            
            optimizer.step() #improve from loss, i.e backprop
            scheduler.step()
            if isinstance(X_val, np.ndarray):
                with torch.no_grad():
                    self.model.eval()
                    validation_loss = criterion(self.model(self.X_val), self.y_val)
                    print(epoch, loss.item(), validation_loss.item())
                    data_loss.append(loss.item())
                    data_val_loss.append(validation_loss.item())
                if early_stopper.early_stop(validation_loss) and epoch > 40:  
                    print(epoch)
                          
                    break
        return data_loss, data_val_loss            
    def predict(self, x):
        
        with torch.no_grad():
            self.model.eval()
            return self.model(torch.from_numpy(np.reshape(x, (1, x.shape[1], 1))).to(device = config.device, dtype = torch.float)).to(device = 'cpu').detach().numpy()[0]



In [77]:
class LSTM_predictor:
    def __init__(self, input_shape, seq_num, inner_output, output_shape, num_layers, drop, bilstm = False):
        
        self.model = Sequential()
        ret_seq = True if num_layers > 1 else False
        if not bilstm:
            self.model.add(LSTM(units = inner_output, return_sequences = ret_seq, input_shape = (input_shape, seq_num)))
            self.model.add(Dropout(drop))
            if num_layers > 1:
                if num_layers > 2:
                    for i in range(num_layers-2):
                        self.model.add(LSTM(units = inner_output, return_sequences = True))
                        self.model.add(Dropout(drop))

                self.model.add(LSTM(units = inner_output))
                self.model.add(Dropout(drop))
            self.model.add(Dense(units = output_shape))
        else:
        
            # First layer of BiLSTM
            self.model.add(Bidirectional(LSTM(units = inner_output, return_sequences = ret_seq, input_shape = (input_shape, seq_num))))
            self.model.add(Dropout(drop))
            if num_layers > 1:
                if num_layers > 2:
                    for i in range(num_layers-2):
                        self.model.add(Bidirectional(LSTM(units = inner_output, return_sequences = True)))
                        self.model.add(Dropout(drop))

            
                # Second layer of BiLSTM
                self.model.add(Bidirectional(LSTM(units = inner_output)))
                self.model.add(Dropout(drop))
            self.model.add(Dense(units = output_shape))
            
            
        
        

In [78]:
class nn_wrapper_tf:
    def __init__(self, nn_type):
        self.type = nn_type
        
        
    def set_params(self, **params):
    
        self.num_epochs = params['num_epochs']
        self.learning_rate = params['learning_rate']
        self.batch_size = params['batch_size']
        self.hidden_size = params['hidden_size'] #, num_layers, seq_length, drop
        self.num_layers = params['num_layers']
        self.drop = params['drop']
        if 'bilstm' in params.keys():

            self.bilstm = params['bilstm']
        else:
            self.bilstm = False
        

    def fit(self, X, y, X_val = 0, y_val = 0):
        
        
        if isinstance(X, np.ndarray):
            self.X_train = np.reshape(X, (X.shape[1], X.shape[2], X.shape[0]))
            self.y_train = np.array(y).reshape((len(y), 1))
        else:
            self.X_train = np.reshape(X.values, (X.shape[1], X.shape[2], X.shape[0]))
            self.y_train = np.array(y.values).reshape((len(y), 1))
        print(X.shape[2],'lll', X.shape[0])
        # (self, input_shape, inner_output, output_shape, num_layers, drop)
        self.nn = LSTM_predictor(X.shape[2], X.shape[0], self.hidden_size, 1, self.num_layers, self.drop, self.bilstm)
        self.nn.model.compile(optimizer = 'adam', loss = 'mean_squared_error')
        self.nn.model.fit(self.X_train, self.y_train, epochs = self.num_epochs, batch_size = self.batch_size, shuffle = False)
                
                   
    def predict(self, x):
        
        return self.nn.model.predict(np.reshape(x, (x.shape[1], x.shape[2], x.shape[0])))


In [166]:
# тестирую на бустинге
import optuna


def optuna_hyper_opt(model, X_train, y_train, X_val, y_val, params, metric_weights, num_lags):
    
    
    def objective(trial):
        parameters = {}
        for i in params:
            
            if type(params[i]) == list:
                if type(params[i][0]) == int:
                    parameters.update({i: trial.suggest_int(i, params[i][0], params[i][1])})
                elif type(params[i][0]) == float:
                    parameters.update({i: trial.suggest_float(i, params[i][0], params[i][1])})
                elif type(params[i][0]) == str:
                    parameters.update({i: trial.suggest_categorical(i, params[i])})
            else:
                parameters.update({i:params[i]})
        
        print(parameters)
        
        model.set_params(**parameters)

        if isinstance(model, nn_wrapper_tf):
            model.fit(X_train, y_train, X_val, y_val)
        else:
            print('k o')
            model.fit(X_train, y_train)

            '''
        pred = []
        for i in X_val:
            pred.append(horizon_forecasts(model, i, y_val.shape[1], num_lags))
         '''
        
        pred = horizon_forecasts_v2(model, X_val, y_val.shape[1], num_lags)
        score = np.array(scores_fun(pred, y_val))

        return np.dot(score, metric_weights)
    
    study = optuna.create_study(direction='minimize')
    study.optimize(objective, n_trials=3, timeout=300)
    return study.best_params

def horizon_forecasts_v2(model, x, horizon, num_lags):
    
    all_features = x
    if isinstance(model, nn_wrapper_tf):
        for i in range(horizon):
            f = model.predict(all_features).reshape(-1, 1)
            
            if i == 0:
                prediction = f
            else: 
                prediction = np.concatenate((prediction, f), axis=1)
            lags = all_features[0]
            
            not_lags = all_features[1:]
            
            lags = np.concatenate((lags[:, 1:], f), axis=1)
            #all_features = np.array([lags, not_lags])
            
            
            all_features = np.concatenate([np.array([lags]), not_lags])
    elif num_lags < x.shape[1]:
        not_lags = all_features[:, num_lags:]
        lags = all_features[:, :num_lags]
        for i in range(horizon):
            f = model.predict(all_features).reshape(-1, 1)
            
            if i == 0:
                prediction = f
            else: 
                prediction = np.concatenate((prediction, f), axis=1)
            
            lags = np.concatenate((lags[:, 1:], f), axis=1)
            all_features = np.concatenate((lags, not_lags), axis=1)
    else:
        lags = all_features
        for i in range(horizon):
            f = model.predict(lags).reshape(-1, 1)
            
            if i == 0:
                prediction = f
            else: 
                prediction = np.concatenate((prediction, f), axis=1)

            lags =  np.concatenate((lags[:, 1:], f), axis=1)

    return np.array(prediction)

def horizon_forecasts(model, x, horizon, num_lags):
    prediction = []
    all_features = x.reshape(1, -1)
    if num_lags < len(x):
        not_lags = all_features[0][num_lags:]
        lags = all_features[0][:num_lags]
        for i in range(horizon):
            f = model.predict(all_features)[0]
            prediction.append(f)
            lags = np.append(lags[1:], f)
            all_features = np.append(lags, not_lags).reshape(1, -1)
            
    else:
        lags = all_features
        for i in range(horizon):
            f = model.predict(lags)[0]
            prediction.append(f)
            lags = np.append(lags[0][1:], f).reshape(1, -1)
       
       

    return np.array(prediction)

def scores_fun(pred, y):
    horizon = y.shape[1]
    n = y.shape[0]
    metric_mse = 0
    metric_trend_detect = 0
    metric_weights = 0
    metric_mape = 0
    metric_true_pred = 0
    
    weights = np.array([i / horizon for i in range(1, horizon + 1)])

    for i in range(n - horizon + 1):
        diff = (pred[i] - y[i]) ** 2
        if pred[i][0] * y[i][0] > 0:
             metric_true_pred += 1
        metric_mape += mean_absolute_percentage_error(y[i], pred[i])

        metric_mse += np.sum(diff) / horizon
        
        if np.dot(np.sum(pred[i]), np.sum(y[i])) > 0:
            metric_trend_detect += 1
        
        metric_weights += np.sum(np.dot(weights, diff)) / horizon

    for ind, j in enumerate(range(n - horizon + 1, n)):
        s1 = 0
        s1_w = 0
        s_mape = 0
        s_tr_det = 0
        s_tr_det_pred = 0
        for i in range(horizon - ind - 1):
            diff = (pred[j][i] - y[j][i]) ** 2 
            s1 += diff
            if i == 0:
                if pred[j][i] * y[j][i] > 0:
                    metric_true_pred += 1
            if y[j][i] != 0:
                s_mape += np.abs(pred[j][i] - y[j][i]) / y[j][i]
            s1_w = diff * weights[i]
            s_tr_det += y[j][i]
            s_tr_det_pred += pred[j][i]
            if i == horizon - ind - 1 - 1:
                if np.dot(s_tr_det_pred, s_tr_det ) > 0:
                    metric_trend_detect += 1 - ind / horizon
        
        metric_mse += s1 / (horizon - ind - 1)
        metric_weights += s1_w / (horizon - ind -1)
        metric_mape += s_mape / (horizon - ind - 1)


    return [metric_mse/n, metric_weights/n, metric_trend_detect/n, 100 * metric_mape / n, metric_true_pred / n]


def horizon_prediction(model, X, y, k, num_lags, addit_data=0):
    time_index = X['Date']
    if isinstance(model, nn_wrapper_tf):
        X = X.drop('Date', axis = 1).values
        X = np.concatenate([np.array([X]), np.array(addit_data)])
        y = y.drop('Date', axis = 1).values
    else:
        X = X.drop('Date', axis = 1).values
        y = y.drop('Date', axis = 1).values


    hor = y.shape[1]
    pred = []
    '''
    for i in X:
        pred.append(horizon_forecasts_v2(model, i, hor, num_lags))
    '''
    
    pred = horizon_forecasts_v2(model, X, hor, num_lags)
    # pred в df превратить
    score = scores_fun(pred, y)
    
    score.append(time_index[0])
    score.append(time_index[time_index.shape[0] - 1])
    #scaler = joblib.load(f"scaler{k}.save")

    #pred = pd.DataFrame(list(map(lambda x: scaler.inverse_transform(x.reshape(-1, 1)).reshape(1, -1)[0], pred)), index=time_index)
    pred = pd.DataFrame(pred, index=time_index)
    return pred, score


def model_forecasts(model, params_set, k, HPO=False, metric_weights = np.array([0, 0, 0, 0, 0])):
    forecasts = []
    metrics = []
    df = pd.read_csv('filtered_features.csv', index_col='Date')
    df['Date'] = df.index
    df.index = np.array(range(df.shape[0]))
    df = df.drop(['Unnamed: 0'], axis = 1)
    tick = ['IXIC_Close', 'DX-Y.NYB_Close', 'VIX_Close']
    for i in range(k):

        print(i)
        if i == 0:
            data_x_train_val = pd.read_csv(f'x_train_v2_{i}.csv').drop(['Unnamed: 0', 'Date'], axis = 1)
            num_lags = data_x_train_val.shape[1] - 1
            if isinstance(model, nn_wrapper_tf):
                
                addit_data = []
                for t in tick:
                    add_data = pd.read_csv(f'x_train_v2_{t}_{i}.csv', index_col='Date')
                    add_data['Date'] = add_data.index
                    add_data.index = np.array(range(add_data.shape[0]))
                    add_data = add_data.drop(['Unnamed: 0', 'Date'], axis = 1)
                    #data_x_train_val = pd.merge(data_x_train_val, add_data, 'inner', 'Date').drop('Date', axis=1)
                    addit_data.append(add_data.values)
                
                data_x_train_val = np.concatenate([np.array([data_x_train_val.values]), np.array(addit_data)])
            else:
                data_x_train_val = pd.merge(data_x_train_val, df, 'inner', 'Date').drop('Date', axis=1)

            data_y_train_val = pd.read_csv(f'./y_train_v2_/y_train_v2_{i}.csv').drop(['Unnamed: 0', 'Date'], axis = 1)
            data_x_val = pd.read_csv(f'./x_val_v2_{i}.csv').drop(['Unnamed: 0'], axis = 1)
            if isinstance(model, nn_wrapper_tf):
                
                addit_data = []
                for t in tick:
                    add_data = pd.read_csv(f'./x_val_v2_{t}_{i}.csv', index_col='Date')
                    add_data['Date'] = add_data.index
                    add_data.index = np.array(range(add_data.shape[0]))
                    add_data = add_data.drop(['Unnamed: 0', 'Date'], axis = 1)
                    addit_data.append(add_data.values)
                data_x_val = np.array([data_x_val.values, addit_data])
                
            else:
                data_x_val = pd.merge(data_x_val, df, 'inner', 'Date').drop('Date', axis=1)

            data_y_val = pd.read_csv(f'y_val_v2_{i}.csv').drop(['Unnamed: 0', 'Date'], axis = 1)
        else:
            data_x_train_val = data_x_train_test
            data_y_train_val = data_y_train_test
            data_x_val = data_x_test.drop( 'Date', axis = 1)
            data_y_val = data_y_test.drop('Date', axis = 1)
        
        if HPO:
            params = optuna_hyper_opt(model, data_x_train_val.values, data_y_train_val.values, data_x_val.values, data_y_val.values, params_set, metric_weights, num_lags)
            for j in params_set:
                if j not in params.keys():
                    params.update({j : params_set[j]})    
        else:
            params = params_set 

        print(params)
        model.set_params(**params)
        

        data_x_train_test = pd.read_csv(f'./x_train_v2_/x_train_v2_{i + 1}.csv').drop(['Unnamed: 0'], axis = 1)
        if isinstance(model, nn_wrapper_tf):
            addit_data = []
            for t in tick:
                add_data = pd.read_csv(f'./x_train_v2_{t}_/x_train_v2_{t}_{i + 1}.csv', index_col='Date')
                add_data['Date'] = add_data.index
                add_data.index = np.array(range(add_data.shape[0]))
                add_data = add_data.drop(['Unnamed: 0', 'Date'], axis = 1)
                addit_data.append(add_data.values)
            
            data_x_train_test = np.concatenate([np.array([data_x_train_test.drop(['Date'], axis = 1).values]), np.array(addit_data)])
        else:
            data_x_train_test = pd.merge(data_x_train_test, df, 'inner', 'Date').drop('Date', axis=1)
            
        data_y_train_test = pd.read_csv(f'./y_train_v2_/y_train_v2_{i + 1}.csv').drop(['Unnamed: 0', 'Date'], axis = 1)
        data_x_test = pd.read_csv(f'./x_test_v2_/x_test_v2_{i + 1}.csv').drop('Unnamed: 0', axis = 1)
        if not isinstance(model, nn_wrapper_tf):
            data_x_test = pd.merge(data_x_test, df, 'inner', 'Date')
        else:
            addit_data = []
            for t in tick:
                add_data = pd.read_csv(f'./x_test_v2_{t}_/x_test_v2_{t}_{i + 1}.csv', index_col='Date')
                add_data['Date'] = add_data.index
                add_data.index = np.array(range(add_data.shape[0]))
                add_data = add_data.drop(['Unnamed: 0', 'Date'], axis = 1)
                addit_data.append(add_data.values)

        data_y_test = pd.read_csv(f'./y_test_v2_/y_test_v2_{i + 1}.csv').drop('Unnamed: 0', axis = 1)
        #model = model(params)
        model.fit(data_x_train_test, data_y_train_test)
        res = horizon_prediction(model, data_x_test, data_y_test, i + 1, num_lags, addit_data)

        
        forecasts.append(res[0])
        metrics.append(res[1])
        print(i)
    return pd.concat(forecasts), pd.DataFrame(metrics)


In [208]:
params = {
    'boosting_type': 'gbdt',
    'objective':'regression',
    'num_leaves': 100,
    'n_estimators': 100,
    'learning_rate': 0.1,
    'metric': 'l1',
    'num_iterations': 200
}

params1 = {
    'boosting_type': 'gbdt',
    'objective': 'regression',
    'num_leaves':  100,
    'learning_rate': 0.1,
    'metric':  ['l1', 'l2']
}

params3 = {
    'num_epochs': 1,
    'batch_size' : 30,
    'hidden_size': 128,
    'num_layers': [1, 2],
    'drop': 0.25,
    'learning_rate':  0.1,
    'bilstm': True
}

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#tf.keras.utils.disable_interactive_logging()
tf.keras.utils.enable_interactive_logging()
seed(0)

#model = lgb.LGBMRegressor()
num_layers = 1
k = 2
model_types = ['LSTM', 'BiLSTM']
for j in model_types:
    bilstm = True if j == model_types[1] else False
    
    for i in range(num_layers, num_layers + 1):

       


        print(j, i)
        model = nn_wrapper_tf(j)
        #model = lgb.LGBMRegressor()
        params2 = {
        'num_epochs': 20,
        'batch_size' : 64,
        'hidden_size': 200,
        'num_layers': i,
        'drop': 0.3,
        'learning_rate': 0.1,
        'bilstm': bilstm
        }
        results = model_forecasts(model, params2, k)#, True, np.array([1, 1, 1, 1, 1]))

        results[0].to_csv(f'new_results_v4_{j}_{i}_{k}.csv')
        results[1].to_csv(f'new_results_metrics_v4_{j}_{i}_{k}.csv')
        

LSTM 1
0


FileNotFoundError: [Errno 2] No such file or directory: 'x_train_v2_0.csv'

In [206]:
df1 = pd.read_csv('new_results_metrics_v4_LSTM_1_2.csv')
#print(df1[['2', '4']].apply(np.mean))
df1

Unnamed: 0.1,Unnamed: 0,0,1,2,3,4,5,6
0,0,0.000145,6.2e-05,0.554,74.533706,0.48,1999-03-19,1999-05-28
1,1,0.000103,4.6e-05,0.488,80.344355,0.6,1999-06-01,1999-08-10


In [7]:
df1.head()

Unnamed: 0.1,Unnamed: 0,0,1,2,3,4,5,6
0,0,0.000145,6.2e-05,0.43,75.2695,0.6,1999-03-19,1999-05-28
1,1,0.0001,4.6e-05,0.434,88.560639,0.54,1999-06-01,1999-08-10
2,2,0.000155,6.8e-05,0.632,124.601431,0.54,1999-08-11,1999-10-20
3,3,7.3e-05,3.4e-05,0.294,91.355377,0.5,1999-10-21,1999-12-31
4,4,0.000218,9.7e-05,0.526,91.268426,0.66,2000-01-03,2000-03-14


In [66]:
df = pd.read_csv('filtered_features.csv', index_col='Date').drop('target', axis=1)
df.to_csv('filtered_features.csv')


KeyError: "['target'] not found in axis"

In [138]:
num_lags = 40
data_x_train_test = pd.read_csv(f'x_train_v2_{1}.csv').drop(['Unnamed: 0', 'Date'], axis = 1)
print(np.array([data_x_train_test.values]).shape)
tick = ['IXIC_Close', 'DX-Y.NYB_Close', 'VIX_Close']
addit_data = []

for t in tick:
    add_data = pd.read_csv(f'x_train_v2_{t}_{1}.csv', index_col='Date')
    add_data['Date'] = add_data.index
    add_data.index = np.array(range(add_data.shape[0]))
    add_data = add_data.drop(['Unnamed: 0', 'Date'], axis = 1)
    addit_data.append(add_data.values)
data_x_train_test = np.concatenate([np.array([data_x_train_test.values]), np.array(addit_data)])


(1, 1509, 40)


In [139]:
data_x_train_test.shape

(4, 1509, 40)

In [120]:
np.array(addit_data).shape

(3, 1509, 40)