In [4]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [5]:
## EXTERNAL
import pandas as pd 
import numpy as np 
import pickle
import shutil
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
import numpy as np
import sklearn
import time
import os
import random 
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

##INTERNAL
from models import Autoencoder
from models import Sequence
from models import waveletSmooth

from utils import prepare_data_lstm, ExampleDataset, save_checkpoint, evaluate_lstm, backtest


# Caricamento dei dati

In [7]:
path = "./data/S&P500IndexData-Table1.csv"
data_master = pd.read_csv(path, sep=";")
data_master["Date"] = pd.to_datetime(data_master["Ntime"].astype(str))
data_master.drop("time", inplace=True, axis=1)
data_master.drop("Ntime", inplace=True, axis=1)
data_master.set_index("Date", inplace=True)
pd.set_option('display.max_columns', None)
data_master

Unnamed: 0_level_0,Close Price,Open Price,High Price,Low Price,Volume,MACD,CCI,ATR,BOLL,EMA20,MA10,MTM6,MA5,MTM12,ROC,SMI,WVAD,US Dollar Index,Federal Fund Rate
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2008-07-01,1284.91,1276.69,1285.31,1260.68,584629.0,-25.545595,-127.422348,24.63,1347.154231,1327.453482,1307.927,-33.09,1289.682,-75.12,-5.523408,-0.039542,-3.772245e+07,72.3400,2.11
2008-07-02,1261.52,1285.82,1292.17,1261.51,527609.0,-27.571986,-115.864929,30.66,1342.391538,1321.174102,1300.298,-52.77,1277.592,-98.62,-7.250724,-0.060551,-5.204278e+07,71.9900,1.95
2008-07-03,1262.90,1262.96,1271.48,1252.01,324759.0,-28.735319,-115.171527,19.47,1337.470769,1315.624188,1292.305,-59.07,1273.542,-88.03,-6.516252,-0.041755,-5.556148e+07,72.7300,1.92
2008-07-07,1252.31,1262.90,1273.95,1240.68,526542.0,-30.164080,-111.302302,33.27,1331.857308,1309.594265,1285.743,-26.07,1268.328,-85.50,-6.740987,-0.049841,-5.352552e+07,72.7100,1.99
2008-07-08,1273.70,1251.84,1274.17,1242.84,603411.0,-29.233405,-82.732708,31.33,1326.985000,1306.175764,1281.313,-6.30,1267.068,-69.13,-3.356020,-0.030884,-3.909113e+07,72.9600,1.97
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2016-09-26,2146.10,2158.54,2158.54,2145.04,357811.0,-3.864936,-10.284991,19.65,2163.793077,2157.914061,2146.918,20.33,2158.170,-35.20,-1.846804,0.008891,-1.701630e+07,95.2986,0.40
2016-09-27,2159.93,2146.04,2161.13,2141.55,366113.0,-3.200729,22.969783,19.58,2162.872308,2158.106055,2150.209,20.81,2162.204,32.12,-1.199821,0.015815,-1.258582e+07,95.4499,0.40
2016-09-28,2171.37,2161.85,2172.40,2151.79,361908.0,-1.731270,100.879276,20.61,2162.438846,2159.369288,2154.769,31.61,2163.854,12.33,-0.455233,0.011104,-7.050632e+06,95.4275,0.40
2016-09-29,2151.13,2168.90,2172.67,2145.20,424922.0,-2.174842,39.501463,27.47,2161.063077,2158.584594,2155.156,-11.99,2158.644,24.11,1.095963,0.005565,-1.063365e+07,95.5172,0.40


# Training

In [8]:
'''Scales the inputs such that they are in an appropriate range''' 
def get_scaled_features(feats_to_scale):
    feats = feats_to_scale.copy()
    feats["Close Price"].loc[:] = feats["Close Price"].loc[:]/1000
    feats["Open Price"].loc[:] = feats["Open Price"].loc[:]/1000
    feats["High Price"].loc[:] = feats["High Price"].loc[:]/1000
    feats["Low Price"].loc[:] = feats["Low Price"].loc[:]/1000
    feats["Volume"].loc[:] = feats["Volume"].loc[:]/1000000
    feats["MACD"].loc[:] = feats["MACD"].loc[:]/10
    feats["CCI"].loc[:] = feats["CCI"].loc[:]/100
    feats["ATR"].loc[:] = feats["ATR"].loc[:]/100
    feats["BOLL"].loc[:] = feats["BOLL"].loc[:]/1000
    feats["EMA20"].loc[:] = feats["EMA20"].loc[:]/1000
    feats["MA10"].loc[:] = feats["MA10"].loc[:]/1000
    feats["MTM6"].loc[:] = feats["MTM6"].loc[:]/100
    feats["MA5"].loc[:] = feats["MA5"].loc[:]/1000
    feats["MTM12"].loc[:] = feats["MTM12"].loc[:]/100
    feats["ROC"].loc[:] = feats["ROC"].loc[:]/10
    feats["SMI"].loc[:] = feats["SMI"].loc[:] * 10
    feats["WVAD"].loc[:] = feats["WVAD"].loc[:]/100000000
    feats["US Dollar Index"].loc[:] = feats["US Dollar Index"].loc[:]/100
    feats["Federal Fund Rate"].loc[:] = feats["Federal Fund Rate"].loc[:]
    return feats

def train_valid_test_split(data, step_size, out_type=np.float):
    return data[:-2*step_size].to_numpy(dtype=out_type, copy=True), data[-2*step_size:-step_size].to_numpy(dtype=out_type, copy=True) ,data[-step_size:].to_numpy(dtype=out_type, copy=True)

In [58]:
#TODO metti parametro look in the future(boolean)
def apply_wavelet_transform(data, consider_future=True):
    res = data.copy()
    if(len(res.shape) == 1):
        res = res[...,np.newaxis]
    if(consider_future):
        for i in range(res.shape[1]):
            res[:,i] = waveletSmooth(res[:,i].copy(), level=1)[-len(res):]
    return res

In [9]:
## training parameters

# 600 is a bit more than 2 years of data
num_datapoints = 600
# roll by approx. 60 days - 3 months of trading days
step_size = int(0.1 * num_datapoints)
# calculate number of iterations we can do over the entire data set
num_iterations = int(np.ceil((len(data_master)-num_datapoints)/step_size))+2

In [10]:
##init training loop
n=0

y_test_lst = []
preds = []
ct = 0

In [11]:
##SCALE THE DATA
data = data_master.iloc[n*step_size:num_datapoints+n*step_size,:].copy()
data.columns = [col.strip() for col in data.columns.tolist()]
ct +=1
feats = get_scaled_features(data)

In [60]:
## CREATE AND SPLIT THE DATASET
X = feats.copy().drop("Close Price", axis=1)
Y = feats["Close Price"].copy()

X_train, X_valid, X_test = train_valid_test_split(X, step_size)
Y_train, Y_valid, Y_test = train_valid_test_split(Y, step_size)

In [61]:
## DENOISING USING DWT
#TODO vedi se con metodo originale risultati diversi, in caso vedi perche'(se effettivamente future data leaking)
#TODO capisci bene wavelet per mettere spiegazione in relazione e per fare TODO sopra(guarda anche codice e doc wavelet)
X_train_denoised = apply_wavelet_transform(X_train)
X_valid_denoised = apply_wavelet_transform(np.append(X_train, X_valid, axis=0))
X_test_denoised = apply_wavelet_transform(np.append(np.append(X_train, X_valid, axis=0), X_test, axis=0))
Y_train_denoised = apply_wavelet_transform(Y_train)
Y_valid_denoised = apply_wavelet_transform(np.append(Y_train, Y_valid, axis=0))
Y_test_denoised = apply_wavelet_transform(np.append(np.append(Y_train, Y_valid, axis=0), Y_test, axis=0))

In [None]:
## TODO mostra i dati prima e dopo aver applicato la wavelet

In [None]:
##ENCODE FEATURES USING STACKED AUTOENCODER
#TODO capisci e fai refactoring
num_hidden_1 = 10
num_hidden_2 = 10
num_hidden_3 = 10
num_hidden_4 = 10

n_epoch=100#20000

# ---- train using training data

# The n==0 statement is done because we only want to initialize the network once and then keep training
# as we move through time 

if n == 0:
    auto1 = Autoencoder(feats_norm_train.shape[1], num_hidden_1)
auto1.fit(feats_norm_train, n_epoch=n_epoch)

inputs = torch.autograd.Variable(torch.from_numpy(feats_norm_train.astype(np.float32)))

if n == 0:
    auto2 = Autoencoder(num_hidden_1, num_hidden_2)
auto1_out = auto1.encoder(inputs).data.numpy()
auto2.fit(auto1_out, n_epoch=n_epoch)

if n == 0:
    auto3 = Autoencoder(num_hidden_2, num_hidden_3)
auto1_out = torch.autograd.Variable(torch.from_numpy(auto1_out.astype(np.float32)))
auto2_out = auto2.encoder(auto1_out).data.numpy()
auto3.fit(auto2_out, n_epoch=n_epoch)

if n == 0:
    auto4 = Autoencoder(num_hidden_3, num_hidden_4)
auto2_out = torch.autograd.Variable(torch.from_numpy(auto2_out.astype(np.float32)))
auto3_out = auto3.encoder(auto2_out).data.numpy()
auto4.fit(auto3_out, n_epoch=n_epoch)


# Change to evaluation mode, in this mode the network behaves differently, e.g. dropout is switched off and so on
auto1.eval()        
auto2.eval()
auto3.eval()
auto4.eval()

X_train = feats_norm_train
X_train = torch.autograd.Variable(torch.from_numpy(X_train.astype(np.float32)))
train_encoded = auto4.encoder(auto3.encoder(auto2.encoder(auto1.encoder(X_train))))
train_encoded = train_encoded.data.numpy()

# ---- encode validation and test data using autoencoder trained only on training data 
X_validate = feats_norm_validate_WT   
X_validate = torch.autograd.Variable(torch.from_numpy(X_validate.astype(np.float32)))
validate_encoded = auto4.encoder(auto3.encoder(auto2.encoder(auto1.encoder(X_validate))))
validate_encoded = validate_encoded.data.numpy()

X_test = feats_norm_test_WT
X_test = torch.autograd.Variable(torch.from_numpy(X_test.astype(np.float32)))
test_encoded = auto4.encoder(auto3.encoder(auto2.encoder(auto1.encoder(X_test))))
test_encoded = test_encoded.data.numpy()

# switch back to training mode
auto1.train()        
auto2.train()
auto3.train()
auto4.train()

In [None]:
# 

In [None]:
# Split in train, test and validation set

# y_test = data_close_new[-step_size:].to_numpy()
# y_validate = data_close_new[-2*step_size:-step_size].to_numpy()
# y_train = data_close_new[:-2*step_size].to_numpy()
# feats_train = train.to_numpy().astype(np.float)
# feats_validate = validate.to_numpy().astype(np.float)
# feats_test = test.to_numpy().astype(np.float)
# ---------------------------------------------------------------------------
# ----------------------- STEP 2.0: NORMALIZE DATA --------------------------
# ---------------------------------------------------------------------------

# REMOVED THE NORMALIZATION AND MANUALLY SCALED TO APPROPRIATE VALUES ABOVE

# """
# scaler = StandardScaler().fit(feats_train)

# feats_norm_train = scaler.transform(feats_train)
# feats_norm_validate = scaler.transform(feats_validate)
# feats_norm_test = scaler.transform(feats_test)
# """
# """
# scaler = MinMaxScaler(feature_range=(0,1))
# scaler.fit(feats_train)

# feats_norm_train = scaler.transform(feats_train)
# feats_norm_validate = scaler.transform(feats_validate)
# feats_norm_test = scaler.transform(feats_test)
# """    
# data_close = pd.Series(np.concatenate((y_train, y_validate, y_test)))

# feats_norm_train = feats_train.copy()
# feats_norm_validate = feats_validate.copy()
# feats_norm_test = feats_test.copy()

# ---------------------------------------------------------------------------
# ----------------------- STEP 2.1: DENOISE USING DWT -----------------------
# ---------------------------------------------------------------------------

# for i in range(feats_norm_train.shape[1]):
#     feats_norm_train[:,i] = waveletSmooth(feats_norm_train[:,i], level=1)[-len(feats_norm_train):]

# # for the validation we have to do the transform using training data + the current and past validation data
# # i.e. we CAN'T USE all the validation data because we would then look into the future 
# temp = np.copy(feats_norm_train)
# feats_norm_validate_WT = np.copy(feats_norm_validate)
# for j in range(feats_norm_validate.shape[0]):
#     #first concatenate train with the latest validation sample
#     temp = np.append(temp, np.expand_dims(feats_norm_validate[j,:], axis=0), axis=0)
#     for i in range(feats_norm_validate.shape[1]):
#         feats_norm_validate_WT[j,i] = waveletSmooth(temp[:,i], level=1)[-1]

# # for the test we have to do the transform using training data + validation data + current and past test data
# # i.e. we CAN'T USE all the test data because we would then look into the future 
# temp_train = np.copy(feats_norm_train)
# temp_val = np.copy(feats_norm_validate)
# temp = np.concatenate((temp_train, temp_val))
# feats_norm_test_WT = np.copy(feats_norm_test)
# for j in range(feats_norm_test.shape[0]):
#     #first concatenate train with the latest validation sample
#     temp = np.append(temp, np.expand_dims(feats_norm_test[j,:], axis=0), axis=0)
#     for i in range(feats_norm_test.shape[1]):
#         feats_norm_test_WT[j,i] = waveletSmooth(temp[:,i], level=1)[-1]

# ---------------------------------------------------------------------------
# ------------- STEP 3: ENCODE FEATURES USING STACKED AUTOENCODER -----------
# ---------------------------------------------------------------------------

# num_hidden_1 = 10
# num_hidden_2 = 10
# num_hidden_3 = 10
# num_hidden_4 = 10

# n_epoch=100#20000

# # ---- train using training data

# # The n==0 statement is done because we only want to initialize the network once and then keep training
# # as we move through time 

# if n == 0:
#     auto1 = Autoencoder(feats_norm_train.shape[1], num_hidden_1)
# auto1.fit(feats_norm_train, n_epoch=n_epoch)

# inputs = torch.autograd.Variable(torch.from_numpy(feats_norm_train.astype(np.float32)))

# if n == 0:
#     auto2 = Autoencoder(num_hidden_1, num_hidden_2)
# auto1_out = auto1.encoder(inputs).data.numpy()
# auto2.fit(auto1_out, n_epoch=n_epoch)

# if n == 0:
#     auto3 = Autoencoder(num_hidden_2, num_hidden_3)
# auto1_out = torch.autograd.Variable(torch.from_numpy(auto1_out.astype(np.float32)))
# auto2_out = auto2.encoder(auto1_out).data.numpy()
# auto3.fit(auto2_out, n_epoch=n_epoch)

# if n == 0:
#     auto4 = Autoencoder(num_hidden_3, num_hidden_4)
# auto2_out = torch.autograd.Variable(torch.from_numpy(auto2_out.astype(np.float32)))
# auto3_out = auto3.encoder(auto2_out).data.numpy()
# auto4.fit(auto3_out, n_epoch=n_epoch)


# # Change to evaluation mode, in this mode the network behaves differently, e.g. dropout is switched off and so on
# auto1.eval()        
# auto2.eval()
# auto3.eval()
# auto4.eval()

# X_train = feats_norm_train
# X_train = torch.autograd.Variable(torch.from_numpy(X_train.astype(np.float32)))
# train_encoded = auto4.encoder(auto3.encoder(auto2.encoder(auto1.encoder(X_train))))
# train_encoded = train_encoded.data.numpy()

# # ---- encode validation and test data using autoencoder trained only on training data 
# X_validate = feats_norm_validate_WT   
# X_validate = torch.autograd.Variable(torch.from_numpy(X_validate.astype(np.float32)))
# validate_encoded = auto4.encoder(auto3.encoder(auto2.encoder(auto1.encoder(X_validate))))
# validate_encoded = validate_encoded.data.numpy()

# X_test = feats_norm_test_WT
# X_test = torch.autograd.Variable(torch.from_numpy(X_test.astype(np.float32)))
# test_encoded = auto4.encoder(auto3.encoder(auto2.encoder(auto1.encoder(X_test))))
# test_encoded = test_encoded.data.numpy()

# # switch back to training mode
# auto1.train()        
# auto2.train()
# auto3.train()
# auto4.train()


# ---------------------------------------------------------------------------
# -------------------- STEP 4: PREPARE TIME-SERIES --------------------------
# ---------------------------------------------------------------------------

# split the entire training time-series into pieces, depending on the number
# of time steps for the LSTM

time_steps = 4

args = (train_encoded, validate_encoded, test_encoded)

x_concat = np.concatenate(args)

validate_encoded_extra = np.concatenate((train_encoded[-time_steps:], validate_encoded))
test_encoded_extra = np.concatenate((validate_encoded[-time_steps:], test_encoded))

y_train_input = data_close[:-len(validate_encoded)-len(test_encoded)]
y_val_input = data_close[-len(test_encoded)-len(validate_encoded)-1:-len(test_encoded)]
y_test_input = data_close[-len(test_encoded)-1:]

x, y = prepare_data_lstm(train_encoded, y_train_input, time_steps, log_return=True, train=True)
x_v, y_v = prepare_data_lstm(validate_encoded_extra, y_val_input, time_steps, log_return=False, train=False)
x_te, y_te = prepare_data_lstm(test_encoded_extra, y_test_input, time_steps, log_return=False, train=False)


x_test = x_te
x_validate = x_v
x_train = x 

y_test = y_te 
y_validate = y_v 
y_train = y

y_train = y_train.as_matrix()

# ---------------------------------------------------------------------------
# ------------- STEP 5: TIME-SERIES REGRESSION USING LSTM -------------------
# ---------------------------------------------------------------------------

batchsize = 60

trainloader = ExampleDataset(x_train, y_train, batchsize)
valloader = ExampleDataset(x_validate, y_validate, 1)
testloader = ExampleDataset(x_test, y_test, 1)

# set ramdom seed to 0
np.random.seed(0)
torch.manual_seed(0)

# build the model
if n == 0:
    seq = Sequence(num_hidden_4, hidden_size=100, nb_layers=3)

resume = ""

# if a path is given in resume, we resume from a checkpoint
if os.path.isfile(resume):
    print("=> loading checkpoint '{}'".format(resume))
    checkpoint = torch.load(resume)
    start_epoch = checkpoint['epoch']
    seq.load_state_dict(checkpoint['state_dict'])
    print("=> loaded checkpoint '{}' (epoch {})"
          .format(resume, checkpoint['epoch']))
else:
    print("=> no checkpoint found at '{}'".format(resume))

# get the number of model parameters
print('Number of model parameters: {}'.format(
    sum([p.data.nelement() for p in seq.parameters()])))

# we use the mean squared error loss
criterion = nn.MSELoss()

optimizer = optim.Adam(params=seq.parameters(), lr=0.0005)

start_epoch = 0 
epochs = 1#5000

global_loss_val = np.inf
#begin to train
global_profit_val = -np.inf

for i in range(start_epoch, epochs):
    seq.train()
    loss_train = 0

    # shuffle ONLY training set        
    combined = list(zip(x_train, y_train))
    random.shuffle(combined)
    x_train=[]
    y_train=[]
    x_train[:], y_train[:] = zip(*combined)

    # initialize trainloader with newly shuffled training data        
    trainloader = ExampleDataset(x_train, y_train, batchsize)

    pred_train = []
    target_train = []
    for j in range(len(trainloader)):
        sample = trainloader[j]
        sample_x = sample["x"]

        if len(sample_x) != 0:

            sample_x = np.stack(sample_x)
            input = Variable(torch.FloatTensor(sample_x), requires_grad=False)
            input = torch.transpose(input, 0, 1)
            target = Variable(torch.FloatTensor([x for x in sample["y"]]), requires_grad=False)

            optimizer.zero_grad()
            out = seq(input)
            loss = criterion(out, target)

            loss_train += float(loss.data.numpy())
            pred_train.extend(out.data.numpy().flatten().tolist())
            target_train.extend(target.data.numpy().flatten().tolist())

            loss.backward()

            optimizer.step()


    if i % 100 == 0:

        plt.plot(pred_train)
        plt.plot(target_train)
        plt.show()

        loss_val, pred_val, target_val = evaluate_lstm(dataloader=valloader, model=seq, criterion=criterion)

        plt.scatter(range(len(pred_val)), pred_val)
        plt.scatter(range(len(pred_val)), target_val)
        plt.show()

        index, real = backtest(pred_val, y_validate)

        print(index[-1])
        # save according to profitability
        if index[-1]>global_profit_val and i>200:
            print("CURRENT BEST")
            global_profit_val = index[-1]
            save_checkpoint({'epoch': i + 1, 'state_dict': seq.state_dict()}, is_best=True, filename='checkpoint_lstm.pth.tar')

        save_checkpoint({'epoch': i + 1, 'state_dict': seq.state_dict()}, is_best=False, filename='checkpoint_lstm.pth.tar')

        print("LOSS TRAIN: " + str(float(loss_train)))        
        print("LOSS VAL: " + str(float(loss_val)))
        print(i)

# do the final test
# first load the best checkpoint on the val set

resume = "./runs/checkpoint/model_best.pth.tar"
#resume = "./runs/HF/checkpoint_lstm.pth.tar"

if os.path.isfile(resume):
    print("=> loading checkpoint '{}'".format(resume))
    checkpoint = torch.load(resume)
    start_epoch = checkpoint['epoch']
    seq.load_state_dict(checkpoint['state_dict'])
    print("=> loaded checkpoint '{}' (epoch {})"
          .format(resume, checkpoint['epoch']))
else:
    print("=> no checkpoint found at '{}'".format(resume))

seq.eval()

loss_test, preds_test, target_test = evaluate_lstm(dataloader=testloader, model=seq, criterion=criterion)

print("LOSS TEST: " + str(float(loss_test)))

temp2 = y_test.as_matrix().flatten().tolist()
y_test_lst.extend(temp2)

plt.plot(preds_test)
plt.plot(y_test_lst)
plt.scatter(range(len(preds_test)), preds_test)
plt.scatter(range(len(y_test_lst)), y_test_lst)
plt.savefig("test_preds.pdf")

# ---------------------------------------------------------------------------
# ------------------ STEP 6: BACKTEST (ARTICLE WAY) -------------------------
# ---------------------------------------------------------------------------

index, real = backtest(preds_test, pd.DataFrame(y_test_lst))

plt.close()
plt.plot(index, label="strat")
plt.plot(real, label="bm")
plt.legend()
plt.savefig("performance_article_way.pdf")
plt.close()

In [None]:
##training loop

y_test_lst = []
preds = []
ct = 0

##TODO crea batch prima

for n in range(num_iterations):
#     print(n)
    data = data_master.iloc[n*step_size:num_datapoints+n*step_size,:]
    data.columns = [col.strip() for col in data.columns.tolist()]
#     print(data.shape)
    ct +=1

    feats = get_scaled_features(data.iloc[:,2:])

    
    data_close = feats["Close Price"].copy()
    data_close_new = data_close

    # Split in train, test and validation set

    train, validate, test = get_train_valid_test(feats, step_size)

    y_test = data_close_new[-step_size:].to_numpy()
    y_validate = data_close_new[-2*step_size:-step_size].to_numpy()
    y_train = data_close_new[:-2*step_size].to_numpy()
    feats_train = train.to_numpy().astype(np.float)
    feats_validate = validate.to_numpy().astype(np.float)
    feats_test = test.to_numpy().astype(np.float)
    # ---------------------------------------------------------------------------
    # ----------------------- STEP 2.0: NORMALIZE DATA --------------------------
    # ---------------------------------------------------------------------------

    # REMOVED THE NORMALIZATION AND MANUALLY SCALED TO APPROPRIATE VALUES ABOVE

    """
    scaler = StandardScaler().fit(feats_train)

    feats_norm_train = scaler.transform(feats_train)
    feats_norm_validate = scaler.transform(feats_validate)
    feats_norm_test = scaler.transform(feats_test)
    """
    """
    scaler = MinMaxScaler(feature_range=(0,1))
    scaler.fit(feats_train)

    feats_norm_train = scaler.transform(feats_train)
    feats_norm_validate = scaler.transform(feats_validate)
    feats_norm_test = scaler.transform(feats_test)
    """    
    data_close = pd.Series(np.concatenate((y_train, y_validate, y_test)))
    
    feats_norm_train = feats_train.copy()
    feats_norm_validate = feats_validate.copy()
    feats_norm_test = feats_test.copy()
    
    # ---------------------------------------------------------------------------
    # ----------------------- STEP 2.1: DENOISE USING DWT -----------------------
    # ---------------------------------------------------------------------------

    for i in range(feats_norm_train.shape[1]):
        feats_norm_train[:,i] = waveletSmooth(feats_norm_train[:,i], level=1)[-len(feats_norm_train):]

    # for the validation we have to do the transform using training data + the current and past validation data
    # i.e. we CAN'T USE all the validation data because we would then look into the future 
    temp = np.copy(feats_norm_train)
    feats_norm_validate_WT = np.copy(feats_norm_validate)
    for j in range(feats_norm_validate.shape[0]):
        #first concatenate train with the latest validation sample
        temp = np.append(temp, np.expand_dims(feats_norm_validate[j,:], axis=0), axis=0)
        for i in range(feats_norm_validate.shape[1]):
            feats_norm_validate_WT[j,i] = waveletSmooth(temp[:,i], level=1)[-1]

    # for the test we have to do the transform using training data + validation data + current and past test data
    # i.e. we CAN'T USE all the test data because we would then look into the future 
    temp_train = np.copy(feats_norm_train)
    temp_val = np.copy(feats_norm_validate)
    temp = np.concatenate((temp_train, temp_val))
    feats_norm_test_WT = np.copy(feats_norm_test)
    for j in range(feats_norm_test.shape[0]):
        #first concatenate train with the latest validation sample
        temp = np.append(temp, np.expand_dims(feats_norm_test[j,:], axis=0), axis=0)
        for i in range(feats_norm_test.shape[1]):
            feats_norm_test_WT[j,i] = waveletSmooth(temp[:,i], level=1)[-1]
    
    # ---------------------------------------------------------------------------
    # ------------- STEP 3: ENCODE FEATURES USING STACKED AUTOENCODER -----------
    # ---------------------------------------------------------------------------

    num_hidden_1 = 10
    num_hidden_2 = 10
    num_hidden_3 = 10
    num_hidden_4 = 10

    n_epoch=100#20000

    # ---- train using training data
    
    # The n==0 statement is done because we only want to initialize the network once and then keep training
    # as we move through time 

    if n == 0:
        auto1 = Autoencoder(feats_norm_train.shape[1], num_hidden_1)
    auto1.fit(feats_norm_train, n_epoch=n_epoch)

    inputs = torch.autograd.Variable(torch.from_numpy(feats_norm_train.astype(np.float32)))

    if n == 0:
        auto2 = Autoencoder(num_hidden_1, num_hidden_2)
    auto1_out = auto1.encoder(inputs).data.numpy()
    auto2.fit(auto1_out, n_epoch=n_epoch)

    if n == 0:
        auto3 = Autoencoder(num_hidden_2, num_hidden_3)
    auto1_out = torch.autograd.Variable(torch.from_numpy(auto1_out.astype(np.float32)))
    auto2_out = auto2.encoder(auto1_out).data.numpy()
    auto3.fit(auto2_out, n_epoch=n_epoch)

    if n == 0:
        auto4 = Autoencoder(num_hidden_3, num_hidden_4)
    auto2_out = torch.autograd.Variable(torch.from_numpy(auto2_out.astype(np.float32)))
    auto3_out = auto3.encoder(auto2_out).data.numpy()
    auto4.fit(auto3_out, n_epoch=n_epoch)
    

    # Change to evaluation mode, in this mode the network behaves differently, e.g. dropout is switched off and so on
    auto1.eval()        
    auto2.eval()
    auto3.eval()
    auto4.eval()
    
    X_train = feats_norm_train
    X_train = torch.autograd.Variable(torch.from_numpy(X_train.astype(np.float32)))
    train_encoded = auto4.encoder(auto3.encoder(auto2.encoder(auto1.encoder(X_train))))
    train_encoded = train_encoded.data.numpy()

    # ---- encode validation and test data using autoencoder trained only on training data 
    X_validate = feats_norm_validate_WT   
    X_validate = torch.autograd.Variable(torch.from_numpy(X_validate.astype(np.float32)))
    validate_encoded = auto4.encoder(auto3.encoder(auto2.encoder(auto1.encoder(X_validate))))
    validate_encoded = validate_encoded.data.numpy()

    X_test = feats_norm_test_WT
    X_test = torch.autograd.Variable(torch.from_numpy(X_test.astype(np.float32)))
    test_encoded = auto4.encoder(auto3.encoder(auto2.encoder(auto1.encoder(X_test))))
    test_encoded = test_encoded.data.numpy()
    
    # switch back to training mode
    auto1.train()        
    auto2.train()
    auto3.train()
    auto4.train()

    
    # ---------------------------------------------------------------------------
    # -------------------- STEP 4: PREPARE TIME-SERIES --------------------------
    # ---------------------------------------------------------------------------

    # split the entire training time-series into pieces, depending on the number
    # of time steps for the LSTM

    time_steps = 4

    args = (train_encoded, validate_encoded, test_encoded)

    x_concat = np.concatenate(args)

    validate_encoded_extra = np.concatenate((train_encoded[-time_steps:], validate_encoded))
    test_encoded_extra = np.concatenate((validate_encoded[-time_steps:], test_encoded))

    y_train_input = data_close[:-len(validate_encoded)-len(test_encoded)]
    y_val_input = data_close[-len(test_encoded)-len(validate_encoded)-1:-len(test_encoded)]
    y_test_input = data_close[-len(test_encoded)-1:]

    x, y = prepare_data_lstm(train_encoded, y_train_input, time_steps, log_return=True, train=True)
    x_v, y_v = prepare_data_lstm(validate_encoded_extra, y_val_input, time_steps, log_return=False, train=False)
    x_te, y_te = prepare_data_lstm(test_encoded_extra, y_test_input, time_steps, log_return=False, train=False)


    x_test = x_te
    x_validate = x_v
    x_train = x 

    y_test = y_te 
    y_validate = y_v 
    y_train = y

    y_train = y_train.as_matrix()

    # ---------------------------------------------------------------------------
    # ------------- STEP 5: TIME-SERIES REGRESSION USING LSTM -------------------
    # ---------------------------------------------------------------------------

    batchsize = 60

    trainloader = ExampleDataset(x_train, y_train, batchsize)
    valloader = ExampleDataset(x_validate, y_validate, 1)
    testloader = ExampleDataset(x_test, y_test, 1)

    # set ramdom seed to 0
    np.random.seed(0)
    torch.manual_seed(0)

    # build the model
    if n == 0:
        seq = Sequence(num_hidden_4, hidden_size=100, nb_layers=3)

    resume = ""

    # if a path is given in resume, we resume from a checkpoint
    if os.path.isfile(resume):
        print("=> loading checkpoint '{}'".format(resume))
        checkpoint = torch.load(resume)
        start_epoch = checkpoint['epoch']
        seq.load_state_dict(checkpoint['state_dict'])
        print("=> loaded checkpoint '{}' (epoch {})"
              .format(resume, checkpoint['epoch']))
    else:
        print("=> no checkpoint found at '{}'".format(resume))

    # get the number of model parameters
    print('Number of model parameters: {}'.format(
        sum([p.data.nelement() for p in seq.parameters()])))

    # we use the mean squared error loss
    criterion = nn.MSELoss()

    optimizer = optim.Adam(params=seq.parameters(), lr=0.0005)

    start_epoch = 0 
    epochs = 1#5000

    global_loss_val = np.inf
    #begin to train
    global_profit_val = -np.inf

    for i in range(start_epoch, epochs):
        seq.train()
        loss_train = 0

        # shuffle ONLY training set        
        combined = list(zip(x_train, y_train))
        random.shuffle(combined)
        x_train=[]
        y_train=[]
        x_train[:], y_train[:] = zip(*combined)
        
        # initialize trainloader with newly shuffled training data        
        trainloader = ExampleDataset(x_train, y_train, batchsize)

        pred_train = []
        target_train = []
        for j in range(len(trainloader)):
            sample = trainloader[j]
            sample_x = sample["x"]

            if len(sample_x) != 0:

                sample_x = np.stack(sample_x)
                input = Variable(torch.FloatTensor(sample_x), requires_grad=False)
                input = torch.transpose(input, 0, 1)
                target = Variable(torch.FloatTensor([x for x in sample["y"]]), requires_grad=False)

                optimizer.zero_grad()
                out = seq(input)
                loss = criterion(out, target)

                loss_train += float(loss.data.numpy())
                pred_train.extend(out.data.numpy().flatten().tolist())
                target_train.extend(target.data.numpy().flatten().tolist())

                loss.backward()

                optimizer.step()


        if i % 100 == 0:

            plt.plot(pred_train)
            plt.plot(target_train)
            plt.show()
            
            loss_val, pred_val, target_val = evaluate_lstm(dataloader=valloader, model=seq, criterion=criterion)
            
            plt.scatter(range(len(pred_val)), pred_val)
            plt.scatter(range(len(pred_val)), target_val)
            plt.show()

            index, real = backtest(pred_val, y_validate)

            print(index[-1])
            # save according to profitability
            if index[-1]>global_profit_val and i>200:
                print("CURRENT BEST")
                global_profit_val = index[-1]
                save_checkpoint({'epoch': i + 1, 'state_dict': seq.state_dict()}, is_best=True, filename='checkpoint_lstm.pth.tar')

            save_checkpoint({'epoch': i + 1, 'state_dict': seq.state_dict()}, is_best=False, filename='checkpoint_lstm.pth.tar')

            print("LOSS TRAIN: " + str(float(loss_train)))        
            print("LOSS VAL: " + str(float(loss_val)))
            print(i)

    # do the final test
    # first load the best checkpoint on the val set

    resume = "./runs/checkpoint/model_best.pth.tar"
    #resume = "./runs/HF/checkpoint_lstm.pth.tar"

    if os.path.isfile(resume):
        print("=> loading checkpoint '{}'".format(resume))
        checkpoint = torch.load(resume)
        start_epoch = checkpoint['epoch']
        seq.load_state_dict(checkpoint['state_dict'])
        print("=> loaded checkpoint '{}' (epoch {})"
              .format(resume, checkpoint['epoch']))
    else:
        print("=> no checkpoint found at '{}'".format(resume))

    seq.eval()

    loss_test, preds_test, target_test = evaluate_lstm(dataloader=testloader, model=seq, criterion=criterion)

    print("LOSS TEST: " + str(float(loss_test)))

    temp2 = y_test.as_matrix().flatten().tolist()
    y_test_lst.extend(temp2)
        
    plt.plot(preds_test)
    plt.plot(y_test_lst)
    plt.scatter(range(len(preds_test)), preds_test)
    plt.scatter(range(len(y_test_lst)), y_test_lst)
    plt.savefig("test_preds.pdf")

    # ---------------------------------------------------------------------------
    # ------------------ STEP 6: BACKTEST (ARTICLE WAY) -------------------------
    # ---------------------------------------------------------------------------

    index, real = backtest(preds_test, pd.DataFrame(y_test_lst))

    plt.close()
    plt.plot(index, label="strat")
    plt.plot(real, label="bm")
    plt.legend()
    plt.savefig("performance_article_way.pdf")
    plt.close()