In [1]:
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [69]:
class Encoder(nn.Module):
    def __init__(self, time_size, hidden_size, feature_size):
        super(Encoder, self).__init__()
        self.time_size = time_size
        self.hidden_size = hidden_size
        self.feature_size = feature_size

        self.lstm = nn.LSTM(input_size=feature_size, 
                             hidden_size=hidden_size,
                             batch_first=True)

    def forward(self, xs):
        '''
        xd : batch, timestep, features
        '''
        outputs, (hidden, cell) = self.lstm(xs)

        return hidden, cell

class Decoder(nn.Module):
    def __init__(self, output_size, hidden_size, feature_size):
        super(Decoder, self).__init__()

        self.hidden_size = hidden_size
        self.feature_size = feature_size
        self.output_size = output_size

        self.lstm = nn.LSTM(input_size=feature_size,
                            hidden_size=hidden_size,
                            batch_first=True)
        self.fc1 = nn.Linear(hidden_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, xs, hidden, cell):
        '''
        xs : batch, time, feture
        '''
        output, (hidden, cell) = self.lstm(xs, (hidden, cell))
        output = self.fc1(output.squeeze(0))
        output = self.fc2(output)

        return output, hidden, cell

class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super(Seq2Seq, self).__init__()

        self.encoder = encoder
        self.decoder = decoder
        self.device = device


    def forward(self, xs):
        hidden, cell = self.encoder(xs)
        output, hidden, cell = self.decoder(torch.zeros(xs.size(0), 1, 9), hidden, cell)

        return output

def init_weights(m):
    for name, param in m.named_parameters():
        nn.init.uniform_(param.data, -0.08, 0.08)

In [48]:
import pandas as pd
import numpy
import time
from datetime import datetime

np.random.seed(42)
numpy.random.seed(42)

data_url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00247/'
data_name = 'data_akbilgic.xlsx'
df = pd.read_excel(data_url+data_name, header=1)
df.drop(columns=df.columns[[0]], axis=1, inplace=True)
df.head()

Unnamed: 0,ISE,ISE.1,SP,DAX,FTSE,NIKKEI,BOVESPA,EU,EM
0,0.035754,0.038376,-0.004679,0.002193,0.003894,0.0,0.03119,0.012698,0.028524
1,0.025426,0.031813,0.007787,0.008455,0.012866,0.004162,0.01892,0.011341,0.008773
2,-0.028862,-0.026353,-0.030469,-0.017833,-0.028735,0.017293,-0.035899,-0.017073,-0.020015
3,-0.062208,-0.084716,0.003391,-0.011726,-0.000466,-0.040061,0.028283,-0.005561,-0.019424
4,0.00986,0.009658,-0.021533,-0.019873,-0.01271,-0.004474,-0.009764,-0.010989,-0.007802


In [49]:
# series data to img function
def series_to_img(dataset, time_step=1):
    num = dataset.shape[1]      # features num
    df = pd.DataFrame(dataset)
    cols, names = list(), list()
    # sequence t-n to t-1
    for i in range(time_step, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(num)]

    for i in range(0, 1):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(num)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(num)]

    agg = pd.concat(cols, axis=1)
    agg.columns = names
    agg.dropna(inplace=True)
    return agg

from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import MinMaxScaler

dataset = df.values
dataset = dataset.astype('float')

n_inputs = 24
n_features = 9
del_idx = n_inputs * n_features + 1
del_cols = [i for i in range(del_idx, del_idx+n_features-1)]
new_df = series_to_img(dataset, n_inputs)
new_df.drop(new_df.columns[del_cols], axis=1, inplace=True)
new_df.head()

Unnamed: 0,var1(t-24),var2(t-24),var3(t-24),var4(t-24),var5(t-24),var6(t-24),var7(t-24),var8(t-24),var9(t-24),var1(t-23),var2(t-23),var3(t-23),var4(t-23),var5(t-23),var6(t-23),var7(t-23),var8(t-23),var9(t-23),var1(t-22),var2(t-22),var3(t-22),var4(t-22),var5(t-22),var6(t-22),var7(t-22),var8(t-22),var9(t-22),var1(t-21),var2(t-21),var3(t-21),var4(t-21),var5(t-21),var6(t-21),var7(t-21),var8(t-21),var9(t-21),var1(t-20),var2(t-20),var3(t-20),var4(t-20),...,var7(t-5),var8(t-5),var9(t-5),var1(t-4),var2(t-4),var3(t-4),var4(t-4),var5(t-4),var6(t-4),var7(t-4),var8(t-4),var9(t-4),var1(t-3),var2(t-3),var3(t-3),var4(t-3),var5(t-3),var6(t-3),var7(t-3),var8(t-3),var9(t-3),var1(t-2),var2(t-2),var3(t-2),var4(t-2),var5(t-2),var6(t-2),var7(t-2),var8(t-2),var9(t-2),var1(t-1),var2(t-1),var3(t-1),var4(t-1),var5(t-1),var6(t-1),var7(t-1),var8(t-1),var9(t-1),var1(t)
24,0.035754,0.038376,-0.004679,0.002193,0.003894,0.0,0.03119,0.012698,0.028524,0.025426,0.031813,0.007787,0.008455,0.012866,0.004162,0.01892,0.011341,0.008773,-0.028862,-0.026353,-0.030469,-0.017833,-0.028735,0.017293,-0.035899,-0.017073,-0.020015,-0.062208,-0.084716,0.003391,-0.011726,-0.000466,-0.040061,0.028283,-0.005561,-0.019424,0.00986,0.009658,-0.021533,-0.019873,...,-0.008538,-0.007201,0.002243,-0.025919,-0.035607,-0.000533,-0.015637,-0.017454,-0.015134,-0.016289,-0.019739,-0.019091,0.015279,0.022403,0.01571,0.02404,0.021039,-0.006175,0.027574,0.017862,0.012719,0.018578,0.023231,-0.007518,0.026577,0.015275,0.026908,0.009565,0.01877,0.015166,-0.014133,-0.014571,0.016233,0.003932,7.1e-05,-0.011169,0.024128,-0.004139,0.002073,0.036607
25,0.025426,0.031813,0.007787,0.008455,0.012866,0.004162,0.01892,0.011341,0.008773,-0.028862,-0.026353,-0.030469,-0.017833,-0.028735,0.017293,-0.035899,-0.017073,-0.020015,-0.062208,-0.084716,0.003391,-0.011726,-0.000466,-0.040061,0.028283,-0.005561,-0.019424,0.00986,0.009658,-0.021533,-0.019873,-0.01271,-0.004474,-0.009764,-0.010989,-0.007802,-0.029191,-0.042361,-0.022823,-0.013526,...,-0.016289,-0.019739,-0.019091,0.015279,0.022403,0.01571,0.02404,0.021039,-0.006175,0.027574,0.017862,0.012719,0.018578,0.023231,-0.007518,0.026577,0.015275,0.026908,0.009565,0.01877,0.015166,-0.014133,-0.014571,0.016233,0.003932,7.1e-05,-0.011169,0.024128,-0.004139,0.002073,0.036607,0.042759,0.026541,0.029306,0.014788,0.015846,0.039282,0.019127,0.032338,0.011353
26,-0.028862,-0.026353,-0.030469,-0.017833,-0.028735,0.017293,-0.035899,-0.017073,-0.020015,-0.062208,-0.084716,0.003391,-0.011726,-0.000466,-0.040061,0.028283,-0.005561,-0.019424,0.00986,0.009658,-0.021533,-0.019873,-0.01271,-0.004474,-0.009764,-0.010989,-0.007802,-0.029191,-0.042361,-0.022823,-0.013526,-0.005026,-0.049039,-0.053849,-0.012451,-0.02263,0.015445,-0.000272,0.001757,-0.017674,...,0.027574,0.017862,0.012719,0.018578,0.023231,-0.007518,0.026577,0.015275,0.026908,0.009565,0.01877,0.015166,-0.014133,-0.014571,0.016233,0.003932,7.1e-05,-0.011169,0.024128,-0.004139,0.002073,0.036607,0.042759,0.026541,0.029306,0.014788,0.015846,0.039282,0.019127,0.032338,0.011353,0.021468,0.001484,0.004766,0.003651,-0.013411,-0.015462,0.005627,0.007895,-0.040542
27,-0.062208,-0.084716,0.003391,-0.011726,-0.000466,-0.040061,0.028283,-0.005561,-0.019424,0.00986,0.009658,-0.021533,-0.019873,-0.01271,-0.004474,-0.009764,-0.010989,-0.007802,-0.029191,-0.042361,-0.022823,-0.013526,-0.005026,-0.049039,-0.053849,-0.012451,-0.02263,0.015445,-0.000272,0.001757,-0.017674,-0.006141,0.0,0.003572,-0.01222,-0.004827,-0.041168,-0.035552,-0.034032,-0.047383,...,0.009565,0.01877,0.015166,-0.014133,-0.014571,0.016233,0.003932,7.1e-05,-0.011169,0.024128,-0.004139,0.002073,0.036607,0.042759,0.026541,0.029306,0.014788,0.015846,0.039282,0.019127,0.032338,0.011353,0.021468,0.001484,0.004766,0.003651,-0.013411,-0.015462,0.005627,0.007895,-0.040542,-0.043907,-0.050369,-0.03517,-0.022182,-0.002902,-0.02144,-0.024388,-0.002139,-0.022106
28,0.00986,0.009658,-0.021533,-0.019873,-0.01271,-0.004474,-0.009764,-0.010989,-0.007802,-0.029191,-0.042361,-0.022823,-0.013526,-0.005026,-0.049039,-0.053849,-0.012451,-0.02263,0.015445,-0.000272,0.001757,-0.017674,-0.006141,0.0,0.003572,-0.01222,-0.004827,-0.041168,-0.035552,-0.034032,-0.047383,-0.050945,0.002912,-0.040302,-0.04522,-0.008677,0.000662,-0.017268,0.001328,-0.019551,...,0.024128,-0.004139,0.002073,0.036607,0.042759,0.026541,0.029306,0.014788,0.015846,0.039282,0.019127,0.032338,0.011353,0.021468,0.001484,0.004766,0.003651,-0.013411,-0.015462,0.005627,0.007895,-0.040542,-0.043907,-0.050369,-0.03517,-0.022182,-0.002902,-0.02144,-0.024388,-0.002139,-0.022106,-0.033893,0.007923,0.005434,0.005019,-0.030745,-0.008799,0.001097,-0.007926,-0.014888


In [71]:
def get_device():
    if torch.cuda.is_available():
        device = torch.device('cuda:0')
    else:
        device = torch.device('cpu')
    return device


enc = Encoder(24, 64, n_features)
dec = Decoder(1, 64, n_features)

device = get_device()
model = Seq2Seq(enc, dec, device).to(device)

model.apply(init_weights)

Seq2Seq(
  (encoder): Encoder(
    (lstm): LSTM(9, 64, batch_first=True)
  )
  (decoder): Decoder(
    (lstm): LSTM(9, 64, batch_first=True)
    (fc1): Linear(in_features=64, out_features=64, bias=True)
    (fc2): Linear(in_features=64, out_features=1, bias=True)
  )
)

In [72]:
def np2tensor(data):
    device = get_device()
    return torch.tensor(data).float().to(device)

length = len(new_df)
n_batch = 32

train_X, train_y = new_df.iloc[:int(length*0.8), :-1].values, new_df.iloc[:int(length*0.8), -1].values
test_X, test_y = new_df.iloc[int(length*0.8):, :-1].values, new_df.iloc[int(length*0.8):, -1].values

scaler = MinMaxScaler()
train_X = scaler.fit_transform(train_X)
test_X = scaler.transform(test_X)

train_X = train_X.reshape(-1, 24, 9)
test_X = test_X.reshape(-1, 24, 9)
train_y = train_y.reshape(-1, 1)
test_y = test_y.reshape(-1, 1)

train_X, train_y = np2tensor(train_X), np2tensor(train_y)
test_X, test_y = np2tensor(test_X), np2tensor(test_y)



criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

data_size = train_X.size(0)
max_iters = data_size//n_batch

for epoch in range(10):
    
    epoch_loss = 0
    start_time = time.time()
    for it in range(max_iters):
        batch_x = train_X[it*n_batch:(it+1)*n_batch]
        batch_y = train_y[it*n_batch:(it+1)*n_batch]

        optimizer.zero_grad()
        predict = model(batch_x)
        loss = criterion(predict, batch_y)
        loss.backward()
        optimizer.step()
    
        epoch_loss+= loss.item()
    avg_loss = epoch_loss/max_iters
    duration = time.time()-start_time
    print(f'epoch:{epoch}/{30}, duration:{duration:.2f}[s], loss:{avg_loss:.5f}')

  return F.mse_loss(input, target, reduction=self.reduction)


epoch:0/30, duration:0.12[s], loss:0.00198
epoch:1/30, duration:0.12[s], loss:0.00043
epoch:2/30, duration:0.13[s], loss:0.00035
epoch:3/30, duration:0.11[s], loss:0.00034
epoch:4/30, duration:0.12[s], loss:0.00032
epoch:5/30, duration:0.11[s], loss:0.00029
epoch:6/30, duration:0.11[s], loss:0.00028
epoch:7/30, duration:0.12[s], loss:0.00029
epoch:8/30, duration:0.11[s], loss:0.00030
epoch:9/30, duration:0.11[s], loss:0.00030


In [73]:
test_y

tensor([[-0.0045],
        [ 0.0008],
        [ 0.0060],
        [ 0.0148],
        [-0.0006],
        [ 0.0034],
        [-0.0136],
        [ 0.0082],
        [ 0.0132],
        [ 0.0090],
        [-0.0018],
        [ 0.0068],
        [ 0.0262],
        [ 0.0097],
        [ 0.0070],
        [-0.0135],
        [ 0.0126],
        [ 0.0051],
        [-0.0088],
        [-0.0068],
        [ 0.0155],
        [ 0.0079],
        [-0.0012],
        [-0.0025],
        [-0.0310],
        [ 0.0071],
        [-0.0080],
        [ 0.0012],
        [ 0.0330],
        [-0.0018],
        [ 0.0023],
        [ 0.0085],
        [-0.0138],
        [-0.0068],
        [-0.0076],
        [ 0.0064],
        [-0.0300],
        [-0.0237],
        [ 0.0134],
        [-0.0012],
        [-0.0150],
        [-0.0319],
        [ 0.0198],
        [ 0.0122],
        [ 0.0118],
        [-0.0012],
        [ 0.0127],
        [ 0.0024],
        [-0.0025],
        [-0.0268],
        [-0.0177],
        [ 0.0247],
        [ 0.

In [74]:
model(test_X)

tensor([[[-0.0030]],

        [[-0.0030]],

        [[-0.0028]],

        [[-0.0029]],

        [[-0.0029]],

        [[-0.0030]],

        [[-0.0030]],

        [[-0.0029]],

        [[-0.0030]],

        [[-0.0027]],

        [[-0.0029]],

        [[-0.0031]],

        [[-0.0029]],

        [[-0.0028]],

        [[-0.0029]],

        [[-0.0028]],

        [[-0.0030]],

        [[-0.0030]],

        [[-0.0029]],

        [[-0.0031]],

        [[-0.0029]],

        [[-0.0028]],

        [[-0.0029]],

        [[-0.0029]],

        [[-0.0030]],

        [[-0.0032]],

        [[-0.0028]],

        [[-0.0028]],

        [[-0.0030]],

        [[-0.0026]],

        [[-0.0030]],

        [[-0.0031]],

        [[-0.0030]],

        [[-0.0031]],

        [[-0.0031]],

        [[-0.0031]],

        [[-0.0029]],

        [[-0.0031]],

        [[-0.0033]],

        [[-0.0027]],

        [[-0.0029]],

        [[-0.0031]],

        [[-0.0032]],

        [[-0.0029]],

        [[-0.0025]],

        [[