In [53]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from model import Iwata_simple
import numpy as np
import time

from utils import StandardScaler
import pandas as pd
import psycopg2 as pg

In [13]:
bidirectional = True
seq_len=13
enc_in = 5
hidden_size = 64
c_out = 1
s_n_layers = 2
batch_size = 32 # = support size
direcs = 2 if bidirectional else 1
model = Iwata_simple(enc_in, hidden_size, c_out, s_n_layers)
support_set = torch.rand(batch_size, seq_len, enc_in)
query_set = torch.rand(1, seq_len, enc_in)
output = model(support_set, query_set)
device = torch.device('cuda:0')

In [7]:
print(model)

Iwata_simple(
  (support_encoder): LSTM(5, 32, num_layers=2, bidirectional=True)
  (query_encoder): LSTM(5, 64)
  (attention): MultiheadAttention(
    (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
  )
  (g): Sequential(
    (0): Linear(in_features=128, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=1, bias=True)
  )
)


In [18]:
# train model
S = torch.rand(batch_size, seq_len, enc_in)
Q = torch.rand(1, seq_len, enc_in)
y = torch.rand(c_out)
epochs = 10

In [49]:
model.train()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_f = nn.MSELoss()

In [50]:
batch_s, batch_Q, y = S, Q, y
# batch_s = batch_s.to(device)
# batch_Q = batch_Q.to(device)
# y = y.to(device)

for epoch in range(10):
    train_loss = []
    epoch_time = time.time()
    
    optimizer.zero_grad()
    output = model(batch_s, batch_Q)
    loss = loss_f(output, y)
    train_loss.append(loss.item())
    loss.backward()
    optimizer.step()

    print('Epoch: {}/{} \t Time: {:.2f}s \t Loss: {:.4f}'.format(
        epoch+1, epochs, time.time() - epoch_time, loss.item()))

train_loss = np.average(train_loss)
print('Epoch {}/{} \t Time: {:.2f}s \t Loss: {:.4f}'.format(epoch+1, epochs, time.time() - epoch_time, train_loss))


Epoch: 1/10 	 Time: 0.02s 	 Loss: 0.0000
Epoch: 2/10 	 Time: 0.03s 	 Loss: 0.2903
Epoch: 3/10 	 Time: 0.02s 	 Loss: 0.0175
Epoch: 4/10 	 Time: 0.02s 	 Loss: 0.0456
Epoch: 5/10 	 Time: 0.02s 	 Loss: 0.1187
Epoch: 6/10 	 Time: 0.02s 	 Loss: 0.0704
Epoch: 7/10 	 Time: 0.02s 	 Loss: 0.0190
Epoch: 8/10 	 Time: 0.02s 	 Loss: 0.0009
Epoch: 9/10 	 Time: 0.02s 	 Loss: 0.0075
Epoch: 10/10 	 Time: 0.02s 	 Loss: 0.0042
Epoch 10/10 	 Time: 0.02s 	 Loss: 0.0042


In [76]:
class Iwata_Dataset_DB_Stock(torch.utils.data.Dataset):
    def __init__(self, conn, S_N, Q_N, sup_stck_ids=None, q_stck_ids=None, flag='train', size=None, 
                 features='M', data_path='ETTh1.csv', 
                 target='close', scale=True, inverse=False,):
        # size [seq_len, label_len, pred_len]
        # info
        assert len(size)==3
        # init
        assert flag in ['train', 'test']
        type_map = {'train':0, 'test':1}
        self.set_type = type_map[flag]
        
        self.agg = {'open': 'first',
        'high': 'max', 
        'low': 'min', 
        'close': 'last',
        'volume': 'sum'}
        self.freq = '1T'
        self.conn = conn
        self.S_N = S_N
        self.Q_N = Q_N
        self.features = features
        self.target = target
        self.scale = scale
        self.inverse = inverse
        self.seq_len = size[0]
        self.label_len = size[1]
        self.pred_len = size[2] 
        assert self.pred_len == 1 # for now 

        self.__read_data__()

    def __stock_id_to_df__(self, stock_id, pg_conn, agg, sample_freq):
        query = f'SELECT time AS date, open, high, low, volume, close \
                            FROM stock WHERE identifier = {stock_id} \
                            ORDER BY time ASC;'
        df = pd.read_sql(query, pg_conn)
        df.set_index(pd.DatetimeIndex(df['date']), inplace=True)
        # resample 
        df = df.resample(sample_freq).agg(agg).dropna()
        df.reset_index(inplace=True)
        return df

    def __read_data__(self):
        """Reads Q_N df from the db as the query set, and S_N df from the db as the support set.
           - Only works for random selection of stocks currently."""
        
        # sample stock ids 
        stock_meta = pd.read_sql('SELECT dataset.identifier AS id, dataset.description AS name \
                                 FROM dataset;', self.conn)
        self.S_stock_meta = stock_meta.sample(n=self.S_N)
        self.Q_stock_meta = stock_meta.sample(n=self.Q_N)
        
        # read data
        '''
        df.columns: ['date', ...(other features), target feature]
        '''
        S_dfs = []
        Q_dfs = []
        for s_id in self.S_stock_meta.id:
            S_dfs.append(self.__stock_id_to_df__(s_id, self.conn, self.agg, self.freq))
        
        for q_id in self.Q_stock_meta.id:
            Q_dfs.append(self.__stock_id_to_df__(q_id, self.conn, self.agg, self.freq))

        # only tuned for Q_N = 1
        cols = list(Q_dfs[0].columns); cols.remove(self.target); cols.remove('date')
        df_raw = Q_dfs[0][['date']+cols+[self.target]]

        num_train = int(len(df_raw)*0.7)
        num_test = len(df_raw) - num_train
        border1s = [0, num_train-self.seq_len]
        border2s = [num_train, len(df_raw)]
        border1 = border1s[self.set_type]
        border2 = border2s[self.set_type]
        
        if self.features=='M' or self.features=='MS':
            cols_data = df_raw.columns[1:]
        elif self.features=='S':
            cols_data = [self.target]

        self.S_dfs_x = []
        self.Q_dfs_x = []
        if self.scale:
            self.s_scalers = [StandardScaler() for i in range(self.S_N)]
            self.q_scalers = [StandardScaler() for i in range(self.Q_N)]
            for i in range(self.S_N):
                train_data = S_dfs[i][cols_data][border1s[0]:border2s[0]]
                self.s_scalers[i].fit(train_data)
                self.S_dfs_x.append(self.s_scalers[i].transform(S_dfs[i][cols_data][border1:border2]))
            for i in range(self.Q_N):
                train_data = Q_dfs[i][cols_data][border1s[0]:border2s[0]]
                self.q_scalers[i].fit(train_data)
                self.Q_dfs_x.append(self.q_scalers[i].transform(Q_dfs[i][cols_data][border1:border2]))
        else:
            for i in range(self.S_N):
                self.S_dfs_x.append(S_dfs[i][cols_data][border1:border2])
            for i in range(self.Q_N):
                self.Q_dfs_x.append(Q_dfs[i][cols_data][border1:border2])
    
    def __getitem__(self, index):
        s_begin = index
        s_end = s_begin + self.seq_len
        r_begin = s_end
        r_end = r_begin + self.pred_len

        # check date for all support set is < query index data
        # query_date = self.Q_dfs[0].iloc[index]['date']
        # for i in range(self.S_N):
        #     s_date = self.S_dfs[i].iloc[0]['date']
        #     # check support set date - 

        q_seq_x = self.Q_dfs_x[0][s_begin:s_end][:-1] # remove target
        q_seq_y = seq_y = self.Q_dfs_x[0][r_begin:r_end][-1]

        # randomly sample support sequence for each support timeseries 
        # (only works for Q_N = 1)
        s_seq_x = []
        for i in range(self.S_N):
            end = len(self.S_dfs_x[i]) - self.label_len - self.pred_len
            s_begin = np.random.randint(0, end)
            s_end = s_begin + self.seq_len
            r_begin = s_end
            r_end = r_begin + self.pred_len
            s_seq_x.append(self.S_dfs_x[i][s_begin:s_end][:-1])

        s_seq_x = np.array(s_seq_x)
            
        return s_seq_x, q_seq_x, q_seq_y
    
    def __len__(self):
        return len(self.data_x) - self.seq_len- self.pred_len + 1

    def inverse_transform(self, data):
        return self.scaler.inverse_transform(data)

In [77]:
conn = pg.connect(database='stonksdb', user='postgres', password='admin')
iwata_stck_ds = Iwata_Dataset_DB_Stock(conn, 32, 1, size=[seq_len, seq_len, 1], flag='train', features='M', scale=True)



In [78]:
iwata_stck_ds.__getitem__(0)

KeyError: -1

In [84]:
iwata_stck_ds.Q_dfs_x[0].values.shape

(3028, 5)

In [89]:
iwata_stck_ds.S_dfs[2]

AttributeError: 'Iwata_Dataset_DB_Stock' object has no attribute 'S_dfs'