# TESTING FILE 
# [ **IGNORE** ]

In [21]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import pickle as pkl
import numpy as np

In [111]:
class SentientAnalizer(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, vocab_size, embedding_dim):
        super(SentientAnalizer, self).__init__()
        """
        [Vocab_size -> Embedding_size]
        input_size: [Seq, Batch, Inputs]
        """
        # input_size == 1
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim

        self.Vocab_Embedding = nn.Embedding(self.vocab_size, self.embedding_dim)
        self.LSTM = nn.LSTM(self.embedding_dim, self.hidden_size,num_layers=2, batch_first=True)
        self.Linear1 = nn.Linear(self.hidden_size, self.hidden_size)
        self.Linear2 = nn.Linear(self.hidden_size, self.output_size)


    def forward(self,x, hidden):
        emb = self.Vocab_Embedding(x).view(x.shape[0],x.shape[1],-1)
        #print (emb.shape)
        #print (hidden[0].shape)
        out, hidden = self.LSTM(emb, hidden)
        out = self.Linear1(out)
        out = self.Linear2(out)
        return out, hidden

    def init_hidden(self, batch_size):
        return (torch.zeros(2, batch_size, self.hidden_size),
                torch.zeros(2, batch_size, self.hidden_size))

In [112]:
class StockPredictor(nn.Module):

    def __init__(self, input_size_stock,input_size_sentiment, hidden_size, num_layers, max_length):
        super(StockPredictor, self).__init__()
        """
        [Vocab_size -> Embedding_size]
        input_size: [Seq, Batch, Inputs]
        """
        # input_size == 1
        self.input_size_stock = input_size_stock
        self.input_size_sentiment = input_size_sentiment
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.attn_hidd = nn.Linear(self.hidden_size, max_length)
        self.attn_out = nn.Linear(self.hidden_size*2, self.hidden_size)
        self.CombinedLayerSS = nn.Linear(self.input_size_stock+self.input_size_sentiment, self.hidden_size)
        self.LSTM = nn.LSTM(self.input_size_sentiment+self.input_size_stock, self.hidden_size,num_layers=num_layers, batch_first=True)
        self.Linear1 = nn.Linear(self.hidden_size, self.hidden_size)
        self.Linear2 = nn.Linear(self.hidden_size, self.input_size_stock)


    def forward(self,x, hidden, stock_news):
        attn_wights = F.softmax(self.attn_hidd(hidden[0][0]), dim=1)
        context = torch.bmm(attn_wights.unsqueeze(0), stock_news.unsqueeze(0))
        x_SS = torch.cat((x,context),dim=2)
        out, hidden = self.LSTM(x_SS, hidden)
        out = self.Linear1(out)
        out = self.Linear2(out)
        return out, hidden

    def init_hidden(self, batch_size):
        return (torch.zeros(self.num_layers, batch_size, self.hidden_size),
                torch.zeros(self.num_layers, batch_size, self.hidden_size))

In [113]:
Senti = SentientAnalizer(1, 128, 256, 50000, 128)
Stock = StockPredictor(5,256,256,5,10)
# input of sentient
batch_size_senti = 10
# input of Sentiment
# InputSize [Batch, Seq, Input] = [Cantidad de noticias, Cantidad de palabras, Cantidad de palabras]
#                                                                               <Recuerda que es 1>
seq_len = 5
noticias = torch.zeros(batch_size_senti,seq_len,1, dtype=torch.long)
hidde_senti = Senti.init_hidden(batch_size_senti)
out_senti, hidden_senti = Senti(noticias, hidde_senti)
print (out_senti.shape) # [Batch, Seq, Output] -> [10, 5, 256] 
# Pero solo queremos las ultimas salidas, osea [:,-1,:]
out_senti = out_senti[:,-1,:]
print (out_senti.shape) # [Batch, Output] -> [10, 256]
# input of Stock
batch_size_stock = 1
# InputSize [Batch, Seq, Input] = [Cantidad de noticias, Cantidad de palabras, Cantidad de variables]
contexto_len = 1
var_stock = 5
x = torch.zeros(batch_size_stock,contexto_len,var_stock, dtype=torch.float)
hidde_stock = Stock.init_hidden(batch_size_stock)
#print (x.shape, hidde_stock[0].shape, out_senti.shape)
out_stock, hidden_stock = Stock(x, hidde_stock, out_senti)
print (out_stock.shape) # [Batch, Seq, Output] -> [1, 1, 5]

torch.Size([10, 5, 256])
torch.Size([10, 256])
torch.Size([1, 1, 5])


In [114]:
class Dataset (torch.utils.data.Dataset):
    def __init__(self, data_stock, notice, target):
        self.data = data_stock
        self.notice = notice
        self.target = target

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.notice[idx], self.target[idx]

In [115]:
# minidataset
data_stock = torch.zeros(10,1,5, dtype=torch.float)
notice = torch.zeros(10,10,5,1, dtype=torch.long)
target = data_stock[:-1,:,:]
data_stock = data_stock[1:,:,:]
print (data_stock.shape, notice.shape, target.shape)
dataset = Dataset(data_stock, notice, target)
train_loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False)

torch.Size([9, 1, 5]) torch.Size([10, 10, 5, 1]) torch.Size([9, 1, 5])


In [18]:
test_list_1 = torch.zeros(10,1,5, dtype=torch.float)
# fill list with incremental numbers
for i in range(10):
    test_list_1[i,0,0] = i

test_list_2 = test_list_1[:-1,:,:]
test_list_1 = test_list_1[1:,:,:]

print(test_list_1)
print('change')
print(test_list_2)



tensor([[[1., 0., 0., 0., 0.]],

        [[2., 0., 0., 0., 0.]],

        [[3., 0., 0., 0., 0.]],

        [[4., 0., 0., 0., 0.]],

        [[5., 0., 0., 0., 0.]],

        [[6., 0., 0., 0., 0.]],

        [[7., 0., 0., 0., 0.]],

        [[8., 0., 0., 0., 0.]],

        [[9., 0., 0., 0., 0.]]])
change
tensor([[[0., 0., 0., 0., 0.]],

        [[1., 0., 0., 0., 0.]],

        [[2., 0., 0., 0., 0.]],

        [[3., 0., 0., 0., 0.]],

        [[4., 0., 0., 0., 0.]],

        [[5., 0., 0., 0., 0.]],

        [[6., 0., 0., 0., 0.]],

        [[7., 0., 0., 0., 0.]],

        [[8., 0., 0., 0., 0.]]])


In [116]:
# Optimizer
optimizer = torch.optim.Adam(list(Senti.parameters())+list(Stock.parameters()), lr=0.001)
criterion = nn.MSELoss()

In [117]:
# test the dataloader
for i, (data_stock, notice, target) in enumerate(train_loader):
    print (data_stock.shape, notice.shape, target.shape)
    # Test the model
    notice = notice.squeeze(0)
    hidde_senti = Senti.init_hidden(notice.shape[0])
    out_senti, hidden_senti = Senti(notice, hidde_senti)
    out_senti = out_senti[:,-1,:]
    hidde_stock = Stock.init_hidden(data_stock.shape[0])
    out_stock, hidden_stock = Stock(data_stock, hidde_stock, out_senti)
    print (out_stock.shape)
    # Test the optimizer
    optimizer.zero_grad()   
    loss = criterion(out_stock, target)
    print (loss.item())
    loss.backward()
    optimizer.step()

torch.Size([1, 1, 5]) torch.Size([1, 10, 5, 1]) torch.Size([1, 1, 5])
torch.Size([1, 1, 5])
0.002249381970614195
torch.Size([1, 1, 5]) torch.Size([1, 10, 5, 1]) torch.Size([1, 1, 5])
torch.Size([1, 1, 5])
0.000624887878075242
torch.Size([1, 1, 5]) torch.Size([1, 10, 5, 1]) torch.Size([1, 1, 5])
torch.Size([1, 1, 5])
2.7504755053087138e-05
torch.Size([1, 1, 5]) torch.Size([1, 10, 5, 1]) torch.Size([1, 1, 5])
torch.Size([1, 1, 5])
0.00021058274433016777
torch.Size([1, 1, 5]) torch.Size([1, 10, 5, 1]) torch.Size([1, 1, 5])
torch.Size([1, 1, 5])
0.0005327091785147786
torch.Size([1, 1, 5]) torch.Size([1, 10, 5, 1]) torch.Size([1, 1, 5])
torch.Size([1, 1, 5])
0.000563718203920871
torch.Size([1, 1, 5]) torch.Size([1, 10, 5, 1]) torch.Size([1, 1, 5])
torch.Size([1, 1, 5])
0.0003882246383000165
torch.Size([1, 1, 5]) torch.Size([1, 10, 5, 1]) torch.Size([1, 1, 5])
torch.Size([1, 1, 5])
0.00017890959861688316
torch.Size([1, 1, 5]) torch.Size([1, 10, 5, 1]) torch.Size([1, 1, 5])
torch.Size([1, 1, 

In [118]:
# predict the next 5 days
for i in range(5):
    out_stock, hidden_stock = Stock(out_stock, hidde_stock, out_senti)
    print (out_stock.shape)

torch.Size([1, 1, 5])
torch.Size([1, 1, 5])
torch.Size([1, 1, 5])
torch.Size([1, 1, 5])
torch.Size([1, 1, 5])


In [22]:
# import csv as numpy
# import numpy as np
import pandas as pd
import pickle as pkl
# news_1 = np.genfromtxt('../data/Preprocess/final_data/Crypto_Currency_News.csv', delimiter=',')
news_1 = pd.read_csv('../data/Preprocess/final_data/Crypto_Currency_News.csv')
news_2 = pd.read_csv('../data/Preprocess/final_data/CryptoCurrencies.csv')
news_3 = pd.read_csv('../data/Preprocess/final_data/CryptoCurrency.csv')
news_4 = pd.read_csv('../data/Preprocess/final_data/Cryptomarkets.csv')
news_5 = pd.read_csv('../data/Preprocess/final_data/eth.csv')
news_6 = pd.read_csv('../data/Preprocess/final_data/ethfinance.csv')
news_7 = pd.read_csv('../data/Preprocess/final_data/ethtrader.csv')
# stocks_norm = np.genfromtxt('../data/Preprocess/final_data/finance_norm.csv', delimiter=',')
stocks_norm = pd.read_csv('../data/Preprocess/final_data/finance_norm.csv')


news_dfs = [news_1, news_2, news_3, news_4, news_5, news_6, news_7]

# import scaler
with open('../data/Preprocess/scaler.pkl', 'rb') as f:
    scaler = pkl.load(f)

In [23]:
# change type
import ast
for df in news_dfs:
    for j in range(1,11):        
        df['tok_text_'+str(j)] = np.array(df['tok_text_'+str(j)].apply(lambda x: ast.literal_eval (x)))

In [24]:
# convert all dataframes to numpy
# news_1_np = news_1.to_numpy()
import numpy as np
news_np = []
for df in news_dfs:
    news_np.append(df.to_numpy())

stocks_norm_np = stocks_norm.to_numpy()

# delete 1st column
# news_1_np = news_1_np[:,1:]
for i in range(len(news_np)):
    news_np[i] = np.delete(news_np[i], 0, 1)
stocks_norm_np = stocks_norm_np[:,1:]

# reshape from (n,10) lists to (n,10,15) numbers
for k in range(len(news_np)):
    news_np[k] = np.array([np.array([np.array(news_np[k][i][j]) for j in range(10)]) for i in range(news_np[k].shape[0])])

# print
for i in range(len(news_np)):
    print(news_np[i].shape)
print (stocks_norm_np.shape)
#first a row
print (news_np[0][0])
print(stocks_norm_np[0])

(1096, 10, 15)
(1096, 10, 15)
(1096, 10, 15)
(1096, 10, 15)
(1096, 10, 15)
(1096, 10, 15)
(1096, 10, 15)
(1096, 9)
[[    1    60    61    62    63    64    65    66    67    60    63    62
     68    69    70]
 [    1    93    94    60    95    96    60    84    97    98    99   100
     82    83   101]
 [    1   110   111   112   113   114    84    19    56   115    24   116
  12421   117   118]
 [    1   120   121    43   122   123   124   125    67   126     2     0
      0     0     0]
 [    1     5    10   127   129   119    67   130   131   132   133   134
    135     2     0]
 [    1   136   137   138   139   140   141     2     0     0     0     0
      0     0     0]
 [    1   142   143   144   145     2     0     0     0     0     0     0
      0     0     0]
 [    1   147   148   149    67   126   150     2   151     2     0     0
      0     0     0]
 [    1    29   152   153   154     2     0     0     0     0     0     0
      0     0     0]
 [    1    13   155    49    8

In [25]:
# shape dataset to pytorch dataset class    

class Dataset (torch.utils.data.Dataset):
    def __init__(self, data_stock, notice, target):
        self.data = data_stock.unsqueeze(1)
        self.notice = notice.unsqueeze(3)
        self.target = target.unsqueeze(1)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.notice[idx], self.target[idx]

# using news_1 and stocks_norm
data_stock = torch.Tensor(stocks_norm_np[:,:]).float()
data_news = torch.Tensor(news_np[0]).long()
target = data_stock[1:,:]
data_stock = data_stock[:-1,:]
print (data_stock.shape, data_news.shape, target.shape)

# create dataset
dataset = Dataset(data_stock, data_news, target)

torch.Size([1095, 9]) torch.Size([1096, 10, 15]) torch.Size([1095, 9])


In [27]:
print(data_stock)
print(target)

tensor([[0.0142, 0.0323, 0.0189,  ..., 0.0253, 0.7960, 0.0347],
        [0.0149, 0.0155, 0.0058,  ..., 0.0127, 0.7904, 0.0329],
        [0.0137, 0.0160, 0.0066,  ..., 0.0118, 0.7981, 0.0232],
        ...,
        [0.2594, 0.1608, 0.1806,  ..., 0.0757, 0.0952, 0.0987],
        [0.2609, 0.1749, 0.1843,  ..., 0.0995, 0.0954, 0.0856],
        [0.2606, 0.1095, 0.1615,  ..., 0.0769, 0.1145, 0.1167]])
tensor([[0.0149, 0.0155, 0.0058,  ..., 0.0127, 0.7904, 0.0329],
        [0.0137, 0.0160, 0.0066,  ..., 0.0118, 0.7981, 0.0232],
        [0.0141, 0.0144, 0.0045,  ..., 0.0092, 0.8153, 0.0261],
        ...,
        [0.2609, 0.1749, 0.1843,  ..., 0.0995, 0.0954, 0.0856],
        [0.2606, 0.1095, 0.1615,  ..., 0.0769, 0.1145, 0.1167],
        [0.2589, 0.1152, 0.1572,  ..., 0.0749, 0.1119, 0.1429]])


In [26]:
# print last in target
print (dataset.__getitem__(len(dataset)-1)[2])

tensor([[0.2589, 0.1152, 0.1572, 0.7236, 0.2515, 0.7386, 0.0749, 0.1119, 0.1429]])


In [121]:
'''
example of cell in news_1:
'[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]'
convert to:
[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
'''
# change from string to list
import ast
for j in range(1,11):        
    news_1['tok_text_'+str(j)] = np.array(news_1['tok_text_'+str(j)].apply(lambda x: ast.literal_eval (x)))

In [122]:
# convert all dataframes to numpy
news_1_np = news_1.to_numpy()
stocks_norm_np = stocks_norm.to_numpy()

# delete 1st column
news_1_np = news_1_np[:,1:]
stocks_norm_np = stocks_norm_np[:,1:]

# reshape from (n,10) lists to (n,10,15) numbers
news_1_np = np.array([np.array([np.array(news_1_np[i][j]) for j in range(10)]) for i in range(news_1_np.shape[0])])

# print
print (news_1_np.shape, stocks_norm_np.shape)
#first a row
# print (news_1_np[0])
# print(stocks_norm_np[0])

(1096, 10, 15) (1096, 9)


In [123]:
# shape dataset to pytorch dataset class    
class Dataset (torch.utils.data.Dataset):
    def __init__(self, data_stock, notice, target):
        self.data = data_stock.unsqueeze(1)
        self.notice = notice.unsqueeze(3)
        self.target = target.unsqueeze(1)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.notice[idx], self.target[idx]

# using news_1 and stocks_norm
# minidataset using the class
data_stock = torch.Tensor(stocks_norm_np[:-1,:]).float()
data_news = torch.Tensor(news_1_np).long()
target = data_stock[1:,:]
data_stock = data_stock[:-1,:]
print (data_stock.shape, data_news.shape, target.shape)

# create dataset
dataset = Dataset(data_stock, data_news, target)

torch.Size([1094, 9]) torch.Size([1096, 10, 15]) torch.Size([1094, 9])


In [124]:
Senti_n = SentientAnalizer(1, 128, 256, 50000, 128)
Stock_n = StockPredictor(9,256,256,9,10)

In [125]:
# dataloader
train_loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False)

In [126]:

# test the dataloader
for i, (data_stock, data_news, target) in enumerate(train_loader):
    print (data_stock.shape, data_news.shape, target.shape)
    # Test the model
    data_news = data_news.squeeze(0)
    hidde_senti = Senti_n.init_hidden(data_news.shape[0])
    out_senti, hidden_senti = Senti(data_news, hidde_senti)
    out_senti = out_senti[:,-1,:]
    hidde_stock = Stock_n.init_hidden(data_stock.shape[0])
    out_stock, hidden_stock = Stock_n(data_stock, hidde_stock, out_senti)
    print (out_stock.shape)
    break

torch.Size([1, 1, 9]) torch.Size([1, 10, 15, 1]) torch.Size([1, 1, 9])
torch.Size([1, 1, 9])


In [127]:
# optimizer
optimizer = torch.optim.Adam(list(Senti_n.parameters())+list(Stock_n.parameters()), lr=0.001)
# loss function
criterion = nn.MSELoss()

In [132]:
# train
epochs = 10
for epoch in range(epochs):
    epoch_loss = 0
    hidde_senti = Senti_n.init_hidden(10)
    hidde_stock = Stock_n.init_hidden(1)
    for i, (data_stock, data_news, target) in enumerate(train_loader):
        data_news = data_news.squeeze(0)
        out_senti, hidden_senti = Senti(data_news, hidde_senti)
        out_senti = out_senti[:,-1,:]
        out_stock, hidden_stock = Stock_n(data_stock, hidde_stock, out_senti)
        # Test the optimizer
        optimizer.zero_grad()   
        loss = criterion(out_stock, target)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
        if i % 100 == 0:
            print (i, epoch_loss/(i+1))
    print (epoch, epoch_loss/(i+1))

0 0.0014296459266915917
100 0.000650658946169372
200 0.0014070377174762844
300 0.0011551575679493492
400 0.0012379382433368449
500 0.002957914513046377
600 0.005011761623063346
700 0.005140971974256944
800 0.004851941173131828
900 0.004586146082950529
1000 0.004664083888935413
0.002988739637658
0 0.14213499426841736
100 0.008965301318258383
200 0.00556265594468415
300 0.003933456321178756
400 0.0033425372967665914


KeyboardInterrupt: 

In [None]:
# save model
torch.save(Senti_n.state_dict(), 'saved_models/Senti_n.pth')
torch.save(Stock_n.state_dict(), 'saved_models/Stock_n.pth')