## Model for price prediction from sentiment data.
> We need it to take a dictionary of sentence : sentiment pairs.
> This size of the dictionary is variable/flexible.

We could potentially design this module as an Transformer encoder model.


References:
> https://towardsdatascience.com/a-detailed-guide-to-pytorchs-nn-transformer-module-c80afbc9ffb1/

> [Set Transformer: A Framework for Attention-based
 Permutation-Invariant Neural Networks](https://arxiv.org/pdf/1810.00825)

 > https://github.com/juho-lee/set_transformer

 > [Deep sets](https://papers.nips.cc/paper_files/paper/2017/hash/f22e4747da1aa27e363d86d40ff442fe-Abstract.html)

Since we want our model to be invarient to permutations in the order in which we feed the news articles and associated sentiments, we can proceed using a set-transformer model.

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math

class MAB(nn.Module):
    def __init__(self, dim_Q, dim_K, dim_V, num_heads, ln=False):
        super(MAB, self).__init__()
        self.dim_V = dim_V
        self.num_heads = num_heads
        self.fc_q = nn.Linear(dim_Q, dim_V)
        self.fc_k = nn.Linear(dim_K, dim_V)
        self.fc_v = nn.Linear(dim_K, dim_V)
        if ln:
            self.ln0 = nn.LayerNorm(dim_V)
            self.ln1 = nn.LayerNorm(dim_V)
        self.fc_o = nn.Linear(dim_V, dim_V)

    def forward(self, Q, K):
        Q = self.fc_q(Q)
        K, V = self.fc_k(K), self.fc_v(K)

        dim_split = self.dim_V // self.num_heads
        Q_ = torch.cat(Q.split(dim_split, 2), 0)
        K_ = torch.cat(K.split(dim_split, 2), 0)
        V_ = torch.cat(V.split(dim_split, 2), 0)

        A = torch.softmax(Q_.bmm(K_.transpose(1,2))/math.sqrt(self.dim_V), 2)
        O = torch.cat((Q_ + A.bmm(V_)).split(Q.size(0), 0), 2)
        O = O if getattr(self, 'ln0', None) is None else self.ln0(O)
        O = O + F.relu(self.fc_o(O))
        O = O if getattr(self, 'ln1', None) is None else self.ln1(O)
        return O

class SAB(nn.Module):
    def __init__(self, dim_in, dim_out, num_heads, ln=False):
        super(SAB, self).__init__()
        self.mab = MAB(dim_in, dim_in, dim_out, num_heads, ln=ln)

    def forward(self, X):
        return self.mab(X, X)

class ISAB(nn.Module):
    def __init__(self, dim_in, dim_out, num_heads, num_inds, ln=False):
        super(ISAB, self).__init__()
        self.I = nn.Parameter(torch.Tensor(1, num_inds, dim_out))
        nn.init.xavier_uniform_(self.I)
        self.mab0 = MAB(dim_out, dim_in, dim_out, num_heads, ln=ln)
        self.mab1 = MAB(dim_in, dim_out, dim_out, num_heads, ln=ln)

    def forward(self, X):
        H = self.mab0(self.I.repeat(X.size(0), 1, 1), X)
        return self.mab1(X, H)

class PMA(nn.Module):
    def __init__(self, dim, num_heads, num_seeds, ln=False):
        super(PMA, self).__init__()
        self.S = nn.Parameter(torch.Tensor(1, num_seeds, dim))
        nn.init.xavier_uniform_(self.S)
        self.mab = MAB(dim, dim, dim, num_heads, ln=ln)

    def forward(self, X):
        return self.mab(self.S.repeat(X.size(0), 1, 1), X)

In [2]:
class DeepSet(nn.Module):
    def __init__(self, dim_input, num_outputs, dim_output, dim_hidden=128):
        super(DeepSet, self).__init__()
        self.num_outputs = num_outputs
        self.dim_output = dim_output
        self.enc = nn.Sequential(
                nn.Linear(dim_input, dim_hidden),
                nn.ReLU(),
                nn.Linear(dim_hidden, dim_hidden),
                nn.ReLU(),
                nn.Linear(dim_hidden, dim_hidden),
                nn.ReLU(),
                nn.Linear(dim_hidden, dim_hidden))
        self.dec = nn.Sequential(
                nn.Linear(dim_hidden, dim_hidden),
                nn.ReLU(),
                nn.Linear(dim_hidden, dim_hidden),
                nn.ReLU(),
                nn.Linear(dim_hidden, dim_hidden),
                nn.ReLU(),
                nn.Linear(dim_hidden, num_outputs*dim_output))

    def forward(self, X):
        X = self.enc(X).mean(-2)
        X = self.dec(X).reshape(-1, self.num_outputs, self.dim_output)
        return X

class SetTransformer(nn.Module):
    def __init__(self, dim_input, num_outputs, dim_output,
            num_inds=32, dim_hidden=128, num_heads=4, ln=False):
        super(SetTransformer, self).__init__()
        self.enc = nn.Sequential(
                ISAB(dim_input, dim_hidden, num_heads, num_inds, ln=ln),
                ISAB(dim_hidden, dim_hidden, num_heads, num_inds, ln=ln))
        self.dec = nn.Sequential(
                PMA(dim_hidden, num_heads, num_outputs, ln=ln),
                SAB(dim_hidden, dim_hidden, num_heads, ln=ln),
                SAB(dim_hidden, dim_hidden, num_heads, ln=ln),
                nn.Linear(dim_hidden, dim_output))

    def forward(self, X):
        return self.dec(self.enc(X))

In [56]:
#Generate some Dummy data to test out the model using
import pandas as pd
import numpy as np

embedding_dim = 256
encodings = np.random.rand(1000, 10, embedding_dim).astype(np.float32)
print(encodings.shape,encodings[:5])

sentiments = np.random.rand(1000, 1).astype(np.float32)
print(sentiments.shape, sentiments[:5])

price_percentage_changes = np.random.rand(1000, 1).astype(np.float32)* 0.2;
print(price_percentage_changes.shape, price_percentage_changes[:5])


(1000, 10, 256) [[[0.70924795 0.32848698 0.82711387 ... 0.49807447 0.90072435 0.8727512 ]
  [0.9046867  0.60809374 0.53850275 ... 0.6635218  0.29242167 0.12887849]
  [0.5412136  0.41911846 0.6955833  ... 0.5759633  0.15471639 0.8632402 ]
  ...
  [0.9149834  0.848888   0.8758864  ... 0.23433454 0.3700483  0.7704213 ]
  [0.6062975  0.66818774 0.49246314 ... 0.90030676 0.47267076 0.05881489]
  [0.10701042 0.9551531  0.45426187 ... 0.6437231  0.94655716 0.53181237]]

 [[0.51977044 0.6124255  0.04410373 ... 0.14944035 0.29738176 0.42086688]
  [0.02331264 0.06788605 0.77830243 ... 0.83413535 0.08700421 0.7267088 ]
  [0.04104438 0.6400285  0.34719583 ... 0.7246397  0.823761   0.558414  ]
  ...
  [0.597084   0.48698038 0.06504077 ... 0.87367594 0.28494206 0.70533794]
  [0.25869054 0.06908277 0.9689559  ... 0.7227215  0.27238044 0.10473777]
  [0.97081715 0.8583806  0.34349772 ... 0.5373928  0.89081556 0.78511953]]

 [[0.80206525 0.43210518 0.9051501  ... 0.9667514  0.48719826 0.17716876]
  [0.5

In [57]:
dummy_df = pd.DataFrame({
    'encodings': list(encodings),
    'sentiments': list(sentiments),
    'price_percentage_changes': list(price_percentage_changes)
})    
dummy_df.head()

Unnamed: 0,encodings,sentiments,price_percentage_changes
0,"[[0.70924795, 0.32848698, 0.82711387, 0.715978...",[0.015435915],[0.1231597]
1,"[[0.51977044, 0.6124255, 0.044103727, 0.079766...",[0.7557631],[0.08905373]
2,"[[0.80206525, 0.43210518, 0.9051501, 0.7511358...",[0.105215505],[0.08373296]
3,"[[0.12910023, 0.6075177, 0.48562065, 0.452957,...",[0.6496918],[0.04768773]
4,"[[0.15135363, 0.5149915, 0.31344974, 0.5500606...",[0.24089135],[0.086240925]


In [79]:
def data_generator(batch_size=32):
    for _, row in dummy_df.iterrows():
        input = row['encodings'] * row['sentiments'][0]
        output = torch.tensor(row['price_percentage_changes'], dtype=torch.float32).unsqueeze(0)
        yield torch.tensor(input, dtype=torch.float32), torch.tensor(output, dtype=torch.float32)
    
    def __len__(self):
        return len(dummy_df)

In [80]:
#Testing the data generator
generator_object = data_generator()

In [81]:
x, y = generator_object.__next__()
print(x,y)

tensor([[0.0109, 0.0051, 0.0128,  ..., 0.0077, 0.0139, 0.0135],
        [0.0140, 0.0094, 0.0083,  ..., 0.0102, 0.0045, 0.0020],
        [0.0084, 0.0065, 0.0107,  ..., 0.0089, 0.0024, 0.0133],
        ...,
        [0.0141, 0.0131, 0.0135,  ..., 0.0036, 0.0057, 0.0119],
        [0.0094, 0.0103, 0.0076,  ..., 0.0139, 0.0073, 0.0009],
        [0.0017, 0.0147, 0.0070,  ..., 0.0099, 0.0146, 0.0082]]) tensor([[0.1232]])


  yield torch.tensor(input, dtype=torch.float32), torch.tensor(output, dtype=torch.float32)


In [82]:
model = SetTransformer(
    dim_input = embedding_dim, 
    num_outputs = 1, #One final prediction
    dim_output = 1, #1D output for price change
    num_inds=32, 
    dim_hidden=10, 
    num_heads=4, 
    ln=True #Layer normalization
    )
optimizer = torch.optim.Adam(model.parameters(), lr=0.03)
criterion = nn.CrossEntropyLoss()
model = nn.DataParallel(model)
model = model.cuda()

In [83]:
num_epochs = 20
for epoch in range(num_epochs):
    model.train()
    losses, total, correct = [], 0, 0
    for inps,outs in data_generator():
        inps = torch.Tensor(inps).cuda()
        outs = torch.Tensor(outs).long().cuda()
        preds = model(inps)
        loss = criterion(preds, outs)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        losses.append(loss.item())
        total += outs.shape[0]
        correct += (preds.argmax(dim=1) == outs).sum().item()

    avg_loss, avg_acc = np.mean(losses), correct / total
    print(f"Epoch {epoch}: train loss {avg_loss:.3f} train acc {avg_acc:.3f}")

    if epoch % 10 == 0:
        print(inps)
        print(outs)
        model.eval()
        losses, total, correct = [], 0, 0
        for inps, outs in data_generator():
            inps = torch.Tensor(inps).cuda()
            outs = torch.Tensor(outs).long().cuda()
            preds = model(inps)
            loss = criterion(preds, outs)

            losses.append(loss.item())
            total += outs.shape[0]
            correct += (preds.argmax(dim=1) == outs).sum().item()
        avg_loss, avg_acc = np.mean(losses), correct / total
        print(f"Epoch {epoch}: test loss {avg_loss:.3f} test acc {avg_acc:.3f}")

  yield torch.tensor(input, dtype=torch.float32), torch.tensor(output, dtype=torch.float32)


IndexError: Dimension out of range (expected to be in range of [-2, 1], but got 2)