## Model for price prediction from sentiment data.
> We need it to take a dictionary of sentence : sentiment pairs.
> This size of the dictionary is variable/flexible.

We could potentially design this module as an Transformer encoder model.


References:
> https://towardsdatascience.com/a-detailed-guide-to-pytorchs-nn-transformer-module-c80afbc9ffb1/

> [Set Transformer: A Framework for Attention-based
 Permutation-Invariant Neural Networks](https://arxiv.org/pdf/1810.00825)

 > https://github.com/juho-lee/set_transformer

 > [Deep sets](https://papers.nips.cc/paper_files/paper/2017/hash/f22e4747da1aa27e363d86d40ff442fe-Abstract.html)

Since we want our model to be invarient to permutations in the order in which we feed the news articles and associated sentiments, we can proceed using a set-transformer model.

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math

class MAB(nn.Module):
    def __init__(self, dim_Q, dim_K, dim_V, num_heads, ln=False):
        super(MAB, self).__init__()
        self.dim_V = dim_V
        self.num_heads = num_heads
        self.fc_q = nn.Linear(dim_Q, dim_V)
        self.fc_k = nn.Linear(dim_K, dim_V)
        self.fc_v = nn.Linear(dim_K, dim_V)
        if ln:
            self.ln0 = nn.LayerNorm(dim_V)
            self.ln1 = nn.LayerNorm(dim_V)
        self.fc_o = nn.Linear(dim_V, dim_V)

    def forward(self, Q, K):
        Q = self.fc_q(Q)
        K, V = self.fc_k(K), self.fc_v(K)

        dim_split = self.dim_V // self.num_heads
        Q_ = torch.cat(Q.split(dim_split, 2), 0)
        K_ = torch.cat(K.split(dim_split, 2), 0)
        V_ = torch.cat(V.split(dim_split, 2), 0)

        A = torch.softmax(Q_.bmm(K_.transpose(1,2))/math.sqrt(self.dim_V), 2)
        O = torch.cat((Q_ + A.bmm(V_)).split(Q.size(0), 0), 2)
        O = O if getattr(self, 'ln0', None) is None else self.ln0(O)
        O = O + F.relu(self.fc_o(O))
        O = O if getattr(self, 'ln1', None) is None else self.ln1(O)
        return O

class SAB(nn.Module):
    def __init__(self, dim_in, dim_out, num_heads, ln=False):
        super(SAB, self).__init__()
        self.mab = MAB(dim_in, dim_in, dim_out, num_heads, ln=ln)

    def forward(self, X):
        return self.mab(X, X)

class ISAB(nn.Module):
    def __init__(self, dim_in, dim_out, num_heads, num_inds, ln=False):
        super(ISAB, self).__init__()
        self.I = nn.Parameter(torch.Tensor(1, num_inds, dim_out))
        nn.init.xavier_uniform_(self.I)
        self.mab0 = MAB(dim_out, dim_in, dim_out, num_heads, ln=ln)
        self.mab1 = MAB(dim_in, dim_out, dim_out, num_heads, ln=ln)

    def forward(self, X):
        H = self.mab0(self.I.repeat(X.size(0), 1, 1), X)
        return self.mab1(X, H)

class PMA(nn.Module):
    def __init__(self, dim, num_heads, num_seeds, ln=False):
        super(PMA, self).__init__()
        self.S = nn.Parameter(torch.Tensor(1, num_seeds, dim))
        nn.init.xavier_uniform_(self.S)
        self.mab = MAB(dim, dim, dim, num_heads, ln=ln)

    def forward(self, X):
        return self.mab(self.S.repeat(X.size(0), 1, 1), X)

In [93]:
class DeepSet(nn.Module):
    def __init__(self, dim_input, num_outputs, dim_output, dim_hidden=128):
        super(DeepSet, self).__init__()
        self.num_outputs = num_outputs
        self.dim_output = dim_output
        self.enc = nn.Sequential(
                nn.Linear(dim_input, dim_hidden),
                nn.ReLU(),
                nn.Linear(dim_hidden, dim_hidden),
                nn.ReLU(),
                nn.Linear(dim_hidden, dim_hidden),
                nn.ReLU(),
                nn.Linear(dim_hidden, dim_hidden))
        self.dec = nn.Sequential(
                nn.Linear(dim_hidden, dim_hidden),
                nn.ReLU(),
                nn.Linear(dim_hidden, dim_hidden),
                nn.ReLU(),
                nn.Linear(dim_hidden, dim_hidden),
                nn.ReLU(),
                nn.Linear(dim_hidden, num_outputs*dim_output))

    def forward(self, X):
        X = self.enc(X).mean(-2)
        X = self.dec(X).reshape(-1, self.num_outputs, self.dim_output)
        return X

class SetTransformer(nn.Module):
    def __init__(self, dim_input, num_outputs, dim_output,
            num_inds=32, dim_hidden=128, num_heads=4, ln=False):
        super(SetTransformer, self).__init__()
        self.enc = nn.Sequential(
                ISAB(dim_input, dim_hidden, num_heads, num_inds, ln=ln),
                ISAB(dim_hidden, dim_hidden, num_heads, num_inds, ln=ln))
        self.dec = nn.Sequential(
                PMA(dim_hidden, num_heads, num_outputs, ln=ln),
                SAB(dim_hidden, dim_hidden, num_heads, ln=ln),
                SAB(dim_hidden, dim_hidden, num_heads, ln=ln),
                nn.Linear(dim_hidden, dim_output))

    def forward(self, X):
        return self.dec(self.enc(X))

In [112]:
#Generate some Dummy data to test out the model using
import pandas as pd
import numpy as np

dataset_size = 100
embedding_dim = 256
encodings = np.random.rand(dataset_size, 10, embedding_dim).astype(np.float32)
print(encodings.shape,encodings[:5])

sentiments = np.random.rand(dataset_size, 1).astype(np.float32)
print(sentiments.shape, sentiments[:5])

price_percentage_changes = np.random.rand(dataset_size, 1).astype(np.float32)* 0.2;
print(price_percentage_changes.shape, price_percentage_changes[:5])


(100, 10, 256) [[[0.49391288 0.5181262  0.3267757  ... 0.43159577 0.607781   0.19591542]
  [0.14672875 0.32331625 0.8917091  ... 0.36367247 0.86712664 0.3618663 ]
  [0.95741004 0.60514283 0.24322791 ... 0.35557702 0.00403093 0.21990432]
  ...
  [0.8560478  0.99948746 0.27371994 ... 0.01117089 0.10729098 0.38278502]
  [0.5368003  0.08103587 0.01180919 ... 0.32980546 0.6937125  0.58217657]
  [0.636435   0.8439539  0.39706945 ... 0.21897729 0.7338553  0.9068168 ]]

 [[0.63911015 0.15714303 0.8798483  ... 0.8810984  0.64553446 0.18334495]
  [0.294584   0.02615817 0.95609117 ... 0.10610003 0.2852764  0.38951737]
  [0.32694098 0.22553347 0.68783647 ... 0.9789811  0.22953351 0.7581387 ]
  ...
  [0.6629332  0.17697397 0.8830423  ... 0.21080467 0.9974504  0.18054199]
  [0.35023713 0.31883088 0.6572026  ... 0.57077485 0.32763436 0.9826105 ]
  [0.23525004 0.9426987  0.64778847 ... 0.917448   0.802853   0.6638273 ]]

 [[0.16090491 0.3736248  0.36928993 ... 0.402598   0.7880287  0.12224684]
  [0.22

In [113]:
dummy_df = pd.DataFrame({
    'encodings': list(encodings),
    'sentiments': list(sentiments),
    'price_percentage_changes': list(price_percentage_changes)
})    
dummy_df.head()

Unnamed: 0,encodings,sentiments,price_percentage_changes
0,"[[0.49391288, 0.5181262, 0.3267757, 0.419439, ...",[0.4470893],[0.08461728]
1,"[[0.63911015, 0.15714303, 0.8798483, 0.5889911...",[0.24631687],[0.1471193]
2,"[[0.16090491, 0.3736248, 0.36928993, 0.0652277...",[0.7953618],[0.15261748]
3,"[[0.69185007, 0.744658, 0.2523723, 0.9564506, ...",[0.68668514],[0.018491564]
4,"[[0.34584802, 0.12259918, 0.7084872, 0.6204603...",[0.9208119],[0.037749376]


In [114]:
from torch.utils.data import Dataset, DataLoader
# Create proper dataset class instead of generator
class NewsDataset(Dataset):
    def __init__(self, encodings, sentiments, price_changes):
        self.encodings = encodings
        self.sentiments = sentiments
        self.price_changes = price_changes
    
    def __len__(self):
        return len(self.encodings)
    
    def __getitem__(self, idx):
        # Multiply encodings by sentiment (broadcasting)
        input_data = self.encodings[idx] * self.sentiments[idx][0]
        target = self.price_changes[idx][0]  # Single value, not array
        
        return torch.tensor(input_data, dtype=torch.float32), torch.tensor(target, dtype=torch.float32)

# Create dataset and dataloader
dataset = NewsDataset(encodings, sentiments, price_percentage_changes)
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

In [115]:
model = SetTransformer(
    dim_input = embedding_dim, 
    num_outputs = 1, #One final prediction
    dim_output = 1, #1D output for price change
    num_inds=32, 
    dim_hidden=128, 
    num_heads=4, 
    ln=True #Layer normalization
    )
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()

# Move model to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

In [116]:
num_epochs = 20

for epoch in range(num_epochs):
    model.train()
    losses = []
    
    for batch_idx, (inps, outs) in enumerate(train_loader):
        # Move to device
        inps = inps.to(device)
        outs = outs.to(device)
        
        # Forward pass
        preds = model(inps)
        
        # Ensure output shapes match
        if preds.dim() > 1: #Output will be 32*1*1 if batch size is 32
            preds = preds.squeeze(-1)  # Remove last dimension if it's 1
        
        loss = criterion(preds, outs)
        
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        losses.append(loss.item())
    
    avg_loss = np.mean(losses)
    print(f"Epoch {epoch}: train loss {avg_loss:.4f}")
    
    # Validation every 5 epochs
    if epoch % 5 == 0:
        model.eval()
        val_losses = []
        
        with torch.no_grad():
            for inps, outs in train_loader:  # Using same data for demo
                inps = inps.to(device)
                outs = outs.to(device)
                
                preds = model(inps)
                if preds.dim() > 1:
                    preds = preds.squeeze(-1)
                
                loss = criterion(preds, outs)
                val_losses.append(loss.item())
        
        avg_val_loss = np.mean(val_losses)
        print(f"Epoch {epoch}: val loss {avg_val_loss:.4f}")
        
        # Print sample predictions
        # print(f"Sample predictions: {preds[:5].cpu().numpy()}")
        # print(f"Sample targets: {outs[:5].cpu().numpy()}")

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 0: train loss 2.9138
Epoch 0: val loss 0.7320
Epoch 1: train loss 0.2499
Epoch 2: train loss 0.0873
Epoch 3: train loss 0.0527
Epoch 4: train loss 0.0180
Epoch 5: train loss 0.0343
Epoch 5: val loss 0.0213
Epoch 6: train loss 0.0092
Epoch 7: train loss 0.0159
Epoch 8: train loss 0.0052
Epoch 9: train loss 0.0099
Epoch 10: train loss 0.0040
Epoch 10: val loss 0.0066
Epoch 11: train loss 0.0076
Epoch 12: train loss 0.0036
Epoch 13: train loss 0.0066
Epoch 14: train loss 0.0041
Epoch 15: train loss 0.0043
Epoch 15: val loss 0.0029
Epoch 16: train loss 0.0027
Epoch 17: train loss 0.0031
Epoch 18: train loss 0.0027
Epoch 19: train loss 0.0030


In [121]:
print (inps.shape,outs.shape)

torch.Size([4, 10, 256]) torch.Size([4])
