## Model for price prediction from sentiment data.
> We need it to take a dictionary of sentence : sentiment pairs.
> This size of the dictionary is variable/flexible.

We could potentially design this module as an Transformer encoder model.


References:
> https://towardsdatascience.com/a-detailed-guide-to-pytorchs-nn-transformer-module-c80afbc9ffb1/

> [Set Transformer: A Framework for Attention-based
 Permutation-Invariant Neural Networks](https://arxiv.org/pdf/1810.00825)

 > https://github.com/juho-lee/set_transformer

 > [Deep sets](https://papers.nips.cc/paper_files/paper/2017/hash/f22e4747da1aa27e363d86d40ff442fe-Abstract.html)

Since we want our model to be invarient to permutations in the order in which we feed the news articles and associated sentiments, we can proceed using a set-transformer model.

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math

class MAB(nn.Module):
    def __init__(self, dim_Q, dim_K, dim_V, num_heads, ln=False):
        super(MAB, self).__init__()
        self.dim_V = dim_V
        self.num_heads = num_heads
        self.fc_q = nn.Linear(dim_Q, dim_V)
        self.fc_k = nn.Linear(dim_K, dim_V)
        self.fc_v = nn.Linear(dim_K, dim_V)
        if ln:
            self.ln0 = nn.LayerNorm(dim_V)
            self.ln1 = nn.LayerNorm(dim_V)
        self.fc_o = nn.Linear(dim_V, dim_V)

    def forward(self, Q, K):
        Q = self.fc_q(Q)
        K, V = self.fc_k(K), self.fc_v(K)

        dim_split = self.dim_V // self.num_heads
        Q_ = torch.cat(Q.split(dim_split, 2), 0)
        K_ = torch.cat(K.split(dim_split, 2), 0)
        V_ = torch.cat(V.split(dim_split, 2), 0)

        A = torch.softmax(Q_.bmm(K_.transpose(1,2))/math.sqrt(self.dim_V), 2)
        O = torch.cat((Q_ + A.bmm(V_)).split(Q.size(0), 0), 2)
        O = O if getattr(self, 'ln0', None) is None else self.ln0(O)
        O = O + F.relu(self.fc_o(O))
        O = O if getattr(self, 'ln1', None) is None else self.ln1(O)
        return O

class SAB(nn.Module):
    def __init__(self, dim_in, dim_out, num_heads, ln=False):
        super(SAB, self).__init__()
        self.mab = MAB(dim_in, dim_in, dim_out, num_heads, ln=ln)

    def forward(self, X):
        return self.mab(X, X)

class ISAB(nn.Module):
    def __init__(self, dim_in, dim_out, num_heads, num_inds, ln=False):
        super(ISAB, self).__init__()
        self.I = nn.Parameter(torch.Tensor(1, num_inds, dim_out))
        nn.init.xavier_uniform_(self.I)
        self.mab0 = MAB(dim_out, dim_in, dim_out, num_heads, ln=ln)
        self.mab1 = MAB(dim_in, dim_out, dim_out, num_heads, ln=ln)

    def forward(self, X):
        H = self.mab0(self.I.repeat(X.size(0), 1, 1), X)
        return self.mab1(X, H)

class PMA(nn.Module):
    def __init__(self, dim, num_heads, num_seeds, ln=False):
        super(PMA, self).__init__()
        self.S = nn.Parameter(torch.Tensor(1, num_seeds, dim))
        nn.init.xavier_uniform_(self.S)
        self.mab = MAB(dim, dim, dim, num_heads, ln=ln)

    def forward(self, X):
        return self.mab(self.S.repeat(X.size(0), 1, 1), X)

In [93]:
class DeepSet(nn.Module):
    def __init__(self, dim_input, num_outputs, dim_output, dim_hidden=128):
        super(DeepSet, self).__init__()
        self.num_outputs = num_outputs
        self.dim_output = dim_output
        self.enc = nn.Sequential(
                nn.Linear(dim_input, dim_hidden),
                nn.ReLU(),
                nn.Linear(dim_hidden, dim_hidden),
                nn.ReLU(),
                nn.Linear(dim_hidden, dim_hidden),
                nn.ReLU(),
                nn.Linear(dim_hidden, dim_hidden))
        self.dec = nn.Sequential(
                nn.Linear(dim_hidden, dim_hidden),
                nn.ReLU(),
                nn.Linear(dim_hidden, dim_hidden),
                nn.ReLU(),
                nn.Linear(dim_hidden, dim_hidden),
                nn.ReLU(),
                nn.Linear(dim_hidden, num_outputs*dim_output))

    def forward(self, X):
        X = self.enc(X).mean(-2)
        X = self.dec(X).reshape(-1, self.num_outputs, self.dim_output)
        return X

class SetTransformer(nn.Module):
    def __init__(self, dim_input, num_outputs, dim_output,
            num_inds=32, dim_hidden=128, num_heads=4, ln=False):
        super(SetTransformer, self).__init__()
        self.enc = nn.Sequential(
                ISAB(dim_input, dim_hidden, num_heads, num_inds, ln=ln),
                ISAB(dim_hidden, dim_hidden, num_heads, num_inds, ln=ln))
        self.dec = nn.Sequential(
                PMA(dim_hidden, num_heads, num_outputs, ln=ln),
                SAB(dim_hidden, dim_hidden, num_heads, ln=ln),
                SAB(dim_hidden, dim_hidden, num_heads, ln=ln),
                nn.Linear(dim_hidden, dim_output))

    def forward(self, X):
        return self.dec(self.enc(X))

In [106]:
#Generate some Dummy data to test out the model using
import pandas as pd
import numpy as np

dataset_size = 100
embedding_dim = 256
encodings = np.random.rand(dataset_size, 10, embedding_dim).astype(np.float32)
print(encodings.shape,encodings[:5])

sentiments = np.random.rand(dataset_size, 1).astype(np.float32)
print(sentiments.shape, sentiments[:5])

price_percentage_changes = np.random.rand(dataset_size, 1).astype(np.float32)* 0.2;
print(price_percentage_changes.shape, price_percentage_changes[:5])


(100, 10, 256) [[[0.91048276 0.19734186 0.22486499 ... 0.40618408 0.92178136 0.5661647 ]
  [0.7591644  0.87074685 0.31699267 ... 0.32964826 0.36503044 0.40797657]
  [0.5969745  0.05806377 0.7231523  ... 0.7675753  0.12269455 0.40065286]
  ...
  [0.0321703  0.8939506  0.953571   ... 0.79273903 0.53147393 0.7241469 ]
  [0.10080913 0.83438474 0.98348325 ... 0.69011456 0.98091567 0.83473915]
  [0.9856216  0.13100976 0.36658415 ... 0.5846759  0.01886287 0.14966795]]

 [[0.27407527 0.2191102  0.3515185  ... 0.893201   0.3406108  0.10998099]
  [0.8265604  0.9763401  0.8191433  ... 0.40011767 0.87134945 0.43417493]
  [0.66520786 0.48240307 0.95109856 ... 0.4819437  0.4842333  0.4407385 ]
  ...
  [0.22965983 0.6492614  0.03905968 ... 0.94743043 0.17267446 0.05969359]
  [0.07663344 0.3735552  0.45401952 ... 0.4988756  0.4566521  0.3246412 ]
  [0.73924375 0.20909086 0.49296996 ... 0.80788386 0.7847491  0.3605323 ]]

 [[0.29940197 0.1599657  0.227631   ... 0.31971666 0.05820284 0.7321239 ]
  [0.71

In [107]:
dummy_df = pd.DataFrame({
    'encodings': list(encodings),
    'sentiments': list(sentiments),
    'price_percentage_changes': list(price_percentage_changes)
})    
dummy_df.head()

Unnamed: 0,encodings,sentiments,price_percentage_changes
0,"[[0.91048276, 0.19734186, 0.22486499, 0.018048...",[0.73971826],[0.08840006]
1,"[[0.27407527, 0.2191102, 0.3515185, 0.3321529,...",[0.24886653],[0.081905775]
2,"[[0.29940197, 0.1599657, 0.227631, 0.8078488, ...",[0.6676872],[0.12570345]
3,"[[0.7902688, 0.76318496, 0.9087278, 0.6236607,...",[0.10046396],[0.14250155]
4,"[[0.4374825, 0.13676775, 0.9476627, 0.6723774,...",[0.3344104],[0.1833147]


In [108]:
from torch.utils.data import Dataset, DataLoader
# Create proper dataset class instead of generator
class NewsDataset(Dataset):
    def __init__(self, encodings, sentiments, price_changes):
        self.encodings = encodings
        self.sentiments = sentiments
        self.price_changes = price_changes
    
    def __len__(self):
        return len(self.encodings)
    
    def __getitem__(self, idx):
        # Multiply encodings by sentiment (broadcasting)
        input_data = self.encodings[idx] * self.sentiments[idx][0]
        target = self.price_changes[idx][0]  # Single value, not array
        
        return torch.tensor(input_data, dtype=torch.float32), torch.tensor(target, dtype=torch.float32)

# Create dataset and dataloader
dataset = NewsDataset(encodings, sentiments, price_percentage_changes)
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

In [109]:
model = SetTransformer(
    dim_input = embedding_dim, 
    num_outputs = 1, #One final prediction
    dim_output = 1, #1D output for price change
    num_inds=32, 
    dim_hidden=128, 
    num_heads=4, 
    ln=True #Layer normalization
    )
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()

# Move model to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

In [111]:
num_epochs = 20

for epoch in range(num_epochs):
    model.train()
    losses = []
    
    for batch_idx, (inps, outs) in enumerate(train_loader):
        # Move to device
        inps = inps.to(device)
        outs = outs.to(device)
        
        # Forward pass
        preds = model(inps)
        
        # Ensure output shapes match
        if preds.dim() > 1: #Output will be 32*1*1 if batch size is 32
            preds = preds.squeeze(-1)  # Remove last dimension if it's 1
        
        loss = criterion(preds, outs)
        
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        losses.append(loss.item())
    
    avg_loss = np.mean(losses)
    print(f"Epoch {epoch}: train loss {avg_loss:.4f}")
    
    # Validation every 5 epochs
    if epoch % 5 == 0:
        model.eval()
        val_losses = []
        
        with torch.no_grad():
            for inps, outs in train_loader:  # Using same data for demo
                inps = inps.to(device)
                outs = outs.to(device)
                
                preds = model(inps)
                if preds.dim() > 1:
                    preds = preds.squeeze(-1)
                
                loss = criterion(preds, outs)
                val_losses.append(loss.item())
        
        avg_val_loss = np.mean(val_losses)
        print(f"Epoch {epoch}: val loss {avg_val_loss:.4f}")
        
        # Print sample predictions
        # print(f"Sample predictions: {preds[:5].cpu().numpy()}")
        # print(f"Sample targets: {outs[:5].cpu().numpy()}")

Epoch 0: train loss 0.0034
Epoch 0: val loss 0.0032
Epoch 1: train loss 0.0040
Epoch 2: train loss 0.0040
Epoch 3: train loss 0.0035
Epoch 4: train loss 0.0036
Epoch 5: train loss 0.0052
Epoch 5: val loss 0.0058
Epoch 6: train loss 0.0073
Epoch 7: train loss 0.0063
Epoch 8: train loss 0.0049
Epoch 9: train loss 0.0051
Epoch 10: train loss 0.0060
Epoch 10: val loss 0.0035
Epoch 11: train loss 0.0058
Epoch 12: train loss 0.0056
Epoch 13: train loss 0.0038
Epoch 14: train loss 0.0054
Epoch 15: train loss 0.0044
Epoch 15: val loss 0.0044
Epoch 16: train loss 0.0041
Epoch 17: train loss 0.0039
Epoch 18: train loss 0.0042
Epoch 19: train loss 0.0038
