In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence

torch.manual_seed(1)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [8]:
class BookDataset(Dataset):
    def __init__(self, scalars, sequences, targets):
        self.scalars = scalars
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.targets)
    
    def __getitem__(self, idx):
        scalar_input = torch.tensor(self.scalars[idx], dtype=torch.float)
        sequence_input = torch.tensor(self.sequences[idx], dtype=torch.float)
        target = torch.tensor(self.targets[idx], dtype=float)
        return scalar_input, sequence_input, target
    
    @staticmethod
    def collate_fn(batch):
        scalars, sequences, targets = zip(*batch)
        sequences_padded = pad_sequence(sequences, batch_first=True)
        return torch.stack(scalars), sequences_padded, torch.stack(targets)

In [9]:
class NewModel(nn.Module):
    def __init__(self):
        super().__init__()
        SCALAR_LAYER_SIZE=10
        SEQUENCE_LAYER_SIZE=20
        FINAL_LAYER_SIZE=10
        self.scalar_processor = nn.Linear(2, SCALAR_LAYER_SIZE)
        self.sequence_processor = nn.Linear(2, 1)
        self.combiner = nn.Linear(SCALAR_LAYER_SIZE + SEQUENCE_LAYER_SIZE, FINAL_LAYER_SIZE)
        self.output_layer = nn.Linear(FINAL_LAYER_SIZE, 1)
        self.flatten = nn.Flatten

    def forward(self, scalar_input, sequence_input):
        scalar_output = self.scalar_processor(scalar_input)
        sequence_output, _ = self.sequence_processor(sequence_input)
        sequence_output = sequence_output[:, -1, :]
        combined = torch.cat((scalar_output, sequence_output), dim=1)
        x = F.relu(self.combiner(combined))
        return self.output_layer(x)
    
model = NewModel()

In [10]:
flatten = nn.Flatten()
listing_inputs = [torch.randn((3, 2)), torch.randn((5, 2)), torch.randn((4, 2))]
formatted_inputs = [flatten(x.unsqueeze(dim=0)).transpose(0, 1) for x in listing_inputs]
x = pad_sequence(formatted_inputs, batch_first=True)
x = x.transpose(1, 2).squeeze(dim=1)
x, model(torch.randn((3)), x)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x3 and 2x10)

In [None]:
flatten = nn.Flatten()
first = torch.randn((3, 2))
second = torch.randn((5, 2))
first = flatten(first.unsqueeze(dim=0))
second = flatten(second.unsqueeze(dim=0))
first = first.transpose(0, 1)
second = second.transpose(0, 1)
print(first, second)
x = pad_sequence([first, second], batch_first=True)
print(x)
x = x.transpose(1, 2)
print(x)
x = x.squeeze(dim=1)
x

tensor([[ 0.3255],
        [-0.4791],
        [ 1.3790],
        [ 2.5286],
        [ 0.4107],
        [-0.9880]]) tensor([[-0.9081],
        [ 0.5423],
        [ 0.1103],
        [-2.2590],
        [ 0.6067],
        [-0.1383],
        [ 0.8310],
        [-0.2477],
        [-0.8029],
        [ 0.2366]])
tensor([[[ 0.3255],
         [-0.4791],
         [ 1.3790],
         [ 2.5286],
         [ 0.4107],
         [-0.9880],
         [ 0.0000],
         [ 0.0000],
         [ 0.0000],
         [ 0.0000]],

        [[-0.9081],
         [ 0.5423],
         [ 0.1103],
         [-2.2590],
         [ 0.6067],
         [-0.1383],
         [ 0.8310],
         [-0.2477],
         [-0.8029],
         [ 0.2366]]])
tensor([[[ 0.3255, -0.4791,  1.3790,  2.5286,  0.4107, -0.9880,  0.0000,
           0.0000,  0.0000,  0.0000]],

        [[-0.9081,  0.5423,  0.1103, -2.2590,  0.6067, -0.1383,  0.8310,
          -0.2477, -0.8029,  0.2366]]])


tensor([[ 0.3255, -0.4791,  1.3790,  2.5286,  0.4107, -0.9880,  0.0000,  0.0000,
          0.0000,  0.0000],
        [-0.9081,  0.5423,  0.1103, -2.2590,  0.6067, -0.1383,  0.8310, -0.2477,
         -0.8029,  0.2366]])

In [None]:
model = NewModel()
scalar_input = torch.randn((1, 2))
sequence_input = torch.randn((1, 5))
target = model(scalar_input, sequence_input)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x5 and 2x1)

In [None]:
class Model(nn.Module):
    def __init__(self):
        self.feature_extractor = nn.Linear(in_features=3, out_features=64)
        self.attention_layer = nn.Linear(in_features=64, out_features=1)
        self.reducer = nn.Linear(in_features=64, out_features=1)
    
    def forward(self, x):
        batch_size, seq_len, _ = x.size()

        features = F.relu(self.feature_extractor(x))
        attention_weights = F.softmax(self.attention_layer(features), dim=1)

        weighted_features = features * attention_weights
        aggregated_features = weighted_features.sum(dim=1)

        output = self.reducer(aggregated_features)
        final_output = output.view(batch_size)

        return final_output

model = Model().to(device)

In [None]:
X = torch.randn((2, 4, 3)).to(device)
y = model(X)

X, y

(tensor([[[ 1.5043, -1.3950,  0.8008],
          [-0.6619,  1.2563,  0.5000],
          [ 0.0402,  0.4647,  0.3453],
          [-0.5506,  1.3261,  0.5883]],
 
         [[-1.2548,  1.3883, -0.4552],
          [ 0.3186,  0.9738,  1.5453],
          [ 0.8202, -0.1630, -0.8551],
          [-0.7517, -0.0947, -2.3144]]], device='cuda:0'),
 tensor([ 0.0038, -0.0527], device='cuda:0', grad_fn=<ViewBackward0>))