In [8]:
import torch
import torch.nn as nn

class SentimentAnalysis(nn.Module):
    def __init__(self, vocab_size, hidden_size, n_layers=1):
        super(SentimentAnalysis, self).__init__()
        
        # Create an identity matrix of size [vocab_size, vocab_size].
        # We'll use this as a "lookup" to transform token indices into one-hot vectors.
        self.ident = torch.eye(vocab_size)
        
        # Define a GRU.
        # input_size = vocab_size, since our one-hot vectors are of length vocab_size.
        # hidden_size is the dimension of the hidden state.
        # n_layers is the number of stacked GRU layers.
        self.rnn = nn.GRU(
            input_size=vocab_size,
            hidden_size=hidden_size,
            num_layers=n_layers,
            batch_first=True
        )
        
        # A linear layer to map from hidden_size back to vocab_size (e.g., for next-token prediction).
        self.decoder = nn.Linear(in_features=hidden_size, out_features=vocab_size)

    def forward(self, inp, hidden):
        """
        Args:
            inp: a 1D tensor of token indices, e.g. [seq_length] 
                 or shape [batch_size, seq_length] if you want to handle a batch
            hidden: the initial hidden state for the GRU (shape [n_layers, batch_size, hidden_size])
        
        Returns:
            logits: the output of the decoder (shape [batch_size, seq_length, vocab_size] if using batch_first)
            hidden: the final hidden state of the GRU
        """
        
        # Reshape inp so it has shape [batch_size=1, seq_length] if it's just a single sequence.
        # Adjust as needed if you're working with batches.
        inp = inp.view(1, -1)  # shape: [1, seq_length]
        
        # Convert token indices to one-hot vectors using self.ident.
        # For each token index in inp, we select the corresponding row in self.ident.
        # That yields shape [1, seq_length, vocab_size].
        inp = self.ident[inp]  
        
        # Pass the one-hot vectors through the GRU.
        # output shape: [batch_size, seq_length, hidden_size]
        # hidden shape: [n_layers, batch_size, hidden_size]
        output, hidden = self.rnn(inp, hidden)
        
        # Decode the GRU output with a linear layer.
        # Squeeze out the batch dimension if you want shape [seq_length, vocab_size].
        # Or keep it if you're working with batch_first output.
        logits = self.decoder(output.squeeze(0))  # shape: [seq_length, vocab_size]
        
        return logits, hidden


In [9]:
model = SentimentAnalysis (vocab_size=10, hidden_size=16)
inp = torch.tensor([1, 0, 3, 2]) # a sequence with 4 tokens
hidden = torch.randn(1, 1, 16)
out, _ = model(inp, hidden)
out.shape

torch.Size([4, 10])

In [7]:
hidden.shape

torch.Size([1, 1, 16])