In [2]:
import torch
import torch.nn.functional as F

class SimpleTransformer:
    def __init__(self, embedding_dim):
        self.embedding = {}
        self.embedding_dim = embedding_dim
        # Initialize weights for query, key, value transformations
        self.W_q = torch.rand((embedding_dim, embedding_dim))
        self.W_k = torch.rand((embedding_dim, embedding_dim))
        self.W_v = torch.rand((embedding_dim, embedding_dim))

    def add_word(self, word, vector):
        """
        Add a word with its embedding vector to the embedding dictionary.
        """
        if len(vector) != self.embedding_dim:
            raise ValueError("Embedding vector must have the correct dimension.")
        self.embedding[word] = torch.tensor(vector)

    def get_query(self, word):
        """
        Get the query vector for a given word.
        """
        return torch.matmul(self.embedding[word], self.W_q)

    def get_keys_and_values(self):
        """
        Get the key and value vectors for all words.
        """
        K = {word: torch.matmul(embedding_vector, self.W_k) for word, embedding_vector in self.embedding.items()}
        V = {word: torch.matmul(embedding_vector, self.W_v) for word, embedding_vector in self.embedding.items()}
        return K, V

    def calculate_attention(self, Q, K):
        """
        Calculate attention scores for a query Q with respect to all keys K.
        """
        scores = {word: torch.dot(Q, K[word]) / torch.sqrt(torch.tensor(float(self.embedding_dim))) for word in K}
        weights = F.softmax(torch.tensor(list(scores.values())), dim=-1)
        return scores, weights

    def get_new_representation(self, word):
        """
        Calculate the new representation of a word using the attention mechanism.
        """
        Q = self.get_query(word)
        K, V = self.get_keys_and_values()
        _, weights = self.calculate_attention(Q, K)
        new_representation = sum(weights[i] * V[word] for i, word in enumerate(K))
        return new_representation

# Example usage:
embedding_dim = 4
transformer = SimpleTransformer(embedding_dim)

# Add words to the embedding dictionary
transformer.add_word("the", [0.2, 0.1, 0.4, 0.5])
transformer.add_word("cat", [0.6, 0.1, 0.8, 0.3])
transformer.add_word("sat", [0.5, 0.3, 0.7, 0.2])
transformer.add_word("on", [0.4, 0.2, 0.5, 0.6])
transformer.add_word("mat", [0.3, 0.5, 0.6, 0.4])

# Get the new representation for the word "sat"
new_representation = transformer.get_new_representation("sat")
print("New representation for 'sat':", new_representation)


New representation for 'sat': tensor([0.5006, 0.7689, 0.3331, 0.5602])


In [1]:
import torch
import torch.nn.functional as F

class SimpleTransformer:
    def __init__(self, embedding_dim):
        self.embedding = {}
        self.embedding_dim = embedding_dim
        # Initialize weights for query, key, value transformations (encoder)
        self.W_q = torch.rand((embedding_dim, embedding_dim))
        self.W_k = torch.rand((embedding_dim, embedding_dim))
        self.W_v = torch.rand((embedding_dim, embedding_dim))
        # Initialize weights for decoder self-attention and encoder-decoder attention
        self.W_q_dec = torch.rand((embedding_dim, embedding_dim))
        self.W_k_dec = torch.rand((embedding_dim, embedding_dim))
        self.W_v_dec = torch.rand((embedding_dim, embedding_dim))
        self.W_q_encdec = torch.rand((embedding_dim, embedding_dim))
        self.W_k_encdec = torch.rand((embedding_dim, embedding_dim))
        self.W_v_encdec = torch.rand((embedding_dim, embedding_dim))

    def add_word(self, word, vector):
        """
        Add a word with its embedding vector to the embedding dictionary.
        """
        if len(vector) != self.embedding_dim:
            raise ValueError("Embedding vector must have the correct dimension.")
        self.embedding[word] = torch.tensor(vector)

    def get_query(self, word):
        """
        Get the query vector for a given word (encoder).
        """
        return torch.matmul(self.embedding[word], self.W_q)

    def get_keys_and_values(self):
        """
        Get the key and value vectors for all words (encoder).
        """
        K = {word: torch.matmul(embedding_vector, self.W_k) for word, embedding_vector in self.embedding.items()}
        V = {word: torch.matmul(embedding_vector, self.W_v) for word, embedding_vector in self.embedding.items()}
        return K, V

    def calculate_attention(self, Q, K):
        """
        Calculate attention scores for a query Q with respect to all keys K.
        """
        scores = {word: torch.dot(Q, K[word]) / torch.sqrt(torch.tensor(float(self.embedding_dim))) for word in K}
        weights = F.softmax(torch.tensor(list(scores.values())), dim=-1)
        return scores, weights

    def get_new_representation(self, word):
        """
        Calculate the new representation of a word using the attention mechanism (encoder).
        """
        Q = self.get_query(word)
        K, V = self.get_keys_and_values()
        _, weights = self.calculate_attention(Q, K)
        new_representation = sum(weights[i] * V[word] for i, word in enumerate(K))
        return new_representation

    def decoder_attention(self, word, encoder_outputs):
        """
        Perform self-attention for the decoder and encoder-decoder attention.
        """
        Q_dec = torch.matmul(self.embedding[word], self.W_q_dec)
        K_dec = {w: torch.matmul(enc_out, self.W_k_dec) for w, enc_out in encoder_outputs.items()}
        V_dec = {w: torch.matmul(enc_out, self.W_v_dec) for w, enc_out in encoder_outputs.items()}
        _, weights_dec = self.calculate_attention(Q_dec, K_dec)
        dec_self_attention = sum(weights_dec[i] * V_dec[w] for i, w in enumerate(K_dec))

        # Encoder-Decoder Attention
        Q_encdec = torch.matmul(dec_self_attention, self.W_q_encdec)
        K_encdec = {w: torch.matmul(enc_out, self.W_k_encdec) for w, enc_out in encoder_outputs.items()}
        V_encdec = {w: torch.matmul(enc_out, self.W_v_encdec) for w, enc_out in encoder_outputs.items()}
        _, weights_encdec = self.calculate_attention(Q_encdec, K_encdec)
        new_representation = sum(weights_encdec[i] * V_encdec[w] for i, w in enumerate(K_encdec))

        return new_representation

# Example usage:
embedding_dim = 4
transformer = SimpleTransformer(embedding_dim)

# Add words to the embedding dictionary (English to French example)
transformer.add_word("the", [0.2, 0.1, 0.4, 0.5])
transformer.add_word("cat", [0.6, 0.1, 0.8, 0.3])
transformer.add_word("sat", [0.5, 0.3, 0.7, 0.2])
transformer.add_word("on", [0.4, 0.2, 0.5, 0.6])
transformer.add_word("mat", [0.3, 0.5, 0.6, 0.4])
transformer.add_word("le", [0.1, 0.2, 0.4, 0.6])
transformer.add_word("chat", [0.5, 0.6, 0.3, 0.2])
transformer.add_word("assis", [0.7, 0.4, 0.1, 0.5])
transformer.add_word("sur", [0.3, 0.7, 0.2, 0.4])
transformer.add_word("tapis", [0.4, 0.6, 0.3, 0.5])

# Step 1: Encoder processes the input sentence
encoder_outputs = {word: transformer.get_new_representation(word) for word in ["the", "cat", "sat", "on", "mat"]}

# Step 2: Decoder processes the target sentence (with encoder outputs)
decoded_representation = transformer.decoder_attention("le", encoder_outputs)
print("Decoder output for 'le':", decoded_representation)


Decoder output for 'le': tensor([1.3656, 1.6018, 2.0012, 1.5555])
