<a href="https://colab.research.google.com/github/TD008/OTransfomer/blob/main/model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import torch
import os
import numpy as np

In [3]:
class AttHead(torch.nn.Module):
    def __init__(self, dmodel, dk, dv, decoder=True):
        super(AttHead, self).__init__()
        self.dmodel = dmodel
        self.dk = dk
        self.dv = dv
        self.decoder = decoder

        self.key = torch.nn.Linear(dmodel, dk)
        self.query = torch.nn.Linear(dmodel, dk)
        self.value = torch.nn.Linear(dmodel, dv)
        self.out = torch.nn.Linear(dv, dmodel) # Linear layers applied on last dimension
        self.smax = torch.nn.Softmax(dim = -1) # Will be applied along the dk dimension

    def forward(self, x):
    # want to input a tensor of (batch_size, block_size, dmodel) and output
    # of dmodel/h ??

        k = self.key(x)
        q = self.query(x)
        v = self.value(x)

        affinities = q @ k.transpose(-2, -1)   # dot product of q and t packaged into a matrix mult.
        affinities = affinities / (self.dk ** 0.5)  # Scale by sqrt(dk)

        if self.decoder:
            mask = torch.triu(torch.ones(x.size(1), x.size(1)), diagonal=1)
            mask = mask.masked_fill(mask==1, float('-inf'))
            affinities += mask

        affinities = self.smax(affinities)   # Transform the affinities into weights summing to 1

        output = affinities @ v  # (B, T, dv)
        output = self.out(output)  # (B, T, dmodel)
        return output


In [4]:
class AttBlock(torch.nn.Module):
    def __init__(self, num_heads, dmodel):
        super(AttBlock, self).__init__()
        self.num_heads = num_heads
        self.dmodel = dmodel
        self.heads = [AttHead(dmodel, dmodel, dmodel) for i in range(num_heads)]

    def forward(self, x):
        pass

In [11]:
# Testing the model
batch_size = 2
block_size = 4
dmodel, dk, dv = 7, 7, 7

data = torch.randn(batch_size, block_size)
embeddings = torch.nn.Embedding(data.size(1), dmodel)
embeddings = embeddings(data) # Apparently feeding in the wrong type
print(embeddings.size)
print("data:\n", data)

model = AttHead(dmodel, dk, dv)

forward = model(data)
print("forward:\n", forward)

RuntimeError: Expected tensor for argument #1 'indices' to have one of the following scalar types: Long, Int; but got torch.FloatTensor instead (while checking arguments for embedding)

In [None]:
class TestModel(torch.nn.Module):
    def __init__(self, dmodel, string_len):
        super(TestModel, self).__init__()

        self.dmodel = dmodel
        self.string_len = string_len

        self.embeddings = torch.nn.Embedding(self.string_len ,self.dmodel)
        self.attHead = AttHead(self.dmodel, dk, dv)

    def forward(self, x):
        embeddings = self.embeddings(x)
