In [24]:
'''
    Rachel's attempt at writing a transformer more or less from scratch

    Input:
        pair of floats between 0 and 1 (or any seqLen of numbers)
    Output:
        float between 0 and 2 - sum of inputs
'''

import numpy as np
import torch
import torch.nn as nn
#import time
import math

#device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [25]:
#Generate Data

def generateData(size, seqLen):
    
    inputData = np.random.random(size = (size, seqLen))

    target = np.zeros((size, math.floor(seqLen/2)))
    #print(target)
    for i in range(0, seqLen):
        #print(i)
        target[:,0] += inputData[:,i]
        #print(target[:,0])

    inputData = torch.tensor(inputData)
    target = torch.tensor(target)

    SOSToken = 3.
    EOSToken = 4.
    SOSToken = torch.ones((size, 1)) * SOSToken
    EOSToken = torch.ones((size, 1)) * EOSToken
    inputData = torch.cat((SOSToken, inputData, EOSToken), 1)
    target = torch.cat((SOSToken, target, EOSToken), 1)

    return inputData, target


def batchify(data, target):
    numMiniBatch = int(math.floor(data.shape[0]/100.0))
    inputMiniBatches = data.chunk(numMiniBatch)
    outputMiniBatches = target.chunk(numMiniBatch)
    
    return numMiniBatch, inputMiniBatches, outputMiniBatches


In [26]:
#Test Data Generation
print(generateData(20,2)) #seqLen is 2 for pairs of numbers


(tensor([[3.0000, 0.1705, 0.4442, 4.0000],
        [3.0000, 0.4803, 0.9540, 4.0000],
        [3.0000, 0.5810, 0.1027, 4.0000],
        [3.0000, 0.4413, 0.2754, 4.0000],
        [3.0000, 0.7165, 0.4234, 4.0000],
        [3.0000, 0.1566, 0.1660, 4.0000],
        [3.0000, 0.5235, 0.7619, 4.0000],
        [3.0000, 0.2001, 0.3135, 4.0000],
        [3.0000, 0.5073, 0.9845, 4.0000],
        [3.0000, 0.5306, 0.1302, 4.0000],
        [3.0000, 0.6939, 0.3495, 4.0000],
        [3.0000, 0.0915, 0.3242, 4.0000],
        [3.0000, 0.1453, 0.2819, 4.0000],
        [3.0000, 0.3159, 0.0753, 4.0000],
        [3.0000, 0.2520, 0.6531, 4.0000],
        [3.0000, 0.4345, 0.8597, 4.0000],
        [3.0000, 0.4864, 0.9902, 4.0000],
        [3.0000, 0.3614, 0.4958, 4.0000],
        [3.0000, 0.4243, 0.0996, 4.0000],
        [3.0000, 0.2251, 0.9123, 4.0000]], dtype=torch.float64), tensor([[3.0000, 0.6147, 4.0000],
        [3.0000, 1.4343, 4.0000],
        [3.0000, 0.6837, 4.0000],
        [3.0000, 0.7167, 4.0000],


In [27]:

class TransformerModel(nn.Module):
    def __init__(self, n_token, d_model, n_head, n_layers):
        super().__init__()
        
        self.d_model = d_model
        self.embedding = nn.Embedding(n_token, d_model)
        self.transformer = nn.Transformer(
            d_model=d_model,
            nhead=n_head,
            num_encoder_layers=n_layers,
            num_decoder_layers=n_layers)
        self.out = nn.Linear(d_model, n_token) # Learned linear at the end where output of decoder is run through
        
    def forward(self, src, tgt):
        
        src = self.embedding(src) * math.sqrt(self.d_model)
        tgt = self.embedding(tgt) * math.sqrt(self.d_model)

        print(src.size())
        src = src.permute(1, 0, 2)
        tgt = tgt.permute(1, 0, 2)
        
        transformer_output = self.transformer(src, tgt)
        output = self.out(transformer_output)
        print(f'Before permute:\n Size: {output.size()}\nTensor: {output}')
        
        output = output.permute(1, 0, 2) # Permute so that batch num is first dimension again
        #print(f'After permute:\n Size: {output.size()}\nTensor: {output}')
        
        return output
 