In [8]:
from concurrent.futures import ThreadPoolExecutor
import math
import random

import pretty_traceback
pretty_traceback.install()

import numpy as np
import rich
import time
import torch
import transformers



In [3]:
seq_len = 5
batch_size = 3
num_embeddings = 32
embedding_depth = 55

relative_embs = torch.nn.Embedding(num_embeddings=num_embeddings, embedding_dim=embedding_depth)
# attention_mask = torch.randn(batch_size, seq_len) <= 0
attention_mask = torch.ones(batch_size, seq_len)
relative_ids = attention_mask.cumsum(dim=-1) + num_embeddings // 2
relative_ids = relative_ids.unsqueeze(1).repeat(1, seq_len, 1)

@torch.jit.script
def build_rel_att_mat(
    attention_mask: torch.Tensor, 
    num_embeddings: int=num_embeddings, 
    seq_len: int=seq_len, 
    batch_size: int=batch_size,
):
    relative_ids_test = torch.empty(batch_size, seq_len, seq_len, dtype=torch.long)
    for i in range(batch_size):
        for j in range(seq_len):
            incr_int = 0
            for k in range(seq_len):
                relative_ids_test[i, j, k] = -i + incr_int + num_embeddings // 2
                if attention_mask[i, j]:
                    incr_int += 1
    return relative_ids_test        

relative_ids_test = build_rel_att_mat(attention_mask=attention_mask, num_embeddings=num_embeddings, seq_len=seq_len, batch_size=batch_size)

In [9]:
relative_ids_test = build_rel_att_mat(attention_mask=attention_mask, num_embeddings=num_embeddings, seq_len=seq_len, batch_size=batch_size)


In [7]:
print(dir(build_rel_att_mat))
print(build_rel_att_mat.code)


['__call__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '_debug_flush_compilation_cache', 'code', 'get_debug_state', 'graph', 'graph_for', 'inlined_graph', 'name', 'qualified_name', 'save', 'save_to_buffer', 'schema']
def build_rel_att_mat(attention_mask: Tensor,
    num_embeddings: int=32,
    seq_len: int=5,
    batch_size: int=3) -> Tensor:
  relative_ids_test = torch.empty([batch_size, seq_len, seq_len], dtype=4)
  for i in range(batch_size):
    for j in range(seq_len):
      incr_int = 0
      for k in range(seq_len):
        _0 = torch.add(torch.neg(i), incr_int)
        _1 = torch.floordiv(num_embeddings, 2)
        _2 = torch.add(_0, _1)
        _3 = torch.select(relative_ids_test, 0, i)
        

In [2]:
# load distilgpt2 model
name = "distilgpt2"
model = transformers.AutoModelForCausalLM.from_pretrained(name)
tok = transformers.AutoTokenizer.from_pretrained(name)

In [3]:
model.cuda();

In [4]:
def p(x):
    output = tok.batch_encode_plus(x, return_tensors="pt")
    for k, v in output.items():
        output[k] = v.cuda()
    return output


In [5]:
stuff = p(["hello world how are you doing"])

output = model.generate(**stuff, max_length=len(stuff["input_ids"][0]) + 10,)
print(output.shape)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


torch.Size([1, 16])


In [6]:
def batcher(dataset, batch_size):
    for i in range(0, len(dataset), batch_size):
        yield dataset[i:i+batch_size]


with ThreadPoolExecutor(max_workers=1) as executor:
    map_ = executor.map(
        lambda x: x,
        batcher(range(10000), 10),
    )

list(map_)

[range(0, 10),
 range(10, 20),
 range(20, 30),
 range(30, 40),
 range(40, 50),
 range(50, 60),
 range(60, 70),
 range(70, 80),
 range(80, 90),
 range(90, 100),
 range(100, 110),
 range(110, 120),
 range(120, 130),
 range(130, 140),
 range(140, 150),
 range(150, 160),
 range(160, 170),
 range(170, 180),
 range(180, 190),
 range(190, 200),
 range(200, 210),
 range(210, 220),
 range(220, 230),
 range(230, 240),
 range(240, 250),
 range(250, 260),
 range(260, 270),
 range(270, 280),
 range(280, 290),
 range(290, 300),
 range(300, 310),
 range(310, 320),
 range(320, 330),
 range(330, 340),
 range(340, 350),
 range(350, 360),
 range(360, 370),
 range(370, 380),
 range(380, 390),
 range(390, 400),
 range(400, 410),
 range(410, 420),
 range(420, 430),
 range(430, 440),
 range(440, 450),
 range(450, 460),
 range(460, 470),
 range(470, 480),
 range(480, 490),
 range(490, 500),
 range(500, 510),
 range(510, 520),
 range(520, 530),
 range(530, 540),
 range(540, 550),
 range(550, 560),
 range(560, 

In [7]:
MODEL = "facebook/bart-base"
tokenizer = transformers.BartTokenizer.from_pretrained(MODEL)
model = transformers.BartForConditionalGeneration.from_pretrained(MODEL)


In [8]:
input_ids = tokenizer.batch_encode_plus(
    [
        "Hello, my friends.", 
        "This is a different sentence.",
    ], 
    return_tensors="pt",
    padding=True
)
print(f"{input_ids = }")

decoder_input_ids = tokenizer.batch_encode_plus(
    [
        "How are you?", 
        "Ah",
    ], 
    padding=True,
    add_special_tokens=False, 
    return_tensors="pt",
)
print(f"{decoder_input_ids = }")

output = model.generate(
    **input_ids, 
    decoder_attention_mask
    =[1,2,3]
)
print(output)
print([tokenizer.decode(x) for x in output])

input_ids = {'input_ids': tensor([[    0, 31414,     6,   127,   964,     4,     2,     1],
        [    0,   713,    16,    10,   430,  3645,     4,     2]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 0],
        [1, 1, 1, 1, 1, 1, 1, 1]])}
decoder_input_ids = {'input_ids': tensor([[ 6179,    32,    47,   116],
        [17986,     1,     1,     1]]), 'attention_mask': tensor([[1, 1, 1, 1],
        [1, 0, 0, 0]])}
tensor([[    2,     0, 31414,     6,   127,   964,     4,     2,     1],
        [    2,     0,   713,    16,    10,   430,  3645,     4,     2]])


2022-05-20 14:10:39.902914: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


['</s><s>Hello, my friends.</s><pad>', '</s><s>This is a different sentence.</s>']


In [9]:
torch.stack([x for x in torch.tensor([1, 2, 3, 3, 3]) if x != 3])

tensor([1, 2])