BPE Tokenzier

In [33]:
import tiktoken
from tiktoken.load import load_tiktoken_bpe


tokenizer_path = "gpt//tokenizer.model"
special_tokens = [
            "<|begin_of_text|>",
            "<|end_of_text|>",
            "<|reserved_special_token_0|>",
            "<|reserved_special_token_1|>",
            "<|reserved_special_token_2|>",
            "<|reserved_special_token_3|>",
            "<|start_header_id|>",
            "<|end_header_id|>",
            "<|reserved_special_token_4|>",
            "<|eot_id|>",  # end of turn
        ] + [f"<|reserved_special_token_{i}|>" for i in range(5, 256 - 5)]
mergeable_ranks = load_tiktoken_bpe(tokenizer_path)

tokenizer = tiktoken.Encoding(
    name='tokenizer.model',
    pat_str=r"(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+",
    mergeable_ranks=mergeable_ranks,
    special_tokens={token: len(mergeable_ranks) + i for i, token in enumerate(special_tokens)},
)

print('------ ENCODE AND DECODE：------')
tokenizer.decode(tokenizer.encode("I'm always looking for new challenges and growth opportunities !"))

------ ENCODE AND DECODE：------


"I'm always looking for new challenges and growth opportunities !"

In [34]:
len(mergeable_ranks)

128000

In [1]:
import torch
import torch.nn as nn 

x = torch.tensor([3, 2, 1], dtype = torch.float, requires_grad = True) 
y = torch.dot(x, x) #y = x1^2 + x2^2 + x3^2 
y.backward(torch.ones_like(y)) 
print(x.grad) 

tensor([6., 4., 2.])


In [5]:
import numpy as np 

x = torch.tensor(np.random.randint(11), dtype = torch.float, requires_grad=True)

y = torch.dot(x, torch.tensor([2]))

y.backward()

RuntimeError: 1D tensors expected, but got 0D and 1D tensors

In [26]:
import numpy as np
import torch

# 生成一个1D数组，其中包含一个随机整数
x = torch.tensor([2,3,4], dtype=torch.float, requires_grad=True)
print(x)
# 创建一个1D张量，包含一个元素2
y = 2 * x
# 进行反向传播
y.sum().backward()

tensor([2., 3., 4.], requires_grad=True)


In [27]:
x.grad

tensor([2., 2., 2.])

In [17]:
x

tensor([8.], requires_grad=True)

In [1]:
import torch

input_ids = torch.rand(size=(5, 128))

position_ids = torch.arange(0, input_ids.size(1), dtype=torch.long).unsqueeze(0)

In [5]:
position_ids

tensor([[  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
          14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,
          28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,
          42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,
          56,  57,  58,  59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,
          70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,  81,  82,  83,
          84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,  97,
          98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
         112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125,
         126, 127]])

In [7]:
position_index = torch.index_select(position_ids, 1, position_ids.view(-1))
 

In [10]:
position_index.shape

torch.Size([1, 128])

In [5]:
import torch

class TriangularCausalMask():
    def __init__(self, B, L, device="cpu"):
        mask_shape = [B, 1, L, L]
        with torch.no_grad():
            self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device)

    @property
    def mask(self):
        return self._mask



# attention mask
t_mask = TriangularCausalMask(B=2, L=128)
t_mask.mask.shape

torch.Size([2, 1, 128, 128])

In [9]:
t_mask.mask * 1e5

tensor([[[[     0., 100000., 100000.,  ..., 100000., 100000., 100000.],
          [     0.,      0., 100000.,  ..., 100000., 100000., 100000.],
          [     0.,      0.,      0.,  ..., 100000., 100000., 100000.],
          ...,
          [     0.,      0.,      0.,  ...,      0., 100000., 100000.],
          [     0.,      0.,      0.,  ...,      0.,      0., 100000.],
          [     0.,      0.,      0.,  ...,      0.,      0.,      0.]]],


        [[[     0., 100000., 100000.,  ..., 100000., 100000., 100000.],
          [     0.,      0., 100000.,  ..., 100000., 100000., 100000.],
          [     0.,      0.,      0.,  ..., 100000., 100000., 100000.],
          ...,
          [     0.,      0.,      0.,  ...,      0., 100000., 100000.],
          [     0.,      0.,      0.,  ...,      0.,      0., 100000.],
          [     0.,      0.,      0.,  ...,      0.,      0.,      0.]]]])