In [None]:
import torch

In [None]:
print(torch.cuda.is_available())

True


In [None]:
print(torch.__version__)

2.5.1+cu121


In [None]:
print(torch.backends.mps.is_available())

False


tensor0d = torch.tensor(1)

In [None]:
tensor1d = torch.tensor([1,2,3])

In [None]:
tensor1d

tensor([1, 2, 3])

In [None]:
tensor2d = torch.tensor([[1, 2], # Creates a two-dimensional tensor from a nested Python list
                         [3, 4]])

In [None]:
tensor3d = torch.tensor([[[1, 2], [3, 4]], # Creates a three-dimensional tensor from a nested Python list
                         [[5, 6],  [7, 8]]])

In [None]:
print("0d tensor: \n", tensor0d, "\n")
print("1d tensor: \n", tensor1d, "\n")
print("2d tensor: \n", tensor2d, "\n")
print("3d tensor: \n", tensor3d, "\n")

0d tensor: 
 tensor(1) 

1d tensor: 
 tensor([1, 2, 3]) 

2d tensor: 
 tensor([[1, 2],
        [3, 4]]) 

3d tensor: 
 tensor([[[1, 2],
         [3, 4]],

        [[5, 6],
         [7, 8]]]) 



In [None]:
tensor1d = torch.tensor([1,2,3])
print(tensor1d.dtype)

torch.int64


In [None]:
torch.tensor([1,2,3], dtype=torch.float64)

tensor([1., 2., 3.], dtype=torch.float64)

In [None]:
floatvec = torch.tensor([1., 2., 3.])

In [None]:
print(floatvec.dtype)

torch.float32


In [None]:
floatvec = tensor1d.to(torch.float32)
print(floatvec.dtype) # torch.float32

torch.float32


In [None]:
print(floatvec)

tensor([1., 2., 3.])


In [None]:
tensor2d = torch.tensor([[1, 2, 3],
                         [4, 5, 6]])

In [None]:
print(tensor2d.view (3,2))

tensor([[1, 2],
        [3, 4],
        [5, 6]])


In [None]:
print("Transpose: \n ", tensor2d.T, "\n")

Transpose: 
  tensor([[1, 4],
        [2, 5],
        [3, 6]]) 



In [None]:
print(tensor2d.matmul(tensor2d.T))

tensor([[14, 32],
        [32, 77]])


In [None]:
print(tensor2d @ tensor2d.T)

tensor([[14, 32],
        [32, 77]])


In [None]:
import torch.nn.functional as F

In [None]:
y = torch.tensor([1.0]) # true label
x1 = torch.tensor([1.1]) # input feature
w1 = torch.tensor([2.2]) # weight parameter
b = torch.tensor([0.0]) # bias unit
z = x1 * w1 + b # net input
a = torch.sigmoid(z) # activation and output

In [None]:
print(z)
print(a)

loss = F.binary_cross_entropy(a, y)
print(loss)

tensor([2.4200])
tensor([0.9183])
tensor(0.0852)


In [None]:
import torch
import torch.nn.functional as F
from torch.autograd import grad

In [None]:
y = torch.tensor([1.0])
x1 = torch.tensor([1.1])
w1 = torch.tensor([2.2], requires_grad=True)
b = torch.tensor([0.0], requires_grad=True)

In [None]:
z = x1 * w1 + b
a = torch.sigmoid(z)

In [None]:
loss = F.binary_cross_entropy(a, y)



In [None]:
grad_L_w1 = grad(loss, w1, retain_graph=True)
grad_L_b = grad(loss, b, retain_graph=True)
print(grad_L_w1)
print(grad_L_b)

(tensor([-0.0898]),)
(tensor([-0.0817]),)


class NeuralNetwork(torch.nn.Module):
    def __init__(self, num_inputs, num_outputs):
        super().__init__()

        self.layers = torch.nn.Sequential(

            # 1st hidden layer
            torch.nn.Linear(num_inputs, 30),
            torch.nn.ReLU(),

            # 2nd hidden layer
            torch.nn.Linear(30, 20),
            torch.nn.ReLU(),

            # Output layer
            torch.nn.Linear(20, num_outputs)
        )

    def forward(self, x):
        logits = self.layers(x)
        return logits

In [None]:
torch.manual_seed(123)
model = NeuralNetwork(50, 3)
print(model)

NeuralNetwork(
  (layers): Sequential(
    (0): Linear(in_features=50, out_features=30, bias=True)
    (1): ReLU()
    (2): Linear(in_features=30, out_features=20, bias=True)
    (3): ReLU()
    (4): Linear(in_features=20, out_features=3, bias=True)
  )
)


In [None]:
num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print("Total number of trainable model parameters: ", num_params)

Total number of trainable model parameters:  2213


In [None]:
print(model.layers[0].weight)

Parameter containing:
tensor([[-0.0577,  0.0047, -0.0702,  ...,  0.0222,  0.1260,  0.0865],
        [ 0.0502,  0.0307,  0.0333,  ...,  0.0951,  0.1134, -0.0297],
        [ 0.1077, -0.1108,  0.0122,  ...,  0.0108, -0.1049, -0.1063],
        ...,
        [-0.0787,  0.1259,  0.0803,  ...,  0.1218,  0.1303, -0.1351],
        [ 0.1359,  0.0175, -0.0673,  ...,  0.0674,  0.0676,  0.1058],
        [ 0.0790,  0.1343, -0.0293,  ...,  0.0344, -0.0971, -0.0509]],
       requires_grad=True)


In [None]:
X = torch.rand((1, 50))

In [None]:
with torch.no_grad():
    out = torch.softmax(model(X), dim=1)

In [None]:
print(out)

tensor([[0.2983, 0.3896, 0.3121]])


In [None]:
from torch.utils.data import Dataset, DataLoader

In [None]:
X_train = torch.tensor([
    [-1.2, 3.1],
    [-0.9, 2.9],
    [-0.5, 2.6],
    [2.3, -1.1],
    [2.7, -1.5]
])

In [None]:
X_train.shape

torch.Size([5, 2])

In [None]:
y_train = torch.tensor([0, 0, 0, 1, 1]) # class labels

In [None]:
X_test = torch.tensor([
    [-0.8, 2.8],
    [2.6, -1.6]
])

y_test = torch.tensor([0, 1])

In [None]:
class ToyDataset(Dataset):
    def __init__(self, X, y):
        self.features = X
        self.labels = y

    def __getitem__(self, index):
        one_x = self.features[index]
        one_y = self.labels[index]
        return one_x, one_y

    def __len__(self):
        return self.labels.shape[0]

In [None]:
train_ds = ToyDataset(X_train, y_train)
test_ds = ToyDataset(X_test, y_test)

In [None]:
print(len(train_ds))

5


torch.manual_seed(123)

In [None]:
train_loader = DataLoader(
    dataset=train_ds,
    batch_size=2,
    shuffle=True,
    num_workers=0,
    drop_last=True # will drop 5th sample, since its not even
)

In [None]:
test_loader = DataLoader(
    dataset=test_ds,
    batch_size=2,
    shuffle=False,
    num_workers=0
)

In [None]:
for idx, (x, y) in enumerate(train_loader):
    print(f"Batch {idx + 1}: ", x, y)
    print()

Batch 1:  tensor([[ 2.3000, -1.1000],
        [-0.9000,  2.9000]]) tensor([1, 0])

Batch 2:  tensor([[-1.2000,  3.1000],
        [-0.5000,  2.6000]]) tensor([0, 0])



In [None]:
import urllib.request
import re

In [None]:
url = ("https://raw.githubusercontent.com/rasbt/"
	   "LLMs-from-scratch/main/ch02/01_main-chapter-code/"
	   "the-verdict.txt")

In [None]:
#Download .txt file into directory
file_path = "the-verdict.txt"
urllib.request.urlretrieve(url, file_path)

('the-verdict.txt', <http.client.HTTPMessage at 0x7b9d0f6a1e10>)

In [None]:
with open("the-verdict.txt", "r", encoding="utf-8") as f:
	raw_text = f.read()

In [None]:
print(raw_text[:1000])

I HAD always thought Jack Gisburn rather a cheap genius--though a good fellow enough--so it was no great surprise to me to hear that, in the height of his glory, he had dropped his painting, married a rich widow, and established himself in a villa on the Riviera. (Though I rather thought it would have been Rome or Florence.)

"The height of his glory"--that was what the women called it. I can hear Mrs. Gideon Thwing--his last Chicago sitter--deploring his unaccountable abdication. "Of course it's going to send the value of my picture 'way up; but I don't think of that, Mr. Rickham--the loss to Arrt is all I think of." The word, on Mrs. Thwing's lips, multiplied its _rs_ as though they were reflected in an endless vista of mirrors. And it was not only the Mrs. Thwings who mourned. Had not the exquisite Hermia Croft, at the last Grafton Gallery show, stopped me before Gisburn's "Moon-dancers" to say, with tears in her eyes: "We shall not look upon its like again"?

Well!--even through th

In [None]:
print("Total number of character:", len(raw_text))
print(raw_text[:99])

Total number of character: 20479
I HAD always thought Jack Gisburn rather a cheap genius--though a good fellow enough--so it was no 


In [None]:
#           Tokenization
# Split text on whitespace characters
text = "Hello, world. This, is a test."
result = re.split(r"(\s)", text)
print("\n", result)


 ['Hello,', ' ', 'world.', ' ', 'This,', ' ', 'is', ' ', 'a', ' ', 'test.']


In [None]:
# Split text on whitespace, commas, and periods
result = re.split(r"([,.]|\s)", text)
print("\n", result)


 ['Hello', ',', '', ' ', 'world', '.', '', ' ', 'This', ',', '', ' ', 'is', ' ', 'a', ' ', 'test', '.', '']


In [None]:
# Optional, remove redundant whitespace characters
result = [item for item in result if item.strip()]
print("\n", result)


 ['Hello', ',', 'world', '.', 'This', ',', 'is', 'a', 'test', '.']


In [None]:
# Split text to handle more such as question marks, quotation marks, and double-dashes.
text = "Hello, world. Is this-- a test?"
result = re.split(r'([,.:;?_!"()\']|--|\s)', text)
result = [item.strip() for item in result if item.strip()] # fully removes whitespaces
print("\n", result, " \n Token Count:", len(result))


 ['Hello', ',', 'world', '.', 'Is', 'this', '--', 'a', 'test', '?']  
 Token Count: 10


In [None]:
# Basic Tokenizer applied to full short story, "the-verdict.txt"
preprocessed = re.split(r'([,.:;?_!"()\']|--|\s)', raw_text)
preprocessed = [item.strip() for item in preprocessed if item.strip()]
print("\n Sample of Tokenized output: \n", preprocessed[:30], "\n Full Token Count:", len(preprocessed))


 Sample of Tokenized output: 
 ['I', 'HAD', 'always', 'thought', 'Jack', 'Gisburn', 'rather', 'a', 'cheap', 'genius', '--', 'though', 'a', 'good', 'fellow', 'enough', '--', 'so', 'it', 'was', 'no', 'great', 'surprise', 'to', 'me', 'to', 'hear', 'that', ',', 'in'] 
 Full Token Count: 4690


In [None]:
import re


class SimpleTokenizerV1:
    def __init__(self, vocab):
        self.str_to_int = vocab
        self.int_to_str = {i:s for s, i in vocab.items()}

    def encode(self, text):
        """ Processes input text into token IDs """
        preprocessed = re.split(r'([,.:;?_!"()\']|--|\s)', text)
        preprocessed = [item.strip() for item in preprocessed if item.strip()]
        ids = [self.str_to_int[s] for s in preprocessed]
        return ids

    def decode(self, ids):
        """ Converts token IDs back into text """
        text = " ".join([self.int_to_str[i] for i in ids])
        # Replace spaces before the specified punctuations
        text = re.sub(r'\s+([,.?!"()\'])', r'\1', text)
        return text

In [None]:
with open("the-verdict.txt", "r", encoding="utf-8") as f:
	raw_text = f.read()

In [None]:
#           Tokenization
preprocessed = re.split(r'([,.:;?_!"()\']|--|\s)', raw_text)
preprocessed = [item.strip() for item in preprocessed if item.strip()]

In [None]:
print(preprocessed[:30])
# Converting Tokens into Token IDs
all_words = sorted(set(preprocessed))
vocab_size = len(all_words)
print(all_words)

['I', 'HAD', 'always', 'thought', 'Jack', 'Gisburn', 'rather', 'a', 'cheap', 'genius', '--', 'though', 'a', 'good', 'fellow', 'enough', '--', 'so', 'it', 'was', 'no', 'great', 'surprise', 'to', 'me', 'to', 'hear', 'that', ',', 'in']
['!', '"', "'", '(', ')', ',', '--', '.', ':', ';', '?', 'A', 'Ah', 'Among', 'And', 'Are', 'Arrt', 'As', 'At', 'Be', 'Begin', 'Burlington', 'But', 'By', 'Carlo', 'Chicago', 'Claude', 'Come', 'Croft', 'Destroyed', 'Devonshire', 'Don', 'Dubarry', 'Emperors', 'Florence', 'For', 'Gallery', 'Gideon', 'Gisburn', 'Gisburns', 'Grafton', 'Greek', 'Grindle', 'Grindles', 'HAD', 'Had', 'Hang', 'Has', 'He', 'Her', 'Hermia', 'His', 'How', 'I', 'If', 'In', 'It', 'Jack', 'Jove', 'Just', 'Lord', 'Made', 'Miss', 'Money', 'Monte', 'Moon-dancers', 'Mr', 'Mrs', 'My', 'Never', 'No', 'Now', 'Nutley', 'Of', 'Oh', 'On', 'Once', 'Only', 'Or', 'Perhaps', 'Poor', 'Professional', 'Renaissance', 'Rickham', 'Riviera', 'Rome', 'Russian', 'Sevres', 'She', 'Stroud', 'Strouds', 'Suddenly', '

In [None]:
vocab = {token:integer for integer, token in enumerate(all_words)}

In [None]:
print(vocab)

{'!': 0, '"': 1, "'": 2, '(': 3, ')': 4, ',': 5, '--': 6, '.': 7, ':': 8, ';': 9, '?': 10, 'A': 11, 'Ah': 12, 'Among': 13, 'And': 14, 'Are': 15, 'Arrt': 16, 'As': 17, 'At': 18, 'Be': 19, 'Begin': 20, 'Burlington': 21, 'But': 22, 'By': 23, 'Carlo': 24, 'Chicago': 25, 'Claude': 26, 'Come': 27, 'Croft': 28, 'Destroyed': 29, 'Devonshire': 30, 'Don': 31, 'Dubarry': 32, 'Emperors': 33, 'Florence': 34, 'For': 35, 'Gallery': 36, 'Gideon': 37, 'Gisburn': 38, 'Gisburns': 39, 'Grafton': 40, 'Greek': 41, 'Grindle': 42, 'Grindles': 43, 'HAD': 44, 'Had': 45, 'Hang': 46, 'Has': 47, 'He': 48, 'Her': 49, 'Hermia': 50, 'His': 51, 'How': 52, 'I': 53, 'If': 54, 'In': 55, 'It': 56, 'Jack': 57, 'Jove': 58, 'Just': 59, 'Lord': 60, 'Made': 61, 'Miss': 62, 'Money': 63, 'Monte': 64, 'Moon-dancers': 65, 'Mr': 66, 'Mrs': 67, 'My': 68, 'Never': 69, 'No': 70, 'Now': 71, 'Nutley': 72, 'Of': 73, 'Oh': 74, 'On': 75, 'Once': 76, 'Only': 77, 'Or': 78, 'Perhaps': 79, 'Poor': 80, 'Professional': 81, 'Renaissance': 82, 'Ri

In [None]:
tokenizer = SimpleTokenizerV1(vocab)

In [None]:
text = """"It's the last he painted, you know,"
           Mrs. Gisburn said with pardonable pride."""

In [None]:
ids = tokenizer.encode(text)
print("\n Token IDs:", ids)


 Token IDs: [1, 56, 2, 850, 988, 602, 533, 746, 5, 1126, 596, 5, 1, 67, 7, 38, 851, 1108, 754, 793, 7]


In [None]:
decoded_ids = tokenizer.decode(ids)
print("\n Decoded IDs:", decoded_ids)


 Decoded IDs: " It' s the last he painted, you know," Mrs. Gisburn said with pardonable pride.


In [None]:
text = "Hello, do you like tea?"
print(tokenizer.encode(text))

KeyError: 'Hello'

In [None]:
# Printing first 51 entries of vocabulary
for i, item in enumerate(vocab.items()):
	print(item)
	if i >= 50:
		break

('!', 0)
('"', 1)
("'", 2)
('(', 3)
(')', 4)
(',', 5)
('--', 6)
('.', 7)
(':', 8)
(';', 9)
('?', 10)
('A', 11)
('Ah', 12)
('Among', 13)
('And', 14)
('Are', 15)
('Arrt', 16)
('As', 17)
('At', 18)
('Be', 19)
('Begin', 20)
('Burlington', 21)
('But', 22)
('By', 23)
('Carlo', 24)
('Chicago', 25)
('Claude', 26)
('Come', 27)
('Croft', 28)
('Destroyed', 29)
('Devonshire', 30)
('Don', 31)
('Dubarry', 32)
('Emperors', 33)
('Florence', 34)
('For', 35)
('Gallery', 36)
('Gideon', 37)
('Gisburn', 38)
('Gisburns', 39)
('Grafton', 40)
('Greek', 41)
('Grindle', 42)
('Grindles', 43)
('HAD', 44)
('Had', 45)
('Hang', 46)
('Has', 47)
('He', 48)
('Her', 49)
('Hermia', 50)


In [None]:
all_tokens = sorted(list(set(preprocessed)))

In [None]:
print(all_tokens)

['!', '"', "'", '(', ')', ',', '--', '.', ':', ';', '?', 'A', 'Ah', 'Among', 'And', 'Are', 'Arrt', 'As', 'At', 'Be', 'Begin', 'Burlington', 'But', 'By', 'Carlo', 'Chicago', 'Claude', 'Come', 'Croft', 'Destroyed', 'Devonshire', 'Don', 'Dubarry', 'Emperors', 'Florence', 'For', 'Gallery', 'Gideon', 'Gisburn', 'Gisburns', 'Grafton', 'Greek', 'Grindle', 'Grindles', 'HAD', 'Had', 'Hang', 'Has', 'He', 'Her', 'Hermia', 'His', 'How', 'I', 'If', 'In', 'It', 'Jack', 'Jove', 'Just', 'Lord', 'Made', 'Miss', 'Money', 'Monte', 'Moon-dancers', 'Mr', 'Mrs', 'My', 'Never', 'No', 'Now', 'Nutley', 'Of', 'Oh', 'On', 'Once', 'Only', 'Or', 'Perhaps', 'Poor', 'Professional', 'Renaissance', 'Rickham', 'Riviera', 'Rome', 'Russian', 'Sevres', 'She', 'Stroud', 'Strouds', 'Suddenly', 'That', 'The', 'Then', 'There', 'They', 'This', 'Those', 'Though', 'Thwing', 'Thwings', 'To', 'Usually', 'Venetian', 'Victor', 'Was', 'We', 'Well', 'What', 'When', 'Why', 'Yes', 'You', '_', 'a', 'abdication', 'able', 'about', 'above',

In [None]:
all_tokens.extend(["<|endoftext|>", "<|unk|>"])

In [None]:
vocab_size = len(all_tokens)
print("\n Vocabular Size: ", vocab_size, "\n")


 Vocabular Size:  1132 



In [None]:
class SimpleTokenizerV2:
    def __init__(self, vocab):
        self.str_to_int = vocab
        self.int_to_str = {i:s for s, i in vocab.items()}

    def encode(self, text):
        """ Processes input text into token IDs """
        preprocessed = re.split(r'([,.:;?_!"()\']|--|\s)', text)
        preprocessed = [item.strip() for item in preprocessed if item.strip()]
        preprocessed = [item if item in self.str_to_int else "<|unk|>" for item in preprocessed]
        ids = [self.str_to_int[s] for s in preprocessed]
        return ids

    def decode(self, ids):
        """ Converts token IDs back into text """
        text = " ".join([self.int_to_str[i] for i in ids])
        # Replace spaces before the specified punctuations
        text = re.sub(r'\s+([,.?!"()\'])', r'\1', text)
        return text

In [None]:
# Creating Vocabulary dictionary
vocab = {token:integer for integer, token in enumerate(all_tokens)}

tokenizer = SimpleTokenizerV2(vocab)

In [None]:
text1 = "Hello, do you like tea?"
text2 = "In the sunlit terraces of the palace."

In [None]:
text = " <|endoftext|> ".join((text1, text2))
print(text)

Hello, do you like tea? <|endoftext|> In the sunlit terraces of the palace.


In [None]:
ids = tokenizer.encode(text)
print("\n Token IDs:", ids)


 Token IDs: [1131, 5, 355, 1126, 628, 975, 10, 1130, 55, 988, 956, 984, 722, 988, 1131, 7]


In [None]:
decoded_ids = tokenizer.decode(ids)
print("\n Decoded IDs:", decoded_ids)


 Decoded IDs: <|unk|>, do you like tea? <|endoftext|> In the sunlit terraces of the <|unk|>.


In [None]:
# Printing last 5 entries of the updated vocabulary
for i, item in enumerate(list(vocab.items())[-5:]):
	print(item)

('younger', 1127)
('your', 1128)
('yourself', 1129)
('<|endoftext|>', 1130)
('<|unk|>', 1131)


In [None]:
!pip install tiktoken
from importlib.metadata import version
import tiktoken

Collecting tiktoken
  Downloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Downloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m13.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tiktoken
Successfully installed tiktoken-0.8.0


In [None]:
print("tiktoken version:", version("tiktoken"))

tiktoken version: 0.8.0


In [None]:
tokenizer = tiktoken.get_encoding("gpt2")

In [None]:
text = (
    "Hello, do you like tea? <|endoftext|> In the sunlit terraces"
    "of someunknownPlace."
)


In [None]:
integers = tokenizer.encode(text, allowed_special={"<|endoftext|>"})
print(integers)

[15496, 11, 466, 345, 588, 8887, 30, 220, 50256, 554, 262, 4252, 18250, 8812, 2114, 1659, 617, 34680, 27271, 13]


In [None]:
strings = tokenizer.decode(integers)
print(strings)

Hello, do you like tea? <|endoftext|> In the sunlit terracesof someunknownPlace.


In [None]:
# Exercise 2.1
text = "Akwirw ier"
token_ids = tokenizer.encode(text)
print("\n", token_ids)
decoded_token_ids = tokenizer.decode(token_ids)
print(decoded_token_ids)


 [33901, 86, 343, 86, 220, 959]
Akwirw ier


In [None]:
!pip install tiktoken
from importlib.metadata import version
import tiktoken

Collecting tiktoken
  Downloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Downloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m14.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tiktoken
Successfully installed tiktoken-0.8.0


In [None]:
print("tiktoken version:", version("tiktoken"))

tiktoken version: 0.8.0


In [None]:
tokenizer = tiktoken.get_encoding("gpt2")

In [None]:
text = (
    "Hello, do you like tea? <|endoftext|> In the sunlit terraces"
    "of someunknownPlace."
)

In [None]:
integers = tokenizer.encode(text, allowed_special={"<|endoftext|>"})
print(integers)

[15496, 11, 466, 345, 588, 8887, 30, 220, 50256, 554, 262, 4252, 18250, 8812, 2114, 1659, 617, 34680, 27271, 13]


In [None]:
strings = tokenizer.decode(integers)
print(strings)

Hello, do you like tea? <|endoftext|> In the sunlit terracesof someunknownPlace.


In [None]:
# Exercise 2.1
text = "Akwirw ier"
token_ids = tokenizer.encode(text)
print("\n", token_ids)
decoded_token_ids = tokenizer.decode(token_ids)
print(decoded_token_ids)


 [33901, 86, 343, 86, 220, 959]
Akwirw ier
