In [21]:
import torch
import torch.nn as nn
import torch.nn.functional as F

from transformers import GPT2Model, GPT2TokenizerFast

  from .autonotebook import tqdm as notebook_tqdm


In [22]:
class Tokenizer_Fast:
    def __init__(self):
        self.tokenizer = GPT2TokenizerFast.from_pretrained('gpt2')
        self.model = GPT2Model.from_pretrained('gpt2')
        self.tokens = None
        self.tokens_ids = None
        self.embeddings = None

    def encode(self, text):
        self.tokens = self.tokenizer.tokenize(text)
        self.tokens_ids = self.tokenizer.convert_tokens_to_ids(self.tokens)

        input_ids = torch.tensor(self.tokens_ids).unsqueeze(0)
        with torch.no_grad():
            outputs = self.model(input_ids)
            self.embeddings = outputs.last_hidden_state
        # print(self.embeddings)
    
    def decode(self, output_ids):
        return self.tokenizer.decode(output_ids, skip_special_tokens=True)

In [33]:
class GPT_2_small:
    def __init__(self):
        self.tokenizer = Tokenizer_Fast()
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.tokenizer.model.to(self.device)

    def forward(self, input_ids):  
        None    
    
    def inference (self, text):
        return self.tokenizer.encode(text)


In [34]:
GPT = GPT_2_small()

In [35]:
%time
text = "Bonjour, je m'appelle Jean"

GPT.inference(text)

CPU times: user 4 μs, sys: 0 ns, total: 4 μs
Wall time: 6.91 μs


In [26]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# Load pre-trained model and tokenizer
model_name = 'gpt2'
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

# Define a prompt
prompt_text = "Once upon a time,"
input_ids = tokenizer.encode(prompt_text, return_tensors='pt')

# Generate text
output = model.generate(input_ids, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, num_beams=5)

# Decode and print the generated text
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
print(generated_text)


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token.As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Once upon a time, there was no such thing as a man who was not a member of the Church of Jesus Christ of Latter-day Saints.

The Church was founded by Joseph Smith, the Prophet of Mormonism, and his wife, Lucy Mack Smith. They were the first women to be ordained to the priesthood. The Church's first president, Joseph Fielding Smith Jr., was born in Nauvoo, Illinois, in 1844. He was the youngest of five children. His father


In [27]:
from transformers import GPT2Model, GPT2Tokenizer

# Load pre-trained model and tokenizer
model_name = 'gpt2'  # You can also use 'gpt2-medium', 'gpt2-large', 'gpt2-xl' for larger models
model = GPT2Model.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)


In [28]:
%pip install tiktoken

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [29]:

import tiktoken

enc = tiktoken.get_encoding('gpt2')

test = enc.encode("Bonjour, je m'appelle Jean")

print(test)

print(enc.decode(test))


[20682, 73, 454, 11, 11223, 285, 6, 1324, 13485, 11320]
Bonjour, je m'appelle Jean


In [30]:
%time
test = enc.encode("Bonjour, je m'appelle Jean")


CPU times: user 4 μs, sys: 0 ns, total: 4 μs
Wall time: 10 μs


In [36]:
%time

from transformers import GPT2Tokenizer

# Load the tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

# Encode a text string
text = "Hello, how are you?"
encoded_input = tokenizer.encode(text)
print("Encoded input:", encoded_input)

# Decode the token IDs back to text
decoded_output = tokenizer.decode(encoded_input)
print("Decoded output:", decoded_output)


CPU times: user 2 μs, sys: 1 μs, total: 3 μs
Wall time: 4.53 μs
Encoded input: [15496, 11, 703, 389, 345, 30]
Decoded output: Hello, how are you?


In [39]:
%time
import tiktoken

# Load the tokenizer for the desired model
enc = tiktoken.get_encoding("gpt2")

# Encode a text string
text = "Hello, how are you?"
encoded_input = enc.encode(text)
print("Encoded input:", encoded_input)

# Decode the token IDs back to text
decoded_output = enc.decode(encoded_input)
print("Decoded output:", decoded_output)


CPU times: user 3 μs, sys: 1 μs, total: 4 μs
Wall time: 7.15 μs
Encoded input: [15496, 11, 703, 389, 345, 30]
Decoded output: Hello, how are you?
