In [2]:
!pip install transformers
from transformers import BertTokenizer, BertForTokenClassification
import torch

# Load pre-trained BERT tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
model = BertForTokenClassification.from_pretrained('dbmdz/bert-large-cased-finetuned-conll03-english')

# Original sentence
sentence = "John lives in New York City."

# Tokenize the sentence
tokens = tokenizer.tokenize(sentence)
token_ids = tokenizer.convert_tokens_to_ids(tokens)

# Add special tokens for BERT input
token_ids = [tokenizer.cls_token_id] + token_ids + [tokenizer.sep_token_id]
tokens = ['[CLS]'] + tokens + ['[SEP]']

# Convert token ids to PyTorch tensor
input_ids = torch.tensor([token_ids])

# Perform NER prediction with BERT
with torch.no_grad():
    outputs = model(input_ids)
    logits = outputs.logits

# Get predicted labels for each token
predicted_labels = torch.argmax(logits, dim=2)
predicted_labels = predicted_labels.squeeze().tolist()

# Map predicted labels to entity types
label_map = {0: 'O', 1: 'B-MISC', 2: 'I-MISC', 3: 'B-PER', 4: 'I-PER', 5: 'B-ORG', 6: 'I-ORG', 7: 'B-LOC', 8: 'I-LOC'}
predicted_entities = [label_map[label] for label in predicted_labels]

# Replace original sentence with labeled entities
labeled_sentence = ""
for token, entity in zip(tokens, predicted_entities):
    if(entity=='O'):
      labeled_sentence += token.replace('[CLS]', '').replace('[SEP]', '') +  " "
    else:
      labeled_sentence += f'[{entity}]' + " "


# Remove trailing space
labeled_sentence = labeled_sentence.strip()

print("Original Sentence:", sentence)
print("Labeled Sentence:", labeled_sentence)


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.28.1-py3-none-any.whl (7.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m32.5 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.13.4-py3-none-any.whl (200 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m200.1/200.1 kB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m88.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.13.4 tokenizers-0.13.3 transformers-4.28.1


Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/998 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.33G [00:00<?, ?B/s]

Original Sentence: John lives in New York City.
Labeled Sentence: [I-PER] lives in [I-LOC] [I-LOC] [I-LOC] .


In [3]:
!pip install transformers
from transformers import GPT2Tokenizer, GPT2LMHeadModel

# Load pre-trained GPT-2 model and tokenizer
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')
import random
# Define a template with placeholders
template = "The [ADJECTIVE] [NOUN] [VERB] [ADVERB] in the [ADJECTIVE] [NOUN]."

# Define lists of words for each placeholder
adjectives = ["quick", "lazy", "happy", "sad"]
nouns = ["cat", "dog", "sun", "moon"]
verbs = ["jumped", "ran", "slept", "cried"]
adverbs = ["quickly", "slowly", "happily", "sadly"]

# Function to generate text based on template and word lists
def generate_text(template, adjectives, nouns, verbs, adverbs):
    # Replace each placeholder with a randomly chosen word from the corresponding word list
    generated_text = template
    for placeholder in ["[ADJECTIVE]", "[NOUN]", "[VERB]", "[ADVERB]"]:
        word_list = eval(placeholder[1:-1].lower() + "s")  # Get the corresponding word list
        generated_text = generated_text.replace(placeholder, random.choice(word_list))  # Replace placeholder with random word

    # Encode the generated text
    input_ids = tokenizer.encode(generated_text, return_tensors="pt")
    input_ids = input_ids[:, :-1]  # Remove the last token as it corresponds to the end-of-text token

    # Generate text from the model
    output = model.generate(input_ids, max_length=100, do_sample=True, pad_token_id=tokenizer.eos_token_id)
    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
    
    return generated_text

# Generate text based on the template
generated_text = generate_text(template, adjectives, nouns, verbs, adverbs)
print(generated_text)


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/548M [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

The happy dog cried slowly in the happy dog.

Now there's nothing you can do to keep his love in your heart. But not for no reason. There won't ever ever be love, because he can be hurt. The only way to kill him is to make a painful but inevitable change in him, no matter what he did.

Love, compassion, love is everything.

When love is only as unconditional as it is unconditional, and when every thing in its place
