In [1]:
# Import libraries
import gensim.downloader as api
from transformers import pipeline
import nltk
import string
from nltk.tokenize import word_tokenize

# Download the correct NLTK tokenizer
nltk.download('punkt')

# Load pre-trained word vectors
print("Loading pre-trained word vectors...")
word_vectors = api.load("glove-wiki-gigaword-100")  # Load GloVe model

# Load GPT-2 model for text generation
print("Loading GPT-2 model...")
generator = pipeline("text-generation", model="gpt2")

# Function to replace words in the prompt with their most similar words
def replace_keyword_in_prompt(prompt, keyword, word_vectors, topn=1):
    words = word_tokenize(prompt)  # Tokenize the prompt into words
    enriched_words = []

    for word in words:
        cleaned_word = word.lower().strip(string.punctuation)  # Normalize word
        if cleaned_word == keyword.lower():  # Replace only if it matches the keyword
            try:
                # Retrieve similar word
                similar_words = word_vectors.most_similar(cleaned_word, topn=topn)
                if similar_words:
                    replacement_word = similar_words[0][0]  # Choose the most similar word
                    print(f"Replacing '{word}' → '{replacement_word}'")
                    enriched_words.append(replacement_word)
                    continue  # Skip appending the original word
            except KeyError:
                print(f"'{keyword}' not found in the vocabulary. Using original word.")

        enriched_words.append(word)  # Keep original if no replacement was made

    enriched_prompt = " ".join(enriched_words)
    print(f"\n🔹 Enriched Prompt: {enriched_prompt}")
    return enriched_prompt

# Function to generate responses using the Generative AI model
def generate_response(prompt, max_length=100):
    try:
        response = generator(prompt, max_length=max_length, num_return_sequences=1)
        return response[0]['generated_text']
    except Exception as e:
        print(f"Error generating response: {e}")
        return None

# Example original prompt
original_prompt = "Who is king."
print(f"\n🔹 Original Prompt: {original_prompt}")

# Retrieve similar words for key terms in the prompt
key_term = "king"

# Enrich the original prompt
enriched_prompt = replace_keyword_in_prompt(original_prompt, key_term, word_vectors)

# Generate responses for the original and enriched prompts
print("\nGenerating response for the original prompt...")
original_response = generate_response(original_prompt)
print("\nOriginal Prompt Response:")
print(original_response)

print("\nGenerating response for the enriched prompt...")
enriched_response = generate_response(enriched_prompt)
print("\nEnriched Prompt Response:")
print(enriched_response)

# Compare the outputs safely
if original_response and enriched_response:
    print("\nComparison of Responses:")
    print("\nOriginal Prompt Response Length:", len(original_response))
    print("Enriched Prompt Response Length:", len(enriched_response))
    print("\nOriginal Prompt Response Detail:", original_response.count("."))
    print("Enriched Prompt Response Detail:", enriched_response.count("."))
else:
    print("\nOne of the responses could not be generated.")


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


Loading pre-trained word vectors...
Loading GPT-2 model...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cpu



🔹 Original Prompt: Who is king.


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Replacing 'king' → 'prince'

🔹 Enriched Prompt: Who is prince .

Generating response for the original prompt...


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Original Prompt Response:
Who is king. The king I see is King Of Babylon. You are like a horse that has been left all day. I'm not going to be happy unless you take out your hand of that horse and get your hands on my daughter...and it's going to get hurt."

"You needn't be afraid of me!" Cyrus demanded, "this does not matter if you're pregnant, or if you're just doing it with a pen."

Makashi, who

Generating response for the enriched prompt...

Enriched Prompt Response:
Who is prince ..." And thus he says, not out of the mouths of men and beasts that be.

And it is a terrible thing that he said, O King, that he may walk to all of you,

and all of you, O King, as thou art wont...and I will never leave thee, for it is my duty, thy father, and thy son, to stand before thee on my right side now: for this will never be seen

Comparison of Responses:

Original Prompt Response Length: 375
Enriched Prompt Response Length: 355

Original Prompt Response Detail: 8
Enriched Prompt Response Deta