In [None]:
import os
from gensim.models import KeyedVectors
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from kaggle_secrets import UserSecretsClient

def load_word2vec_model(path="/kaggle/input/google-word2vec/GoogleNews-vectors-negative300.bin"):
    model = KeyedVectors.load_word2vec_format(path, binary=True)
    print("Word2Vec model loaded!")
    return model

model = load_word2vec_model()

def get_similar_words(word, model, topn=2):
    if word in model.key_to_index:
        return [w for w, _ in model.most_similar(word, topn=topn) if w.isalpha()]
    return []

def enrich_prompt(prompt, model, max_sim=2):
    enriched_words = []
    for word in prompt.split():
        sims = get_similar_words(word, model, topn=max_sim)
        enriched_words.append(f"{word} ({', '.join(sims)})" if sims else word)
    return " ".join(enriched_words)

def generate_response(prompt, model_name="llama3-70b-8192"):
    print(f"Simulated response for: {prompt}")
    return f"Response to: {prompt}"

def analyze_responses(orig, enriched):
    tfidf = TfidfVectorizer()
    matrix = tfidf.fit_transform([orig, enriched])
    score = cosine_similarity(matrix[0:1], matrix[1:2])[0][0]
    print("\n==== Response Analysis ====")
    print(f"Similarity Score: {score:.4f}")
    print(f"Original Word Count: {len(orig.split())}")
    print(f"Enriched Word Count: {len(enriched.split())}")

original_prompt = "Describe the future of artificial intelligence in healthcare in 2 concise bullet points."
enriched_prompt = enrich_prompt(original_prompt, model)

print(f"Original Prompt:\n{original_prompt}")
print(f"\nEnriched Prompt:\n{enriched_prompt}")

original_response = generate_response(original_prompt)
enriched_response = generate_response(enriched_prompt)
print(f"\n==== Original Response ====\n{original_response}")
print(f"\n==== Enriched Response ====\n{enriched_response}")
analyze_responses(original_response, enriched_response)
