In [None]:
##singular 100 words

import gc
import torch
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sentence_transformers import SentenceTransformer
from sentence_transformers import util

# ✅ Model list
model_list = [
    "all-MiniLM-L6-v2",
    "all-MiniLM-L12-v2",
    "all-mpnet-base-v2",
    "paraphrase-MiniLM-L6-v2",
    "paraphrase-MiniLM-L12-v2",
    "paraphrase-mpnet-base-v2",
    "paraphrase-distilroberta-base-v1",
    "distiluse-base-multilingual-cased-v2",
    "LaBSE",
    "sentence-t5-base",
    "sentence-t5-large",
    "nli-roberta-base-v2",
    "nli-mpnet-base-v2",
    "stsb-roberta-large"
]

# ✅ Sample word list (300 words, trimmed here for brevity)
answerlist = [
    "Paris is the capital of France.",
    "Sure! Here's a funny joke.",
    "Airplanes fly due to lift generated by their wings.",
    "Quantum physics explains the behavior of particles at a small scale.",
    "You can check today's weather using a weather app.",
    "The president of the USA is Joe Biden (as of 2025).",
    "Here's a recipe for chocolate cake.",
    "You can learn Python through tutorials and practice.",
    "Rainbows are caused by light refraction and reflection in water droplets.",
    "Space is full of amazing facts!",
    "Plants make food using photosynthesis.",
    "Dreams occur during REM sleep and their purpose is still studied.",
    "‘Hello’ in Spanish is ‘Hola’.",
    "Tokyo is in the Japan Standard Time zone.",
    "Try maintaining a consistent sleep schedule.",
    "Here’s a simple daily workout routine.",
    "Photosynthesis is the process by which plants make food from sunlight.",
    "Caffeine has both benefits and drawbacks.",
    "The moon is about 384,400 km from Earth.",
    "No, chocolate is toxic to dogs.",
    "Mount Everest is the tallest mountain on Earth.",
    "Starting a business involves planning and registration.",
    "A black hole is a region of space with intense gravity.",
    "Here's how you can tie a tie.",
    "A healthy breakfast includes protein and fiber.",
    "The sky appears blue due to Rayleigh scattering.",
    "Philosophers have debated the meaning of life for centuries.",
    "Vaccines train the immune system to fight diseases.",
    "AI stands for Artificial Intelligence.",
    "The speed of light is approximately 299,792 km/s.",
    "Check out these critically acclaimed movies.",
    "Leonardo da Vinci painted the Mona Lisa.",
    "Meditation involves focusing the mind for clarity.",
    "Inflation is the rise in prices over time.",
    "Flu symptoms include fever, cough, and fatigue.",
    "Magnets attract materials due to magnetic fields.",
    "Gravity is the force that pulls objects toward each other.",
    "Sure, here’s a short bedtime story.",
    "A resume showcases your skills and experience.",
    "Stocks represent ownership in a company.",
    "Here's how to make pancakes step-by-step.",
    "DNA stands for Deoxyribonucleic Acid.",
    "Pluto is classified as a dwarf planet.",
    "The square root of 64 is 8.",
    "Leaves change color due to chlorophyll breakdown.",
    "Blockchain is a secure, decentralized ledger.",
    "Popular Asian destinations include Japan and Thailand.",
    "The human genome is the complete set of human DNA.",
    "Albert Einstein was a theoretical physicist.",
    "Birds migrate to warmer climates seasonally.",
    "Climate change is the long-term shift in temperatures.",
    "Sure! Try reading ‘To Kill a Mockingbird’.",
    "Democracy is a government by the people.",
    "The Big Bang is the origin of the universe theory.",
    "You can learn guitar through practice and lessons.",
    "Did you know Napoleon was once exiled?",
    "Renewable energy comes from natural sources like wind.",
    "Earthquakes are caused by tectonic plate movements.",
    "Set goals and celebrate small wins to stay motivated.",
    "Mindfulness is being fully present in the moment.",
    "Use repetition and associations to boost memory.",
    "A black swan event is rare and unpredictable.",
    "Here’s a haiku: Gentle breeze whispers / Through the trees on a spring day / Nature's breath in bloom.",
    "Serotonin is a neurotransmitter linked to mood.",
    "Shakespeare wrote many famous plays.",
    "Create a budget and track spending to save money.",
    "Machine learning lets computers learn from data.",
    "The Internet is a network of connected computers.",
    "Wi-Fi transmits data wirelessly using radio waves.",
    "Newton's third law: Every action has an equal and opposite reaction.",
    "Cats purr when they are relaxed or content.",
    "A virus is a tiny infectious agent.",
    "Human rights are basic freedoms and protections.",
    "Start with basic shapes to draw a cat.",
    "A synonym for happy is joyful.",
    "Irony is when the opposite of what's expected happens.",
    "“Life is what happens when you’re busy making other plans.” – John Lennon",
    "The Mariana Trench is the deepest ocean point.",
    "Isaac Newton discovered gravity.",
    "Elephants live up to 60–70 years.",
    "Blockchain is a decentralized digital record.",
    "A palindrome reads the same forward and backward.",
    "Yawning may help regulate brain temperature.",
    "“The only limit is your mind.”",
    "Earth's tilt causes the seasons.",
    "Use tools like calendars and to-do lists.",
    "Computers process data using binary logic.",
    "Shooting stars are meteors burning in the atmosphere.",
    "An algorithm is a step-by-step problem-solving method.",
    "Here’s a fun riddle: What has keys but can't open locks?",
    "Empathy is understanding others' feelings.",
    "Lightning is caused by electric discharge in clouds.",
    "Bees make honey by converting flower nectar.",
    "Democracy allows citizens to vote and participate.",
    "The ozone layer protects Earth from UV rays.",
    "Evolution is the change in species over time.",
    "The Pacific is the largest ocean.",
    "Alexander Graham Bell invented the telephone.",
    "Water is essential for all living things.",
    "Art expresses emotions, ideas, and culture.",
    "A metaphor compares two things symbolically.",
    "A Rubik’s cube is solved using algorithms.",
    "Try making a volcano using baking soda and vinegar.",
    "Stress can be reduced with deep breathing and rest."
]

def plot_pca_embeddings(answerlist, model_name):
    """
    Plots PCA (2D) projection of sentence embeddings for a given model,
    without text labels on the scatter plot.

    Args:
        answerlist (list): List of input sentences.
        model_name (str): Name of the SentenceTransformer model to use.
    """
    print(f"\n📌 Plotting PCA for model: {model_name}")
    model = SentenceTransformer(model_name)
    
    # 🔢 Generate embeddings
    embeddings = model.encode(answerlist, normalize_embeddings=False)
    
    # 📉 Perform PCA
    pca = PCA(n_components=2)
    reduced_embeddings = pca.fit_transform(embeddings)
    
    # 🎨 Plot
    plt.figure(figsize=(10, 6))
    x_vals = reduced_embeddings[:, 0]
    y_vals = reduced_embeddings[:, 1]
    plt.scatter(x_vals, y_vals, marker='o')

    plt.title(f'PCA of Sentence Embeddings - {model_name}')
    plt.xlabel('PCA Component 1')
    plt.ylabel('PCA Component 2')
    plt.grid(True)
    plt.tight_layout()
    plt.show()


# ✅ Best model based on PCA variance
def get_best_model_by_variance(answerlist, model_list):
    best_model = None
    best_variance = 0.0
    variance_data = {}

    for model_name in model_list:
        try:
            print(f"\n🔄 Evaluating model: {model_name}")
            model = SentenceTransformer(model_name)
            embeddings = model.encode(answerlist, normalize_embeddings=False)
            pca = PCA(n_components=2)
            reduced = pca.fit_transform(embeddings)
            total_var = sum(pca.explained_variance_ratio_)
            print(f"📊 {model_name}: Total PCA variance = {total_var * 100:.2f}%")
            variance_data[model_name] = total_var

            if total_var > best_variance:
                best_variance = total_var
                best_model = model_name
            plot_pca_embeddings(answerlist, model_name)
            # Cleanup
            del model, embeddings, reduced, pca
            gc.collect()
            if torch.cuda.is_available():
                torch.cuda.empty_cache()

        except Exception as e:
            print(f"⚠️ {model_name}: Error: {e}")

    return best_model, variance_data

    
# ✅ Run model evaluation
best_model_name, variance_data = get_best_model_by_variance(answerlist, model_list)
print(f"\n✅ Best model based on PCA variance: {best_model_name}")

# (Optional) print all variances
for model, var in variance_data.items():
    print(f"{model}: {var * 100:.2f}% variance")
import numpy as np

def convert_np_floats_to_float(d):
    """
    Converts all numpy.float values in a dictionary to native Python float.

    Args:
        d (dict): Input dictionary with numeric values (possibly numpy floats).

    Returns:
        dict: Dictionary with native float values.
    """
    return {k: float(v) if isinstance(v, np.floating) else v for k, v in d.items()}
variancedict=convert_np_floats_to_float(variance_data)
testlist = [
    "What's the capital of France?",
    "Tell me a joke.",
    "How do airplanes fly?",
    "Explain quantum physics in simple terms.",
    "What's the weather like today?",
    "Who is the president of the USA?",
    "How to bake a chocolate cake?",
    "Best way to learn Python?",
    "What causes rainbows?",
    "Tell me a fun fact about space.",
    "How do plants make food?",
    "Why do we dream?",
    "Translate 'hello' to Spanish.",
    "What's the time in Tokyo?",
    "Tips for better sleep?",
    "Give me a workout routine.",
    "What is photosynthesis?",
    "Is caffeine good or bad?",
    "How far is the moon?",
    "Can dogs eat chocolate?",
    "What's the tallest mountain?",
    "Steps to start a business?",
    "What is a black hole?",
    "How to tie a tie?",
    "What's a healthy breakfast?",
    "Why is the sky blue?",
    "Meaning of life?",
    "How do vaccines work?",
    "What is AI?",
    "What’s the speed of light?",
    "Best movies of all time?",
    "Who painted the Mona Lisa?",
    "How to meditate?",
    "What is inflation?",
    "Symptoms of flu?",
    "How do magnets work?",
    "What is gravity?",
    "Tell me a bedtime story.",
    "How to write a resume?",
    "What are stocks?",
    "Give me a recipe for pancakes.",
    "What does DNA stand for?",
    "Is Pluto a planet?",
    "What's the square root of 64?",
    "Why do leaves change color?",
    "Explain blockchain simply.",
    "Best travel destinations in Asia?",
    "What is the human genome?",
    "Who was Albert Einstein?",
    "How do birds migrate?",
    "What is climate change?",
    "Can you suggest a book?",
    "Define democracy.",
    "What is the Big Bang?",
    "How to learn guitar?",
    "Tell me a historical fact.",
    "What is renewable energy?",
    "What causes earthquakes?",
    "How do I stay motivated?",
    "What is mindfulness?",
    "How to improve memory?",
    "What's a black swan event?",
    "Give me a haiku.",
    "What is serotonin?",
    "Tell me about Shakespeare.",
    "Best way to save money?",
    "What is machine learning?",
    "Explain the Internet.",
    "How does Wi-Fi work?",
    "What's Newton's third law?",
    "Why do cats purr?",
    "What is a virus?",
    "What are human rights?",
    "How to draw a cat?",
    "What’s a synonym for happy?",
    "Can you define irony?",
    "Give a quote about life.",
    "What's the deepest ocean?",
    "Who discovered gravity?",
    "How long do elephants live?",
    "What is blockchain?",
    "What's a palindrome?",
    "Why do we yawn?",
    "Tell me a motivational quote.",
    "How do seasons work?",
    "How to be more productive?",
    "How do computers work?",
    "What are shooting stars?",
    "What's an algorithm?",
    "Tell me a riddle.",
    "What is empathy?",
    "What causes lightning?",
    "How is honey made?",
    "What is a democracy?",
    "What is the ozone layer?",
    "Explain evolution briefly.",
    "What's the largest ocean?",
    "Who invented the telephone?",
    "Why is water important?",
    "What is the purpose of art?",
    "What is a metaphor?",
    "How do you solve a Rubik's cube?",
    "What's a good science experiment?",
    "How to reduce stress?"
]

def batch_answer(queries, model_name):
    model = SentenceTransformer(model_name)
    real_embeddings = model.encode(answerlist, convert_to_tensor=False)
    
    if isinstance(queries, str):
        queries = [queries]
    query_embeddings = model.encode(queries, convert_to_tensor=False)

    similarities = util.cos_sim(query_embeddings, real_embeddings)
    results = []
    for i in range(len(queries)):
        sim_row = similarities[i]
        best_idx = torch.argmax(torch.tensor(sim_row)).item()
        best_match = answerlist[best_idx]
        results.append(best_match)

    # Clean up GPU memory if needed
    del model, query_embeddings, real_embeddings
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

    return results
accuracydict={}
for m in model_list:
    L=batch_answer(testlist,m)
    accuracy=0
    for i in range(len(testlist)):
        if L[i]==answerlist[i]:
            accuracy=accuracy+1
    print(accuracy,m)
    accuracydict[m]=accuracy
import numpy as np

def correlation_between_dicts(dict1, dict2):
    if dict1.keys() != dict2.keys():
        raise ValueError("Dictionaries must have the same keys.")
    
    values1 = [dict1[key] for key in dict1]
    values2 = [dict2[key] for key in dict1]  # maintain same key order

    return np.corrcoef(values1, values2)[0, 1]
correlation_between_dicts(accuracydict,variancedict)