In [6]:
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
from sentence_transformers import SentenceTransformer, util
import torch

qa_tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-distilled-squad")
qa_model = AutoModelForQuestionAnswering.from_pretrained("distilbert-base-uncased-distilled-squad")

retrieval_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

def get_answer(question, context):
    inputs = qa_tokenizer.encode_plus(question, context, return_tensors="pt")
    input_ids = inputs["input_ids"]
    attention_mask = inputs["attention_mask"]

    outputs = qa_model(input_ids=input_ids, attention_mask=attention_mask)
    start_scores = outputs.start_logits
    end_scores = outputs.end_logits

    start_idx = torch.argmax(start_scores)
    end_idx = torch.argmax(end_scores) + 1

    answer = qa_tokenizer.decode(input_ids[0][start_idx:end_idx], skip_special_tokens=True)
    return answer

def retrieve_context(query, contexts):
    query_embedding = retrieval_model.encode(query, convert_to_tensor=True)
    context_embeddings = retrieval_model.encode(contexts, convert_to_tensor=True)

    similarity_scores = util.pytorch_cos_sim(query_embedding, context_embeddings)
    best_match_idx = torch.argmax(similarity_scores).item()
    return contexts[best_match_idx], similarity_scores[0][best_match_idx].item()

if __name__ == "__main__":
    sample_contexts = [
        "The Eiffel Tower, a wrought-iron lattice tower, is located in Paris, France. "
        "It was named after the engineer Gustave Eiffel, whose company designed and built the tower. "
        "The structure is 330 meters tall and is one of the most iconic landmarks in the world.",

        "The Great Wall of China is a historic fortification that stretches across northern China. "
        "Built to protect against invasions, the wall spans approximately 13,000 miles. "
        "It is made of stone, brick, and other materials, and parts of it date back to the 7th century BC.",

        "Mount Everest is the highest mountain in the world, with a peak reaching 8,848.86 meters above sea level. "
        "Located in the Himalayas, it lies on the border between Nepal and the Tibet Autonomous Region of China. "
        "The mountain was first successfully climbed by Sir Edmund Hillary and Tenzing Norgay in 1953.",

        "Machine learning (ML) is a subset of artificial intelligence (AI) that focuses on enabling machines to learn from data. "
        "ML models can be classified into supervised, unsupervised, and reinforcement learning categories. "
        "Popular algorithms include linear regression, decision trees, and neural networks.",

        "Deep learning (DL) is a specialized branch of ML that uses neural networks with multiple layers, known as deep neural networks. "
        "DL has driven advancements in fields such as computer vision, natural language processing, and speech recognition. "
        "Key frameworks include TensorFlow and PyTorch."
    ]

    sample_questions = [
        "Where is the Eiffel Tower located?",
        "How long is the Great Wall of China?",
        "Who first climbed Mount Everest?",
        "What are types of machine learning?",
        "Name a framework used in deep learning."
    ]

    for question in sample_questions:
        best_context, similarity = retrieve_context(question, sample_contexts)
        answer = get_answer(question, best_context)
        print(f"Question: {question}")
        print(f"Answer: {answer}")
        print(f"Cosine Similarity: {similarity:.4f}\n")

Question: Where is the Eiffel Tower located?
Answer: paris, france
Cosine Similarity: 0.7997

Question: How long is the Great Wall of China?
Answer: 13, 000 miles
Cosine Similarity: 0.7977

Question: Who first climbed Mount Everest?
Answer: sir edmund hillary and tenzing norgay
Cosine Similarity: 0.6243

Question: What are types of machine learning?
Answer: supervised, unsupervised, and reinforcement learning
Cosine Similarity: 0.6485

Question: Name a framework used in deep learning.
Answer: tensorflow and pytorch
Cosine Similarity: 0.5892

