In [None]:
from sentence_transformers import SentenceTransformer, util
from transformers import pipeline
import torch

# --- 1. Load the Retriever (MiniLM) on the CPU ---
# We explicitly tell it to use the 'cpu'.
# This is fast enough for a retriever and saves all our VRAM.
retriever_model = SentenceTransformer(
    'all-MiniLM-L6-v2',
    device='cpu'  # Force to CPU
)
print(f"‚úÖ Retriever model (MiniLM) loaded. Using device: cpu")


# --- 2. Load the Generator (DistilBERT) on the GPU ---
# We check if a GPU is available and set the device index
# 0 = first GPU, -1 = CPU
pipeline_device = 0 if torch.cuda.is_available() else -1

generator_model = pipeline("question-answering",
                           model="distilbert-base-cased-distilled-squad",
                           device=pipeline_device) # Use GPU if available

print(f"‚úÖ Generator model (DistilBERT) loaded.")
if pipeline_device == 0:
    print("   -> Running on GPU (Good!)")
else:
    print("   -> WARNING: Running on CPU (Will be slow, but should work)")

In [None]:

knowledge_base = [
    "Buddy is a 3-year-old Golden Retriever who loves to play fetch.",
    "The capital of France is Paris, which is known for the Eiffel Tower.",
    "Python is an interpreted, high-level, general-purpose programming language.",
    "The first person to walk on the Moon was Neil Armstrong in 1969.",
    "Climate change is the long-term alteration of temperature and typical weather patterns."
]

print(f"üìö Knowledge base created with {len(knowledge_base)} documents.")

In [None]:
# Code Cell 4: Task 1 - Encode Knowledge
print("--- Task 1: Encoding Knowledge Base ---")

knowledge_embeddings = retriever_model.encode(knowledge_base, convert_to_tensor=True)

# --- Verification ---
if 'knowledge_embeddings' in locals() and knowledge_embeddings.shape[0] == len(knowledge_base):
    print("‚úÖ Success! Knowledge base has been encoded.")
    print(f"   -> Embedding shape: {knowledge_embeddings.shape}")
else:
    print("‚ö†Ô∏è Task 1 not complete. 'knowledge_embeddings' not found or has wrong shape.")

In [None]:
print("--- Task 2: Building the Retriever ---")

def retrieve_context(query):
    # 1. Encode the query
    # Don't forget convert_to_tensor=True
    query_embedding = retriever_model.encode(query, convert_to_tensor=True)

    # 2. Compute cosine similarity
    # with all 'knowledge_embeddings'
    cos_scores = util.pytorch_cos_sim(query_embedding, knowledge_embeddings)[0]

    # 3. Find the best match
    top_result_index = torch.argmax(cos_scores)

    # 4. Return the matching document text
    # TODO: Return the text from 'knowledge_base' at 'top_result_index'
    return knowledge_base[top_result_index]


# --- Verification ---
print("Testing retrieve_context('What is Python?')...")
retrieved = retrieve_context("What is Python?")
print(f"   -> Retrieved: '{retrieved}'")
if "Python" in retrieved:
    print("‚úÖ Success! Retriever function works.")
else:
    print("‚ö†Ô∏è Retriever function failed to find the right document.")

In [None]:
print("\n--- Task 3: Building the Generator ---")

def generate_answer(question, context):
    # 1. Call the pipeline
    # 'question' and 'context'
    result = generator_model(question=question, context=context)

    # 2. Return the answer
    return result['answer']


# --- Verification ---
print("Testing generate_answer('What is Python?', '...')...")
test_context = "Python is a popular programming language."
test_question = "What is Python?"
answer = generate_answer(test_question, test_context)
print(f"   -> Question: '{test_question}'")
print(f"   -> Context: '{test_context}'")
print(f"   -> Answer: '{answer}'")

if "popular programming language" in answer:
    print("‚úÖ Success! Generator function works.")
else:
    print("‚ö†Ô∏è Generator function failed to extract the answer.")

In [None]:
print("\n--- Task 4: Building the Full RAG Pipeline ---")

def ask_rag_pipeline(query):
    # 1. Retrieve
    # TODO: Call your 'retrieve_context' function
    best_context = retrieve_context(query)

    # 2. Generate
    # TODO: Call your 'generate_answer' function
    final_answer = generate_answer(question=query, context=best_context)

    # 3. Return
    return final_answer, best_context

# --- Verification ---
print("Testing the full RAG pipeline...")
print("Query: 'What is the capital of France?'")
final_answer, retrieved_context = ask_rag_pipeline("What is the capital of France?")

print(f"   -> Retrieved Context: '{retrieved_context}'")
print(f"   -> Final Answer: '{final_answer}'")

if final_answer.lower() == "paris":
    print("‚úÖ Success! Your RAG pipeline is working!")
else:
    print("‚ö†Ô∏è RAG pipeline failed. Expected 'Paris'.")