In [1]:
import wikipedia
from transformers import AutoTokenizer, AutoModelForQuestionAnswering, pipeline
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np


  from .autonotebook import tqdm as notebook_tqdm


To simulate an external knowledge base, we’ll fetch relevant Wikipedia articles based on a given topic:

In [2]:
def get_wikipedia_content(topic):
    try:
        page = wikipedia.page(topic)
        return page.content
    except wikipedia.exceptions.PageError:
        return None
    except wikipedia.exceptions.DisambiguationError as e:
        # handle cases where the topic is ambiguous
        print(f"Ambiguous topic. Please be more specific. Options: {e.options}")
        return None

# user input
topic = input("Enter a topic to learn about: ")
document = get_wikipedia_content(topic)

if not document:
    print("Could not retrieve information.")
    exit()

    

Since Wikipedia articles can be long, we will split the text into smaller overlapping chunks for better retrieval:

In [3]:
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-mpnet-base-v2")

def split_text(text, chunk_size=256, chunk_overlap=20):
    tokens = tokenizer.tokenize(text)
    chunks = []
    start = 0
    while start < len(tokens):
        end = min(start + chunk_size, len(tokens))
        chunks.append(tokenizer.convert_tokens_to_string(tokens[start:end]))
        if end == len(tokens):
            break
        start = end - chunk_overlap
    return chunks

chunks = split_text(document)
print(f"Number of chunks: {len(chunks)}")


Token indices sequence length is longer than the specified maximum sequence length for this model (3322 > 512). Running this sequence through the model will result in indexing errors


Number of chunks: 14


To efficiently search for relevant chunks, we will use Sentence Transformers to convert text into embeddings and store them in a FAISS index:

In [4]:
embedding_model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
embeddings = embedding_model.encode(chunks)

dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(np.array(embeddings))

Now, we will take user input for the RAG pipeline. When a user asks a question, we will:

    Convert the query into an embedding.
    Retrieve the top-k most relevant chunks using FAISS.
    Use an LLM-powered question-answering model to generate the answer.


In [7]:
query = input("Ask a question about the topic: ")
query_embedding = embedding_model.encode([query])

k = 3
distances, indices = index.search(np.array(query_embedding), k)
retrieved_chunks = [chunks[i] for i in indices[0]]
print("Retrieved chunks:")
for chunk in retrieved_chunks:
    print("- " + chunk)

Retrieved chunks:
- , jewellery and dinner sets. khan claimed his arrest was due to a " london plan " to keep him out of politics, a term used by him to refer to an alleged plot between army chief asim munir and nawaz sharif. al jazeera reported khan " has yet to provide evidence of its existence. " the 5 august arrest and conviction was criticized by interviewed lawyers for being hasty, the verdict being announced in the absence of the imran khan, and for being politically motivated. = = = reactions = = = on 6 august, a day after khan had been arrested, dawn reported that despite heavy police deployment, only a few street protests were reported in khyber pakhtunkhwa, and that pti activists in kpk had " largely remained ' silent ' over the arrest of their leader ". the bbc said that " there was barely a whimper of protest. " and that no major protests had been reported in the country. analyst ali akbar attributed this to the defection and lack of pti leaders following the may 9 riots a

Now, we will use a pre-trained question-answering model to extract the final answer from the retrieved context:

In [8]:
qa_model_name = "deepset/roberta-base-squad2"
qa_tokenizer = AutoTokenizer.from_pretrained(qa_model_name)
qa_model = AutoModelForQuestionAnswering.from_pretrained(qa_model_name)
qa_pipeline = pipeline("question-answering", model=qa_model, tokenizer=qa_tokenizer)

context = " ".join(retrieved_chunks)
answer = qa_pipeline(question=query, context=context)
print(f"Answer: {answer['answer']}")

Answer: jewellery and dinner sets
