In [1]:
# ✅ Install dependencies
!pip install -q sentence-transformers faiss-cpu transformers langchain unstructured pdfminer.six

# ✅ Imports
import os
from sentence_transformers import SentenceTransformer
from transformers import pipeline
import faiss
import numpy as np
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import TextLoader

# ✅ Step 1: Load your document
with open("sample_text.txt", "w") as f:
    f.write("""
    Artificial Intelligence (AI) is transforming industries by enabling machines to learn, reason, and interact with humans. Retrieval-Augmented Generation (RAG) combines the power of large language models with external knowledge bases to provide accurate and context-aware responses. FAISS, developed by Facebook AI, is a highly efficient library for similarity search and clustering of dense vectors. It is widely used for building vector databases in RAG pipelines. HuggingFace provides open-source models like flan-t5-base, which are perfect for question answering without needing expensive APIs.
    """)

loader = TextLoader("sample_text.txt")
documents = loader.load()

# ✅ Step 2: Chunk the text
text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=20)
chunks = text_splitter.split_documents(documents)
texts = [chunk.page_content for chunk in chunks]

# ✅ Step 3: Embed with Sentence Transformers
model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = model.encode(texts, show_progress_bar=True)

# ✅ Step 4: Store in FAISS index
dim = embeddings[0].shape[0]
index = faiss.IndexFlatL2(dim)
index.add(np.array(embeddings))

# ✅ Step 5: Setup Question Answering pipeline
qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-base")

# ✅ Step 6: Ask a question
query = "What is RAG?"
query_embedding = model.encode([query])
D, I = index.search(np.array(query_embedding), k=3)

context = "\n".join([texts[i] for i in I[0]])
prompt = f"Answer the question based on context:\n{context}\n\nQ: {query}\nA:"

result = qa_pipeline(prompt, max_length=100)[0]['generated_text']
print("Q:", query)
print("A:", result)

^C


ModuleNotFoundError: No module named 'sentence_transformers'

