In [8]:

!pip install -q --upgrade langchain langchain-community sentence-transformers faiss-cpu "transformers[torch]" accelerate torch

# 2) Now the implementation
try:
    import os
    import torch
    from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
    from sentence_transformers import SentenceTransformer
    from langchain.text_splitter import RecursiveCharacterTextSplitter
    from langchain.schema import Document
    from langchain.vectorstores import FAISS
    from langchain.embeddings import HuggingFaceEmbeddings
    from langchain.memory import ConversationBufferMemory
    from langchain.chains import ConversationalRetrievalChain
    from langchain.llms import HuggingFacePipeline
except Exception as e:
    raise RuntimeError(
        "Import failed. Make sure the pip install step above ran successfully.\n"
        "If you're in Colab, try restarting the runtime (Runtime -> Restart runtime) and re-run this cell.\n\n"
        f"Original error: {e}"
    )

# 3) Prepare a small example corpus (changed content)
corpus_text = """Machine learning enables computers to learn patterns from data without explicit programming.
Vector databases like FAISS make it possible to search large collections of embeddings quickly.
Transformers are neural network architectures designed for handling sequential data.
Jupyter notebooks are widely used for interactive coding and data analysis.
Streamlit helps developers convert Python scripts into shareable web apps with minimal effort."""

# 4) Chunk into documents
splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=50)
docs = splitter.create_documents([corpus_text])

# 5) Build embeddings with sentence-transformers (via LangChain wrapper)
embed_model_name = "sentence-transformers/all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(model_name=embed_model_name)

# 6) Create FAISS index from documents
vectorstore = FAISS.from_documents(docs, embeddings)

# 7) Prepare a small local LLM (flan-t5-small) for generation
hf_model_name = "google/flan-t5-small"
device = 0 if torch.cuda.is_available() else -1

print(f"Using device for generation: {'cuda' if torch.cuda.is_available() else 'cpu'}")

tokenizer = AutoTokenizer.from_pretrained(hf_model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(hf_model_name)
if torch.cuda.is_available():
    model = model.to("cuda")

gen_pipeline = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    device=device,
    framework="pt",
    max_length=256,
    do_sample=False,
)

llm = HuggingFacePipeline(pipeline=gen_pipeline)

# 8) Create memory and the Conversational Retrieval Chain
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

qa_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=vectorstore.as_retriever(search_kwargs={"k": 4}),
    memory=memory,
    return_source_documents=True
)

# 9) Colab-friendly chat loop
print("\n✅ RAG Chatbot ready — type questions below. Type 'exit' to quit.\n")
while True:
    q = input("You: ").strip()
    if not q:
        continue
    if q.lower() in ("exit", "quit"):
        print("Goodbye 👋")
        break

    result = qa_chain({"question": q})
    answer = result.get("answer", "").strip()
    src_docs = result.get("source_documents", [])

    print("\nBot:", answer, "\n")
    if src_docs:
        print("----- Retrieved Sources -----")
        for i, d in enumerate(src_docs, 1):
            src = d.metadata.get("source", f"doc_{i}")
            preview = d.page_content.replace("\n", " ")[:400]
            print(f"[{i}] source={src} | preview: {preview}")
        print("-----------------------------\n")


Using device for generation: cpu


Device set to use cpu



✅ RAG Chatbot ready — type questions below. Type 'exit' to quit.

You: exit
Goodbye 👋
