In [None]:
# === SETUP ===
import os
os.environ["HUGGINGFACEHUB_API_TOKEN"] = ""

!pip install langchain chromadb sentence-transformers faiss-cpu huggingface_hub pypdf
!pip install -U langchain-community

# First install Gradio
!pip install gradio

from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import TextLoader
from langchain.llms import HuggingFaceHub
from langchain.chains import RetrievalQA
import gradio as gr
from langchain.document_loaders import PyPDFLoader


In [None]:

import os
from huggingface_hub import login
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import TextLoader
from transformers import pipeline

# === SIMPLE DATASET ===
sample_text = """
Stanford University is a private research university in Stanford, California.
It is known for its academic strength, proximity to Silicon Valley, and ranking among the world's top universities.
"""
with open("stanford_info.txt", "w") as f:
    f.write(sample_text)

loader = TextLoader("stanford_info.txt")
documents = loader.load()

# === CHUNKING ===
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
texts = text_splitter.split_documents(documents)

# === EMBEDDING MODEL ===
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# === VECTOR DB (Chroma) ===
db = Chroma.from_documents(texts, embedding_model)

# === LOCAL SMALL LLM (Flan-T5 small) ===
generator = pipeline("text2text-generation", model="google/flan-t5-small")

# === SIMPLE retrieval + QA manually ===
def retrieve_and_answer(query):
    docs = db.similarity_search(query, k=3)
    context = "\n\n".join([d.page_content for d in docs])
    prompt = f"Answer based on context:\n{context}\n\nQuestion: {query}"
    result = generator(prompt, max_length=200)[0]['generated_text']
    return result


In [3]:
# === INTERACTIVE TEST ===

while True:
    query = input("\nAsk me anything (type 'exit' to quit): ")
    if query.lower() == "exit":
        break
    answer = retrieve_and_answer(query)
    print(f"\n🔎 Answer: {answer}")


Ask me anything (type 'exit' to quit): where is stanford

🔎 Answer: Stanford, California

Ask me anything (type 'exit' to quit): exit


In [4]:

# (Assuming previous code is already run and you have `retrieve_and_answer` defined)

# Define a Gradio function
def qa_gradio(query):
    answer = retrieve_and_answer(query)
    return answer

# Launch Gradio app
iface = gr.Interface(
    fn=qa_gradio,
    inputs=gr.Textbox(lines=2, placeholder="Ask a question about Stanford..."),
    outputs="text",
    title="Mini RAG Chatbot",
    description="Ask questions about the document loaded!"
)

iface.launch()


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://f96b2fa2fcc5b72257.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [5]:
# Simple evaluation set
eval_set = [
    {"question": "Where is Stanford located?", "expected_keywords": ["California"]},
    {"question": "What is Stanford famous for?", "expected_keywords": ["Silicon Valley", "academic strength"]},
]

# Evaluate
correct = 0

for item in eval_set:
    answer = retrieve_and_answer(item['question'])
    print(f"\nQuestion: {item['question']}")
    print(f"Answer: {answer}")

    if any(keyword.lower() in answer.lower() for keyword in item['expected_keywords']):
        correct += 1

accuracy = correct / len(eval_set)
print(f"\n✅ Simple Retrieval QA Accuracy: {accuracy * 100:.2f}%")



Question: Where is Stanford located?
Answer: Stanford, California

Question: What is Stanford famous for?
Answer: academic strength, proximity to Silicon Valley, and ranking among the world's top universities

✅ Simple Retrieval QA Accuracy: 100.00%
