In [1]:
pip install wikipedia-api openai faiss-cpu sentence-transformers

Collecting wikipedia-api
  Downloading wikipedia_api-0.8.1.tar.gz (19 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu

**Console bot**

In [3]:
pip install langchain_community

Collecting langchain_community
  Downloading langchain_community-0.3.21-py3-none-any.whl.metadata (2.4 kB)
Collecting langchain-core<1.0.0,>=0.3.51 (from langchain_community)
  Downloading langchain_core-0.3.51-py3-none-any.whl.metadata (5.9 kB)
Collecting langchain<1.0.0,>=0.3.23 (from langchain_community)
  Downloading langchain-0.3.23-py3-none-any.whl.metadata (7.8 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain_community)
  Downloading pydantic_settings-2.8.1-py3-none-any.whl.metadata (3.5 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain_community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-

In [4]:
import requests
import faiss
import numpy as np
import wikipediaapi
from sentence_transformers import SentenceTransformer
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
import getpass

# ✅ Get OpenRouter API key
api_key = getpass.getpass("Enter your OpenRouter API key: ")

# ✅ OpenRouter API URL
API_URL = "https://openrouter.ai/api/v1/chat/completions"

# ✅ Wikipedia API
wiki_wiki = wikipediaapi.Wikipedia(user_agent="MyWikipediaChatbot/1.0", language="en")

# ✅ Embedding Model
embedder = SentenceTransformer("all-MiniLM-L6-v2")

# ✅ FAISS Index for storing Wikipedia summaries
embedding_dim = 384  # Dimension of the MiniLM model
index = faiss.IndexFlatL2(embedding_dim)
stored_texts = []  # Store Wikipedia text

# ✅ Fetch Wikipedia summary
def fetch_wikipedia_summary(topic):
    page = wiki_wiki.page(topic)
    if page.exists():
        return page.summary
    return None

# ✅ Convert text to embeddings
def embed_text(text):
    return embedder.encode([text])[0]

# ✅ Add Wikipedia text to FAISS
def add_to_index(text):
    vector = embed_text(text)
    index.add(np.array([vector], dtype=np.float32))
    stored_texts.append(text)

# ✅ Retrieve relevant text using FAISS
def retrieve_similar_text(query, top_k=1):
    if len(stored_texts) == 0:
        return ["No relevant Wikipedia data found."]

    query_vector = embed_text(query).reshape(1, -1)
    distances, indices = index.search(query_vector, top_k)
    return [stored_texts[i] for i in indices[0] if i < len(stored_texts)]

# ✅ Set up OpenRouter LLM in LangChain
llm = ChatOpenAI(
    model="mistralai/mistral-small",  # Use OpenRouter-supported model
    openai_api_key=api_key,
    openai_api_base="https://openrouter.ai/api/v1"
)

# ✅ Prompt Template
prompt_template = PromptTemplate(
    input_variables=["context", "query"],
    template="Context: {context}\nQuestion: {query}\nAnswer:"
)

# ✅ LangChain LLM Chain
llm_chain = LLMChain(llm=llm, prompt=prompt_template)

# ✅ Generate response using OpenRouter LLM
def generate_response(query):
    context = retrieve_similar_text(query)
    context_str = "\n".join(context) if context else "No relevant Wikipedia data found."

    response = llm_chain.invoke({"context": context_str, "query": query})
    return response

# ✅ Chatbot loop
def start_chat():
    print("📚 Wikipedia Chatbot (RAG) - Type 'exit' to quit.")

    while True:
        query = input("\nYou: ")
        if query.lower() == "exit":
            print("Goodbye! 👋")
            break

        if query not in stored_texts:
            summary = fetch_wikipedia_summary(query)
            if summary:
                add_to_index(summary)

        response = generate_response(query)
        print(f"🤖 Chatbot: {response}")

# ✅ Run chatbot
start_chat()


Enter your OpenRouter API key: ··········


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

  llm = ChatOpenAI(
  llm_chain = LLMChain(llm=llm, prompt=prompt_template)


📚 Wikipedia Chatbot (RAG) - Type 'exit' to quit.

You: When was MS Dhoni Born?
🤖 Chatbot: {'context': 'No relevant Wikipedia data found.', 'query': 'When was MS Dhoni Born?', 'text': 'MS Dhoni, full name Mahendra Singh Dhoni, was born on July 7, 1981.'}

You: Explain about Mahendra Singh Dhoni
🤖 Chatbot: {'context': 'No relevant Wikipedia data found.', 'query': 'Explain about Mahendra Singh Dhoni', 'text': "Mahendra Singh Dhoni, often referred to as MS Dhoni, is a former Indian international cricketer who is widely regarded as one of the greatest finishers and captains in the history of the game. Here are some key aspects of his career:\n\n1. **Early Life and Career:**\n   - Born on July 7, 1981, in Ranchi, Jharkhand, Dhoni started his career as a ticket collector for the Indian Railways.\n   - He began playing cricket at a young age and was selected for the Indian team in 2004.\n\n2. **Cricket Career:**\n   - **Batsman:** Dhoni is known for his aggressive batting style, particularly h

KeyboardInterrupt: Interrupted by user

**Gradio bot**

In [None]:
import gradio as gr
import requests
import faiss
import numpy as np
import wikipediaapi
from sentence_transformers import SentenceTransformer
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.memory import ConversationBufferMemory   # Updated Memory
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings

# ✅ OpenRouter API Configuration
API_KEY = getpass.getpass("Enter your OpenRouter API key: ")
BASE_URL = "https://openrouter.ai/api/v1"

# ✅ Wikipedia API
wiki = wikipediaapi.Wikipedia(user_agent="MyWikipediaChatbot/1.0", language="en")

# ✅ Load Sentence Transformer for Embeddings
embedder = SentenceTransformer("all-MiniLM-L6-v2")
embedding_dim = 384

# ✅ FAISS Vector Store
index = faiss.IndexFlatL2(embedding_dim)
stored_texts = []

# ✅ LLM Model (OpenRouter)
llm = ChatOpenAI(
    model="mistralai/mistral-small",
    openai_api_key=API_KEY,
    openai_api_base=BASE_URL
)

# ✅ Updated Memory (Fixing Deprecation Warning)
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

# ✅ Function to Fetch Wikipedia Summary
def fetch_wikipedia_summary(topic):
    page = wiki.page(topic)
    return page.summary if page.exists() else None

# ✅ Convert Text to Embeddings
def embed_text(text):
    return embedder.encode([text])[0]

# ✅ Add Wikipedia Text to FAISS
def add_to_index(text):
    vector = embed_text(text)
    index.add(np.array([vector], dtype=np.float32))
    stored_texts.append(text)

# ✅ Retrieve Similar Text
def retrieve_similar_text(query, top_k=1):
    if len(stored_texts) == 0:
        return ["No relevant Wikipedia data found."]

    query_vector = embed_text(query).reshape(1, -1)
    distances, indices = index.search(query_vector, top_k)
    return [stored_texts[i] for i in indices[0] if i < len(stored_texts)]

# ✅ Create Prompt Template
prompt = PromptTemplate(
    input_variables=["input_text"],  # Use a single variable
    template="""
    You are an AI assistant answering questions using Wikipedia.
    {input_text}
    Answer:
    """
)


# ✅ Create LLM Chain
llm_chain = LLMChain(llm=llm, prompt=prompt, memory=memory)

# ✅ Generate Response
def generate_response(query):
    if query not in stored_texts:
        summary = fetch_wikipedia_summary(query)
        if summary:
            add_to_index(summary)

    context = retrieve_similar_text(query)
    context_str = "\n".join(context) if context else "No relevant Wikipedia data found."

    input_text = f"Context: {context_str}\nQuery: {query}"

    response = llm_chain.run(input_text)  # Now using a single input variable
    return response


# ✅ Create Gradio Interface (Fixed `debug` Issue)
def chatbot_interface(query):
    return generate_response(query)

gr.Interface(
    fn=chatbot_interface,
    inputs="text",
    outputs="text",
    title="📚 Wikipedia Chatbot (RAG)",
    description="Ask me anything! I use Wikipedia to find relevant info and generate responses.",
    theme="default"  # Fixed theme issue
).launch(debug=True)  # Automatically sets `share=True` in Colab


  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://116e569b469196f8bc.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


  response = llm_chain.run(input_text)  # Now using a single input variable


In [6]:
pip install gradio

Collecting gradio
  Downloading gradio-5.23.3-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 