<a href="https://colab.research.google.com/github/GrandWizard1102/NM_projects/blob/main/Wikipedia_chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install wikipedia-api openai faiss-cpu sentence-transformers

Collecting wikipedia-api
  Downloading wikipedia_api-0.8.1.tar.gz (19 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu

**Console bot**

In [28]:
import requests
import faiss
import numpy as np
import wikipediaapi
from sentence_transformers import SentenceTransformer
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
import getpass

# ✅ Get OpenRouter API key
api_key = getpass.getpass("Enter your OpenRouter API key: ")

# ✅ OpenRouter API URL
API_URL = "https://openrouter.ai/api/v1/chat/completions"

# ✅ Wikipedia API
wiki_wiki = wikipediaapi.Wikipedia(user_agent="MyWikipediaChatbot/1.0", language="en")

# ✅ Embedding Model
embedder = SentenceTransformer("all-MiniLM-L6-v2")

# ✅ FAISS Index for storing Wikipedia summaries
embedding_dim = 384  # Dimension of the MiniLM model
index = faiss.IndexFlatL2(embedding_dim)
stored_texts = []  # Store Wikipedia text

# ✅ Fetch Wikipedia summary
def fetch_wikipedia_summary(topic):
    page = wiki_wiki.page(topic)
    if page.exists():
        return page.summary
    return None

# ✅ Convert text to embeddings
def embed_text(text):
    return embedder.encode([text])[0]

# ✅ Add Wikipedia text to FAISS
def add_to_index(text):
    vector = embed_text(text)
    index.add(np.array([vector], dtype=np.float32))
    stored_texts.append(text)

# ✅ Retrieve relevant text using FAISS
def retrieve_similar_text(query, top_k=1):
    if len(stored_texts) == 0:
        return ["No relevant Wikipedia data found."]

    query_vector = embed_text(query).reshape(1, -1)
    distances, indices = index.search(query_vector, top_k)
    return [stored_texts[i] for i in indices[0] if i < len(stored_texts)]

# ✅ Set up OpenRouter LLM in LangChain
llm = ChatOpenAI(
    model="mistralai/mistral-small",  # Use OpenRouter-supported model
    openai_api_key=api_key,
    openai_api_base="https://openrouter.ai/api/v1"
)

# ✅ Prompt Template
prompt_template = PromptTemplate(
    input_variables=["context", "query"],
    template="Context: {context}\nQuestion: {query}\nAnswer:"
)

# ✅ LangChain LLM Chain
llm_chain = LLMChain(llm=llm, prompt=prompt_template)

# ✅ Generate response using OpenRouter LLM
def generate_response(query):
    context = retrieve_similar_text(query)
    context_str = "\n".join(context) if context else "No relevant Wikipedia data found."

    response = llm_chain.invoke({"context": context_str, "query": query})
    return response

# ✅ Chatbot loop
def start_chat():
    print("📚 Wikipedia Chatbot (RAG) - Type 'exit' to quit.")

    while True:
        query = input("\nYou: ")
        if query.lower() == "exit":
            print("Goodbye! 👋")
            break

        if query not in stored_texts:
            summary = fetch_wikipedia_summary(query)
            if summary:
                add_to_index(summary)

        response = generate_response(query)
        print(f"🤖 Chatbot: {response}")

# ✅ Run chatbot
start_chat()


Enter your OpenRouter API key: ··········


  llm = ChatOpenAI(


📚 Wikipedia Chatbot (RAG) - Type 'exit' to quit.

You: Hi
🤖 Chatbot: {'context': 'HI or Hi may refer to:', 'query': 'Hi', 'text': 'Hello! How can I assist you today?'}

You: exit
Goodbye! 👋


**Gradio bot**

In [45]:
import gradio as gr
import requests
import faiss
import numpy as np
import wikipediaapi
from sentence_transformers import SentenceTransformer
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.memory import ConversationBufferMemory   # Updated Memory
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings

# ✅ OpenRouter API Configuration
API_KEY = getpass.getpass("Enter your OpenRouter API key: ")
BASE_URL = "https://openrouter.ai/api/v1"

# ✅ Wikipedia API
wiki = wikipediaapi.Wikipedia(user_agent="MyWikipediaChatbot/1.0", language="en")

# ✅ Load Sentence Transformer for Embeddings
embedder = SentenceTransformer("all-MiniLM-L6-v2")
embedding_dim = 384

# ✅ FAISS Vector Store
index = faiss.IndexFlatL2(embedding_dim)
stored_texts = []

# ✅ LLM Model (OpenRouter)
llm = ChatOpenAI(
    model="mistralai/mistral-small",
    openai_api_key=API_KEY,
    openai_api_base=BASE_URL
)

# ✅ Updated Memory (Fixing Deprecation Warning)
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

# ✅ Function to Fetch Wikipedia Summary
def fetch_wikipedia_summary(topic):
    page = wiki.page(topic)
    return page.summary if page.exists() else None

# ✅ Convert Text to Embeddings
def embed_text(text):
    return embedder.encode([text])[0]

# ✅ Add Wikipedia Text to FAISS
def add_to_index(text):
    vector = embed_text(text)
    index.add(np.array([vector], dtype=np.float32))
    stored_texts.append(text)

# ✅ Retrieve Similar Text
def retrieve_similar_text(query, top_k=1):
    if len(stored_texts) == 0:
        return ["No relevant Wikipedia data found."]

    query_vector = embed_text(query).reshape(1, -1)
    distances, indices = index.search(query_vector, top_k)
    return [stored_texts[i] for i in indices[0] if i < len(stored_texts)]

# ✅ Create Prompt Template
prompt = PromptTemplate(
    input_variables=["input_text"],  # Use a single variable
    template="""
    You are an AI assistant answering questions using Wikipedia.
    {input_text}
    Answer:
    """
)


# ✅ Create LLM Chain
llm_chain = LLMChain(llm=llm, prompt=prompt, memory=memory)

# ✅ Generate Response
def generate_response(query):
    if query not in stored_texts:
        summary = fetch_wikipedia_summary(query)
        if summary:
            add_to_index(summary)

    context = retrieve_similar_text(query)
    context_str = "\n".join(context) if context else "No relevant Wikipedia data found."

    input_text = f"Context: {context_str}\nQuery: {query}"

    response = llm_chain.run(input_text)  # Now using a single input variable
    return response


# ✅ Create Gradio Interface (Fixed `debug` Issue)
def chatbot_interface(query):
    return generate_response(query)

gr.Interface(
    fn=chatbot_interface,
    inputs="text",
    outputs="text",
    title="📚 Wikipedia Chatbot (RAG)",
    description="Ask me anything! I use Wikipedia to find relevant info and generate responses.",
    theme="default"  # Fixed theme issue
).launch(debug=True)  # Automatically sets `share=True` in Colab


Enter your OpenRouter API key: ··········
Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://90947f7f637faf469f.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7862 <> https://90947f7f637faf469f.gradio.live


