In [None]:
from IPython import get_ipython
from IPython.display import display
# %%
# Install required libraries
!pip install -q langchain langchain-community faiss-cpu sentence-transformers huggingface_hub datasets

# Import modules
from langchain.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFaceHub

# 1. Load Documents (Using AI Wikipedia page)
loader = WebBaseLoader(["https://en.wikipedia.org/wiki/Artificial_intelligence"])
docs = loader.load()

# 2. Split Documents
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
splits = text_splitter.split_documents(docs)

# 3. Create Embeddings
embedding_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

# 4. Store in VectorDB
vectorstore = FAISS.from_documents(splits, embedding_model)

# 5. Initialize Free LLM (Using Hugging Face Free Inference API)
import os
# Set the Hugging Face API token as an environment variable
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"  # Replace "your_hf_token" with your actual Hugging Face token from https://huggingface.co/settings/tokens

llm = HuggingFaceHub(
    repo_id="google/flan-t5-xxl",
    model_kwargs={"temperature":0.5, "max_length":512}
)

# 6. Create RAG Chain
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectorstore.as_retriever(),
    return_source_documents=True
)

# 7. Query the System
query = "What are the main applications of AI?"
result = qa_chain({"query": query})

print("Answer:", result["result"])
print("\nSources:")
for doc in result["source_documents"][:3]:
    print(f"- {doc.metadata['source']}: {doc.page_content[:150]}...")

In [None]:
!pip install gradio

In [4]:
from IPython import get_ipython
from IPython.display import display
# %%
# Install required libraries
!pip install -q langchain langchain-community faiss-cpu sentence-transformers huggingface_hub datasets gradio

# Import modules
from langchain.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFaceHub
import gradio as gr

# 1. Load Documents (Using AI Wikipedia page)
loader = WebBaseLoader(["https://en.wikipedia.org/wiki/Artificial_intelligence"])
docs = loader.load()

# 2. Split Documents
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
splits = text_splitter.split_documents(docs)

# 3. Create Embeddings
embedding_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

# 4. Store in VectorDB
vectorstore = FAISS.from_documents(splits, embedding_model)

# 5. Initialize Free LLM (Using Hugging Face Free Inference API)
import os
# Set the Hugging Face API token as an environment variable
# Make sure to replace "YOUR_ACTUAL_HF_TOKEN" with your actual Hugging Face token
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"

# Using a smaller model to avoid size limits
# Try 'google/flan-t5-large' or 'google/flan-t5-base'
llm = HuggingFaceHub(
    repo_id="google/flan-t5-large",  # Switched to a smaller model
    model_kwargs={"temperature":0.5, "max_length":512}
)

# 6. Create RAG Chain
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectorstore.as_retriever(),
    return_source_documents=True
)

# Function to get answer from the RAG chain
def get_answer(query):
    result = qa_chain({"query": query})
    answer = result["result"]
    sources = "\nSources:\n"
    for doc in result["source_documents"][:3]:
        sources += f"- {doc.metadata['source']}: {doc.page_content[:150]}...\n"
    return answer + sources

# Create the Gradio interface
iface = gr.Interface(
    fn=get_answer,
    inputs=gr.Textbox(lines=2, placeholder="Enter your question here..."),
    outputs="text",
    title="AI Wikipedia RAG",
    description="Ask questions about Artificial Intelligence from Wikipedia.",
)

# Launch the interface
iface.launch()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://3af2de8ede8f7de769.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [4]:
from IPython import get_ipython
from IPython.display import display
# %%
# Install required libraries
!pip install -q langchain langchain-community faiss-cpu sentence-transformers huggingface_hub datasets

# Import modules
from langchain.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFaceHub

# 1. Load Documents (Using AI Wikipedia page)
loader = WebBaseLoader(["https://en.wikipedia.org/wiki/Artificial_intelligence"])
docs = loader.load()

# 2. Split Documents
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
splits = text_splitter.split_documents(docs)

# 3. Create Embeddings
embedding_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

# 4. Store in VectorDB
vectorstore = FAISS.from_documents(splits, embedding_model)

# 5. Initialize Free LLM (Using Hugging Face Free Inference API)
import os
# Set the Hugging Face API token as an environment variable
# Make sure to replace "YOUR_ACTUAL_HF_TOKEN" with your actual Hugging Face token
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"

# Using a smaller model to avoid size limits
# Try 'google/flan-t5-large' or 'google/flan-t5-base'
llm = HuggingFaceHub(
    repo_id="google/flan-t5-large",  # Switched to a smaller model
    model_kwargs={"temperature":0.5, "max_length":512}
)

# 6. Create RAG Chain
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectorstore.as_retriever(),
    return_source_documents=True
)

# 7. Query the System
query = "What are the main applications of AI?"
result = qa_chain({"query": query})

print("Answer:", result["result"])
print("\nSources:")
for doc in result["source_documents"][:3]:
    print(f"- {doc.metadata['source']}: {doc.page_content[:150]}...")

Answer: advanced web search engines

Sources:
- https://en.wikipedia.org/wiki/Artificial_intelligence: High-profile applications of AI include advanced web search engines (e.g., Google Search); recommendation systems (used by YouTube, Amazon, and Netfli...
- https://en.wikipedia.org/wiki/Artificial_intelligence: Various subfields of AI research are centered around particular goals and the use of particular tools. The traditional goals of AI research include re...
- https://en.wikipedia.org/wiki/Artificial_intelligence: Applications
Main article: Applications of artificial intelligenceAI and machine learning technology is used in most of the essential applications of ...
