In [None]:
# !pip install gradio
!pip install -U langchain-huggingface
!pip install tf-keras
!pip install streamlit
!pip install unstructured
!pip install langchain-community
!pip install langchain-huggingface
!pip install faiss-cpu
!pip install sentence-transformers

Collecting langchain-huggingface
  Downloading langchain_huggingface-0.1.2-py3-none-any.whl.metadata (1.3 kB)
Downloading langchain_huggingface-0.1.2-py3-none-any.whl (21 kB)
Installing collected packages: langchain-huggingface
Successfully installed langchain-huggingface-0.1.2
Collecting streamlit
  Downloading streamlit-1.41.1-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.41.1-py2.py3-none-any.whl (9.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.1/9.1 MB[0m [31m67.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━

In [None]:
import os
import gradio as gr
import pickle
from langchain.llms import HuggingFaceEndpoint
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain_huggingface import HuggingFaceEndpointEmbeddings
from langchain.vectorstores import FAISS

# Set HuggingFace API token
os.environ['HUGGINGFACEHUB_API_TOKEN'] = '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'

# Initialize the model


# Function to load and process URLs
def process_urls(url1, url2, url3):
    llm = HuggingFaceEndpoint(repo_id="mistralai/Mistral-Nemo-Instruct-2407")
    urls = [str(url1), str(url2), str(url3)]
    loader = UnstructuredURLLoader(urls=urls)
    data = loader.load()

    # Split documents into smaller chunks
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200
    )
    docs = text_splitter.split_documents(data)

    # Create embeddings and store in FAISS index
    embedding = HuggingFaceEndpointEmbeddings()
    vectorindex_hugging = FAISS.from_documents(docs, embedding)

    # Save the vector index to file (optional)
    file_path = "/content/drive/MyDrive/pythongenai/vector_db5.pkl"
    with open(file_path, 'wb') as f:
        pickle.dump(vectorindex_hugging, f)

    # Load the vector store for retriever
    with open(file_path, 'rb') as f:
        vector_db = pickle.load(f)

    # Create the RetrievalQA chain with sources
    chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vector_db.as_retriever())

    return chain

# Function to answer a question using the stored vector database
def answer_question(question, chain):
    result = chain({'question': question}, return_only_outputs=True)
    return result['answer']

# Create Gradio interface using Blocks API
def create_gradio_interface():
    with gr.Blocks() as interface:
        gr.Markdown("### URL Processing and Question Answering")

        with gr.Row():
            url1 = gr.Textbox(label="Enter URL 1", placeholder="Enter the first URL here")
            url2 = gr.Textbox(label="Enter URL 2", placeholder="Enter the second URL here")
            url3 = gr.Textbox(label="Enter URL 3", placeholder="Enter the third URL here")

        with gr.Row():
            question = gr.Textbox(label="Enter your Question", placeholder="Ask your question here")

        answer_area = gr.Textbox(label="Answer", interactive=False)
        chain_state = gr.State()  # Store chain state after processing URLs

        with gr.Row():
            url_button = gr.Button("Submit URLs")
            question_button = gr.Button("Submit Question")

        # Define button actions
        def on_urls_submit(url1, url2, url3):
            chain_value = process_urls(url1, url2, url3)
            return chain_value, "URLs processed successfully!"

        def on_question_submit(question, chain):
            if chain is None:
                return "Please submit URLs first!"
            answer = answer_question(question, chain)
            return answer

        # Connect buttons to functions
        url_button.click(
            on_urls_submit,
            inputs=[url1, url2, url3],
            outputs=[chain_state, answer_area]
        )
        question_button.click(
            on_question_submit,
            inputs=[question, chain_state],
            outputs=answer_area
        )

    return interface

# Launch the Gradio interface
if __name__ == "__main__":
    interface = create_gradio_interface()
    interface.launch()


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://fc5b1c2b1117cc2008.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
