<a href="https://colab.research.google.com/github/NormLorenz/ai-llm-gradio-rag/blob/main/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Main Notebook
Build a RAG pipeline from a file!

## Documentation
https://docs.google.com/document/d/1iRPcqsYZj0Jmd6QqI6UoT2mVbs3BjGkpIpGCI5SFQCM/edit?tab=t.0

In [2]:
# Install all packages

!uv pip install gradio openai pinecone langchain langchain-openai langchain_pinecone pypdf tiktoken

# Pinecone with optional features
# uv add "pinecone[asyncio,grpc]"

# LangChain with OpenAI integration
# !uv add langchain langchain-openai


[2mUsing Python 3.12.12 environment at: /usr[0m
[2K[2mResolved [1m86 packages[0m [2min 1.11s[0m[0m
[2K[2mPrepared [1m14 packages[0m [2min 750ms[0m[0m
[2mUninstalled [1m5 packages[0m [2min 58ms[0m[0m
[2K[2mInstalled [1m14 packages[0m [2min 92ms[0m[0m
 [31m-[39m [1maiohttp[0m[2m==3.13.2[0m
 [32m+[39m [1maiohttp[0m[2m==3.9.5[0m
 [31m-[39m [1mlangchain[0m[2m==1.1.0[0m
 [32m+[39m [1mlangchain[0m[2m==0.3.27[0m
 [31m-[39m [1mlangchain-core[0m[2m==1.1.0[0m
 [32m+[39m [1mlangchain-core[0m[2m==0.3.80[0m
 [32m+[39m [1mlangchain-openai[0m[2m==0.3.35[0m
 [32m+[39m [1mlangchain-pinecone[0m[2m==0.2.0[0m
 [32m+[39m [1mlangchain-text-splitters[0m[2m==0.3.11[0m
 [31m-[39m [1mnumpy[0m[2m==2.0.2[0m
 [32m+[39m [1mnumpy[0m[2m==1.26.4[0m
 [31m-[39m [1mpackaging[0m[2m==25.0[0m
 [32m+[39m [1mpackaging[0m[2m==24.2[0m
 [32m+[39m [1mpinecone[0m[2m==8.0.0[0m
 [32m+[39m [1mpinecone-client[0m[2m==5.0

In [14]:
# Install dependencies:
!uv pip install langchain<0.1.0 openai pinecone-client gradio pypdf tiktoken langchain_openai langchain_pinecone

/bin/bash: line 1: 0.1.0: No such file or directory


In [5]:
# !pip install langchain --upgrade --force-reinstall # Reinstall langchain to ensure 'chains' module is present and correct
# !pip install langchain_openai # Install langchain_openai

import os
import gradio as gr
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
import pinecone
from pinecone import PineconeVectorStore
from langchain.chains import RetrievalQA
# from langchain.chains.retrieval_qa.base import RetrievalQA
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
from google.colab import userdata

DeprecatedPluginError: The `pinecone-plugin-inference` package has been deprecated. The features from that plugin have been incorporated into the main `pinecone` package with no need for additional plugins. Please remove the `pinecone-plugin-inference` package from your dependencies to ensure you have the most up-to-date version of these features.

In [None]:
# Set API keys
os.environ["OPENAI_API_KEY"] =  userdata.get("OPENAI_API_KEY")
os.environ["PINECONE_API_KEY"] =  userdata.get("PINECONE_API_KEY")
os.environ["PINECONE_ENVIRONMENT"] =  userdata.get("PINECONE_ENVIRONMENT")

In [None]:
# Initialize Pinecone
pinecone.init(api_key=os.environ["PINECONE_API_KEY"], environment=os.environ["PINECONE_ENVIRONMENT"])
index_name = "rag-demo"

if index_name not in pinecone.list_indexes():
    pinecone.create_index(index_name, dimension=1536)  # matches OpenAI embedding size

embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")

In [None]:
# Function to process uploaded document
def process_document(file):
    # Load PDF or text
    if file.name.endswith(".pdf"):
        loader = PyPDFLoader(file.name)
        docs = loader.load()
    else:
        text = open(file.name, "r", encoding="utf-8").read()
        splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
        docs = splitter.create_documents([text])

    # Store in Pinecone
    vectorstore = PineconeVectorStore.from_documents(docs, embeddings, index_name=index_name)
    return "Document uploaded and indexed successfully!"

In [None]:
# Function to answer questions
def answer_question(query):
    vectorstore = PineconeVectorStore(index_name=index_name, embedding=embeddings)
    retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
    llm = ChatOpenAI(model="gpt-4", temperature=0)
    qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, chain_type="stuff")
    return qa_chain.run(query)

In [None]:
# Build a Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("## üìò RAG Q&A Demo with Document Upload")
    with gr.Row():
        upload = gr.File(label="Upload a PDF or TXT file")
        upload_output = gr.Textbox(label="Upload Status")
    upload.upload(process_document, upload, upload_output)
    query = gr.Textbox(lines=2, placeholder="Ask a question...")
    answer = gr.Textbox(label="Answer")
    query.submit(answer_question, query, answer)
demo.launch()

In [4]:
# install packages

!uv pip install gradio
!uv pip install openai
!uv pip install pinecone
!uv pip install langchain
!uv pip install langchain-openai
!uv pip install langchain-pinecone
!uv pip install langchain-community
!uv pip install pypdf

[2mUsing Python 3.12.12 environment at: /usr[0m
[2mAudited [1m1 package[0m [2min 83ms[0m[0m
[2mUsing Python 3.12.12 environment at: /usr[0m
[2mAudited [1m1 package[0m [2min 76ms[0m[0m
[2mUsing Python 3.12.12 environment at: /usr[0m
[2mAudited [1m1 package[0m [2min 73ms[0m[0m
[2mUsing Python 3.12.12 environment at: /usr[0m
[2mAudited [1m1 package[0m [2min 78ms[0m[0m
[2mUsing Python 3.12.12 environment at: /usr[0m
[2mAudited [1m1 package[0m [2min 74ms[0m[0m
[2mUsing Python 3.12.12 environment at: /usr[0m
[2mAudited [1m1 package[0m [2min 75ms[0m[0m
[2mUsing Python 3.12.12 environment at: /usr[0m
[2mAudited [1m1 package[0m [2min 74ms[0m[0m
[2mUsing Python 3.12.12 environment at: /usr[0m
[2mAudited [1m1 package[0m [2min 72ms[0m[0m


In [7]:
# install text splitters
!pip install langchain_text_splitters

# declare imports
import gradio as gr
import os
from pinecone import Pinecone, ServerlessSpec
from langchain_text_splitters import RecursiveCharacterTextSplitter # Corrected import path
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_pinecone import PineconeVectorStore
from langchain.chains import RetrievalQA
from langchain_community.document_loaders import PyPDFLoader, TextLoader
import tempfile



AttributeError: module 'numpy._core._multiarray_umath' has no attribute '_blas_supports_fpe'

In [5]:


# Initialize clients (set your API keys as environment variables)
# export OPENAI_API_KEY="your-key"
# export PINECONE_API_KEY="your-key"

PINECONE_INDEX_NAME = "rag-qa-index"

class RAGPipeline:
    def __init__(self):
        self.embeddings = None
        self.vectorstore = None
        self.qa_chain = None
        self.pc = None
        self.index = None

    def initialize_pinecone(self, api_key):
        """Initialize Pinecone client and create/connect to index"""
        try:
            self.pc = Pinecone(api_key=api_key)

            # Check if index exists, if not create it
            existing_indexes = [index.name for index in self.pc.list_indexes()]

            if PINECONE_INDEX_NAME not in existing_indexes:
                self.pc.create_index(
                    name=PINECONE_INDEX_NAME,
                    dimension=1536,  # OpenAI embeddings dimension
                    metric='cosine',
                    spec=ServerlessSpec(
                        cloud='aws',
                        region='us-east-1'
                    )
                )

            self.index = self.pc.Index(PINECONE_INDEX_NAME)
            return "‚úì Pinecone initialized successfully"
        except Exception as e:
            return f"‚úó Pinecone initialization failed: {str(e)}"

    def process_document(self, file, openai_key, pinecone_key, chunk_size, chunk_overlap):
        """Process uploaded document and store in Pinecone"""
        try:
            # Initialize APIs
            os.environ["OPENAI_API_KEY"] = openai_key
            pinecone_status = self.initialize_pinecone(pinecone_key)

            if "failed" in pinecone_status:
                return pinecone_status

            # Save uploaded file temporarily
            with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.name)[1]) as tmp_file:
                tmp_file.write(file.read() if hasattr(file, 'read') else open(file.name, 'rb').read())
                tmp_path = tmp_file.name

            # Load document based on file type
            if file.name.endswith('.pdf'):
                loader = PyPDFLoader(tmp_path)
            elif file.name.endswith('.txt'):
                loader = TextLoader(tmp_path)
            else:
                return "‚úó Unsupported file format. Please upload PDF or TXT file."

            documents = loader.load()

            # Split documents into chunks
            text_splitter = RecursiveCharacterTextSplitter(
                chunk_size=int(chunk_size),
                chunk_overlap=int(chunk_overlap),
                length_function=len
            )
            chunks = text_splitter.split_documents(documents)

            # Initialize embeddings
            self.embeddings = OpenAIEmbeddings(openai_api_key=openai_key)

            # Create vector store
            self.vectorstore = PineconeVectorStore.from_documents(
                documents=chunks,
                embedding=self.embeddings,
                index_name=PINECONE_INDEX_NAME
            )

            # Initialize QA chain
            llm = ChatOpenAI(
                model_name="gpt-4",
                temperature=0,
                openai_api_key=openai_key
            )

            self.qa_chain = RetrievalQA.from_chain_type(
                llm=llm,
                chain_type="stuff",
                retriever=self.vectorstore.as_retriever(
                    search_kwargs={"k": 3}
                ),
                return_source_documents=True
            )

            # Clean up temporary file
            os.unlink(tmp_path)

            return f"‚úì Document processed successfully!\n- File: {file.name}\n- Chunks created: {len(chunks)}\n- Ready for questions!"

        except Exception as e:
            return f"‚úó Error processing document: {str(e)}"

    def answer_question(self, question):
        """Answer question using RAG pipeline"""
        if not self.qa_chain:
            return "‚ö† Please upload and process a document first!"

        if not question.strip():
            return "‚ö† Please enter a question!"

        try:
            result = self.qa_chain.invoke({"query": question})

            answer = result['result']
            sources = result.get('source_documents', [])

            # Format response with sources
            response = f"**Answer:**\n{answer}\n\n"

            if sources:
                response += "**Sources:**\n"
                for i, doc in enumerate(sources[:3], 1):
                    content_preview = doc.page_content[:200] + "..." if len(doc.page_content) > 200 else doc.page_content
                    response += f"\n{i}. {content_preview}\n"

            return response

        except Exception as e:
            return f"‚úó Error answering question: {str(e)}"

# Initialize pipeline
pipeline = RAGPipeline()

# Create Gradio interface
with gr.Blocks(title="RAG Q&A Pipeline", theme=gr.themes.Soft()) as demo:
    gr.Markdown("# üìö RAG Q&A Pipeline")
    gr.Markdown("Upload a document (PDF or TXT) and ask questions about its content using AI-powered retrieval.")

    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### üîë API Configuration")
            openai_key = gr.Textbox(
                label="OpenAI API Key",
                type="password",
                placeholder="sk-..."
            )
            pinecone_key = gr.Textbox(
                label="Pinecone API Key",
                type="password",
                placeholder="your-pinecone-key"
            )

            gr.Markdown("### üìÑ Document Upload")
            file_input = gr.File(
                label="Upload Document",
                file_types=[".pdf", ".txt"]
            )

            with gr.Accordion("‚öôÔ∏è Advanced Settings", open=False):
                chunk_size = gr.Slider(
                    minimum=100,
                    maximum=2000,
                    value=1000,
                    step=100,
                    label="Chunk Size"
                )
                chunk_overlap = gr.Slider(
                    minimum=0,
                    maximum=500,
                    value=200,
                    step=50,
                    label="Chunk Overlap"
                )

            process_btn = gr.Button("üöÄ Process Document", variant="primary")
            status_output = gr.Textbox(
                label="Status",
                lines=5,
                interactive=False
            )

        with gr.Column(scale=1):
            gr.Markdown("### üí¨ Ask Questions")
            question_input = gr.Textbox(
                label="Your Question",
                placeholder="Ask anything about the uploaded document...",
                lines=3
            )
            ask_btn = gr.Button("üîç Get Answer", variant="primary")
            answer_output = gr.Markdown(label="Answer")

            gr.Markdown("### üìù Example Questions")
            gr.Examples(
                examples=[
                    ["What is the main topic of this document?"],
                    ["Can you summarize the key points?"],
                    ["What are the main conclusions?"],
                ],
                inputs=question_input
            )

    # Event handlers
    process_btn.click(
        fn=pipeline.process_document,
        inputs=[file_input, openai_key, pinecone_key, chunk_size, chunk_overlap],
        outputs=status_output
    )

    ask_btn.click(
        fn=pipeline.answer_question,
        inputs=question_input,
        outputs=answer_output
    )

    question_input.submit(
        fn=pipeline.answer_question,
        inputs=question_input,
        outputs=answer_output
    )

# Launch the app
if __name__ == "__main__":
    demo.launch(share=False)

ModuleNotFoundError: No module named 'langchain.text_splitter'

#