In [1]:
# RAG System with Groq LLM - Complete Implementation
# Run this in Google Colab

# ============ INSTALLATION ============
# Run this cell first to install required packages
"""
!pip install -q langchain langchain-groq langchain-community
!pip install -q faiss-cpu
!pip install -q sentence-transformers
!pip install -q PyPDF2
!pip install -q python-docx
!pip install -q openpyxl
!pip install -q gradio
"""

# ============ IMPORTS ============
import os
import gradio as gr
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain_groq import ChatGroq
from langchain.docstore.document import Document
import PyPDF2
import docx
import pandas as pd
from typing import List, Tuple
import tempfile

# ============ GLOBAL VARIABLES ============
vector_store = None
qa_chain = None
groq_api_key = None

# ============ DOCUMENT PROCESSING FUNCTIONS ============

def extract_text_from_pdf(file_path: str) -> str:
    """Extract text from PDF file"""
    text = ""
    try:
        with open(file_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            for page in pdf_reader.pages:
                text += page.extract_text() + "\n"
    except Exception as e:
        raise Exception(f"Error reading PDF: {str(e)}")
    return text

def extract_text_from_docx(file_path: str) -> str:
    """Extract text from DOCX file"""
    try:
        doc = docx.Document(file_path)
        text = "\n".join([paragraph.text for paragraph in doc.paragraphs])
    except Exception as e:
        raise Exception(f"Error reading DOCX: {str(e)}")
    return text

def extract_text_from_excel(file_path: str) -> str:
    """Extract text from Excel file"""
    try:
        df = pd.read_excel(file_path, sheet_name=None)
        text = ""
        for sheet_name, sheet_data in df.items():
            text += f"\n--- Sheet: {sheet_name} ---\n"
            text += sheet_data.to_string(index=False) + "\n"
    except Exception as e:
        raise Exception(f"Error reading Excel: {str(e)}")
    return text

def extract_text_from_csv(file_path: str) -> str:
    """Extract text from CSV file"""
    try:
        df = pd.read_csv(file_path)
        text = df.to_string(index=False)
    except Exception as e:
        raise Exception(f"Error reading CSV: {str(e)}")
    return text

def extract_text_from_txt(file_path: str) -> str:
    """Extract text from TXT file"""
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            text = file.read()
    except Exception as e:
        raise Exception(f"Error reading TXT: {str(e)}")
    return text

def process_uploaded_file(file_path: str) -> str:
    """Process uploaded file based on its extension"""
    file_extension = os.path.splitext(file_path)[1].lower()

    if file_extension == '.pdf':
        return extract_text_from_pdf(file_path)
    elif file_extension == '.docx':
        return extract_text_from_docx(file_path)
    elif file_extension in ['.xlsx', '.xls']:
        return extract_text_from_excel(file_path)
    elif file_extension == '.csv':
        return extract_text_from_csv(file_path)
    elif file_extension == '.txt':
        return extract_text_from_txt(file_path)
    else:
        raise ValueError(f"Unsupported file type: {file_extension}")

# ============ RAG CORE FUNCTIONS ============

def verify_groq_api_key(api_key: str) -> Tuple[str, str]:
    """Verify if the Groq API key is valid"""
    if not api_key or api_key.strip() == "":
        return "‚ùå Please enter an API key", ""

    try:
        # Test the API key with a simple request
        llm = ChatGroq(
            groq_api_key=api_key,
            model_name="llama-3.3-70b-versatile",
            temperature=0
        )
        # Make a simple test call
        response = llm.invoke("Say 'API key is valid'")

        global groq_api_key
        groq_api_key = api_key

        return "‚úÖ API Key is Valid! You can now upload documents.", "verified"
    except Exception as e:
        return f"‚ùå Invalid API Key: {str(e)}", ""

def create_vector_store(documents: List[Document]) -> FAISS:
    """Create FAISS vector store from documents"""
    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2",
        model_kwargs={'device': 'cpu'}
    )
    vector_store = FAISS.from_documents(documents, embeddings)
    return vector_store

def process_documents(files, api_status) -> str:
    """Process uploaded documents and create vector store"""
    global vector_store, qa_chain, groq_api_key

    # Check if API key is verified
    if api_status != "verified":
        return "‚ùå Please verify your Groq API key first!"

    if not files:
        return "‚ùå Please upload at least one document!"

    try:
        all_texts = []

        # Process each uploaded file
        for file in files:
            file_path = file.name
            text = process_uploaded_file(file_path)
            all_texts.append(text)

        # Combine all texts
        combined_text = "\n\n".join(all_texts)

        # Split text into chunks
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=200,
            length_function=len
        )

        chunks = text_splitter.split_text(combined_text)
        documents = [Document(page_content=chunk) for chunk in chunks]

        # Create vector store
        vector_store = create_vector_store(documents)

        # Create QA chain
        llm = ChatGroq(
            groq_api_key=groq_api_key,
            model_name="llama-3.3-70b-versatile",
            temperature=0
        )

        qa_chain = RetrievalQA.from_chain_type(
            llm=llm,
            chain_type="stuff",
            retriever=vector_store.as_retriever(search_kwargs={"k": 3}),
            return_source_documents=False
        )

        return f"‚úÖ Successfully processed {len(files)} document(s) with {len(chunks)} text chunks!"

    except Exception as e:
        return f"‚ùå Error processing documents: {str(e)}"

def answer_question(question: str, chat_history: List) -> Tuple[str, List]:
    """Answer question based on uploaded documents"""
    global qa_chain

    if qa_chain is None:
        response = "‚ùå Please upload and process documents first!"
        chat_history.append((question, response))
        return "", chat_history

    if not question or question.strip() == "":
        response = "‚ùå Please enter a question!"
        chat_history.append((question, response))
        return "", chat_history

    try:
        # Get answer from QA chain
        result = qa_chain.invoke({"query": question})
        answer = result['result']

        chat_history.append((question, answer))
        return "", chat_history

    except Exception as e:
        response = f"‚ùå Error: {str(e)}"
        chat_history.append((question, response))
        return "", chat_history

def clear_chat():
    """Clear chat history"""
    return []

def reset_system():
    """Reset the entire system"""
    global vector_store, qa_chain, groq_api_key
    vector_store = None
    qa_chain = None
    groq_api_key = None
    return "", "", [], "Please verify your API key and upload documents again."

# ============ GRADIO INTERFACE ============

def create_interface():
    """Create Gradio interface"""

    with gr.Blocks(title="RAG System with Groq LLM", theme=gr.themes.Soft()) as demo:
        gr.Markdown(
            """
            # ü§ñ RAG System with Groq LLM
            ### Upload your documents and ask questions!
            Supports: PDF, DOCX, TXT, CSV, Excel files
            """
        )

        api_status = gr.State("")

        with gr.Row():
            with gr.Column(scale=2):
                api_key_input = gr.Textbox(
                    label="üîë Groq API Key",
                    placeholder="Enter your Groq API key here...",
                    type="password"
                )
            with gr.Column(scale=1):
                verify_btn = gr.Button("‚úÖ Verify API Key", variant="primary")

        api_status_text = gr.Textbox(label="API Status", interactive=False)

        gr.Markdown("---")

        with gr.Row():
            file_upload = gr.Files(
                label="üìÅ Upload Documents",
                file_types=[".pdf", ".docx", ".txt", ".csv", ".xlsx", ".xls"],
                file_count="multiple"
            )

        process_btn = gr.Button("üîÑ Process Documents", variant="primary", size="lg")
        process_status = gr.Textbox(label="Processing Status", interactive=False)

        gr.Markdown("---")

        chatbot = gr.Chatbot(label="üí¨ Chat with your Documents", height=400)

        with gr.Row():
            question_input = gr.Textbox(
                label="‚ùì Ask a Question",
                placeholder="Type your question here...",
                scale=4
            )
            submit_btn = gr.Button("Send", variant="primary", scale=1)

        with gr.Row():
            clear_btn = gr.Button("üóëÔ∏è Clear Chat")
            reset_btn = gr.Button("üîÑ Reset System", variant="stop")

        gr.Markdown(
            """
            ---
            ### üìù Instructions:
            1. Enter your Groq API key and verify it
            2. Upload one or more documents (PDF, DOCX, TXT, CSV, Excel)
            3. Click "Process Documents" to create the knowledge base
            4. Ask questions about your documents in the chat
            """
        )

        # Event handlers
        verify_btn.click(
            fn=verify_groq_api_key,
            inputs=[api_key_input],
            outputs=[api_status_text, api_status]
        )

        process_btn.click(
            fn=process_documents,
            inputs=[file_upload, api_status],
            outputs=[process_status]
        )

        submit_btn.click(
            fn=answer_question,
            inputs=[question_input, chatbot],
            outputs=[question_input, chatbot]
        )

        question_input.submit(
            fn=answer_question,
            inputs=[question_input, chatbot],
            outputs=[question_input, chatbot]
        )

        clear_btn.click(
            fn=clear_chat,
            outputs=[chatbot]
        )

        reset_btn.click(
            fn=reset_system,
            outputs=[api_key_input, api_status_text, chatbot, process_status]
        )

    return demo

# ============ MAIN EXECUTION ============

if __name__ == "__main__":
    demo = create_interface()
    demo.launch(debug=True, share=True)

ModuleNotFoundError: No module named 'langchain.text_splitter'

In [4]:
# ============================================================
# CELL 1: INSTALLATION (Run this first!)
# ============================================================
# Copy and run this cell first, wait for it to complete

!pip install -q langchain langchain-groq langchain-community
!pip install -q faiss-cpu
!pip install -q sentence-transformers
!pip install -q PyPDF2
!pip install -q python-docx
!pip install -q openpyxl pandas
!pip install -q gradio

print("‚úÖ All packages installed successfully!")
print("Now run CELL 2 to start the application")

# ============================================================
# CELL 2: MAIN APPLICATION (Run after Cell 1 completes)
# ============================================================

import os
import gradio as gr
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain_groq import ChatGroq
from langchain.docstore.document import Document
import PyPDF2
import docx
import pandas as pd
from typing import List, Tuple

# ============ GLOBAL VARIABLES ============
vector_store = None
qa_chain = None
groq_api_key = None

# ============ DOCUMENT PROCESSING FUNCTIONS ============

def extract_text_from_pdf(file_path: str) -> str:
    """Extract text from PDF file"""
    text = ""
    try:
        with open(file_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            for page in pdf_reader.pages:
                text += page.extract_text() + "\n"
    except Exception as e:
        raise Exception(f"Error reading PDF: {str(e)}")
    return text

def extract_text_from_docx(file_path: str) -> str:
    """Extract text from DOCX file"""
    try:
        doc = docx.Document(file_path)
        text = "\n".join([paragraph.text for paragraph in doc.paragraphs])
    except Exception as e:
        raise Exception(f"Error reading DOCX: {str(e)}")
    return text

def extract_text_from_excel(file_path: str) -> str:
    """Extract text from Excel file"""
    try:
        df = pd.read_excel(file_path, sheet_name=None)
        text = ""
        for sheet_name, sheet_data in df.items():
            text += f"\n--- Sheet: {sheet_name} ---\n"
            text += sheet_data.to_string(index=False) + "\n"
    except Exception as e:
        raise Exception(f"Error reading Excel: {str(e)}")
    return text

def extract_text_from_csv(file_path: str) -> str:
    """Extract text from CSV file"""
    try:
        df = pd.read_csv(file_path)
        text = df.to_string(index=False)
    except Exception as e:
        raise Exception(f"Error reading CSV: {str(e)}")
    return text

def extract_text_from_txt(file_path: str) -> str:
    """Extract text from TXT file"""
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            text = file.read()
    except Exception as e:
        raise Exception(f"Error reading TXT: {str(e)}")
    return text

def process_uploaded_file(file_path: str) -> str:
    """Process uploaded file based on its extension"""
    file_extension = os.path.splitext(file_path)[1].lower()

    if file_extension == '.pdf':
        return extract_text_from_pdf(file_path)
    elif file_extension == '.docx':
        return extract_text_from_docx(file_path)
    elif file_extension in ['.xlsx', '.xls']:
        return extract_text_from_excel(file_path)
    elif file_extension == '.csv':
        return extract_text_from_csv(file_path)
    elif file_extension == '.txt':
        return extract_text_from_txt(file_path)
    else:
        raise ValueError(f"Unsupported file type: {file_extension}")

# ============ RAG CORE FUNCTIONS ============

def verify_groq_api_key(api_key: str) -> Tuple[str, str]:
    """Verify if the Groq API key is valid"""
    if not api_key or api_key.strip() == "":
        return "‚ùå Please enter an API key", ""

    try:
        llm = ChatGroq(
            groq_api_key=api_key,
            model_name="llama-3.3-70b-versatile",
            temperature=0
        )
        response = llm.invoke("Say 'API key is valid'")

        global groq_api_key
        groq_api_key = api_key

        return "‚úÖ API Key is Valid! You can now upload documents.", "verified"
    except Exception as e:
        return f"‚ùå Invalid API Key: {str(e)}", ""

def create_vector_store(documents: List[Document]) -> FAISS:
    """Create FAISS vector store from documents"""
    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2",
        model_kwargs={'device': 'cpu'}
    )
    vector_store = FAISS.from_documents(documents, embeddings)
    return vector_store

def process_documents(files, api_status) -> str:
    """Process uploaded documents and create vector store"""
    global vector_store, qa_chain, groq_api_key

    if api_status != "verified":
        return "‚ùå Please verify your Groq API key first!"

    if not files:
        return "‚ùå Please upload at least one document!"

    try:
        all_texts = []

        for file in files:
            file_path = file.name
            text = process_uploaded_file(file_path)
            all_texts.append(text)

        combined_text = "\n\n".join(all_texts)

        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=200,
            length_function=len
        )

        chunks = text_splitter.split_text(combined_text)
        documents = [Document(page_content=chunk) for chunk in chunks]

        vector_store = create_vector_store(documents)

        llm = ChatGroq(
            groq_api_key=groq_api_key,
            model_name="llama-3.3-70b-versatile",
            temperature=0
        )

        qa_chain = RetrievalQA.from_chain_type(
            llm=llm,
            chain_type="stuff",
            retriever=vector_store.as_retriever(search_kwargs={"k": 3}),
            return_source_documents=False
        )

        return f"‚úÖ Successfully processed {len(files)} document(s) with {len(chunks)} text chunks!"

    except Exception as e:
        return f"‚ùå Error processing documents: {str(e)}"

def answer_question(question: str, chat_history: List) -> Tuple[str, List]:
    """Answer question based on uploaded documents"""
    global qa_chain

    if qa_chain is None:
        response = "‚ùå Please upload and process documents first!"
        chat_history.append((question, response))
        return "", chat_history

    if not question or question.strip() == "":
        response = "‚ùå Please enter a question!"
        chat_history.append((question, response))
        return "", chat_history

    try:
        result = qa_chain.invoke({"query": question})
        answer = result['result']

        chat_history.append((question, answer))
        return "", chat_history

    except Exception as e:
        response = f"‚ùå Error: {str(e)}"
        chat_history.append((question, response))
        return "", chat_history

def clear_chat():
    """Clear chat history"""
    return []

def reset_system():
    """Reset the entire system"""
    global vector_store, qa_chain, groq_api_key
    vector_store = None
    qa_chain = None
    groq_api_key = None
    return "", "", [], "Please verify your API key and upload documents again."

# ============ GRADIO INTERFACE ============

def create_interface():
    """Create Gradio interface"""

    with gr.Blocks(title="RAG System with Groq LLM", theme=gr.themes.Soft()) as demo:
        gr.Markdown(
            """
            # ü§ñ RAG System with Groq LLM
            ### Upload your documents and ask questions!
            **Supports:** PDF, DOCX, TXT, CSV, Excel files
            """
        )

        api_status = gr.State("")

        with gr.Row():
            with gr.Column(scale=2):
                api_key_input = gr.Textbox(
                    label="üîë Groq API Key",
                    placeholder="Enter your Groq API key here...",
                    type="password"
                )
            with gr.Column(scale=1):
                verify_btn = gr.Button("‚úÖ Verify API Key", variant="primary")

        api_status_text = gr.Textbox(label="API Status", interactive=False)

        gr.Markdown("---")

        with gr.Row():
            file_upload = gr.Files(
                label="üìÅ Upload Documents",
                file_types=[".pdf", ".docx", ".txt", ".csv", ".xlsx", ".xls"],
                file_count="multiple"
            )

        process_btn = gr.Button("üîÑ Process Documents", variant="primary", size="lg")
        process_status = gr.Textbox(label="Processing Status", interactive=False)

        gr.Markdown("---")

        chatbot = gr.Chatbot(label="üí¨ Chat with your Documents", height=400)

        with gr.Row():
            question_input = gr.Textbox(
                label="‚ùì Ask a Question",
                placeholder="Type your question here...",
                scale=4
            )
            submit_btn = gr.Button("Send", variant="primary", scale=1)

        with gr.Row():
            clear_btn = gr.Button("üóëÔ∏è Clear Chat")
            reset_btn = gr.Button("üîÑ Reset System", variant="stop")

        gr.Markdown(
            """
            ---
            ### üìù How to Use:
            1. **Enter** your Groq API key and click **Verify**
            2. **Upload** documents (PDF, DOCX, TXT, CSV, Excel)
            3. Click **Process Documents** to build knowledge base
            4. **Ask questions** about your documents!

            üí° Get your free Groq API key: [console.groq.com/keys](https://console.groq.com/keys)
            """
        )

        # Event handlers
        verify_btn.click(
            fn=verify_groq_api_key,
            inputs=[api_key_input],
            outputs=[api_status_text, api_status]
        )

        process_btn.click(
            fn=process_documents,
            inputs=[file_upload, api_status],
            outputs=[process_status]
        )

        submit_btn.click(
            fn=answer_question,
            inputs=[question_input, chatbot],
            outputs=[question_input, chatbot]
        )

        question_input.submit(
            fn=answer_question,
            inputs=[question_input, chatbot],
            outputs=[question_input, chatbot]
        )

        clear_btn.click(
            fn=clear_chat,
            outputs=[chatbot]
        )

        reset_btn.click(
            fn=reset_system,
            outputs=[api_key_input, api_status_text, chatbot, process_status]
        )

    return demo

# ============ LAUNCH APPLICATION ============

print("üöÄ Starting RAG System with Groq LLM...")
demo = create_interface()
demo.launch(debug=True, share=True)

‚úÖ All packages installed successfully!
Now run CELL 2 to start the application


ModuleNotFoundError: No module named 'langchain.text_splitter'

In [7]:
# ============================================================
# CELL 1: INSTALLATION (Run this first!)
# ============================================================

!pip install -q -U langchain-groq
!pip install -q -U langchain-community
!pip install -q -U langchain-core
!pip install -q -U langchain-text-splitters
!pip install -q faiss-cpu
!pip install -q sentence-transformers
!pip install -q PyPDF2
!pip install -q python-docx
!pip install -q openpyxl pandas
!pip install -q gradio

print("‚úÖ All packages installed successfully!")
print("üìå Now run CELL 2 to start the application")

# ============================================================
# CELL 2: MAIN APPLICATION (Run after Cell 1 completes)
# ============================================================

import os
import gradio as gr
import PyPDF2
import pandas as pd
from typing import List, Tuple

# LangChain imports
try:
    from langchain_text_splitters import RecursiveCharacterTextSplitter
except:
    from langchain.text_splitter import RecursiveCharacterTextSplitter

try:
    from langchain_core.documents import Document
except:
    from langchain.schema import Document

from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain_groq import ChatGroq

# python-docx import
try:
    import docx
except ImportError:
    print("Warning: python-docx not available")

# ============ GLOBAL VARIABLES ============
vector_store = None
qa_chain = None
groq_api_key = None

# ============ DOCUMENT PROCESSING FUNCTIONS ============

def extract_text_from_pdf(file_path: str) -> str:
    """Extract text from PDF file"""
    text = ""
    try:
        with open(file_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            for page in pdf_reader.pages:
                extracted = page.extract_text()
                if extracted:
                    text += extracted + "\n"
    except Exception as e:
        raise Exception(f"Error reading PDF: {str(e)}")
    return text

def extract_text_from_docx(file_path: str) -> str:
    """Extract text from DOCX file"""
    try:
        doc = docx.Document(file_path)
        text = "\n".join([paragraph.text for paragraph in doc.paragraphs if paragraph.text])
        return text
    except Exception as e:
        raise Exception(f"Error reading DOCX: {str(e)}")

def extract_text_from_excel(file_path: str) -> str:
    """Extract text from Excel file"""
    try:
        df = pd.read_excel(file_path, sheet_name=None)
        text = ""
        for sheet_name, sheet_data in df.items():
            text += f"\n=== Sheet: {sheet_name} ===\n"
            text += sheet_data.to_string(index=False) + "\n"
        return text
    except Exception as e:
        raise Exception(f"Error reading Excel: {str(e)}")

def extract_text_from_csv(file_path: str) -> str:
    """Extract text from CSV file"""
    try:
        df = pd.read_csv(file_path)
        return df.to_string(index=False)
    except Exception as e:
        raise Exception(f"Error reading CSV: {str(e)}")

def extract_text_from_txt(file_path: str) -> str:
    """Extract text from TXT file"""
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            return file.read()
    except UnicodeDecodeError:
        try:
            with open(file_path, 'r', encoding='latin-1') as file:
                return file.read()
        except Exception as e:
            raise Exception(f"Error reading TXT: {str(e)}")
    except Exception as e:
        raise Exception(f"Error reading TXT: {str(e)}")

def process_uploaded_file(file_path: str) -> str:
    """Process uploaded file based on its extension"""
    file_extension = os.path.splitext(file_path)[1].lower()

    if file_extension == '.pdf':
        return extract_text_from_pdf(file_path)
    elif file_extension == '.docx':
        return extract_text_from_docx(file_path)
    elif file_extension in ['.xlsx', '.xls']:
        return extract_text_from_excel(file_path)
    elif file_extension == '.csv':
        return extract_text_from_csv(file_path)
    elif file_extension == '.txt':
        return extract_text_from_txt(file_path)
    else:
        raise ValueError(f"Unsupported file type: {file_extension}")

# ============ RAG CORE FUNCTIONS ============

def verify_groq_api_key(api_key: str) -> Tuple[str, str]:
    """Verify if the Groq API key is valid"""
    if not api_key or api_key.strip() == "":
        return "‚ùå Please enter an API key", ""

    try:
        llm = ChatGroq(
            api_key=api_key,
            model="llama-3.3-70b-versatile",
            temperature=0
        )
        response = llm.invoke("Say 'OK'")

        global groq_api_key
        groq_api_key = api_key

        return "‚úÖ API Key is Valid! You can now upload documents.", "verified"
    except Exception as e:
        error_msg = str(e)
        if "authentication" in error_msg.lower() or "api" in error_msg.lower():
            return "‚ùå Invalid API Key. Please check your key and try again.", ""
        return f"‚ùå Error: {error_msg}", ""

def create_vector_store(documents: List[Document]) -> FAISS:
    """Create FAISS vector store from documents"""
    print("Creating embeddings...")
    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2",
        model_kwargs={'device': 'cpu'}
    )
    print("Building vector store...")
    vector_store = FAISS.from_documents(documents, embeddings)
    return vector_store

def process_documents(files, api_status) -> str:
    """Process uploaded documents and create vector store"""
    global vector_store, qa_chain, groq_api_key

    if api_status != "verified":
        return "‚ùå Please verify your Groq API key first!"

    if not files:
        return "‚ùå Please upload at least one document!"

    try:
        all_texts = []
        file_names = []

        print(f"Processing {len(files)} file(s)...")

        for file in files:
            file_path = file.name
            file_name = os.path.basename(file_path)
            print(f"Processing: {file_name}")

            text = process_uploaded_file(file_path)
            if text.strip():
                all_texts.append(text)
                file_names.append(file_name)
            else:
                print(f"Warning: No text extracted from {file_name}")

        if not all_texts:
            return "‚ùå No text could be extracted from the uploaded files!"

        combined_text = "\n\n".join(all_texts)
        print(f"Total text length: {len(combined_text)} characters")

        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=200,
            length_function=len
        )

        chunks = text_splitter.split_text(combined_text)
        documents = [Document(page_content=chunk) for chunk in chunks]

        print(f"Created {len(chunks)} chunks")

        vector_store = create_vector_store(documents)

        print("Creating QA chain...")
        llm = ChatGroq(
            api_key=groq_api_key,
            model="llama-3.3-70b-versatile",
            temperature=0
        )

        qa_chain = RetrievalQA.from_chain_type(
            llm=llm,
            chain_type="stuff",
            retriever=vector_store.as_retriever(search_kwargs={"k": 3}),
            return_source_documents=False
        )

        return f"‚úÖ Successfully processed {len(files)} document(s)!\nüìä Created {len(chunks)} text chunks from: {', '.join(file_names)}"

    except Exception as e:
        import traceback
        error_details = traceback.format_exc()
        print(f"Error: {error_details}")
        return f"‚ùå Error processing documents: {str(e)}"

def answer_question(question: str, chat_history: List) -> Tuple[str, List]:
    """Answer question based on uploaded documents"""
    global qa_chain

    if qa_chain is None:
        response = "‚ùå Please upload and process documents first!"
        chat_history.append((question, response))
        return "", chat_history

    if not question or question.strip() == "":
        return "", chat_history

    try:
        print(f"Question: {question}")
        result = qa_chain.invoke({"query": question})
        answer = result['result']
        print(f"Answer: {answer}")

        chat_history.append((question, answer))
        return "", chat_history

    except Exception as e:
        import traceback
        error_details = traceback.format_exc()
        print(f"Error: {error_details}")
        response = f"‚ùå Error: {str(e)}"
        chat_history.append((question, response))
        return "", chat_history

def clear_chat():
    """Clear chat history"""
    return []

def reset_system():
    """Reset the entire system"""
    global vector_store, qa_chain, groq_api_key
    vector_store = None
    qa_chain = None
    groq_api_key = None
    return "", "", [], "System reset. Please verify your API key and upload documents again."

# ============ GRADIO INTERFACE ============

def create_interface():
    """Create Gradio interface"""

    with gr.Blocks(title="RAG System with Groq LLM", theme=gr.themes.Soft()) as demo:
        gr.Markdown(
            """
            # ü§ñ RAG System with Groq LLM
            ### Upload your documents and ask questions powered by AI!
            **Supported formats:** PDF, DOCX, TXT, CSV, Excel
            """
        )

        api_status = gr.State("")

        # API Key Section
        gr.Markdown("### üîë Step 1: Verify Your API Key")
        with gr.Row():
            with gr.Column(scale=3):
                api_key_input = gr.Textbox(
                    label="Groq API Key",
                    placeholder="Enter your Groq API key (e.g., gsk_...)",
                    type="password"
                )
            with gr.Column(scale=1):
                verify_btn = gr.Button("‚úÖ Verify Key", variant="primary", size="lg")

        api_status_text = gr.Textbox(label="Status", interactive=False, show_label=False)

        gr.Markdown("---")

        # Document Upload Section
        gr.Markdown("### üìÅ Step 2: Upload Your Documents")
        file_upload = gr.Files(
            label="Select Files",
            file_types=[".pdf", ".docx", ".txt", ".csv", ".xlsx", ".xls"],
            file_count="multiple"
        )

        process_btn = gr.Button("üîÑ Process Documents", variant="primary", size="lg")
        process_status = gr.Textbox(label="Processing Status", interactive=False, show_label=False)

        gr.Markdown("---")

        # Chat Section
        gr.Markdown("### üí¨ Step 3: Ask Questions")
        chatbot = gr.Chatbot(label="Conversation", height=400)

        with gr.Row():
            question_input = gr.Textbox(
                label="Your Question",
                placeholder="Ask anything about your documents...",
                scale=4,
                show_label=False
            )
            submit_btn = gr.Button("üì§ Send", variant="primary", scale=1)

        with gr.Row():
            clear_btn = gr.Button("üóëÔ∏è Clear Chat", size="sm")
            reset_btn = gr.Button("üîÑ Reset All", variant="stop", size="sm")

        gr.Markdown(
            """
            ---
            ### üí° Tips:
            - Get your free API key at [console.groq.com/keys](https://console.groq.com/keys)
            - Upload multiple documents at once
            - Ask specific questions for better answers
            - The AI will search through all uploaded documents
            """
        )

        # Event handlers
        verify_btn.click(
            fn=verify_groq_api_key,
            inputs=[api_key_input],
            outputs=[api_status_text, api_status]
        )

        process_btn.click(
            fn=process_documents,
            inputs=[file_upload, api_status],
            outputs=[process_status]
        )

        submit_btn.click(
            fn=answer_question,
            inputs=[question_input, chatbot],
            outputs=[question_input, chatbot]
        )

        question_input.submit(
            fn=answer_question,
            inputs=[question_input, chatbot],
            outputs=[question_input, chatbot]
        )

        clear_btn.click(
            fn=clear_chat,
            outputs=[chatbot]
        )

        reset_btn.click(
            fn=reset_system,
            outputs=[api_key_input, api_status_text, chatbot, process_status]
        )

    return demo

# ============ LAUNCH APPLICATION ============

print("=" * 60)
print("üöÄ Starting RAG System with Groq LLM...")
print("=" * 60)

try:
    demo = create_interface()
    demo.launch(debug=True, share=True)
except Exception as e:
    print(f"Error launching application: {e}")
    import traceback
    traceback.print_exc()

‚úÖ All packages installed successfully!
üìå Now run CELL 2 to start the application


ModuleNotFoundError: No module named 'langchain.chains'

In [8]:
!pip install -q -U langchain-groq
!pip install -q -U langchain-community
!pip install -q -U langchain-core
!pip install -q -U langchain-text-splitters
!pip install -q faiss-cpu
!pip install -q sentence-transformers
!pip install -q PyPDF2
!pip install -q python-docx
!pip install -q openpyxl pandas
!pip install -q gradio

print("‚úÖ All packages installed successfully!")
print("üìå Now run CELL 2 to start the application")

‚úÖ All packages installed successfully!
üìå Now run CELL 2 to start the application


In [9]:
# ============================================================
# CELL 1: INSTALLATION (Run this first!)
# ============================================================

!pip install -q -U langchain-groq
!pip install -q -U langchain-community
!pip install -q -U langchain-core
!pip install -q -U langchain-text-splitters
!pip install -q faiss-cpu
!pip install -q sentence-transformers
!pip install -q PyPDF2
!pip install -q python-docx
!pip install -q openpyxl pandas
!pip install -q gradio

print("‚úÖ All packages installed successfully!")
print("üìå Now run CELL 2 to start the application")

# ============================================================
# CELL 2: MAIN APPLICATION (Run after Cell 1 completes)
# ============================================================

import os
import gradio as gr
import PyPDF2
import pandas as pd
from typing import List, Tuple

# LangChain imports
try:
    from langchain_text_splitters import RecursiveCharacterTextSplitter
except:
    from langchain.text_splitter import RecursiveCharacterTextSplitter

try:
    from langchain_core.documents import Document
except:
    from langchain.schema import Document

from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain_groq import ChatGroq

# python-docx import
try:
    import docx
except ImportError:
    print("Warning: python-docx not available")

# ============ GLOBAL VARIABLES ============
vector_store = None
qa_chain = None
groq_api_key = None

# ============ DOCUMENT PROCESSING FUNCTIONS ============

def extract_text_from_pdf(file_path: str) -> str:
    """Extract text from PDF file"""
    text = ""
    try:
        with open(file_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            for page in pdf_reader.pages:
                extracted = page.extract_text()
                if extracted:
                    text += extracted + "\n"
    except Exception as e:
        raise Exception(f"Error reading PDF: {str(e)}")
    return text

def extract_text_from_docx(file_path: str) -> str:
    """Extract text from DOCX file"""
    try:
        doc = docx.Document(file_path)
        text = "\n".join([paragraph.text for paragraph in doc.paragraphs if paragraph.text])
        return text
    except Exception as e:
        raise Exception(f"Error reading DOCX: {str(e)}")

def extract_text_from_excel(file_path: str) -> str:
    """Extract text from Excel file"""
    try:
        df = pd.read_excel(file_path, sheet_name=None)
        text = ""
        for sheet_name, sheet_data in df.items():
            text += f"\n=== Sheet: {sheet_name} ===\n"
            text += sheet_data.to_string(index=False) + "\n"
        return text
    except Exception as e:
        raise Exception(f"Error reading Excel: {str(e)}")

def extract_text_from_csv(file_path: str) -> str:
    """Extract text from CSV file"""
    try:
        df = pd.read_csv(file_path)
        return df.to_string(index=False)
    except Exception as e:
        raise Exception(f"Error reading CSV: {str(e)}")

def extract_text_from_txt(file_path: str) -> str:
    """Extract text from TXT file"""
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            return file.read()
    except UnicodeDecodeError:
        try:
            with open(file_path, 'r', encoding='latin-1') as file:
                return file.read()
        except Exception as e:
            raise Exception(f"Error reading TXT: {str(e)}")
    except Exception as e:
        raise Exception(f"Error reading TXT: {str(e)}")

def process_uploaded_file(file_path: str) -> str:
    """Process uploaded file based on its extension"""
    file_extension = os.path.splitext(file_path)[1].lower()

    if file_extension == '.pdf':
        return extract_text_from_pdf(file_path)
    elif file_extension == '.docx':
        return extract_text_from_docx(file_path)
    elif file_extension in ['.xlsx', '.xls']:
        return extract_text_from_excel(file_path)
    elif file_extension == '.csv':
        return extract_text_from_csv(file_path)
    elif file_extension == '.txt':
        return extract_text_from_txt(file_path)
    else:
        raise ValueError(f"Unsupported file type: {file_extension}")

# ============ RAG CORE FUNCTIONS ============

def verify_groq_api_key(api_key: str) -> Tuple[str, str]:
    """Verify if the Groq API key is valid"""
    if not api_key or api_key.strip() == "":
        return "‚ùå Please enter an API key", ""

    try:
        llm = ChatGroq(
            api_key=api_key,
            model="llama-3.3-70b-versatile",
            temperature=0
        )
        response = llm.invoke("Say 'OK'")

        global groq_api_key
        groq_api_key = api_key

        return "‚úÖ API Key is Valid! You can now upload documents.", "verified"
    except Exception as e:
        error_msg = str(e)
        if "authentication" in error_msg.lower() or "api" in error_msg.lower():
            return "‚ùå Invalid API Key. Please check your key and try again.", ""
        return f"‚ùå Error: {error_msg}", ""

def create_vector_store(documents: List[Document]) -> FAISS:
    """Create FAISS vector store from documents"""
    print("Creating embeddings...")
    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2",
        model_kwargs={'device': 'cpu'}
    )
    print("Building vector store...")
    vector_store = FAISS.from_documents(documents, embeddings)
    return vector_store

def process_documents(files, api_status) -> str:
    """Process uploaded documents and create vector store"""
    global vector_store, qa_chain, groq_api_key

    if api_status != "verified":
        return "‚ùå Please verify your Groq API key first!"

    if not files:
        return "‚ùå Please upload at least one document!"

    try:
        all_texts = []
        file_names = []

        print(f"Processing {len(files)} file(s)...")

        for file in files:
            file_path = file.name
            file_name = os.path.basename(file_path)
            print(f"Processing: {file_name}")

            text = process_uploaded_file(file_path)
            if text.strip():
                all_texts.append(text)
                file_names.append(file_name)
            else:
                print(f"Warning: No text extracted from {file_name}")

        if not all_texts:
            return "‚ùå No text could be extracted from the uploaded files!"

        combined_text = "\n\n".join(all_texts)
        print(f"Total text length: {len(combined_text)} characters")

        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=200,
            length_function=len
        )

        chunks = text_splitter.split_text(combined_text)
        documents = [Document(page_content=chunk) for chunk in chunks]

        print(f"Created {len(chunks)} chunks")

        vector_store = create_vector_store(documents)

        print("Creating QA chain...")
        llm = ChatGroq(
            api_key=groq_api_key,
            model="llama-3.3-70b-versatile",
            temperature=0
        )

        qa_chain = RetrievalQA.from_chain_type(
            llm=llm,
            chain_type="stuff",
            retriever=vector_store.as_retriever(search_kwargs={"k": 3}),
            return_source_documents=False
        )

        return f"‚úÖ Successfully processed {len(files)} document(s)!\nüìä Created {len(chunks)} text chunks from: {', '.join(file_names)}"

    except Exception as e:
        import traceback
        error_details = traceback.format_exc()
        print(f"Error: {error_details}")
        return f"‚ùå Error processing documents: {str(e)}"

def answer_question(question: str, chat_history: List) -> Tuple[str, List]:
    """Answer question based on uploaded documents"""
    global qa_chain

    if qa_chain is None:
        response = "‚ùå Please upload and process documents first!"
        chat_history.append((question, response))
        return "", chat_history

    if not question or question.strip() == "":
        return "", chat_history

    try:
        print(f"Question: {question}")
        result = qa_chain.invoke({"query": question})
        answer = result['result']
        print(f"Answer: {answer}")

        chat_history.append((question, answer))
        return "", chat_history

    except Exception as e:
        import traceback
        error_details = traceback.format_exc()
        print(f"Error: {error_details}")
        response = f"‚ùå Error: {str(e)}"
        chat_history.append((question, response))
        return "", chat_history

def clear_chat():
    """Clear chat history"""
    return []

def reset_system():
    """Reset the entire system"""
    global vector_store, qa_chain, groq_api_key
    vector_store = None
    qa_chain = None
    groq_api_key = None
    return "", "", [], "System reset. Please verify your API key and upload documents again."

# ============ GRADIO INTERFACE ============

def create_interface():
    """Create Gradio interface"""

    with gr.Blocks(title="RAG System with Groq LLM", theme=gr.themes.Soft()) as demo:
        gr.Markdown(
            """
            # ü§ñ RAG System with Groq LLM
            ### Upload your documents and ask questions powered by AI!
            **Supported formats:** PDF, DOCX, TXT, CSV, Excel
            """
        )

        api_status = gr.State("")

        # API Key Section
        gr.Markdown("### üîë Step 1: Verify Your API Key")
        with gr.Row():
            with gr.Column(scale=3):
                api_key_input = gr.Textbox(
                    label="Groq API Key",
                    placeholder="Enter your Groq API key (e.g., gsk_...)",
                    type="password"
                )
            with gr.Column(scale=1):
                verify_btn = gr.Button("‚úÖ Verify Key", variant="primary", size="lg")

        api_status_text = gr.Textbox(label="Status", interactive=False, show_label=False)

        gr.Markdown("---")

        # Document Upload Section
        gr.Markdown("### üìÅ Step 2: Upload Your Documents")
        file_upload = gr.Files(
            label="Select Files",
            file_types=[".pdf", ".docx", ".txt", ".csv", ".xlsx", ".xls"],
            file_count="multiple"
        )

        process_btn = gr.Button("üîÑ Process Documents", variant="primary", size="lg")
        process_status = gr.Textbox(label="Processing Status", interactive=False, show_label=False)

        gr.Markdown("---")

        # Chat Section
        gr.Markdown("### üí¨ Step 3: Ask Questions")
        chatbot = gr.Chatbot(label="Conversation", height=400)

        with gr.Row():
            question_input = gr.Textbox(
                label="Your Question",
                placeholder="Ask anything about your documents...",
                scale=4,
                show_label=False
            )
            submit_btn = gr.Button("üì§ Send", variant="primary", scale=1)

        with gr.Row():
            clear_btn = gr.Button("üóëÔ∏è Clear Chat", size="sm")
            reset_btn = gr.Button("üîÑ Reset All", variant="stop", size="sm")

        gr.Markdown(
            """
            ---
            ### üí° Tips:
            - Get your free API key at [console.groq.com/keys](https://console.groq.com/keys)
            - Upload multiple documents at once
            - Ask specific questions for better answers
            - The AI will search through all uploaded documents
            """
        )

        # Event handlers
        verify_btn.click(
            fn=verify_groq_api_key,
            inputs=[api_key_input],
            outputs=[api_status_text, api_status]
        )

        process_btn.click(
            fn=process_documents,
            inputs=[file_upload, api_status],
            outputs=[process_status]
        )

        submit_btn.click(
            fn=answer_question,
            inputs=[question_input, chatbot],
            outputs=[question_input, chatbot]
        )

        question_input.submit(
            fn=answer_question,
            inputs=[question_input, chatbot],
            outputs=[question_input, chatbot]
        )

        clear_btn.click(
            fn=clear_chat,
            outputs=[chatbot]
        )

        reset_btn.click(
            fn=reset_system,
            outputs=[api_key_input, api_status_text, chatbot, process_status]
        )

    return demo

# ============ LAUNCH APPLICATION ============

print("=" * 60)
print("üöÄ Starting RAG System with Groq LLM...")
print("=" * 60)

try:
    demo = create_interface()
    demo.launch(debug=True, share=True)
except Exception as e:
    print(f"Error launching application: {e}")
    import traceback
    traceback.print_exc()

‚úÖ All packages installed successfully!
üìå Now run CELL 2 to start the application


ModuleNotFoundError: No module named 'langchain.chains'

In [10]:
# ============================================================
# CELL 1: INSTALLATION (Run this first!)
# ============================================================

!pip install -q -U langchain
!pip install -q -U langchain-groq
!pip install -q -U langchain-community
!pip install -q -U langchain-core
!pip install -q -U langchain-text-splitters
!pip install -q faiss-cpu
!pip install -q sentence-transformers
!pip install -q PyPDF2
!pip install -q python-docx
!pip install -q openpyxl pandas
!pip install -q gradio

print("‚úÖ All packages installed successfully!")
print("üìå Now run CELL 2 to start the application")

# ============================================================
# CELL 2: MAIN APPLICATION (Run after Cell 1 completes)
# ============================================================

import os
import gradio as gr
import PyPDF2
import pandas as pd
from typing import List, Tuple

# LangChain imports
try:
    from langchain_text_splitters import RecursiveCharacterTextSplitter
except:
    from langchain.text_splitter import RecursiveCharacterTextSplitter

try:
    from langchain_core.documents import Document
except:
    from langchain.schema import Document

from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain_groq import ChatGroq

# python-docx import
try:
    import docx
except ImportError:
    print("Warning: python-docx not available")

# ============ GLOBAL VARIABLES ============
vector_store = None
qa_chain = None
groq_api_key = None

# ============ DOCUMENT PROCESSING FUNCTIONS ============

def extract_text_from_pdf(file_path: str) -> str:
    """Extract text from PDF file"""
    text = ""
    try:
        with open(file_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            for page in pdf_reader.pages:
                extracted = page.extract_text()
                if extracted:
                    text += extracted + "\n"
    except Exception as e:
        raise Exception(f"Error reading PDF: {str(e)}")
    return text

def extract_text_from_docx(file_path: str) -> str:
    """Extract text from DOCX file"""
    try:
        doc = docx.Document(file_path)
        text = "\n".join([paragraph.text for paragraph in doc.paragraphs if paragraph.text])
        return text
    except Exception as e:
        raise Exception(f"Error reading DOCX: {str(e)}")

def extract_text_from_excel(file_path: str) -> str:
    """Extract text from Excel file"""
    try:
        df = pd.read_excel(file_path, sheet_name=None)
        text = ""
        for sheet_name, sheet_data in df.items():
            text += f"\n=== Sheet: {sheet_name} ===\n"
            text += sheet_data.to_string(index=False) + "\n"
        return text
    except Exception as e:
        raise Exception(f"Error reading Excel: {str(e)}")

def extract_text_from_csv(file_path: str) -> str:
    """Extract text from CSV file"""
    try:
        df = pd.read_csv(file_path)
        return df.to_string(index=False)
    except Exception as e:
        raise Exception(f"Error reading CSV: {str(e)}")

def extract_text_from_txt(file_path: str) -> str:
    """Extract text from TXT file"""
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            return file.read()
    except UnicodeDecodeError:
        try:
            with open(file_path, 'r', encoding='latin-1') as file:
                return file.read()
        except Exception as e:
            raise Exception(f"Error reading TXT: {str(e)}")
    except Exception as e:
        raise Exception(f"Error reading TXT: {str(e)}")

def process_uploaded_file(file_path: str) -> str:
    """Process uploaded file based on its extension"""
    file_extension = os.path.splitext(file_path)[1].lower()

    if file_extension == '.pdf':
        return extract_text_from_pdf(file_path)
    elif file_extension == '.docx':
        return extract_text_from_docx(file_path)
    elif file_extension in ['.xlsx', '.xls']:
        return extract_text_from_excel(file_path)
    elif file_extension == '.csv':
        return extract_text_from_csv(file_path)
    elif file_extension == '.txt':
        return extract_text_from_txt(file_path)
    else:
        raise ValueError(f"Unsupported file type: {file_extension}")

# ============ RAG CORE FUNCTIONS ============

def verify_groq_api_key(api_key: str) -> Tuple[str, str]:
    """Verify if the Groq API key is valid"""
    if not api_key or api_key.strip() == "":
        return "‚ùå Please enter an API key", ""

    try:
        llm = ChatGroq(
            api_key=api_key,
            model="llama-3.3-70b-versatile",
            temperature=0
        )
        response = llm.invoke("Say 'OK'")

        global groq_api_key
        groq_api_key = api_key

        return "‚úÖ API Key is Valid! You can now upload documents.", "verified"
    except Exception as e:
        error_msg = str(e)
        if "authentication" in error_msg.lower() or "api" in error_msg.lower():
            return "‚ùå Invalid API Key. Please check your key and try again.", ""
        return f"‚ùå Error: {error_msg}", ""

def create_vector_store(documents: List[Document]) -> FAISS:
    """Create FAISS vector store from documents"""
    print("Creating embeddings...")
    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2",
        model_kwargs={'device': 'cpu'}
    )
    print("Building vector store...")
    vector_store = FAISS.from_documents(documents, embeddings)
    return vector_store

def process_documents(files, api_status) -> str:
    """Process uploaded documents and create vector store"""
    global vector_store, qa_chain, groq_api_key

    if api_status != "verified":
        return "‚ùå Please verify your Groq API key first!"

    if not files:
        return "‚ùå Please upload at least one document!"

    try:
        all_texts = []
        file_names = []

        print(f"Processing {len(files)} file(s)...")

        for file in files:
            file_path = file.name
            file_name = os.path.basename(file_path)
            print(f"Processing: {file_name}")

            text = process_uploaded_file(file_path)
            if text.strip():
                all_texts.append(text)
                file_names.append(file_name)
            else:
                print(f"Warning: No text extracted from {file_name}")

        if not all_texts:
            return "‚ùå No text could be extracted from the uploaded files!"

        combined_text = "\n\n".join(all_texts)
        print(f"Total text length: {len(combined_text)} characters")

        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=200,
            length_function=len
        )

        chunks = text_splitter.split_text(combined_text)
        documents = [Document(page_content=chunk) for chunk in chunks]

        print(f"Created {len(chunks)} chunks")

        vector_store = create_vector_store(documents)

        print("Creating QA chain...")
        llm = ChatGroq(
            api_key=groq_api_key,
            model="llama-3.3-70b-versatile",
            temperature=0
        )

        qa_chain = RetrievalQA.from_chain_type(
            llm=llm,
            chain_type="stuff",
            retriever=vector_store.as_retriever(search_kwargs={"k": 3}),
            return_source_documents=False
        )

        return f"‚úÖ Successfully processed {len(files)} document(s)!\nüìä Created {len(chunks)} text chunks from: {', '.join(file_names)}"

    except Exception as e:
        import traceback
        error_details = traceback.format_exc()
        print(f"Error: {error_details}")
        return f"‚ùå Error processing documents: {str(e)}"

def answer_question(question: str, chat_history: List) -> Tuple[str, List]:
    """Answer question based on uploaded documents"""
    global qa_chain

    if qa_chain is None:
        response = "‚ùå Please upload and process documents first!"
        chat_history.append((question, response))
        return "", chat_history

    if not question or question.strip() == "":
        return "", chat_history

    try:
        print(f"Question: {question}")
        result = qa_chain.invoke({"query": question})
        answer = result['result']
        print(f"Answer: {answer}")

        chat_history.append((question, answer))
        return "", chat_history

    except Exception as e:
        import traceback
        error_details = traceback.format_exc()
        print(f"Error: {error_details}")
        response = f"‚ùå Error: {str(e)}"
        chat_history.append((question, response))
        return "", chat_history

def clear_chat():
    """Clear chat history"""
    return []

def reset_system():
    """Reset the entire system"""
    global vector_store, qa_chain, groq_api_key
    vector_store = None
    qa_chain = None
    groq_api_key = None
    return "", "", [], "System reset. Please verify your API key and upload documents again."

# ============ GRADIO INTERFACE ============

def create_interface():
    """Create Gradio interface"""

    with gr.Blocks(title="RAG System with Groq LLM", theme=gr.themes.Soft()) as demo:
        gr.Markdown(
            """
            # ü§ñ RAG System with Groq LLM
            ### Upload your documents and ask questions powered by AI!
            **Supported formats:** PDF, DOCX, TXT, CSV, Excel
            """
        )

        api_status = gr.State("")

        # API Key Section
        gr.Markdown("### üîë Step 1: Verify Your API Key")
        with gr.Row():
            with gr.Column(scale=3):
                api_key_input = gr.Textbox(
                    label="Groq API Key",
                    placeholder="Enter your Groq API key (e.g., gsk_...)",
                    type="password"
                )
            with gr.Column(scale=1):
                verify_btn = gr.Button("‚úÖ Verify Key", variant="primary", size="lg")

        api_status_text = gr.Textbox(label="Status", interactive=False, show_label=False)

        gr.Markdown("---")

        # Document Upload Section
        gr.Markdown("### üìÅ Step 2: Upload Your Documents")
        file_upload = gr.Files(
            label="Select Files",
            file_types=[".pdf", ".docx", ".txt", ".csv", ".xlsx", ".xls"],
            file_count="multiple"
        )

        process_btn = gr.Button("üîÑ Process Documents", variant="primary", size="lg")
        process_status = gr.Textbox(label="Processing Status", interactive=False, show_label=False)

        gr.Markdown("---")

        # Chat Section
        gr.Markdown("### üí¨ Step 3: Ask Questions")
        chatbot = gr.Chatbot(label="Conversation", height=400)

        with gr.Row():
            question_input = gr.Textbox(
                label="Your Question",
                placeholder="Ask anything about your documents...",
                scale=4,
                show_label=False
            )
            submit_btn = gr.Button("üì§ Send", variant="primary", scale=1)

        with gr.Row():
            clear_btn = gr.Button("üóëÔ∏è Clear Chat", size="sm")
            reset_btn = gr.Button("üîÑ Reset All", variant="stop", size="sm")

        gr.Markdown(
            """
            ---
            ### üí° Tips:
            - Get your free API key at [console.groq.com/keys](https://console.groq.com/keys)
            - Upload multiple documents at once
            - Ask specific questions for better answers
            - The AI will search through all uploaded documents
            """
        )

        # Event handlers
        verify_btn.click(
            fn=verify_groq_api_key,
            inputs=[api_key_input],
            outputs=[api_status_text, api_status]
        )

        process_btn.click(
            fn=process_documents,
            inputs=[file_upload, api_status],
            outputs=[process_status]
        )

        submit_btn.click(
            fn=answer_question,
            inputs=[question_input, chatbot],
            outputs=[question_input, chatbot]
        )

        question_input.submit(
            fn=answer_question,
            inputs=[question_input, chatbot],
            outputs=[question_input, chatbot]
        )

        clear_btn.click(
            fn=clear_chat,
            outputs=[chatbot]
        )

        reset_btn.click(
            fn=reset_system,
            outputs=[api_key_input, api_status_text, chatbot, process_status]
        )

    return demo

# ============ LAUNCH APPLICATION ============

print("=" * 60)
print("üöÄ Starting RAG System with Groq LLM...")
print("=" * 60)

try:
    demo = create_interface()
    demo.launch(debug=True, share=True)
except Exception as e:
    print(f"Error launching application: {e}")
    import traceback
    traceback.print_exc()

[?25l   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m0.0/102.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[91m‚ï∏[0m [32m102.4/102.8 kB[0m [31m6.0 MB/s[0m eta [36m0:00:01[0m[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m102.8/102.8 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25h‚úÖ All packages installed successfully!
üìå Now run CELL 2 to start the application


ModuleNotFoundError: No module named 'langchain.chains'

In [None]:
# ============================================
# CELL 1: INSTALLATION (Run this first!)
# ============================================
!pip install -q groq chromadb pypdf2 openpyxl pandas gradio sentence-transformers

print("‚úÖ All packages installed successfully!")
print("Now run the next cell with the application code...")

# ============================================
# CELL 2: APPLICATION CODE (Run after installation)
# ============================================

import os
import gradio as gr
from groq import Groq
import chromadb
from chromadb.utils import embedding_functions
import PyPDF2
import pandas as pd
from typing import List

class RAGSystem:
    def __init__(self):
        self.collection = None
        self.groq_client = None
        self.api_key = None
        self.doc_counter = 0

        # Initialize ChromaDB client
        self.chroma_client = chromadb.Client()

        # Initialize embedding function
        self.embedding_function = embedding_functions.SentenceTransformerEmbeddingFunction(
            model_name="all-MiniLM-L6-v2"
        )

    def verify_api_key(self, api_key: str) -> tuple:
        """Verify if the Groq API key is valid"""
        try:
            client = Groq(api_key=api_key)
            # Test with a simple completion
            response = client.chat.completions.create(
                messages=[{"role": "user", "content": "Hi"}],
                model="llama-3.3-70b-versatile",
                max_tokens=10
            )
            self.groq_client = client
            self.api_key = api_key
            return "‚úÖ API Key verified successfully!", True
        except Exception as e:
            return f"‚ùå API Key verification failed: {str(e)}", False

    def split_text(self, text: str, chunk_size: int = 1000, overlap: int = 200) -> List[str]:
        """Simple text splitter"""
        chunks = []
        start = 0
        text_length = len(text)

        while start < text_length:
            end = start + chunk_size
            chunk = text[start:end]
            chunks.append(chunk)
            start = end - overlap

        return chunks

    def extract_text_from_pdf(self, file_path: str) -> str:
        """Extract text from PDF file"""
        text = ""
        try:
            with open(file_path, 'rb') as file:
                pdf_reader = PyPDF2.PdfReader(file)
                for page in pdf_reader.pages:
                    text += page.extract_text() + "\n"
        except Exception as e:
            print(f"Error reading PDF: {e}")
        return text

    def extract_text_from_excel(self, file_path: str) -> str:
        """Extract text from Excel file"""
        try:
            df = pd.read_excel(file_path)
            return df.to_string()
        except Exception as e:
            print(f"Error reading Excel: {e}")
            return ""

    def extract_text_from_txt(self, file_path: str) -> str:
        """Extract text from text file"""
        try:
            with open(file_path, 'r', encoding='utf-8') as file:
                return file.read()
        except Exception as e:
            print(f"Error reading text file: {e}")
            return ""

    def process_documents(self, files) -> str:
        """Process uploaded documents and create vector store"""
        if not files:
            return "‚ùå No files uploaded!"

        all_texts = []
        file_names = []

        try:
            for file in files:
                file_path = file.name
                file_ext = os.path.splitext(file_path)[1].lower()
                file_name = os.path.basename(file_path)

                if file_ext == '.pdf':
                    text = self.extract_text_from_pdf(file_path)
                elif file_ext in ['.xlsx', '.xls']:
                    text = self.extract_text_from_excel(file_path)
                elif file_ext == '.txt':
                    text = self.extract_text_from_txt(file_path)
                else:
                    continue

                if text.strip():
                    all_texts.append(text)
                    file_names.append(file_name)

            if not all_texts:
                return "‚ùå No valid text extracted from files!"

            # Combine all texts
            combined_text = "\n\n".join(all_texts)

            # Split text into chunks
            chunks = self.split_text(combined_text)

            # Create or recreate collection
            try:
                self.chroma_client.delete_collection("documents")
            except:
                pass

            self.collection = self.chroma_client.create_collection(
                name="documents",
                embedding_function=self.embedding_function
            )

            # Add documents to collection
            ids = [f"doc_{i}" for i in range(len(chunks))]
            self.collection.add(
                documents=chunks,
                ids=ids
            )

            self.doc_counter = len(chunks)

            return f"‚úÖ Successfully processed {len(files)} file(s) with {len(chunks)} chunks!"

        except Exception as e:
            return f"‚ùå Error processing documents: {str(e)}"

    def query_documents(self, question: str, chat_history) -> tuple:
        """Query the documents using RAG"""
        if not self.groq_client:
            return "‚ùå Please verify your API key first!", chat_history

        if not self.collection or self.doc_counter == 0:
            return "‚ùå Please upload and process documents first!", chat_history

        if not question.strip():
            return "‚ùå Please enter a question!", chat_history

        try:
            # Retrieve relevant documents
            results = self.collection.query(
                query_texts=[question],
                n_results=min(3, self.doc_counter)
            )

            context = "\n\n".join(results['documents'][0])

            # Create prompt with context
            prompt = f"""Based on the following context, answer the question accurately and concisely.

Context:
{context}

Question: {question}

Answer:"""

            # Get response from Groq
            response = self.groq_client.chat.completions.create(
                messages=[
                    {
                        "role": "system",
                        "content": "You are a helpful assistant that answers questions based on the provided context. Always provide accurate answers from the context. If the answer is not in the context, say so."
                    },
                    {
                        "role": "user",
                        "content": prompt
                    }
                ],
                model="llama-3.3-70b-versatile",
                temperature=0.3,
                max_tokens=1024
            )

            answer = response.choices[0].message.content

            # Update chat history
            chat_history.append((question, answer))

            return "", chat_history

        except Exception as e:
            error_msg = f"‚ùå Error: {str(e)}"
            return error_msg, chat_history

# Initialize RAG system
rag_system = RAGSystem()

# Create Gradio interface
with gr.Blocks(title="RAG System with Groq LLM", theme=gr.themes.Soft()) as demo:
    gr.Markdown("# üìö RAG System with Groq LLM")
    gr.Markdown("Upload your documents (PDF, Excel, Text) and ask questions about them!")

    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### üîë Step 1: API Key")
            api_key_input = gr.Textbox(
                label="Groq API Key",
                type="password",
                placeholder="Enter your Groq API key..."
            )
            verify_btn = gr.Button("Verify API Key", variant="primary")
            api_status = gr.Textbox(label="Status", interactive=False)

            gr.Markdown("### üì§ Step 2: Upload Documents")
            file_upload = gr.File(
                label="Upload Files (PDF, Excel, Text)",
                file_count="multiple",
                file_types=[".pdf", ".xlsx", ".xls", ".txt"]
            )
            process_btn = gr.Button("Process Documents", variant="primary")
            process_status = gr.Textbox(label="Processing Status", interactive=False)

        with gr.Column(scale=2):
            gr.Markdown("### üí¨ Step 3: Ask Questions")
            chatbot = gr.Chatbot(label="Chat History", height=400)
            question_input = gr.Textbox(
                label="Your Question",
                placeholder="Ask a question about your documents...",
                lines=2
            )
            submit_btn = gr.Button("Submit Question", variant="primary")
            clear_btn = gr.Button("Clear Chat")

    gr.Markdown("---")
    gr.Markdown("üí° **Tips:** Make sure to verify your API key and process documents before asking questions!")

    # Event handlers
    verify_btn.click(
        fn=lambda key: rag_system.verify_api_key(key),
        inputs=[api_key_input],
        outputs=[api_status]
    )

    process_btn.click(
        fn=rag_system.process_documents,
        inputs=[file_upload],
        outputs=[process_status]
    )

    submit_btn.click(
        fn=rag_system.query_documents,
        inputs=[question_input, chatbot],
        outputs=[question_input, chatbot]
    )

    question_input.submit(
        fn=rag_system.query_documents,
        inputs=[question_input, chatbot],
        outputs=[question_input, chatbot]
    )

    clear_btn.click(
        fn=lambda: [],
        outputs=[chatbot]
    )

# Launch the app
print("üöÄ Launching Gradio interface...")
demo.launch(debug=True, share=True)

[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m67.3/67.3 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m21.7/21.7 MB[0m [31m42.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m278.2/278.2 kB[0m [31m16.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m2.0/2.0 MB[0m [31m45.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚î

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

  with gr.Blocks(title="RAG System with Groq LLM", theme=gr.themes.Soft()) as demo:
  chatbot = gr.Chatbot(label="Chat History", height=400)
  chatbot = gr.Chatbot(label="Chat History", height=400)


üöÄ Launching Gradio interface...
Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://df1627b7e2dfa325b2.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
