# PersonalRAG

A streamlined RAG system for your personal documents.


In [None]:
# Imports
import os
import glob
from dotenv import load_dotenv
import gradio as gr
import numpy as np


# LangChain imports
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain.document_loaders import TextLoader, DirectoryLoader
from langchain_openai import AzureOpenAIEmbeddings, AzureChatOpenAI
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain


# PDF processing
import pdfplumber

In [None]:
# Complete RAG System Setup
# =========================

# Configuration
MODEL = 'gpt-5-nano'
db_name = 'vector_db'

# Load environment
load_dotenv()

# Validate environment
required_vars = [
    'AZURE_OPENAI_ENDPOINT', 'AZURE_OPENAI_API_KEY',
    'AZURE_OPENAI_API_VERSION',
    'AZURE_OPENAI_EMBEDDING_DEPLOYMENT',


    'AZURE_CHATOPENAI_DEPLOYMENT',
    'AZURE_CHATOPENAI_ENDPOINT', 'AZURE_CHATOPENAI_API_KEY',
    'AZURE_CHATOPENAI_API_VERSION'
]
missing = [var for var in required_vars if not os.getenv(var)]
if missing:
    print(f"❌ Missing: {missing}")
    env_ok = False
else:
    print("✅ Environment OK")
    env_ok = True

# Initialize embeddings and LLM
if env_ok:
    embeddings = AzureOpenAIEmbeddings(
        model=os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT"),
        azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
        api_key=os.getenv("AZURE_OPENAI_API_KEY"),
        openai_api_version=os.getenv("AZURE_OPENAI_API_VERSION")
    )

    llm = AzureChatOpenAI(
        azure_deployment=os.getenv('AZURE_CHATOPENAI_DEPLOYMENT'),
        azure_endpoint=os.getenv('AZURE_CHATOPENAI_ENDPOINT'),
        api_key=os.getenv('AZURE_CHATOPENAI_API_KEY'),
        api_version=os.getenv('AZURE_CHATOPENAI_API_VERSION'),
    )
    print("✅ Embeddings and LLM initialized")
else:
    print("❌ Cannot initialize - check environment variables")
    embeddings = None
    llm = None

In [None]:
# Load and Process Documents
# ==========================

# Check PDF processing availability
try:
    import pdfplumber
    PDF_AVAILABLE = True
    print("✅ PDF processing available (pdfplumber)")
except ImportError:
    try:
        import PyPDF2
        PDF_AVAILABLE = True
        print("✅ PDF processing available (PyPDF2)")
    except ImportError:
        PDF_AVAILABLE = False
        print("⚠️ PDF processing not available - install pdfplumber or PyPDF2")

# Auto-convert PDFs to markdown
for root, dirs, files in os.walk("my-knowledge-worker-data"):
    for file in files:
        if file.lower().endswith('.pdf'):
            pdf_path = os.path.join(root, file)
            md_path = pdf_path.rsplit('.', 1)[0] + '.md'

            if not os.path.exists(md_path):
                try:
                    with pdfplumber.open(pdf_path) as pdf:
                        text = "\n\n".join(
                            [p.extract_text() or "" for p in pdf.pages])
                    if text.strip():
                        with open(md_path, 'w', encoding='utf-8') as f:
                            f.write(f"# {os.path.splitext(file)[0]}\n\n{text}")
                        print(f"✅ Converted: {file}")
                except Exception as e:
                    print(f"❌ Error: {file} - {e}")


def load_all_documents():
    """Load all markdown files from my-knowledge-worker-data"""
    documents = []

    for folder in glob.glob("my-knowledge-worker-data/*"):
        doc_type = os.path.basename(folder)
        loader = DirectoryLoader(folder, glob="**/*.md", loader_cls=TextLoader)
        folder_docs = loader.load()

        for doc in folder_docs:
            doc.metadata["doc_type"] = doc_type
        documents.extend(folder_docs)

    return documents


# Load documents
documents = load_all_documents()
print(f"📄 Loaded {len(documents)} documents")

# Improved chunking for better retrieval of resume sections (especially EDUCATION)
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=600,  # smaller chunks to keep section details together
    chunk_overlap=120,  # overlap for context retention and continuity
    # prioritize splitting on section, line, and sentence boundaries
    separators=["\n## ", "\n# ", "\n- ", "\n", ". "]
)
chunks = text_splitter.split_documents(documents)
print(f"📝 Created {len(chunks)} chunks")

In [None]:
# Always delete and recreate database for fresh start
# ===================================================

import shutil

# Force delete existing database
if os.path.exists(db_name):
    shutil.rmtree(db_name)
    print(f"🗑️ Deleted existing database: {db_name}")

# Always create fresh vector database
vectorstore = Chroma.from_documents(
    documents=chunks,
    embedding=embeddings,
    persist_directory=db_name
)
print(
    f"✅ Created fresh database ({vectorstore._collection.count()} documents)")

In [None]:
# Create vector database if needed
if vectorstore is None:
    vectorstore = Chroma.from_documents(
        documents=chunks,
        embedding=embeddings,
        persist_directory=db_name
    )
    print(
        f"✅ Created new database ({vectorstore._collection.count()} documents)")

In [None]:
# Configuration Settings
from datetime import datetime
import os
MODEL = 'gpt-5-nano'

# Vector Database Configuration

# Option 1: Simple name (current approach)
db_name = 'vector_db'

# Option 2: Timestamped database (uncomment to use)
# timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
# db_name = f'vector_db_{timestamp}'

# Option 3: Environment variable with fallback (uncomment to use)
# db_name = os.getenv('VECTOR_DB_NAME', 'vector_db')

# Option 4: Full path configuration (uncomment to use)
# db_name = os.path.join('data', 'vector_databases', 'personal_knowledge_db')

print(f"Vector database will be stored as: {db_name}")

In [None]:
# load environment variables
load_dotenv()

In [None]:
folders = glob.glob("my-knowledge-worker-data/*")


def add_metadata(doc, doc_type):
    doc.metadata["doc_type"] = doc_type
    return doc
    # Adds a 'doc_type' field to the document's metadata and returns the modified document


text_loader_kwargs = {"encoding": "utf-8"}


documents = []
for folder in folders:
    doc_type = os.path.basename(folder)
    loader = DirectoryLoader(
        folder, glob="**/*.md", loader_cls=TextLoader, loader_kwargs=text_loader_kwargs)
    temp_docs = loader.load()
    folder_docs = loader.load()
    documents.extend([add_metadata(doc, doc_type) for doc in folder_docs])

# Improved chunking for better retrieval of resume sections (especially EDUCATION)
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=600,  # smaller chunks to keep section details together
    chunk_overlap=120,  # overlap for context retention and continuity
    # prioritize splitting on section, line, and sentence boundaries
    separators=["\n## ", "\n# ", "\n- ", "\n", ". "]
)
chunks = text_splitter.split_documents(documents)

In [None]:
print(f"Total number of chunks: {len(chunks)}")

In [None]:
print(
    f"Document types found: {set([doc.metadata['doc_type'] for doc in documents])}")

In [None]:

# Alternative Embeddings Options
# ================================

# you can use these alternatives:

# Option 1: HuggingFace Embeddings (Free, Local)
# Uncomment the lines below to use HuggingFace embeddings instead:
# from langchain.embeddings import HuggingFaceEmbeddings
# embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


# Option 2: OpenAI Embeddings (if you have OpenAI API key)
# from langchain_openai import OpenAIEmbeddings
# os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')
# embeddings = OpenAIEmbeddings(openai_api_key="your-openai-api-key")

# Option 3: Azure OpenAI Embeddings
embeddings = AzureOpenAIEmbeddings(
    model=os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT"),
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    openai_api_version=os.getenv("AZURE_OPENAI_API_VERSION")
)
# Usage Instructions:
# 1. If Azure OpenAI embeddings failed, uncomment the HuggingFace lines above
# 2. Or call: embeddings = get_alternative_embeddings()
# 3. Make sure to install: pip install sentence-transformers

In [None]:
# Get one vector and find how many dimensions it has

collection = vectorstore._collection
try:
    # Check if collection has any embeddings
    count = collection.count()
    if count > 0:
        sample_embedding = collection.get(limit=1, include=["embeddings"])[
            "embeddings"][0]
        dimensions = len(sample_embedding)
        print(f"The vectors have {dimensions:,} dimensions")
    else:
        print("⚠️ No embeddings found in the collection. Run the rebuild cell first.")
except Exception as e:
    print(f"❌ Error accessing embeddings: {e}")
    print("💡 Make sure to run the rebuild cell (Cell 20) first to create the vector database.")

In [None]:
sample_embedding

In [None]:

memory = ConversationBufferMemory(
    memory_key="chat_history", return_messages=True)


retriever = vectorstore.as_retriever(search_kwargs={"k": 35})

conversation_chain = ConversationalRetrievalChain.from_llm(
    llm=llm, retriever=retriever, memory=memory)

In [None]:
# Custom Prompt Template for Better Responses
# ===========================================

from langchain.prompts import PromptTemplate


qa_prompt = PromptTemplate(
    template="""  
    
# IDENTITY & PURPOSE
You are a Personal AI Assistant specialized in answering questions about the user's professional profile, educational background, work experience, projects, and skills. You function as an intelligent retrieval interface over the user's personal knowledge base.

# KNOWLEDGE SOURCE
- Primary Source: User's personal documents (resumes, records, portfolios)
- Access Method: Semantic search retrieval on vector database
- Update Frequency: Documents are current as of user's last update
- Scope Limitation: You answer ONLY from retrieved context, never from general knowledge

# OPERATIONAL CONSTRAINTS

## Constraint 1: Grounding Requirement
Answer exclusively from the provided context. Apply these rules:
- ✓ DO: Extract and present facts directly from context
- ✓ DO: Combine information from multiple context chunks when relevant
- ✗ DON'T: Use external knowledge or training data
- ✗ DON'T: Infer or extrapolate beyond stated facts
- ✗ DON'T: Make assumptions about unstated information

## Constraint 2: Response Protocol
When information is missing or unclear:
- If zero relevant information: "I don't have that information in your documents"
- If partial information: State what exists + acknowledge what's missing
- If outdated possibility: Present available info + suggest user may want to update documents

## Constraint 3: No Hallucination Policy
If you're unsure or the context is ambiguous:
- Default to "information not found" rather than guessing
- Never fabricate dates, numbers, names, or details
- Never blend context facts with general assumptions

# COMMUNICATION STYLE

## Tone: Conversational & Personal
- Use second-person: "You worked at...", "Your degree in...", "You have experience with..."
- Sound like a helpful colleague reviewing documents together
- Be warm but professional
- Avoid robotic phrases: "As an AI", "I cannot", "I'm unable to"

## Structure: Clear & Scannable
- Single facts: Complete sentences with full detail
- Multiple items: Bullet points with parallel structure
- Dates: Use consistent format (MM/YYYY or Year)
- Lists: Most recent first (unless chronological narrative requested)

## Detail Level: Comprehensive & Specific
Always include when available:
- Education: [Institution] - [Degree/Program] - [Year/Duration] - [Grade/Percentage/CGPA]
- Work: [Company] - [Role] - [Duration] - [Key Achievements/Responsibilities]
- Projects: [Name] - [Technologies] - [Description] - [Your Role/Contribution]
- Skills: [Technology/Tool] - [Experience Level/Years] - [Context of Use]

# QUERY CLASSIFICATION & RESPONSE PATTERNS

## Type 1: Factual Lookup
User asks for specific fact (date, grade, company name)
→ Provide direct answer with full context
Example: "You graduated in May 2018 with a B.Tech in Computer Science from ABC University with an 8.5 CGPA."

## Type 2: List Queries
User asks "what skills/projects/jobs..."
→ Bullet-pointed list with details for each item
Example: "Your web development skills include:
• Frontend: React, Vue.js, HTML5/CSS3 - 3 years professional experience
• Backend: Node.js, Express, Django - used in 5+ production projects
• Databases: MongoDB, PostgreSQL - managed databases handling 1M+ records"

## Type 3: Summaries
User asks for overview/summary of section
→ Organized breakdown by category with key highlights
Example: "Here's your professional summary:

**Current Role:** Senior Developer at TechCorp (2021-Present)
**Experience:** 5 years in software development
**Education:** M.S. Computer Science, XYZ University (2019)
**Specialization:** Full-stack development, Cloud architecture, Machine Learning
**Key Achievement:** Led development of ML platform serving 100K+ users"

## Type 4: Comparisons
User asks to compare two things
→ Side-by-side structured comparison
Example: "Comparing your education levels:

**Undergraduate (2014-2018):**
• Institution: ABC College
• Degree: B.Tech Computer Science
• Performance: 8.5/10 CGPA
• Key Courses: Data Structures, Algorithms, Databases

**Graduate (2018-2019):**
• Institution: XYZ University  
• Degree: M.S. Computer Science
• Performance: 3.8/4.0 GPA
• Specialization: Machine Learning, Deep Learning"

## Type 5: Timeline/History
User asks about progression or "when did I..."
→ Chronological narrative or timeline
Example: "Your career progression:
• 2019-2020: Junior Developer at StartupCo - building microservices
• 2020-2021: Software Engineer at MidSizeTech - full-stack development
• 2021-Present: Senior Developer at TechCorp - leading ML initiatives"

## Type 6: Information Not Found
Retrieved context is empty/irrelevant
→ Clear statement of unavailability
Example: "I don't have information about certifications in your documents."

# CONTEXT PROCESSING INSTRUCTIONS

You will receive input in this structure:
```
CONTEXT:
[Retrieved document chunks - may contain multiple segments]

QUERY:
[User's question]
```

Processing Steps:
1. Read all context chunks thoroughly
2. Identify which chunks are relevant to the query
3. Extract all pertinent facts from relevant chunks
4. Synthesize into coherent response following the appropriate pattern
5. Verify response contains only context-grounded information
6. Format according to query type

# EXAMPLES OF COMPLETE INTERACTIONS

**Example 1: Direct Fact**
Context: "John completed B.Tech in Computer Science from Delhi University in 2018 with 85%"
Query: "What was my graduation percentage?"
Response: "Your B.Tech graduation percentage was 85%, which you completed from Delhi University in 2018."

**Example 2: Skills List**
Context: "Proficient in Python (Django, Flask), JavaScript (React, Node.js), worked with AWS and Docker"
Query: "What technical skills do I have?"
Response: "Your technical skills include:
• Python: Django and Flask frameworks
• JavaScript: React for frontend, Node.js for backend
• Cloud: AWS
• DevOps: Docker"

**Example 3: Missing Info**
Context: "Worked at TechCorp as Software Engineer from 2020-2022"
Query: "What was my starting salary?"
Response: "I don't have information about your starting salary in your documents."

**Example 4: Work Summary**
Context: "Software Engineer at TechCorp (2020-2022): Developed ML pipelines, mentored 3 junior developers, increased system performance by 40%"
Query: "Tell me about my TechCorp role"
Response: "You worked as a Software Engineer at TechCorp from 2020 to 2022. Your key contributions included:
• Developing ML pipelines
• Mentoring 3 junior developers
• Improving system performance by 40%"

# QUALITY CHECKS BEFORE RESPONDING
Before sending each response, verify:
- ✓ All facts are from provided context
- ✓ Used "You/Your" perspective
- ✓ Included specific details (dates, numbers, names)
- ✓ Format matches query type
- ✓ No assumptions or external knowledge used
- ✓ Conversational and helpful tone
- ✓ No AI-centric language

Now, answer the user's query using only the retrieved context provided.

Context: {context}
Chat History: {chat_history}
Question: {question}

Answer:""",
    input_variables=["context", "chat_history", "question"]
)

# Update the conversation chain with the custom prompt
conversation_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory,
    combine_docs_chain_kwargs={"prompt": qa_prompt}
)

print("✅ Custom prompt template applied to conversation chain")

In [None]:
# query = "what is my work experience"
# result = conversation_chain.invoke({"question": query})
# answer = result["answer"]
# print("\nAnswer:", answer)

In [None]:
def is_greeting(message):
    """Check if the message is a greeting"""
    greetings = [
        'hi', 'hello', 'hey', 'good morning', 'good afternoon', 'good evening',
        'greetings', 'howdy', 'sup', 'what\'s up', 'yo', 'good day',
        'hi there', 'hello there', 'hey there', 'good to see you'
    ]

    message_lower = message.lower().strip()

    # Check for exact matches
    if message_lower in greetings:
        return True

    # Check if message starts with greeting
    for greeting in greetings:
        if message_lower.startswith(greeting):
            return True

    return False


def get_greeting_response():
    """Get a friendly greeting response"""
    greetings = [
        "Hello! 👋 I'm PersonalRAG, your AI knowledge assistant. I'm here to help you find information from your documents, projects, and work experience. What would you like to know?",
        "Hi there! 🤖 Welcome to PersonalRAG. I can help you search through your knowledge base and answer questions about your work, projects, and documents. How can I assist you today?",
        "Hey! 😊 Great to see you! I'm PersonalRAG, ready to help you explore your personal knowledge base. Feel free to ask me anything about your documents or projects!",
        "Hello! 🌟 I'm PersonalRAG, your intelligent knowledge assistant. I'm here to help you discover insights from your documents and answer questions about your work experience. What can I help you with?"
    ]

    import random
    return random.choice(greetings)


def chat_messages(message, history):
    """Chat function that returns messages in the correct format with greeting support"""
    try:
        # Check for greetings first
        if is_greeting(message):
            greeting_response = get_greeting_response()
            return history + [{"role": "assistant", "content": greeting_response}]

        # Get response from conversation chain
        result = conversation_chain.invoke({"question": message})
        response = result["answer"]

        # Return in the correct format for messages
        return history + [{"role": "assistant", "content": response}]

    except Exception as e:
        error_msg = f"❌ Sorry, I encountered an error: {str(e)}\n\nPlease try again or rephrase your question."
        return history + [{"role": "assistant", "content": error_msg}]

In [None]:
# Enhanced Clean Black Theme Interface
# ====================================

# Custom CSS for clean black theme with improved textbox
custom_css = """
/* Clean Black Theme */
.gradio-container {
    font-family: 'Segoe UI', 'Helvetica Neue', Arial, sans-serif !important;
    background: #000000 !important;
    min-height: 100vh !important;
}

/* Main container styling */
.main-container {
    background: #111111 !important;
    border-radius: 12px !important;
    box-shadow: 0 4px 20px rgba(0, 0, 0, 0.5) !important;
    border: 1px solid #333333 !important;
}

/* Header styling */
.header {
    background: #000000 !important;
    color: #ffffff !important;
    padding: 32px !important;
    border-radius: 12px 12px 0 0 !important;
    text-align: center !important;
    border-bottom: 2px solid #333333 !important;
}

.header h1 {
    margin: 0 !important;
    font-size: 2.5em !important;
    font-weight: 700 !important;
    letter-spacing: -1px !important;
    color: #ffffff !important;
}

.header p {
    margin: 12px 0 0 0 !important;
    font-size: 1.1em !important;
    opacity: 0.8 !important;
    font-weight: 400 !important;
    color: #cccccc !important;
}

/* Chat interface styling */
.chat-container {
    background: #111111 !important;
    border-radius: 0 0 12px 12px !important;
    padding: 24px !important;
}

/* Clean chatbot message bubbles */
.chatbot-clean .chat-message,
.chatbot-clean .message,
.chatbot-clean .prose {
    background: none !important;
    box-shadow: none !important;
    border: none !important;
    margin: 8px 0 !important;
    padding: 0 !important;
}

.chatbot-clean .user,
.user-message {
    background: #1a1a1a !important;
    color: #fff !important;
    border-radius: 14px !important;
    padding: 12px 22px !important;
    margin-left: auto !important;
    margin-right: 16px !important;
    box-shadow: 0 1px 6px rgba(0,0,0,0.18);
    width: max-content !important;
    min-width: 56px !important;
    max-width: 90vw !important;
    border: 1px solid #232323 !important;
    font-size: 1.05em !important;
    text-align: left !important;
    font-family: inherit !important;
    white-space: nowrap !important;
    overflow-x: auto !important;
}

.chatbot-clean .assistant,
.assistant-message {
    background: #141414 !important;
    color: #f2f2f2 !important;
    border-radius: 14px !important;
    padding: 16px 22px !important;
    margin-right: auto !important;
    margin-left: 8px !important;
    box-shadow: 0 1px 6px rgba(0,0,0,0.13);
    max-width: 75% !important;
    border: 1px solid #232323 !important;
    font-size: 1.05em !important;
    font-family: inherit !important;
}

/* Remove message box styling from container */
.message, .prose {
    background: none !important;
    border: none !important;
    box-shadow: none !important;
}


/* Enhanced Textbox Styling */
.input-container {
    background: #1a1a1a !important;
    border-radius: 12px !important;
    padding: 0 !important;
    box-shadow: 0 2px 8px rgba(0, 0, 0, 0.3) !important;
    border: 2px solid #333333 !important;
    transition: all 0.3s ease !important;
    position: relative !important;
    overflow: hidden !important;
}

.input-container:focus-within {
    border-color: #ffffff !important;
    box-shadow: 0 0 0 3px rgba(255, 255, 255, 0.1) !important;
    transform: translateY(-1px) !important;
}

/* Text input specific styling */
input[type="text"], textarea {
    color: #ffffff !important;
    background-color: transparent !important;
    border: none !important;
    outline: none !important;
    padding: 14px 20px !important;
    font-size: 16px !important;
    line-height: 1.5 !important;
    width: 100% !important;
    resize: none !important;
    font-family: 'Segoe UI', 'Helvetica Neue', Arial, sans-serif !important;
    height: 48px !important;
    min-height: 48px !important;
    max-height: 48px !important;
}

input[type="text"]::placeholder, textarea::placeholder {
    color: #888888 !important;
    font-style: italic !important;
    opacity: 0.8 !important;
}

input[type="text"]:focus, textarea:focus {
    outline: none !important;
    border: none !important;
    box-shadow: none !important;
}

/* Button styling */
.btn-primary {
    background: linear-gradient(135deg, #ffffff 0%, #f0f0f0 100%) !important;
    border: none !important;
    border-radius: 10px !important;
    padding: 14px 28px !important;
    color: #000000 !important;
    font-weight: 600 !important;
    transition: all 0.3s ease !important;
    box-shadow: 0 2px 8px rgba(255, 255, 255, 0.2) !important;
    font-size: 14px !important;
    text-transform: uppercase !important;
    letter-spacing: 0.5px !important;
    height: 48px !important;
    min-height: 48px !important;
    max-height: 48px !important;
}

.btn-primary:hover {
    background: linear-gradient(135deg, #f0f0f0 0%, #e0e0e0 100%) !important;
    box-shadow: 0 4px 12px rgba(255, 255, 255, 0.3) !important;
    transform: translateY(-2px) !important;
}

.btn-secondary {
    background: #333333 !important;
    border: 1px solid #555555 !important;
    border-radius: 8px !important;
    padding: 10px 20px !important;
    color: #ffffff !important;
    font-weight: 500 !important;
    transition: all 0.3s ease !important;
    font-size: 13px !important;
}

.btn-secondary:hover {
    background: #444444 !important;
    border-color: #666666 !important;
    transform: translateY(-1px) !important;
}

/* Chatbot styling */
.chatbot {
    background: #0a0a0a !important;
    border: 1px solid #333333 !important;
    border-radius: 12px !important;
    padding: 20px !important;
}

/* Input row styling */
.input-row {
    display: flex !important;
    gap: 12px !important;
    align-items: flex-end !important;
    margin-top: 20px !important;
}

/* Responsive design */
@media (max-width: 768px) {
    .main-container {
        margin: 10px !important;
        border-radius: 12px !important;
    }
    
    .header h1 {
        font-size: 2em !important;
    }
    
    .user-message, .assistant-message {
        margin-left: 5% !important;
        margin-right: 5% !important;
    }
    
    .input-container {
        padding: 0 !important;
    }
    
    input[type="text"], textarea {
        padding: 14px 16px !important;
        font-size: 14px !important;
    }
    
    .chatbot-clean .user,
.user-message {
    background: #1a1a1a !important;
    color: #fff !important;
    border-radius: 20px !important;
    padding: 10px 20px !important;
    margin-left: auto !important;
    margin-right: 16px !important;
    box-shadow: 0 1px 6px rgba(0,0,0,0.18);
    width: max-content !important;
    min-width: 56px !important;
    max-width: 55vw !important;
    border: 1px solid #232323 !important;
    font-size: 1.08em !important;
    text-align: left !important;
    font-family: inherit !important;
    word-break: break-word !important;
    white-space: pre-line !important;
}
}
"""

# Create enhanced interface
def create_enhanced_black_interface():
    """Create a clean, simple black-themed interface with enhanced textbox"""
    
    with gr.Blocks(css=custom_css, title="PersonalRAG") as interface:
        
        # Header
        gr.HTML("""
        <div class="header">
            <h1>PersonalRAG</h1>
            <p>Your Personal Knowledge Assistant</p>
        </div>
        """)
        
        # Main Chat Area
        with gr.Row():
            with gr.Column(scale=1):
                # Chat interface
                chatbot = gr.Chatbot(
                    height=600,
                    container=True,
                    bubble_full_width=False,
                    show_label=False,
                    elem_classes=["chatbot-clean"]
                )
                
                # Enhanced Input area with better styling
                with gr.Row(elem_classes="input-row"):
                    msg = gr.Textbox(
                        placeholder="Ask me anything about your documents, projects, or work experience...",
                        lines=2,
                        scale=4,
                        elem_classes="input-container",
                        show_label=False,
                        container=False
                    )
                    submit_btn = gr.Button("Send", elem_classes="btn-primary", scale=1)
                
                # Control buttons
                with gr.Row():
                    clear_btn = gr.Button("Clear Chat", elem_classes="btn-secondary", size="sm")
        
        # Event handlers
        
        def handle_message(message, history):
            """Handle user messages"""
            if not message.strip():
                return history, ""
            
            # Get AI response
            try:
                response_messages = chat_messages(message, history)
                response = response_messages[-1]["content"] if response_messages else "Sorry, I couldn't generate a response."
                history.append((message, response))
            except Exception as e:
                history.append((message, f"Error: {str(e)}"))
            
            return history, ""
        
        def clear_chat():
            """Clear the chat history"""
            return []
        
        # Connect event handlers
        msg.submit(handle_message, [msg, chatbot], [chatbot, msg])
        submit_btn.click(handle_message, [msg, chatbot], [chatbot, msg])
        
        clear_btn.click(clear_chat, outputs=chatbot)
        
        # Add initial greeting
        interface.load(
            lambda: [("", "👋 Welcome to PersonalRAG! I'm here to help you explore your knowledge base. What would you like to know?")],
            outputs=chatbot
        )
    
    return interface

# Create and launch the enhanced interface
interface = create_enhanced_black_interface()
interface.launch(
    inbrowser=True,
    share=False,
    show_error=True,
    quiet=False
)