Best before gpu

# rag_engine.py
import os
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
import pickle

EMB_MODEL = "all-MiniLM-L6-v2"
EMB_DIR = "data/embeddings"
os.makedirs(EMB_DIR, exist_ok=True)

def chunk_text(text, size=400):  # Reduced from 500 to 400 for faster processing
    """Split text into chunks of approximately 'size' words"""
    words = text.split()
    chunks = []
    for i in range(0, len(words), size):
        chunk = " ".join(words[i:i + size])
        if chunk.strip():
            chunks.append(chunk)
    return chunks

def build_or_load_index(file_path, emb_model):
    """Build FAISS index or load if already exists"""
    base_name = os.path.splitext(os.path.basename(file_path))[0]
    index_file = os.path.join(EMB_DIR, f"{base_name}_index.faiss")
    chunks_file = os.path.join(EMB_DIR, f"{base_name}_chunks.pkl")

    # Load existing index if available (MUCH FASTER)
    if os.path.exists(index_file) and os.path.exists(chunks_file):
        print(f"‚ö° Loading cached index for {os.path.basename(file_path)}...")
        index = faiss.read_index(index_file)
        with open(chunks_file, "rb") as f:
            chunks = pickle.load(f)
        return index, chunks

    # Build new index (only first time)
    print(f"üî® Building new index for {os.path.basename(file_path)}...")
    
    try:
        with open(file_path, "r", encoding="utf-8") as f:
            text = f.read()
    except FileNotFoundError:
        print(f"‚ùå ERROR: {file_path} not found!")
        return None, []

    chunks = chunk_text(text, size=400)
    
    if not chunks:
        print(f"‚ö†Ô∏è WARNING: No chunks created from {file_path}")
        return None, []

    # Create embeddings (no progress bar for speed)
    embeddings = emb_model.encode(chunks, show_progress_bar=False)
    
    # Build FAISS index
    dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(np.array(embeddings).astype('float32'))

    # Save index and chunks
    faiss.write_index(index, index_file)
    with open(chunks_file, "wb") as f:
        pickle.dump(chunks, f)

    print(f"‚úÖ Index created: {len(chunks)} chunks")
    return index, chunks

def setup_rag():
    """Initialize RAG system with book and prediction indexes"""
    print("ü§ñ Loading embedding model...")
    model_emb = SentenceTransformer(EMB_MODEL)

    print("üìö Setting up Book Index...")
    book_index, book_chunks = build_or_load_index("data/book.txt", model_emb)
    
    print("üìä Setting up Prediction Index...")
    pred_index, pred_chunks = build_or_load_index("data/predictions/prediction_latest.txt", model_emb)

    return model_emb, book_index, book_chunks, pred_index, pred_chunks

def retrieve_context(query, model_emb, book_index=None, book_chunks=None, 
                     pred_index=None, pred_chunks=None, k=2):
    """
    Retrieve relevant context from specified indexes - OPTIMIZED FOR SPEED
    """
    context_parts = []
    
    # Encode the query
    query_emb = model_emb.encode([query], show_progress_bar=False).astype('float32')

    # Retrieve from book if provided
    if book_index is not None and book_chunks:
        try:
            distances, indices = book_index.search(query_emb, k)
            retrieved_chunks = [book_chunks[i] for i in indices[0] if i < len(book_chunks)]
            if retrieved_chunks:
                # Simpler formatting for speed
                context_parts.append("Book Context:\n" + "\n".join(retrieved_chunks))
        except Exception as e:
            print(f"‚ö†Ô∏è Error retrieving from book: {e}")

    # Retrieve from predictions if provided
    if pred_index is not None and pred_chunks:
        try:
            distances, indices = pred_index.search(query_emb, k)
            retrieved_chunks = [pred_chunks[i] for i in indices[0] if i < len(pred_chunks)]
            if retrieved_chunks:
                context_parts.append("Prediction Data:\n" + "\n".join(retrieved_chunks))
        except Exception as e:
            print(f"‚ö†Ô∏è Error retrieving from predictions: {e}")

    return "\n\n".join(context_parts) if context_parts else "No context found."







    # main.py
from ollama import chat
from rag_engine import setup_rag, retrieve_context

print("üìÑ Loading RAG indexes (book + prediction)...")
model_emb, book_index, book_chunks, pred_index, pred_chunks = setup_rag()
print("‚úÖ RAG system ready!\n")

# --- Intent Detection Keywords ---
SIMPLE_CHAT = ["hi", "hii", "hello", "hey", "hyy", "ok", "okay", "thanks", "thank you", 
               "bye", "goodbye", "how are you", "what's up", "wassup"]

INVESTMENT_ADVICE = ["should i invest", "what do you suggest", "recommendation", 
                     "what should i do", "invest or not", "buy or sell", "advice"]

PREDICTION_QUERY = ["prediction", "forecast", "price tomorrow", "future price", 
                    "what will be", "trend", "next"]

def detect_intent(user_input):
    """Determine what the user is asking for"""
    q = user_input.lower().strip()
    
    # Check for simple greetings/chat
    if any(word in q for word in SIMPLE_CHAT) and len(q.split()) <= 5:
        return "chat"
    
    # Check for investment advice
    if any(phrase in q for phrase in INVESTMENT_ADVICE):
        return "investment"
    
    # Check for predictions
    if any(word in q for word in PREDICTION_QUERY):
        return "prediction"
    
    # Default to book QA (theory questions)
    return "book_qa"


def get_response(user_input):
    """Main function to route user queries - FASTER VERSION"""
    intent = detect_intent(user_input)
    
    # --- Simple Chat (No RAG needed - FASTEST) ---
    if intent == "chat":
        prompt = f"Respond briefly and warmly to: {user_input}"
        response = chat(model="llama3:latest", messages=[{"role": "user", "content": prompt}])
        return response["message"]["content"]
    
    # --- Investment Advice (Uses both book + prediction) ---
    elif intent == "investment":
        # Retrieve context from BOTH sources (reduced k for speed)
        context = retrieve_context(
            user_input, 
            model_emb, 
            book_index, 
            book_chunks, 
            pred_index, 
            pred_chunks,
            k=2  # Reduced from 3 to 2 for faster retrieval
        )
        
        # SHORTER, FASTER PROMPT
        prompt = f"""You're a financial advisor. Give a SHORT recommendation based on:

{context}

Question: {user_input}

Answer format:
Recommendation: [INVEST/AVOID/HOLD]
Reason: [2-3 sentences combining prediction trend + book principle]

Be concise and direct."""
        
        response = chat(model="llama3:latest", messages=[{"role": "user", "content": prompt}])
        return response["message"]["content"]
    
    # --- Prediction Query (Only prediction context) ---
    elif intent == "prediction":
        context = retrieve_context(
            user_input, 
            model_emb, 
            pred_index=pred_index, 
            pred_chunks=pred_chunks,
            k=1  # Only get top result
        )
        
        # SHORTER PROMPT
        prompt = f"""Answer briefly using this prediction data:

{context}

Question: {user_input}"""
        
        response = chat(model="llama3:latest", messages=[{"role": "user", "content": prompt}])
        return response["message"]["content"]
    
    # --- Book QA (Theory questions) ---
    elif intent == "book_qa":
        context = retrieve_context(
            user_input, 
            model_emb, 
            book_index=book_index, 
            book_chunks=book_chunks,
            k=2  # Reduced for speed
        )
        
        # SHORTER PROMPT
        prompt = f"""Answer concisely using this book context:

{context}

Question: {user_input}"""
        
        response = chat(model="llama3:latest", messages=[{"role": "user", "content": prompt}])
        return response["message"]["content"]









# ui.py
import gradio as gr
import sys
from main import get_response

def chat_interface(message, history):
    """Handle incoming messages and maintain chat history"""
    if not message.strip():
        return history, ""
    
    # Get response from the bot (now faster!)
    response = get_response(message)
    
    # Append to history
    history.append({"role": "user", "content": message})
    history.append({"role": "assistant", "content": response})
    
    return history, ""

def exit_app():
    """Gracefully close the application"""
    print("\nüëã Shutting down chatbot... Goodbye!")
    sys.exit(0)

# --- Custom CSS for Clean UI ---
custom_css = """
#chatbot {
    border-radius: 12px;
    border: 1px solid #e0e0e0;
}

#chatbot .message.user {
    background-color: #007bff !important;
    color: white !important;
    border-radius: 18px !important;
    padding: 10px 15px !important;
    margin: 5px 0 !important;
}

#chatbot .message.bot {
    background-color: #007bff !important;
    color: #202124 !important;
    border-radius: 18px !important;
    padding: 10px 15px !important;
    margin: 5px 0 !important;
}

#input_box {
    border-radius: 24px;
    border: 1px solid #dadce0;
    padding: 10px 20px;
}

.button-row {
    margin-top: 10px;
}

footer {
    display: none !important;
}

#exit_btn {
    background-color: #dc3545 !important;
}
"""

# --- Build Gradio Interface ---
with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
    
    gr.Markdown(
        """
        # üíº Investment Advisor Chatbot
        ### Powered by RAG + Ollama LLaMA3 | The Intelligent Investor + Live Predictions
        """
    )
    
    chatbot = gr.Chatbot(
        label="üí¨ Chat",
        type="messages",
        height=500,
        elem_id="chatbot",
        show_copy_button=True
    )
    
    with gr.Row():
        msg_input = gr.Textbox(
            placeholder="Ask me anything about investing, predictions, or financial theory...",
            show_label=False,
            scale=9,
            elem_id="input_box"
        )
        send_btn = gr.Button("Send", variant="primary", scale=1)
    
    with gr.Row(elem_classes="button-row"):
        clear_btn = gr.Button("üóëÔ∏è Clear Chat", variant="secondary", size="sm")
        exit_btn = gr.Button("‚ùå Exit", variant="stop", size="sm", elem_id="exit_btn")
    
    gr.Markdown(
        """
        **Tips:**
        - Ask theory questions: *"What is value investing?"*
        - Get predictions: *"What's the gold price forecast?"*
        - Investment advice: *"Should I invest in gold?"*
        """
    )
    
    # Event handlers
    send_btn.click(
        fn=chat_interface,
        inputs=[msg_input, chatbot],
        outputs=[chatbot, msg_input]
    )
    
    msg_input.submit(
        fn=chat_interface,
        inputs=[msg_input, chatbot],
        outputs=[chatbot, msg_input]
    )
    
    clear_btn.click(
        fn=lambda: ([], None),
        outputs=[chatbot, msg_input]
    )
    
    exit_btn.click(
        fn=exit_app,
        inputs=None,
        outputs=None
    )

if __name__ == "__main__":
    print("üöÄ Launching Investment Advisor Chatbot...")
    print("üìç Open in browser: http://localhost:7860")
    print("‚ö° Optimized for faster responses!")
    demo.launch(share=False, server_port=7860)