In [None]:
from langchain_community.tools import DuckDuckGoSearchRun
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_community.document_loaders import PyPDFLoader
import pandas as pd
from PIL import Image
import base64
from langchain_core.messages import HumanMessage,SystemMessage
import mimetypes
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
# from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_core.documents import Document
import dotenv
import os
from langchain_core.tools import tool
from langgraph.prebuilt import ToolNode
from langgraph.graph import StateGraph, START, END, MessagesState
import requests
import json
from langchain_core.tools import tool
from langchain_experimental.utilities import PythonREPL
from typing import Literal
from langgraph.checkpoint.memory import MemorySaver
dotenv.load_dotenv()

In [None]:

model = ChatGoogleGenerativeAI(model="gemini-2.5-flash",temperature = 0)

class UniversalLoader:
    def __init__(self,llm):
        self.llm = llm

    def process_file(self, file_path: str):
        """
        Traffic Controller: Routes files to the correct reader.
        """
        # 1. Get extension and mime type
        _, ext = os.path.splitext(file_path)
        ext = ext.lower()
        mime_type, _ = mimetypes.guess_type(file_path)
        
        # 2. DEFINE CODE EXTENSIONS (Treat these as text)
        code_extensions = {'.py', '.js', '.ts', '.html', '.css', '.java', '.cpp', '.c', '.h', '.sql', '.md', '.json', '.xml', '.yaml', '.yml', '.txt'}

        if ext in code_extensions:
            return self._process_code(file_path, ext)
        
        elif mime_type and "pdf" in mime_type:
            return self._process_pdf(file_path)
        
        elif mime_type and "csv" in mime_type:
            return self._process_csv(file_path)
        
        elif mime_type and "image" in mime_type:
            return self._process_image(file_path)
        
        else:
            return f"Unsupported file type: {mime_type or ext}"
        
    def _process_code(self, file_path, ext):
        """Reads code files and wraps them in markdown."""
        try:
            with open(file_path, "r", encoding="utf-8") as f:
                content = f.read()
            # Wrap in markdown so LLM knows it's code
            lang_map = {'.py': 'python', '.js': 'javascript', '.ts': 'typescript', '.html': 'html', '.sql': 'sql', '.css': 'css'}
            language = lang_map.get(ext, '')
            return f"```{language}\n{content}\n```"
        except Exception as e:
            return f"Error reading code file: {e}"
        
    def _process_txt(self,file_path):
        with open(file_path,'r') as f:
            return f.read()
        
    def _process_pdf(self,file_path):
        loader = PyPDFLoader(file_path)
        pages = loader.load_and_split()
        result = ""
        for page in pages:
            result += page.page_content + "\n"
        return result
    
    def _process_csv(self,file_path):
        df = pd.read_csv(file_path)
        return df.to_markdown(index=False)
    
    def _process_image(self,file_path):
        try:
            with open(file_path, 'rb') as f:
                    
                image_data = base64.b64encode(f.read()).decode('utf-8')
                
                Prompt = HumanMessage(content = [
                    {"type":"text","text":"Describe the following image in detail."},
                    {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}},
                ])
                response = self.llm.invoke([Prompt])
                return response.content

        except Exception as e:
            return "error processing image: " + str(e)
universalloader = UniversalLoader(model)


In [None]:
#embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
vector_store = FAISS.from_texts(["Nexus Initialized"], embeddings)

def index_files(file_paths):
    """
    Reads files -> Chunks them -> Saves to Vector DB (Locally)
    """
    all_documents = []
    
    for path in file_paths:
        print(f"Loading: {path}...")
        
        # Extract Text using your UniversalLoader
        raw_content = universalloader.process_file(path)
        
        # Convert to Document
        doc = Document(page_content=raw_content, metadata={"source": path})
        all_documents.append(doc)
        
    # Split into chunks
    splits = text_splitter.split_documents(all_documents)
    
    # Add to Vector Store
    if splits:
        vector_store.add_documents(splits)
        print(f"Successfully indexed {len(splits)} chunks locally!")
    else:
        print("No content found to index.")


In [None]:

retriever = vector_store.as_retriever(search_kwargs={"k": 2})
@tool
def retrieve_documents(query: str) -> str:
    """
    Search and retrieve information from internal documents, code, and policies.
    Use this tool when the user asks about specific files or internal knowledge.
    """
    pages = retriever.invoke(query)
    result = ""
    for page in pages:
        result += page.page_content + "\n\n"
    return result

def should_continue(state: MessagesState) -> str:
    messages = state['messages']
    last_message = messages[-1]
    # If the LLM asks for a tool, go to "tools" node
    if last_message.tool_calls:
        return "tools"
    # Otherwise, stop
    return END

# 1. The Approved Tools
tools = [ retrieve_documents]

# 2. Bind Tools to Model
model = model.bind_tools(tools)
tool_node = ToolNode(tools)

# 3. Define the Agent Logic
def call_model(state: MessagesState):
    messages = state['messages']
    response = model.invoke(messages)
    return {"messages": [response]}

# 4. Build the Graph
workflow = StateGraph(MessagesState)
workflow.add_node("agent", call_model)
workflow.add_node("tools", tool_node)

workflow.add_edge(START, "agent")
workflow.add_conditional_edges("agent", should_continue)
workflow.add_edge("tools", "agent")

memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

print("Prototype Complete. Ready to migrate to Backend.")

def get_nexus_response(user_prompt: str, files: list = None):
    """
    This function handles the AI logic.
    Currently, it processes text. Later, we will add RAG (File) support here.
    """
    
    try:
        if files and len(files)>0:
            index_files(files)
            file_names = ", ".join([os.path.basename(f) for f in files])
            user_prompt = f"System Note: The user just uploaded these files: {file_names}. \n\nUser Question: {user_prompt}"
        else:
            user_prompt = f"User Question: {user_prompt}"

        system_instruction = SystemMessage(content="""
        You are Nexus, an advanced AI with file-reading capabilities.
        
        CRITICAL RULES:
        1. You have a tool named 'retrieve_documents'.
        2. IF the user asks about "the file", "uploaded documents", or content you don't know:
           YOU MUST USE 'retrieve_documents' to look it up.
        3. DO NOT say "I cannot access files". You HAVE the tool. Use it.
        4. If the tool returns text, assume it is the correct content of the file.
        """)

        config = {"configurable": {"thread_id": "1"}}
        inputs = {"messages": [system_instruction,HumanMessage(content=user_prompt)]}
        result_state = app.invoke(inputs,config=config)
        last_message = result_state['messages'][-1]
        content = last_message.content
       
        if isinstance(content, list):
            text_parts = []
            for part in content:
                # Extract 'text' if it exists, otherwise convert whole part to string
                if isinstance(part, dict):
                    text_parts.append(part.get('text', str(part)))
                else:
                    text_parts.append(str(part))
            return "\n".join(text_parts)
        
        # Case B: Content is already a string
        if isinstance(content, str):
            return content
            
        # Case C: Fallback
        return str(content)
        
    except Exception as e:
        return f"I encountered an error processing your request: {str(e)}"

In [None]:
import os

# 1. Create a dummy file for testing
test_file_path = "secret_plans.txt"
with open(test_file_path, "w") as f:
    f.write("CONFIDENTIAL: The Project Nexus launch date is October 15th, 2025. The admin password is 'BlueSky99'.")

print(f"Created test file at: {os.path.abspath(test_file_path)}")

# 2. Set API Key (Replace with your actual key if .env is not working)
# Make sure .env is in the same folder or parent


Created test file at: d:\centeral folder\Nexus-The-Agentic-RAG-Orchestrator\testing\secret_plans.txt


True

In [None]:

import os
from dotenv import load_dotenv
load_dotenv() 
import mimetypes
import pandas as pd
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_core.documents import Document
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_community.tools import DuckDuckGoSearchRun
from langchain_core.tools import tool
from langgraph.prebuilt import ToolNode
from langgraph.graph import StateGraph, START, END, MessagesState
from langchain_experimental.utilities import PythonREPL
from langgraph.checkpoint.memory import MemorySaver
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings

# --- 1. INITIALIZE ---
print("Initializing Model & Vector Store...")
model = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0) # Use 1.5-flash or 2.0-flash-exp
embeddings = FastEmbedEmbeddings(model_name="BAAI/bge-small-en-v1.5")
#embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
vector_store = FAISS.from_texts(["Nexus Knowledge Base Initialized"], embeddings)

# --- 2. LOADER & INDEXER ---
class UniversalLoader:
    def process_file(self, file_path):
        _, ext = os.path.splitext(file_path)
        try:
            if ext == ".txt":
                with open(file_path, "r", encoding="utf-8") as f: return f.read()
            elif ext == ".pdf":
                loader = PyPDFLoader(file_path)
                return "\n".join([p.page_content for p in loader.load_and_split()])
            else:
                return "Unsupported file type"
        except Exception as e:
            return f"Error: {e}"

loader = UniversalLoader()

def index_files(file_paths):
    docs = []
    for path in file_paths:
        content = loader.process_file(path)
        docs.append(Document(page_content=content, metadata={"source": path}))
    
    if docs:
        splits = text_splitter.split_documents(docs)
        vector_store.add_documents(splits)
        print(f"‚úÖ Successfully indexed {len(splits)} chunks from {file_paths}")

# --- 3. TOOLS ---
retriever = vector_store.as_retriever(search_kwargs={"k": 3})

@tool
def retrieve_documents(query: str) -> str:
    """Search uploaded files for information."""
    print(f"üîç TOOL CALLED: retrieve_documents('{query}')") 
    docs = retriever.invoke(query)
    if not docs: return "No info found."
    return "\n".join([d.page_content for d in docs])

search_tool = DuckDuckGoSearchRun()
tools = [search_tool, retrieve_documents]

# --- 4. GRAPH ---
model_with_tools = model.bind_tools(tools)
tool_node = ToolNode(tools)

def call_model(state: MessagesState):
    return {"messages": [model_with_tools.invoke(state['messages'])]}

def should_continue(state: MessagesState):
    if state['messages'][-1].tool_calls:
        return "tools"
    return END

workflow = StateGraph(MessagesState)
workflow.add_node("agent", call_model)
workflow.add_node("tools", tool_node)
workflow.add_edge(START, "agent")
workflow.add_conditional_edges("agent", should_continue)
workflow.add_edge("tools", "agent")

app = workflow.compile(checkpointer=MemorySaver())
print("ü§ñ Agent Ready.")

Initializing Model & Vector Store...
ü§ñ Agent Ready.


In [6]:
# --- RUN THE TEST ---

# 1. Force Index the File
index_files(["secret_plans.txt"])

# 2. Define the Prompt
user_query = "What is the admin password for Project Nexus?"

# 3. Inject System Prompt to Force Tool Use
system_msg = SystemMessage(content="""
You are Nexus. 
CRITICAL: You have access to a tool 'retrieve_documents'. 
If the user asks about 'Project Nexus', 'password', or 'launch date', YOU MUST use that tool.
Do not say you cannot access files.
""")

config = {"configurable": {"thread_id": "test_notebook_1"}}
inputs = {"messages": [system_msg, HumanMessage(content=user_query)]}

print(f"\nüí¨ User: {user_query}")
print("-" * 40)

# 4. Run Agent
for event in app.stream(inputs, config=config):
    for key, value in event.items():
        if key == "agent":
            print("ü§ñ Agent Thinking...")
            # Uncomment to see raw thought: print(value['messages'][0].content)
        elif key == "tools":
            print("üõ†Ô∏è Tool Output Received")

# 5. Get Final Answer
final_state = app.get_state(config)
final_response = final_state.values['messages'][-1].content

print("-" * 40)
print(f"‚úÖ Final Answer:\n{final_response}")

‚úÖ Successfully indexed 1 chunks from ['secret_plans.txt']

üí¨ User: What is the admin password for Project Nexus?
----------------------------------------
ü§ñ Agent Thinking...
üîç TOOL CALLED: retrieve_documents('admin password for Project Nexus')
üõ†Ô∏è Tool Output Received
ü§ñ Agent Thinking...
----------------------------------------
‚úÖ Final Answer:
[{'type': 'text', 'text': "The admin password for Project Nexus is 'BlueSky99'.", 'extras': {'signature': 'CuABAXLI2nwLni1wyc8LrUlGHuVZmWEHJXxt60Iz1VagJ8G86+kaaz440G5VYEzdMN/5QPErM5uhlBR1IYsclQJbsSh682Ry8ICYkH9sWDIpuER4KWfclozWZ8IMjUfevwRhqA9hnmECPvTKy1Y5PzjIK3P9+Vs7mb4BaLLork6e+DqJgl0mh24xeeORHwyY4w/88mDxRoU15hy0W1ygAi1qr7PAVzKGHu+ON9aLKkqQmSlyrdkMxcLcvJlwzBE7c/WG6K0G7IvtOeabJES8C5UP0BnfDJWp8ordKz3kyU+8TIo='}}]


In [3]:
import torch
print(torch.__version__)

2.7.0+cu118


In [None]:
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings

print("Testing FastEmbed...")
# This line was crashing before. It should work now!
embeddings = FastEmbedEmbeddings(model_name="BAAI/bge-small-en-v1.5")

print("‚úÖ Success! FastEmbed is working.")

Testing FastEmbed...
‚úÖ Success! FastEmbed is working.
