In [None]:
import os
from typing import List, Dict, Any
import requests
import streamlit as st

# --- LangChain + OpenAI ---
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, OpenAI
from langchain.memory import VectorStoreRetrieverMemory
from langchain_core.documents import Document

# --- CrewAI + Tools ---
from crewai import Agent, Task, Crew, Process
from crewai.tools import tool
from crewai_tools import SerperDevTool

os.environ["OPENAI_API_KEY"] = "mykey"
os.environ["SERPER_API_KEY"] = "mykey"

# Document Processor
class DocumentProcessor:
    def __init__(self, embedding_model: str = "text-embedding-ada-002"):
        self.embeddings = OpenAIEmbeddings(model=embedding_model)
        self.vector_store = None
    
    def load_and_process(self, text_data: str, chunk_size: int = 1000, chunk_overlap: int = 200) -> FAISS:
        document = Document(page_content=text_data, metadata={"source": "in-memory-text"})
        docs_list = [document]
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=chunk_size, chunk_overlap=chunk_overlap)
        docs = text_splitter.split_documents(docs_list)
        self.vector_store = FAISS.from_documents(docs, self.embeddings)
        return self.vector_store

# RAG Tool
class RAGTool:
    def __init__(self, vector_store: FAISS):
        self.retriever = vector_store.as_retriever(search_kwargs={"k": 5})
    
    def retrieve(self, query: str) -> str:
        docs = self.retriever.get_relevant_documents(query)
        return "\n".join([doc.page_content for doc in docs])

# Web‐search helper function
def web_search(query: str) -> str:
    url = f"https://api.duckduckgo.com/?q={query}&format=json"
    response = requests.get(url).json()
    results = response.get("RelatedTopics", [])[:3]
    return "\n".join([r.get("Text", "") for r in results if "Text" in r])

# Memory setup (initially empty)
memory = VectorStoreRetrieverMemory(
    retriever=FAISS.from_texts([""], OpenAIEmbeddings()).as_retriever()
)

# Agent definitions
llm = OpenAI(temperature=0.7)

planner_agent = Agent(
    role="Query Decomposer",
    goal="Break down user queries into subtasks for research.",
    backstory="A strategic planner who organizes complex questions into manageable parts.",
    llm=llm,
    memory=memory,
    verbose=True
)

web_search_tool = SerperDevTool()

research_agent = Agent(
    role="Information Retriever",
    goal="Fetch relevant context using RAG and tools.",
    backstory="A diligent researcher with access to knowledge bases and web tools.",
    llm=llm,
    tools=[web_search_tool],
    memory=memory,
    verbose=True
)

analyzer_agent = Agent(
    role="Content Synthesizer",
    goal="Synthesize retrieved data into a draft answer.",
    backstory="An analytical expert who creates coherent narratives from facts.",
    llm=llm,
    memory=memory,
    verbose=True
)

reviewer_agent = Agent(
    role="Quality Assurer",
    goal="Evaluate and refine answers for accuracy and clarity.",
    backstory="A meticulous reviewer ensuring truthfulness and refinement.",
    llm=llm,
    memory=memory,
    verbose=True
)

# Task creation
def create_tasks(user_query: str, rag_tool: RAGTool) -> List[Task]:
    @tool("RAGRetriever")
    def rag_retrieve_tool(query: str) -> str:
        """Retrieve relevant information from the document vector store."""
        return rag_tool.retrieve(query)
    
    # attach the tool to the research agent
    research_agent.tools.append(rag_retrieve_tool)
    
    plan_task = Task(
        description=f"Break down the query: '{user_query}' into 2-3 subtasks.",
        agent=planner_agent,
        expected_output="A list of 2-3 concise, actionable sub-queries/steps necessary to answer the user query.",
        memory=memory
    )
    research_task = Task(
        description="Use RAG and web search to gather info based on planner's subtasks.",
        agent=research_agent,
        context=[plan_task],
        expected_output="A collection of relevant, verified research notes and snippets categorized by the subtasks.",
        memory=memory
    )
    analyze_task = Task(
        description="Draft an answer synthesizing the retrieved context.",
        agent=analyzer_agent,
        context=[research_task],
        expected_output="A well-structured, easy-to-read draft answer to the user query, incorporating all gathered facts.",
        memory=memory
    )
    review_task = Task(
        description="Evaluate the draft for accuracy and refine it.",
        agent=reviewer_agent,
        context=[analyze_task],
        expected_output="The final, polished answer to the user query, presented in clear paragraphs.",
        memory=memory
    )
    return [plan_task, research_task, analyze_task, review_task]

# Runner
def run_crew(user_query: str, vector_store: FAISS) -> str:
    rag_tool = RAGTool(vector_store)
    tasks = create_tasks(user_query, rag_tool)
    crew = Crew(
        agents=[planner_agent, research_agent, analyzer_agent, reviewer_agent],
        tasks=tasks,
        process=Process.sequential,
        memory=True,
        max_iterations=2,
        max_execution_time=300
    )
    result = crew.kickoff()
    return result

# Evaluation helper
def evaluate_answer(query: str, answer: str, retrieved_docs: List[str]) -> Dict[str, float]:
    query_words = set(query.lower().split())
    answer_words = set(answer.lower().split())
    relevance = len(query_words & answer_words) / len(query_words) if query_words else 0
    accuracy = 1.0 if any(doc.lower() in answer.lower() for doc in retrieved_docs) else 0.5
    coherence = 0.8
    return {"relevance": relevance, "accuracy": accuracy, "coherence": coherence}

# Demo usage
if __name__ == "__main__":
    sample_text = """
    Artificial intelligence (AI) is intelligence demonstrated by machines. The term was coined in 1956. Early AI focused on symbolic reasoning. In the 1980s, expert systems emerged. The 21st century saw deep learning and neural networks. Key figures include Alan Turing and Geoffrey Hinton. AI applications include image recognition and natural language processing.
    """
    processor = DocumentProcessor()
    vector_store = processor.load_and_process(sample_text)
    memory.retriever = vector_store.as_retriever()
    
    user_query = "Explain the evolution of AI from the 1950s to now."
    final_answer = run_crew(user_query, vector_store)
    
    rag_tool = RAGTool(vector_store)
    retrieved = rag_tool.retrieve(user_query).split("\n")
    scores = evaluate_answer(user_query, final_answer, retrieved)
    
    print("Final Answer:", final_answer)
    print("Evaluation Scores:", scores)