In [43]:
import os
import sys
import json
import asyncio
from typing import Any, Dict, List
from dotenv import load_dotenv

import pandas as pd
from tqdm import tqdm
import chromadb
from chromadb.config import Settings
from sentence_transformers import SentenceTransformer


In [44]:
# ----------------------------
# Load environment variables
# ----------------------------
load_dotenv()
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")

# ----------------------------
# Import and initialize GROQ client (dependency injected into agents)
# ----------------------------
from groq import Groq
groq_client = Groq(api_key=GROQ_API_KEY)



In [45]:
# ----------------------------
# Agent prompt texts (as constants)
# ----------------------------
AGENT_MANAGER_PROMPT = """
You are an experienced senior project manager of an automated machine learning project (AutoML).
You have two main responsibilities:
1. Receive requirements/inquiries from users through a well-structured JSON object.
2. Devise promising, high-quality plans for data scientists, ML research engineers, and MLOps engineers.
""".strip()

PROMPT_AGENT_PROMPT = """
You are an assistant project manager in the AutoML development team.
Your task is to parse the user's requirement into a valid JSON format strictly adhering to the given JSON schema.
Your response must contain only the JSON without any additional comment.
# JSON SPECIFICATION SCHEMA #
'''json
{json_specification}
'''
""".strip()

DATA_AGENT_PROMPT = """
You are the world’s best data scientist for an AutoML project.
Your responsibilities include:
1. Retrieving or searching for datasets based on user instruction.
2. Preprocessing, augmenting, and visualizing the data to extract useful insights.
""".strip()

MODEL_AGENT_PROMPT = """
You are the world’s best ML research engineer for an AutoML project.
Your responsibilities include:
1. Retrieving well-performing candidate models based on dataset details.
2. Optimizing models via hyperparameter tuning.
3. Profiling models and selecting the best candidates.
""".strip()

OPERATION_AGENT_PROMPT = """
You are the world’s best MLOps engineer for an AutoML project.
Your responsibilities include:
1. Preparing and deploying the model (including model compression or conversion).
2. Building a demo web application (e.g., with Gradio) for the deployed model.
""".strip()


In [46]:
# # ----------------------------
# # Utility: Split long text into overlapping chunks
# # ----------------------------
# def split_text(text: str, max_chunk_length: int = 8000, overlap_ratio: float = 0.1) -> List[str]:
#     if not (0 <= overlap_ratio < 1):
#         raise ValueError("Overlap ratio must be between 0 and 1 (exclusive).")
#     overlap_length = int(max_chunk_length * overlap_ratio)
#     chunks = []
#     start = 0
#     while start < len(text):
#         end = min(start + max_chunk_length, len(text))
#         chunks.append(text[start:end])
#         start += max_chunk_length - overlap_length
#     return chunks

# # ============================
# # COMPONENT: Pipeline State
# # ============================
# class PipelineState:
#     """
#     Manages the workflow state including phase, memory (outputs per step),
#     and provides methods for state persistence and context building.
#     """
#     def __init__(self, phase: str, output: str, message: str = "No message provided."):
#         self.phase = phase
#         self.output = output
#         self.message = message
#         self.memory: List[Dict[str, Any]] = [{}]
#         self.current_step = 0
#         self.score = 0
#         self.finished = False

#         # Define the agent prompt rules
#         self.agent_rules = {
#             "Agent Manager": AGENT_MANAGER_PROMPT,
#             "Prompt Agent": PROMPT_AGENT_PROMPT,
#             "Data Agent": DATA_AGENT_PROMPT,
#             "Model Agent": MODEL_AGENT_PROMPT,
#             "Operations Agent": OPERATION_AGENT_PROMPT
#         }
#         self.agents = list(self.agent_rules.keys())

#         # Directories for saving state outputs
#         self.output_dir = os.path.join("output", self.output)
#         self.dir_name = self.phase.replace(" ", "_")
#         self.restore_dir = os.path.join(self.output_dir, self.dir_name)
#         self.context = ""

#     def __str__(self) -> str:
#         return (f"PipelineState(Phase: {self.phase}, Step: {self.current_step}, "
#                 f"Current Agent: {self.get_current_agent()}, Finished: {self.finished})")

#     def make_context(self) -> None:
#         """Build context information for the current phase."""
#         self.context = f"Output: {self.output}\nPhase: {self.phase}\n"
#         self.context += "Agents in workflow:\n"
#         self.context += "\n".join(f"{i+1}. {agent}" for i, agent in enumerate(self.agents))

#     def update_memory(self, memory_update: Dict[str, Any]) -> None:
#         """Update internal memory with the current agent’s output."""
#         print(f"[State] Updating memory for agent '{self.get_current_agent()}' (Phase: {self.phase}).")
#         if self.memory and isinstance(self.memory[-1], dict):
#             self.memory[-1].update(memory_update)
#         else:
#             self.memory.append(memory_update)

#     def persist_memory(self) -> None:
#         """Persist the memory to a JSON file for record keeping."""
#         memory_path = os.path.join(self.restore_dir, "memory.json")
#         os.makedirs(os.path.dirname(memory_path), exist_ok=True)
#         with open(memory_path, 'w') as f:
#             json.dump(self.memory, f, indent=4)
#         print(f"[State] Memory persisted to {memory_path}")

#     def next_step(self) -> None:
#         """Advance the workflow to the next step."""
#         self.current_step += 1
#         self.memory.append({})

#     def get_current_agent(self) -> str:
#         if self.agents:
#             return self.agents[self.current_step % len(self.agents)]
#         return "N/A"

#     def make_dir(self) -> None:
#         """Create necessary directories for state outputs."""
#         os.makedirs(self.restore_dir, exist_ok=True)


In [66]:
import os
import json
from typing import Any, Dict, List

# ----------------------------
# Utility: Split long text into overlapping chunks
# ----------------------------
def split_text(text: str, max_chunk_length: int = 8000, overlap_ratio: float = 0.1) -> List[str]:
    if not (0 <= overlap_ratio < 1):
        raise ValueError("Overlap ratio must be between 0 and 1 (exclusive).")
    overlap_length = int(max_chunk_length * overlap_ratio)
    chunks = []
    start = 0
    while start < len(text):
        end = min(start + max_chunk_length, len(text))
        chunks.append(text[start:end])
        start += max_chunk_length - overlap_length
    return chunks

# ============================
# COMPONENT: Pipeline State
# ============================
class PipelineState:
    """
    Manages the workflow state including phase, memory (outputs per step),
    and provides methods for state persistence, context building, and saving the final state in Markdown.
    """
    def __init__(self, phase: str, output: str, message: str = "No message provided."):
        self.phase = phase
        self.output = output
        self.message = message
        self.memory: List[Dict[str, Any]] = [{}]
        self.current_step = 0
        self.score = 0
        self.finished = False

        # Define the agent prompt rules
        # (Make sure these constants are defined somewhere in your code)
        self.agent_rules = {
            "Agent Manager": "Your AGENT_MANAGER_PROMPT goes here.",
            "Prompt Agent": "Your PROMPT_AGENT_PROMPT goes here.",
            "Data Agent": "Your DATA_AGENT_PROMPT goes here.",
            "Model Agent": "Your MODEL_AGENT_PROMPT goes here.",
            "Operations Agent": "Your OPERATION_AGENT_PROMPT goes here."
        }
        self.agents = list(self.agent_rules.keys())

        # Directories for saving state outputs
        self.output_dir = os.path.join("output", self.output)
        self.dir_name = self.phase.replace(" ", "_")
        self.restore_dir = os.path.join(self.output_dir, self.dir_name)
        self.context = ""

    def __str__(self) -> str:
        return (f"PipelineState(Phase: {self.phase}, Step: {self.current_step}, "
                f"Current Agent: {self.get_current_agent()}, Finished: {self.finished})")

    def make_context(self) -> None:
        """Build context information for the current phase."""
        self.context = f"Output: {self.output}\nPhase: {self.phase}\n"
        self.context += "Agents in workflow:\n"
        self.context += "\n".join(f"{i+1}. {agent}" for i, agent in enumerate(self.agents))

    def update_memory(self, memory_update: Dict[str, Any]) -> None:
        """Update internal memory with the current agent’s output."""
        print(f"[State] Updating memory for agent '{self.get_current_agent()}' (Phase: {self.phase}).")
        if self.memory and isinstance(self.memory[-1], dict):
            self.memory[-1].update(memory_update)
        else:
            self.memory.append(memory_update)

    def persist_memory(self) -> None:
        """Persist the memory to a JSON file for record keeping."""
        memory_path = os.path.join(self.restore_dir, "memory.json")
        os.makedirs(os.path.dirname(memory_path), exist_ok=True)
        with open(memory_path, 'w') as f:
            json.dump(self.memory, f, indent=4)
        print(f"[State] Memory persisted to {memory_path}")

    def next_step(self) -> None:
        """Advance the workflow to the next step."""
        self.current_step += 1
        self.memory.append({})

    def get_current_agent(self) -> str:
        if self.agents:
            return self.agents[self.current_step % len(self.agents)]
        return "N/A"

    def make_dir(self) -> None:
        """Create necessary directories for state outputs."""
        os.makedirs(self.restore_dir, exist_ok=True)


In [48]:
class CSVEmbedder:
    """
    Handles CSV processing by reading data, computing embeddings,
    and storing them in a Chroma DB collection.
    """
    def __init__(self, collection_name: str, db_path: str, embedding_model, cache_size: int = 10_000_000_000):
        self.settings = Settings(
            chroma_segment_cache_policy="LRU",
            chroma_memory_limit_bytes=cache_size
        )
        self.client = chromadb.PersistentClient(path=db_path, settings=self.settings)
        self.collection = self.client.get_or_create_collection(
            collection_name, metadata={"hnsw:space": "cosine"}
        )
        if embedding_model is None:
            raise ValueError("An embedding model must be provided.")
        self.embedding_model = embedding_model
        self.id_counter = 0

    async def embed_csv(self, csv_file_path: str, batch_size: int = 100) -> None:
        """Embed CSV data in batches (wrapped in a thread for CPU work)."""
        await asyncio.to_thread(self._embed_csv_sync, csv_file_path, batch_size)

    def _embed_csv_sync(self, csv_file_path: str, batch_size: int):
        if not os.path.exists(csv_file_path):
            raise FileNotFoundError(f"CSV file not found: {csv_file_path}")

        df = pd.read_csv(csv_file_path)
        if 'id' not in df.columns:
            df['id'] = df.index.astype(str)
        rows = df.to_dict(orient='records')

        batch_ids = []
        batch_documents = []
        batch_metadatas = []

        for row in tqdm(rows, desc="Embedding CSV rows"):
            doc_id = str(row.get('id', self.id_counter))
            row_copy = {k: v for k, v in row.items() if k != 'id'}
            doc_text = json.dumps(row_copy)

            if len(doc_text) > 8000:
                for chunk in split_text(doc_text, max_chunk_length=8000, overlap_ratio=0.1):
                    batch_documents.append(chunk)
                    batch_ids.append(f"{doc_id}_{self.id_counter}")
                    batch_metadatas.append({"doc_name": os.path.basename(csv_file_path)})
                    self.id_counter += 1
            else:
                batch_documents.append(doc_text)
                batch_ids.append(doc_id)
                batch_metadatas.append({"doc_name": os.path.basename(csv_file_path)})
                self.id_counter += 1

            if len(batch_documents) >= batch_size:
                embeddings = [
                    self.embedding_model.encode(doc).tolist() for doc in batch_documents
                ]
                self.collection.add(
                    documents=batch_documents,
                    ids=batch_ids,
                    embeddings=embeddings,
                    metadatas=batch_metadatas
                )
                batch_ids, batch_documents, batch_metadatas = [], [], []

        if batch_documents:
            embeddings = [
                self.embedding_model.encode(doc).tolist() for doc in batch_documents
            ]
            self.collection.add(
                documents=batch_documents,
                ids=batch_ids,
                embeddings=embeddings,
                metadatas=batch_metadatas
            )
        print(f"[CSVEmbedder] Finished embedding CSV: {csv_file_path}")

    async def query_collection(self, query: str, n_results: int = 5) -> dict:
        """Query the Chroma collection for context using the query string."""
        query_embedding = self.embedding_model.encode(query).tolist()
        return await asyncio.to_thread(
            self.collection.query,
            query_embeddings=query_embedding,
            n_results=n_results,
            include=['documents', 'metadatas', 'distances']
        )

    async def delete_collection(self) -> None:
        """
        Deletes the entire collection from Chroma DB, effectively cleaning the embedded dataset.
        """
        # Wrapping the deletion call in asyncio.to_thread if it’s blocking.
        await asyncio.to_thread(self.client.delete_collection, self.collection.name)
        print(f"[CSVEmbedder] Collection '{self.collection.name}' deleted.")


In [62]:

# ============================
# COMPONENT: Agent Base and Specialized Agents
# ============================
class AgentBase:
    """
    Provides a common asynchronous interface for all agents.
    The actual call to the GROQ client is wrapped in asyncio.to_thread.
    """
    def __init__(self, client, role: str, model: str, description: str, **kwargs):
        self.client = client
        self.role = role
        self.model = model
        self.description = description
        self.kwargs = kwargs

    async def execute(self, messages: List[Dict[str, str]]) -> Any:
        return await asyncio.to_thread(
            self.client.chat.completions.create,
            messages=messages,
            model=self.model,
            **self.kwargs
        )

class AgentManager(AgentBase):
    def __init__(self, client, role, model, description, json_schema, **kwargs):
        super().__init__(client, role, model, description, **kwargs)
        self.json_schema = json_schema

    async def parse_to_json(self, user_input: str) -> str:
        messages = [
            {"role": "system", "content": f"{AGENT_MANAGER_PROMPT}\n\n# JSON SPECIFICATION SCHEMA #\n{self.json_schema}"},
            {"role": "user", "content": user_input}
        ]
        response = await self.execute(messages)
        return response.choices[0].message.content

class PromptAgent(AgentBase):
    def __init__(self, client, role, model, description, json_specification, **kwargs):
        super().__init__(client, role, model, description, **kwargs)
        self.json_specification = json_specification

    async def generate_json(self, user_input: str) -> str:
        messages = [
            {"role": "system", "content": PROMPT_AGENT_PROMPT.format(json_specification=self.json_specification)},
            {"role": "user", "content": user_input}
        ]
        response = await self.execute(messages)
        return response.choices[0].message.content

class AutoMLAgent(AgentBase):
    def __init__(self, client, role, model, description, data_path: str = "./data", **kwargs):
        super().__init__(client, role, model, description, **kwargs)
        self.data_path = data_path

    async def preprocess_data(self, instructions: str) -> str:
        messages = [
            {"role": "system", "content": DATA_AGENT_PROMPT},
            {"role": "user", "content": f"Instructions: {instructions}"}
        ]
        response = await self.execute(messages)
        return response.choices[0].message.content

    # Additional methods (retrieve_dataset, augment_data, visualize_data) can be added here.

class ModelAgent(AgentBase):
    async def retrieve_models(self, dataset_details: str) -> str:
        messages = [
            {"role": "system", "content": MODEL_AGENT_PROMPT},
            {"role": "user", "content": dataset_details}
        ]
        response = await self.execute(messages)
        return response.choices[0].message.content

    async def optimize_model(self, hyperparameter_details: str) -> str:
        messages = [
            {"role": "system", "content": MODEL_AGENT_PROMPT},
            {"role": "user", "content": hyperparameter_details}
        ]
        response = await self.execute(messages)
        return response.choices[0].message.content

    async def profile_models(self, profiling_details: str) -> str:
        messages = [
            {"role": "system", "content": MODEL_AGENT_PROMPT},
            {"role": "user", "content": profiling_details}
        ]
        response = await self.execute(messages)
        return response.choices[0].message.content

class OperationsAgent(AgentBase):
    async def deploy_model(self, deployment_details: str) -> str:
        messages = [
            {"role": "system", "content": OPERATION_AGENT_PROMPT},
            {"role": "user", "content": deployment_details}
        ]
        response = await self.execute(messages)
        return response.choices[0].message.content

# ============================
# COMPONENT: Pipeline Agent
# ============================
class PipelineAgent:
    """
    Orchestrates the entire pipeline from CSV ingestion through model deployment.
    This version uses all five agents to produce a single aggregated final output.
    Outputs from each agent are saved as separate files in the dataset directory.
    """
    def __init__(self, agents: Dict[str, AgentBase], state: PipelineState, csv_embedder: CSVEmbedder, dataset_dir: str = "data", **kwargs):
        self.agents = agents
        self.state = state
        self.csv_embedder = csv_embedder
        self.dataset_dir = dataset_dir
        os.makedirs(self.dataset_dir, exist_ok=True)
        self.state.make_dir()

    async def run_pipeline(self, preprocessing_input: str, model_request: str, deployment_details: str) -> dict:
        try:
            # --- Step 0: Use Manager Agent to parse initial requirements ---
            print("[Pipeline] Parsing user requirements using AgentManager...")
            manager_output = await self.agents["manager"].parse_to_json(preprocessing_input)
            manager_output_path = os.path.join(self.dataset_dir, "manager_output.md")
            with open(manager_output_path, "w") as f:
                f.write(manager_output)
            print(f"[Pipeline] Manager output saved to: {manager_output_path}")
            self.state.update_memory({"manager_output": manager_output})
            self.state.persist_memory()
            self.state.next_step()

            # --- Step 1: Use Prompt Agent to refine the requirements ---
            print("[Pipeline] Refining requirements using PromptAgent...")
            prompt_output = await self.agents["prompt"].generate_json(manager_output)
            prompt_output_path = os.path.join(self.dataset_dir, "prompt_output.md")
            with open(prompt_output_path, "w") as f:
                f.write(prompt_output)
            print(f"[Pipeline] Prompt output saved to: {prompt_output_path}")
            self.state.update_memory({"prompt_output": prompt_output})
            self.state.persist_memory()
            self.state.next_step()

            # --- Step 2: Query CSV embedder for context ---
            print("[Pipeline] Querying embedded CSV data for context...")
            memory_results = await self.csv_embedder.query_collection(preprocessing_input, n_results=5)
            memory_context = "\n".join(str(doc) for doc in memory_results.get("documents", []))
            csv_context_path = os.path.join(self.dataset_dir, "csv_context.md")
            with open(csv_context_path, "w") as f:
                f.write(memory_context)
            print(f"[Pipeline] CSV context saved to: {csv_context_path}")
            self.state.update_memory({"csv_context": memory_context})
            self.state.persist_memory()
            self.state.next_step()

            # --- Step 3: Data Preprocessing via AutoMLAgent ---
            # Combine the refined requirements with CSV context.
            combined_preprocessing_input = f"{prompt_output}\n\nRelevant dataset context:\n{memory_context}"
            preprocessed_data = await self.agents["automl"].preprocess_data(combined_preprocessing_input)
            preprocessed_path = os.path.join(self.dataset_dir, "preprocessed_data.md")
            with open(preprocessed_path, "w") as f:
                f.write(preprocessed_data)
            print(f"[Pipeline] Preprocessed data saved to: {preprocessed_path}")
            self.state.update_memory({"preprocessed_data": preprocessed_data})
            self.state.persist_memory()
            self.state.next_step()

            # --- Step 4: Model Retrieval via ModelAgent ---
            combined_model_request = f"{model_request}\n\nRelevant dataset context:\n{memory_context}"
            model_list = await self.agents["model"].retrieve_models(combined_model_request)
            model_list_path = os.path.join(self.dataset_dir, "model_list.md")
            with open(model_list_path, "w") as f:
                f.write(model_list)
            print(f"[Pipeline] Model list saved to: {model_list_path}")
            self.state.update_memory({"model_list": model_list})
            self.state.persist_memory()
            self.state.next_step()

            # --- Step 5: Model Deployment via OperationsAgent ---
            combined_deployment_details = f"{deployment_details}\n\nRelevant dataset context:\n{memory_context}"
            deployment_output = await self.agents["operations"].deploy_model(combined_deployment_details)
            deployment_output_path = os.path.join(self.dataset_dir, "deployment_output.md")
            with open(deployment_output_path, "w") as f:
                f.write(deployment_output)
            print(f"[Pipeline] Deployment output saved to: {deployment_output_path}")
            self.state.update_memory({"deployment_output": deployment_output})
            self.state.persist_memory()
            self.state.next_step()

            # --- Aggregate all outputs into a single final result ---
            final_result = {
                "manager_output": manager_output,
                "prompt_output": prompt_output,
                "csv_context": memory_context,
                "preprocessed_data": preprocessed_data,
                "model_list": model_list,
                "deployment_output": deployment_output,
                "aggregated_output": (
                    "Full Pipeline Result:\n\n"
                    "===== Manager Output =====\n" + manager_output + "\n\n" +
                    "===== Prompt Output =====\n" + prompt_output + "\n\n" +
                    "===== CSV Context =====\n" + memory_context + "\n\n" +
                    "===== Preprocessed Data =====\n" + preprocessed_data + "\n\n" +
                    "===== Model List =====\n" + model_list + "\n\n" +
                    "===== Deployment Output =====\n" + deployment_output
                )
            }
            return final_result

        except Exception as e:
            error_message = f"An error occurred in the pipeline: {e}"
            print("[Pipeline]", error_message)
            self.state.update_memory({"error": error_message})
            self.state.persist_memory()
            raise e


In [63]:
# !pip install nest_asyncio


In [64]:
# ============================
# MAIN: Initialization & Execution
# ============================
async def main():
    # --- Initialize Pipeline State ---
    state = PipelineState(phase="Model Development", output="MyOutput")
    state.make_context()

    # --- Initialize the Embedding Model and CSV Embedder ---
    embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
    csv_embedder = CSVEmbedder(
        collection_name="auto_ml_memory",
        db_path="data/chromadb",
        embedding_model=embedding_model
    )
    # Optionally, embed CSV data (uncomment if needed):
    await csv_embedder.embed_csv("data/heart.csv")

    # await csv_embedder.delete_collection()

    # --- Define a JSON schema for agents that require it ---
    JSON_SCHEMA = """{
    "task": "string",
    "priority": "string",
    "deadline": "string",
    "resources": [
        {
            "type": "string",
            "quantity": "integer"
        }
    ]
}"""

    # --- Instantiate Agents (dependencies injected) ---
    agents = {
        "manager": AgentManager(
            client=groq_client,
            role="manager",
            model="llama-3.3-70b-versatile",
            description="Assistant project manager for parsing user requirements.",
            json_schema=JSON_SCHEMA,
            stream=False
        ),
        "prompt": PromptAgent(
            client=groq_client,
            role="prompt_parser",
            model="llama-3.3-70b-versatile",
            description="Assistant project manager for JSON parsing.",
            json_specification=JSON_SCHEMA,
            stream=False
        ),
        "automl": AutoMLAgent(
            client=groq_client,
            role="data_scientist",
            model="llama-3.3-70b-versatile",
            description="Automated ML agent for dataset preprocessing and augmentation.",
            data_path="data",
            stream=False
        ),
        "model": ModelAgent(
            client=groq_client,
            role="ml_researcher",
            model="llama-3.3-70b-versatile",
            description="ML research agent for model optimization and profiling.",
            stream=False
        ),
        "operations": OperationsAgent(
            client=groq_client,
            role="mlops",
            model="llama-3.3-70b-versatile",
            description="MLOps agent for model deployment.",
            stream=False
        )
    }

    # --- Initialize and run the Pipeline Agent ---
    pipeline = PipelineAgent(agents=agents, state=state, csv_embedder=csv_embedder, dataset_dir="data")

    # --- Define sample inputs ---
    preprocessing_input = (
        "I have uploaded the dataset obtained from UCI Machine learning, "
        "which relates to detect the heart disease of patients based on various features of the patients." 
        "Develop a model with at least 90 percent accuracy. "
    )
    model_request = "Find the top 3 models for classifying this dataset."
    deployment_details = "Deploy the selected model as a web application."

    # --- Run the pipeline ---
    results = await pipeline.run_pipeline(preprocessing_input, model_request, deployment_details)
    print("Pipeline execution completed. Results:")
    print(json.dumps(results, indent=4))


    await csv_embedder.delete_collection()


# if __name__ == "__main__":
#     asyncio.run(main())

In [65]:
import nest_asyncio
import asyncio

nest_asyncio.apply()

# Now you can safely run your main coroutine.
asyncio.run(main())


Embedding CSV rows: 100%|██████████| 1025/1025 [00:12<00:00, 82.12it/s]


[CSVEmbedder] Finished embedding CSV: data/heart.csv
[Pipeline] Parsing user requirements using AgentManager...
[Pipeline] Manager output saved to: data/manager_output.md
[State] Updating memory for agent 'Agent Manager' (Phase: Model Development).
[State] Memory persisted to output/MyOutput/Model_Development/memory.json
[Pipeline] Refining requirements using PromptAgent...
[Pipeline] Prompt output saved to: data/prompt_output.md
[State] Updating memory for agent 'Prompt Agent' (Phase: Model Development).
[State] Memory persisted to output/MyOutput/Model_Development/memory.json
[Pipeline] Querying embedded CSV data for context...
[Pipeline] CSV context saved to: data/csv_context.md
[State] Updating memory for agent 'Data Agent' (Phase: Model Development).
[State] Memory persisted to output/MyOutput/Model_Development/memory.json
[Pipeline] Preprocessed data saved to: data/preprocessed_data.md
[State] Updating memory for agent 'Model Agent' (Phase: Model Development).
[State] Memory pers