In [4]:
import os
from groq import Groq
import sys
from dotenv import load_dotenv

sys.path.insert(1, "source")

# dotenv_path = os.path.join(os.path.dirname(__file__), ".env")
# load_dotenv(dotenv_path)

from prompts.agent_prompts import (
    agent_manager_prompt,
    data_agent_prompt,
    model_agent_prompt,
    prompt_agent,
    operation_agent_prompt,
)


In [8]:
# Initialize Groq client
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
api_key=os.environ.get("GROQ_API_KEY")
# print(api_key)

In [15]:
#########################
# 1. AGENT BASE CLASSES #
#########################

class AgentBase:
    def __init__(self, role, model, description, **kwargs):
        self.role = role
        self.model = model
        self.description = description
        self.kwargs = kwargs

    def execute(self, messages):
        """Executes a task using the defined role and model."""
        return client.chat.completions.create(
            messages=messages,
            model=self.model,
            **self.kwargs
        )

In [16]:
# ----------------------------
# Manager Agent (inherits from AgentBase)
# ----------------------------
class AgentManager(AgentBase):
    def __init__(self, role, model, description, json_schema, **kwargs):
        super().__init__(role, model, description, **kwargs)
        self.json_schema = json_schema

    def parse_to_json(self, user_input):
        """Parses the user input into a JSON format based on the schema."""
        messages = [
            {
                "role": "system",
                "content": f"""
{agent_manager_prompt.strip()}

# JSON SPECIFICATION SCHEMA #
{self.json_schema}
""",
            },
            {"role": "user", "content": user_input},
        ]
        response = self.execute(messages)
        return response.choices[0].message.content


In [17]:

# ----------------------------
# Prompt Agent (inherits from AgentBase)
# ----------------------------
class PromptAgent(AgentBase):
    def __init__(self, role, model, description, json_specification, **kwargs):
        super().__init__(role, model, description, **kwargs)
        self.json_specification = json_specification

    def generate_json(self, user_input):
        """Generates a JSON response strictly adhering to the specification."""
        messages = [
            {
                "role": "system",
                "content": f"""
{prompt_agent.strip()}

# JSON SPECIFICATION SCHEMA #
'''json
{self.json_specification}
'''
""",
            },
            {"role": "user", "content": user_input},
        ]
        response = self.execute(messages)
        return response.choices[0].message.content



In [18]:
# ----------------------------
# AutoML Agent (inherits from AgentBase)
# ----------------------------
class AutoMLAgent(AgentBase):
    def __init__(self, role, model, description, data_path="./data", **kwargs):
        super().__init__(role, model, description, **kwargs)
        self.data_path = data_path

    def retrieve_dataset(self, query):
        """Retrieves a dataset based on user instructions or searches for one."""
        dataset_path = os.path.join(self.data_path, "renttherunway_cleaned.csv")
        messages = [
            {"role": "system", "content": data_agent_prompt.strip()},
            {"role": "user", "content": query},
        ]
        response = self.execute(messages)
        # Save the retrieved dataset to the specified path (placeholder implementation)
        with open(dataset_path, "w") as file:
            file.write(response.choices[0].message.content)
        return dataset_path

    def preprocess_data(self, instructions):
        """Performs data preprocessing based on user instructions or best practices."""
        messages = [
            {"role": "system", "content": data_agent_prompt.strip()},
            {"role": "user", "content": f"Instructions: {instructions}"},
        ]
        response = self.execute(messages)
        return response.choices[0].message.content

    def augment_data(self, augmentation_details):
        """Performs data augmentation as necessary."""
        messages = [
            {"role": "system", "content": data_agent_prompt.strip()},
            {"role": "user", "content": f"Augmentation Details: {augmentation_details}"},
        ]
        response = self.execute(messages)
        return response.choices[0].message.content

    def visualize_data(self, visualization_request):
        """Generates meaningful visualizations to understand the dataset."""
        messages = [
            {"role": "system", "content": data_agent_prompt.strip()},
            {"role": "user", "content": visualization_request},
        ]
        response = self.execute(messages)
        return response.choices[0].message.content



In [19]:

# ----------------------------
# Model Agent (inherits from AgentBase)
# ----------------------------
class ModelAgent(AgentBase):
    def __init__(self, role, model, description, **kwargs):
        super().__init__(role, model, description, **kwargs)

    def retrieve_models(self, dataset_details):
        """Retrieve a list of well-performing models or algorithms based on dataset details."""
        messages = [
            {"role": "system", "content": model_agent_prompt.strip()},
            {"role": "user", "content": dataset_details},
        ]
        response = self.execute(messages)
        return response.choices[0].message.content

    def optimize_model(self, hyperparameter_details):
        """Perform hyperparameter optimization on candidate models."""
        messages = [
            {"role": "system", "content": model_agent_prompt.strip()},
            {"role": "user", "content": hyperparameter_details},
        ]
        response = self.execute(messages)
        return response.choices[0].message.content

    def profile_models(self, profiling_details):
        """Perform metadata extraction and profiling on candidate models."""
        messages = [
            {"role": "system", "content": model_agent_prompt.strip()},
            {"role": "user", "content": profiling_details},
        ]
        response = self.execute(messages)
        return response.choices[0].message.content



In [20]:
# ----------------------------
# Operations Agent (inherits from AgentBase)
# ----------------------------
class OperationsAgent(AgentBase):
    def __init__(self, role, model, description, **kwargs):
        super().__init__(role, model, description, **kwargs)

    def deploy_model(self, deployment_details):
        """Prepare and deploy the model based on the provided details."""
        messages = [
            {"role": "system", "content": operation_agent_prompt.strip()},
            {"role": "user", "content": deployment_details},
        ]
        response = self.execute(messages)
        return response.choices[0].message.content


In [21]:
#################################
# 2. AGENT INSTANTIATION SETUP  #
#################################

# Define JSON specification schema
JSON_SCHEMA = """json
{
    "task": "string",
    "priority": "string",
    "deadline": "string",
    "resources": [
        {
            "type": "string",
            "quantity": "integer"
        }
    ]
}
"""

# Create agent instances
manager_agent = AgentManager(
    role="manager",
    model="llama-3.3-70b-versatile",
    description="Assistant project manager for parsing user requirements into JSON.",
    json_schema=JSON_SCHEMA,
    stream=False
)

prompt_parser_agent = PromptAgent(
    role="prompt_parser",
    model="llama-3.3-70b-versatile",
    description="Assistant project manager for JSON parsing.",
    json_specification=JSON_SCHEMA,
    stream=False
)

automl_agent = AutoMLAgent(
    role="data_scientist",
    model="llama-3.3-70b-versatile",
    description="Automated machine learning agent for dataset retrieval, preprocessing, augmentation, and visualization.",
    data_path="data",
    stream=False
)

model_agent = ModelAgent(
    role="ml_researcher",
    model="llama-3.3-70b-versatile",
    description="Machine learning research agent for model optimization and profiling.",
    stream=False
)

operations_agent = OperationsAgent(
    role="mlops",
    model="llama-3.3-70b-versatile",
    description="MLOps agent for deployment and application development.",
    stream=False
)

# A dictionary to hold all agents if needed
agents = {
    "manager": manager_agent,
    "prompt": prompt_parser_agent,
    "automl": automl_agent,
    "model": model_agent,
    "operations": operations_agent
}

In [25]:
from source.state import State
from source.memory import CSVEmbeddingManager

In [22]:

#################################
# 3. PIPELINE AGENT DEFINITION  #
#################################

class PipelineAgent(AgentBase):
    """
    PipelineAgent is responsible for orchestrating the full data-to-deployment pipeline.
    It uses the various agents (data, model, ops, etc.) to execute their tasks sequentially.
    """
    def __init__(self, agents, state: State,memory_manager: CSVEmbeddingManager,
                 dataset_dir="data", **kwargs):
        # You can give this pipeline a role and model description if needed.
        super().__init__(role="pipeline", model="n/a", description="Pipeline to orchestrate all agents", **kwargs)
        self.agents = agents
        self.state = state
        self.memory_manager = memory_manager
        self.dataset_dir = dataset_dir

    def run_pipeline(self, preprocessing_input, model_request, deployment_details):
        """
        Executes a full pipeline:
          1. Preprocess data.
          2. Retrieve candidate models.
          3. Deploy the selected model.
        """
        os.makedirs(self.dataset_dir, exist_ok=True)
        self.state.make_dir()

        # 1. Preprocess the dataset using the AutoML agent
        preprocessed_data = self.agents["automl"].preprocess_data(preprocessing_input)
        preprocessed_path = os.path.join(self.dataset_dir, "preprocessed_data.md")
        with open(preprocessed_path, "w") as f:
            f.write(preprocessed_data)
        print(f"Preprocessed data saved to: {preprocessed_path}")

        # Update state memory for preprocessing step
        self.state.update_memory({"preprocessing": preprocessed_data})
        self.state.persist_memory() # save memory to disk
        # Optionally update csv embedding if preprocessing produces a CSV
        # self.memory_manager.update_embedding(preprocesses_csv_file)

        # Advance state and write current agents rules
        rules = self.state.generate_rules()
        print("[Pipeline] Current agent rules saved:\n", rules)
        self.state.next_step()

        # 2. Retrieve candidate models using the Model agent
        model_list = self.agents["model"].retrieve_models(model_request)
        model_list_path = os.path.join(self.dataset_dir, "model_list.md")
        with open(model_list_path, "w") as f:
            f.write(model_list)
        print(f"Model list saved to: {model_list_path}")

        # Update state memory for model retrieval step
        self.state.update_memory({"model_list": model_list})
        self.state.persist_memory()
        self.state.next_step()

        # 3. Deploy the model using the Operations agent
        deployment_output = self.agents["operations"].deploy_model(deployment_details)
        deployment_output_path = os.path.join(self.dataset_dir, "deployment_output.md")
        with open(deployment_output_path, "w") as f:
            f.write(deployment_output)
        print(f"Deployment output saved to: {deployment_output_path}")

        # Return a dictionary of results (if needed)
        return {
            "preprocessed_data": preprocessed_data,
            "model_list": model_list,
            "deployment_output": deployment_output
        }


In [24]:
# Define sample inputs for the pipeline.
preprocessing_input = (
        "I have uploaded the dataset obtained from Rent the Runway, "
        "which relates to fit fiber clothing for women. Develop a model with at least 90 percent F1 score. "
        "The target variable is fit."
    )
model_request = "Find the top 3 models for classifying this dataset."
deployment_details = "Deploy the selected model as a web application."

# Create the pipeline agent with the dictionary of agents and dataset directory.
pipeline = PipelineAgent(agents=agents, dataset_dir="data")

# Execute the pipeline
results = pipeline.run_pipeline(preprocessing_input, model_request, deployment_details)

# Optionally, print the results for debugging.
print("Pipeline execution completed. Results:")
print(results)

Preprocessed data saved to: data/preprocessed_data.md
Model list saved to: data/model_list.md
Deployment output saved to: data/deployment_output.md
Pipeline execution completed. Results:
