In [1]:
import pandas as pd
import re
import yaml
import sqlparse
import os
import pandas as pd
import numpy as np
import requests
from IPython.display import display, Markdown

from langchain.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings, ChatOpenAI

In [2]:
def add_repo_root_path():
    import os
    import sys
    repo_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
    if repo_root not in sys.path:
        sys.path.append(repo_root)
        
add_repo_root_path()
from src import generate_knowledge
from src import create_rag_db
from src import llm_chain_tools

In [3]:
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 50)
#pd.set_option('display.width', None)
#pd.set_option('display.max_colwidth', 10) 

### INIT

In [4]:
generate_knowledge.add_repo_root_path()
import openai_setup

OPENAI_API_KEY = openai_setup.conf['key']
OPENAI_PROJECT = openai_setup.conf['project']
OPENAI_ORGANIZATION = openai_setup.conf['organization']
DEFAULT_LLM_MODEL = "gpt-4o-mini"

import os
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
os.environ['OPENAI_MODEL_NAME'] = DEFAULT_LLM_MODEL


In [5]:
langchain_openai_embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY, model="text-embedding-ada-002")
langchain_openai_llm = ChatOpenAI(model=DEFAULT_LLM_MODEL, temperature=0.1, openai_api_key=OPENAI_API_KEY, openai_organization = OPENAI_ORGANIZATION)

### TESTS

#### Agents

In [16]:
# Define file paths for YAML configurations
files = {
    'agents': '../config/agents.yml',
    'tasks': '../config/tasks.yml'
}

# Load configurations from YAML files
configs = {}
for config_type, file_path in files.items():
    with open(file_path, 'r') as file:
        configs[config_type] = yaml.safe_load(file)

# Assign loaded configurations to specific variables
agents_config = configs['agents']
tasks_config = configs['tasks']

print(agents_config)
print(tasks_config)

{'interpretation_agent': {'role': 'Request Interpreter\n', 'goal': 'Interpret user requests related to dbt projects and translate them into actionable decisions. Use expertise in dbt, data modeling, and analytics engineering to determine the type of action required.\n', 'backstory': "You specialize in analyzing requests to identify whether the action involves adding a field, modifying an existing model, or retrieving specific information. Your goal is to provide concise and actionable outputs tailored to the user's needs.\n", 'verbose': True, 'allow_delegation': False}, 'evaluation_agent': {'role': 'Evaluation Specialist\n', 'goal': 'Evaluate user requests related to dbt projects and provide concise, actionable insights and steps required to address the request. Leverage expertise in data modeling, dbt project structure, and dependency analysis to ensure accurate evaluations.\n', 'backstory': 'You specialize in analyzing interpreted requests and breaking them down into specific, action

In [7]:
from crewai import Agent, Task, Crew

In [None]:

# Creating Agents
interpretation_agent = Agent(
  config=agents_config['interpretation_agent'],
)

evaluation_agent = Agent(
  config=agents_config['evaluation_agent'],
)

lineage_agent = Agent(
  config=agents_config['lineage_agent'],
)

plan_agent = Agent(
  config=agents_config['plan_agent'],
)

# Creating Tasks
interpretation_task = Task(
  config=tasks_config['interpretation_task'],
  agent=interpretation_agent
)

evaluation_task = Task(
  config=tasks_config['evaluation_task'],
  agent=evaluation_agent
)

lineage_task = Task(
  config=tasks_config['lineage_task'],
  agent=lineage_agent
)

plan_task = Task(
  config=tasks_config['plan_task'],
  agent=plan_agent
)

In [9]:
crew = Crew(
  agents=[
    interpretation_agent,
    evaluation_agent
  ],
  tasks=[
    interpretation_task,
    evaluation_task
  ],
  verbose=True
)

In [10]:
user_input = 'Give me all the information about the models related with customers'

inputs = {
  'request': user_input
}

# Run the crew
result = crew.kickoff(
  inputs=inputs
)

[1m[95m# Agent:[00m [1m[92mRequest Interpreter[00m
[95m## Task:[00m [92mEvaluate the user's request: Give me all the information about the models related with customers and determine the required action: - Adding a field: Identify where the field is currently available (if provided),
  determine how to propagate it through necessary models or transformations,
  and assess the impact on related models and dependencies.
- Modifying an existing model: Identify specific changes needed, evaluate the
  impact on structure, relationships, and downstream dependencies.
- Retrieving specific information: Identify models containing relevant data,
  analyze relationships, and determine queries or transformations needed.

Reflect on the request to generate a concise plan for the approach and provide a clear summary of the required action and its implications.
[00m


[1m[95m# Agent:[00m [1m[92mRequest Interpreter[00m
[95m## Final Answer:[00m [92m
RETRIEVE_INFO - The work required 

#### Flows

In [11]:
import nest_asyncio
nest_asyncio.apply()

In [12]:
from crewai import Flow
from crewai.flow.flow import listen, start

class dbtChatFlow(Flow):
    @start()
    def interpret_prompt(self):
        user_prompt = self.state["user_input"]
        print(user_prompt)
        interpretation_result = crew.kickoff(inputs = {'request': user_prompt} )
        self.state["interpretation_result"] = interpretation_result
        return interpretation_result

    @listen(lambda state: "interpretation_result" in state)
    def evaluate_interpretation(self):
        interpretation_result = self.state.get("interpretation_result")
        evaluation_result = crew.agents[1].kickoff({"request": interpretation_result})
        self.state["evaluation_result"] = evaluation_result
        return evaluation_result

flow = dbtChatFlow()
#flow.plot()

In [13]:
user_input = 'Give me all the information about the models related with customers'
flow.kickoff(inputs={"user_input": user_input})

Give me all the information about the models related with customers
[1m[95m# Agent:[00m [1m[92mRequest Interpreter[00m
[95m## Task:[00m [92mEvaluate the user's request: Give me all the information about the models related with customers and determine the required action: - Adding a field: Identify where the field is currently available (if provided),
  determine how to propagate it through necessary models or transformations,
  and assess the impact on related models and dependencies.
- Modifying an existing model: Identify specific changes needed, evaluate the
  impact on structure, relationships, and downstream dependencies.
- Retrieving specific information: Identify models containing relevant data,
  analyze relationships, and determine queries or transformations needed.

Reflect on the request to generate a concise plan for the approach and provide a clear summary of the required action and its implications.
[00m


[1m[95m# Agent:[00m [1m[92mRequest Interpreter[00m

CrewOutput(raw="To fulfill the user request for all information about models related to customers, follow these high-level actionable steps:\n\n1. **Identify Target Models or Files**: \n   - Locate and list all dbt models that contain customer-related data. This could include models directly named 'customers' as well as any associated models such as 'customer_orders', 'customer_segments', or 'customer_addresses'.\n\n2. **Analyze Dependencies and Relationships**: \n   - Examine upstream and downstream dependencies for each identified model using dbt's lineage graph. Document which models feed into customer-related models and which models derive data from them.\n\n3. **Field Existence Evaluation**: \n   - Check the schema of each customer-related model to ensure that all necessary fields (e.g., customer ID, name, contact information) are present and correctly defined.\n\n4. **Assess Documentation Needs**: \n   - Review existing documentation for the identified models. Identify gaps in do

### CREATE AGENT CHAIN

#### Example

In [None]:
from crewai import Flow
from crewai.flow.flow import listen, start, and_, or_, router

class SalesPipeline(Flow):
    
  @start()
  def fetch_leads(self):
    # Pull our leads from the database
    # This is a mock, in a real-world scenario, this is where you would
    # fetch leads from a database
    leads = [
      {
        "lead_data": {
          "name": "João Moura",
          "job_title": "Director of Engineering",
          "company": "Clearbit",
          "email": "joao@clearbit.com",
          "use_case": "Using AI Agent to do better data enrichment."
        },
      },
    ]
    return leads

  @listen(fetch_leads)
  def score_leads(self, leads):
    scores = lead_scoring_crew.kickoff_for_each(leads)
    self.state["score_crews_results"] = scores
    return scores

  @listen(score_leads)
  def store_leads_score(self, scores):
    # Here we would store the scores in the database
    return scores

  @listen(score_leads)
  def filter_leads(self, scores):
    return [score for score in scores if score['lead_score'].score > 70]

  @listen(and_(filter_leads, store_leads_score))
  def log_leads(self, leads):
    print(f"Leads: {leads}")

  @router(filter_leads, paths=["high", "medium", "low"])
  def count_leads(self, scores):
    if len(scores) > 10:
      return 'high'
    elif len(scores) > 5:
      return 'medium'
    else:
      return 'low'

  @listen('high')
  def store_in_salesforce(self, leads):
    return leads

  @listen('medium')
  def send_to_sales_team(self, leads):
    return leads

  @listen('low')
  def write_email(self, leads):
    scored_leads = [lead.to_dict() for lead in leads]
    emails = email_writing_crew.kickoff_for_each(scored_leads)
    return emails

  @listen(write_email)
  def send_email(self, emails):
    # Here we would send the emails to the leads
    return emails