In [1]:
import sys
import os

# Add the parent directory to sys.path
parent_dir = os.path.abspath("../")
print(parent_dir)
sys.path.append(parent_dir)


/Users/hanna/openfn/ai_experiments/apollo/services


In [2]:
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
import os
import json
from dotenv import load_dotenv
import anthropic

load_dotenv()
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)

In [6]:
from embed_docsite.github_utils import get_docs
import re


In [7]:
docs = get_docs("adaptor_docs")

INFO:GitHubUtils:Fetched 59 URLs from GitHub for https://api.github.com/repos/OpenFn/docs/contents/adaptors
INFO:GitHubUtils:Downloaded and processed 59 files from GitHub
INFO:GitHubUtils:{'name': 'asana.md', 'docs': '---\ntitle: Asana Adaptor\n---\n\n## About Asana\n\n[Asana](https://app.asana.com/) is a web-based project management tool that helps teams organize, plan, collaborate, and execute tasks. \n\n## Integration Options\n\nAsana supports 2 primary integration options:\n\n1. Rest API: Asana has an available REST API that enables external services like OpenFn to pull data from Asana, or push data from external apps to Asana. This option is suited for scheduled, bulk syncs or workflows that must update data in Asana with external information. See [functions](/adaptors/packages/asana-docs) for more on how to use this adaptor to work with the API.\n\n2. Webhook: Asana also has a [Webhook or Data Forwarding](https://developers.asana.com/docs/webhooks-guide) to push data from Asana t

In [11]:
import pickle
with open('/Users/hanna/openfn/ai_experiments/data/adaptor_docs.pkl', 'wb') as file:
    pickle.dump(docs, file)

In [13]:
d = clean_html(docs[0]["docs"])
d = split_by_headers(d)
d

['---\ntitle: Asana Adaptor\n---',
 '## About Asana\n\n[Asana](https://app.asana.com/) is a web-based project management tool that helps teams organize, plan, collaborate, and execute tasks.',
 '## Integration Options\n\nAsana supports 2 primary integration options:\n\n1. Rest API: Asana has an available REST API that enables external services like OpenFn to pull data from Asana, or push data from external apps to Asana. This option is suited for scheduled, bulk syncs or workflows that must update data in Asana with external information. See [functions](/adaptors/packages/asana-docs) for more on how to use this adaptor to work with the API.\n\n2. Webhook: Asana also has a [Webhook or Data Forwarding](https://developers.asana.com/docs/webhooks-guide) to push data from Asana to external systems. This option is suited for real-time, event-based data integration. Check out the Asana [developer documentation](/adaptors/packages/asana-docs) to learn how to set up a webhook to push data to Op

In [12]:

def clean_html(text):
    """Remove HTML tags while preserving essential formatting."""
    text = re.sub(r'<\/?p>', '\n', text)  # Convert <p> to newlines
    text = re.sub(r'<\/?code>', '`', text)  # Convert <code> to backticks
    text = re.sub(r'<\/?strong>', '**', text)  # Convert <strong> to bold
    text = re.sub(r'<[^>]+>', '', text)  # Remove other HTML tags

    return text.strip()

def split_by_headers(text):
    """Split text into chunks based on Markdown headers (# and ##) and code blocks."""
    sections = re.split(r'(?=^#+\s.*$|^```(?:.*\n[\s\S]*?^```))', text, flags=re.MULTILINE)

    return [chunk.strip() for chunk in sections if chunk.strip()]

def get_overview(json_data):
    for item in json_data:
        if isinstance(item, dict) and "docs" in item and "name" in item:
            
            docs = item["docs"]
            name = item["name"]

            # Decode JSON string
            try:
                docs = json.loads(docs)
            except json.JSONDecodeError:
                pass
            
            docs = clean_html(docs)

            # Save all fields for adding to metadata later
            item["docs"] = docs # replace docs with cleaned text
            metadata_dict[name] = item

            # Split by headers, and where needed, sentences
            splits = split_by_headers(docs)

In [4]:
# compile list of adaptors with descriptions (offline)

describe_adaptor_system_prompt = """
You are an assistant for writing brief descriptions of adaptors offered by OpenFn, a workflow generation platform.
The summary will be used to help select appropriate adaptors for clients' desriptions of their automation tasks.
Relevant information might therefore include e.g. the purpose of the adaptor and the data formats it uses.
You will be given the name of an adaptor and the overview section from its documentation.
Answer with nothing but the name of the adaptor followed by a colon and a 2-3 sentence description.
"""

describe_adaptor_user_prompt = """The adaptor to describe: "{adaptor_name}" \n The adaptor documentation: {documentation} """

def describe_adaptor(user_question):
    message = client.messages.create(
        model="claude-3-7-sonnet-20250219", 
        max_tokens=1000,
        temperature=0,
        system=describe_adaptor_system_prompt,
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": describe_adaptor_user_prompt.format(user_question=user_question)
                    }
                ]
            }
        ]
    )
    return message.content[0].text

In [None]:
adaptor_summaries = """

"""

In [3]:
adaptor_summaries = """
@openfn/language-asana@latest: For interacting with Asana project management platform
@openfn/language-azure-storage@latest: For Azure Storage operations
@openfn/language-beyonic@latest: For mobile money payments with Beyonic
@openfn/language-bigquery@latest: For Google BigQuery database operations
@openfn/language-cartodb@latest: For CartoDB spatial database operations
@openfn/language-chatgpt@latest: For interacting with ChatGPT API
@openfn/language-cht@latest: For Community Health Toolkit integration
@openfn/language-claude@latest: For interacting with Claude AI
@openfn/language-collections@latest: For working with data collections
@openfn/language-commcare@latest: For interacting with CommCare
@openfn/language-common@latest: For basic data transformation operations
@openfn/language-dhis2@latest: For DHIS2 health information systems
@openfn/language-divoc@latest: For Digital Infrastructure for Vaccination Open Credentialing
@openfn/language-dynamics@latest: For Microsoft Dynamics 365 operations
@openfn/language-facebook@latest: For Facebook platform integration
@openfn/language-fhir@latest: For Fast Healthcare Interoperability Resources
@openfn/language-fhir-4@latest: For FHIR version 4 specific operations
@openfn/language-fhir-fr@latest: For French implementation of FHIR
@openfn/language-fhir-jembi@latest: For Jembi Health Systems FHIR implementation
@openfn/language-fhir-ndr-et@latest: For Ethiopia National Data Repository FHIR implementation
@openfn/language-ghana-bdr@latest: For Ghana Birth and Death Registry
@openfn/language-ghana-nia@latest: For Ghana National Identification Authority
@openfn/language-gmail@latest: For Gmail email operations
@openfn/language-godata@latest: For WHO Go.Data outbreak management
@openfn/language-googledrive@latest: For Google Drive operations
@openfn/language-googlehealthcare@latest: For Google Healthcare API
@openfn/language-googlesheets@latest: For Google Sheets operations
@openfn/language-hive@latest: For Apache Hive data warehouse
@openfn/language-http@latest: For making HTTP requests
@openfn/language-hubtel@latest: For Hubtel messaging platform
@openfn/language-intuit@latest: For QuickBooks and Intuit services
@openfn/language-khanacademy@latest: For Khan Academy integration
@openfn/language-kobotoolbox@latest: For KoboToolbox data collection
@openfn/language-magpi@latest: For Magpi mobile data collection
@openfn/language-mailchimp@latest: For MailChimp email marketing
@openfn/language-mailgun@latest: For Mailgun email services
@openfn/language-maximo@latest: For IBM Maximo asset management
@openfn/language-medicmobile@latest: For Medic Mobile health platform
@openfn/language-mogli@latest: For Mogli SMS Salesforce app
@openfn/language-mojatax@latest: For digital tax platforms
@openfn/language-mongodb@latest: For MongoDB database operations
@openfn/language-mpesa@latest: For M-Pesa mobile payment service
@openfn/language-msgraph@latest: For Microsoft Graph API
@openfn/language-mssql@latest: For Microsoft SQL Server operations
@openfn/language-msupply@latest: For mSupply inventory management
@openfn/language-mysql@latest: For MySQL database operations
@openfn/language-nexmo@latest: For Nexmo/Vonage communications API
@openfn/language-ocl@latest: For Open Concept Lab terminology services
@openfn/language-odk@latest: For Open Data Kit data collection
@openfn/language-odoo@latest: For Odoo ERP system
@openfn/language-openboxes@latest: For OpenBoxes supply chain management
@openfn/language-openfn@latest: For OpenFn platform operations
@openfn/language-openhim@latest: For OpenHIM interoperability layer
@openfn/language-openimis@latest: For OpenIMIS insurance management
@openfn/language-openlmis@latest: For OpenLMIS logistics management
@openfn/language-openmrs@latest: For OpenMRS medical record system
@openfn/language-openspp@latest: For Open Social Protection Platform
@openfn/language-pesapal@latest: For PesaPal payment gateway
@openfn/language-postgresql@latest: For PostgreSQL database operations
@openfn/language-primero@latest: For Primero child protection information management
@openfn/language-progres@latest: For UNHCR ProGres refugee management
@openfn/language-rapidpro@latest: For RapidPro messaging platform
@openfn/language-redis@latest: For Redis database operations
@openfn/language-resourcemap@latest: For Resource Map geospatial platform
@openfn/language-salesforce@latest: For Salesforce CRM operations
@openfn/language-satusehat@latest: For Indonesia's SatuSehat health platform
@openfn/language-senaite@latest: For SENAITE laboratory information management
@openfn/language-sftp@latest: For secure file transfer protocol operations
@openfn/language-smpp@latest: For Short Message Peer-to-Peer protocol
@openfn/language-surveycto@latest: For SurveyCTO data collection
@openfn/language-telerivet@latest: For Telerivet SMS platform
@openfn/language-template@latest: For template operations
@openfn/language-testing@latest: For testing OpenFn adaptors
@openfn/language-twilio@latest: For Twilio communications API
@openfn/language-varo@latest: For Varo financial services
@openfn/language-vtiger@latest: For VTiger CRM operations
@openfn/language-wigal-sms@latest: For Wigal SMS gateway
@openfn/language-zoho@latest: For Zoho CRM and business applications"""

In [None]:
# get_info_gen_yaml_system_prompt = """
# You are an expert assistant for the OpenFn workflow automation platform.
# Your task is to talk to a client with the goal of converting their description of a workflow into an OpenFn workflow YAML.
# You should produce properly structured YAML files that define workflow jobs, triggers, and connections.
# This might be an iterative process, where you adjust a previous YAML according to the user's instructions.
# If necessary, you can ask the user for clarification instead of producing a YAML. You should ask for more details if it is not possible to determine what kind of data and databases/services they are using.
# Do not produce a YAML unnecessarily; if the user does not otherwise appear to want a new YAML (and is instead e.g. asking for a clarification or hit send too early),
# do not produce a YAML in your answer.
# Be as brief as possible in your answers.

# ## Your Task

# Given a text description of a workflow process, you will:
# 1. Identify distinct jobs/steps in the workflow
# 2. Determine appropriate adaptors for each job
# 3. Set up proper trigger mechanisms (webhook or cron)
# 4. Create the connections (edges) between jobs
# 5. Generate a valid project.yaml file that follows OpenFn's structure

# ## OpenFn Project.yaml Structure

# A valid project.yaml must be enclosed in ``` and follow this structure:
# ```
# name: open-project
# jobs:
#   job-one:
#     name: First Job
#     adaptor: "@openfn/language-common@latest"
#     body: "| // Add operations here"
#   job-two:
#     name: Second Job
#     adaptor: "@openfn/language-http@latest"
#     body: "| // Add operations here"
# triggers:
#   # Choose one trigger type and remove the other
#   cron:  # For scheduled jobs
#     type: cron
#     cron_expression: 0 0 * * *  # Format: minute hour day month weekday
#     enabled: false
#   # OR
#   webhook:  # For event-based jobs
#     type: webhook
#     enabled: false
# edges:
#   daily-trigger->job-one:
#     source_trigger: daily-trigger
#     target_job: job-one
#     condition_type: always
#     enabled: true
#   job-one->job-two:
#     source_job: job-one
#     target_job: job-two
#     condition_type: on_job_success
#     enabled: true
# ```

# ## Adaptor Knowledge

# Here is a list of available OpenFn adaptors:
# {adaptor_summaries}

# ## Trigger Types

# - **Webhook**: Use for event-based triggers (default if not specified)
# - **Cron**: Use for time-based schedules
# The trigger should be set to enabled: false by default.

# ## Rules for Job Identification

# 1. Each distinct action should become its own job
# 2. Jobs should have clear, descriptive names
# 3. Jobs should be connected in a logical sequence
# 4. Choose the most specific adaptor available for each operation
# 5. When in doubt about an adaptor, use `@openfn/language-common@latest`
# 6. Job IDs should be derived from their names, replacing spaces with hyphens

# ## Rules for Edge Creation

# 1. The first job should always connect to the trigger
# 2. Each subsequent job should connect to the previous job with one condition_type: on_job_success, on_job_failure, always or js_expression (for the latter, also add a condition_expression in quotes e.g. "!state.error")
# 3. For branching workflows, create conditional edges as appropriate
# 4. Edges should be enabled by default

# ## Example Conversion

# For the input:
# "Fetch visits from commare once a day. For each visitor with an IHS number, create a FHIR Encounter in Satusehat. Otherwise, lookup the number in satusehat and then create an encounter"

# The output should be:
# Your reasoning (max ~4 sentences).

# ```
# name: Daily CommCare to Satusehat Encounter Sync
# jobs:
#   Fetch-visits-from-CommCare:
#     name: Fetch visits from CommCare
#     adaptor: "@openfn/language-commcare@latest"
#     body: "| // Add operations here"
#   Create-FHIR-Encounter-for-visitors-with-IHS-number:
#     name: Create FHIR Encounter for visitors with IHS number
#     adaptor: "@openfn/language-satusehat@latest"
#     body: "| // Add operations here"
#   Lookup-IHS-number-in-Satusehat:
#     name: Lookup IHS number in Satusehat
#     adaptor: "@openfn/language-satusehat@latest"
#     body: "| // Add operations here"
#   Create-FHIR-Encounter-after-IHS-lookup:
#     name: Create FHIR Encounter after IHS lookup
#     adaptor: "@openfn/language-satusehat@latest"
#     body: "| // Add operations here"
# triggers:
#   cron:
#     type: cron
#     cron_expression: 0 0 * * *
#     enabled: false
# edges:
#   cron->Fetch-visits-from-CommCare:
#     source_trigger: cron
#     target_job: Fetch-visits-from-CommCare
#     condition_type: always
#     enabled: true
#   Fetch-visits-from-CommCare->Create-FHIR-Encounter-for-visitors-with-IHS-number:
#     source_job: Fetch-visits-from-CommCare
#     target_job: Create-FHIR-Encounter-for-visitors-with-IHS-number
#     condition_type: on_job_success
#     enabled: true
#   Fetch-visits-from-CommCare->Lookup-IHS-number-in-Satusehat:
#     source_job: Fetch-visits-from-CommCare
#     target_job: Lookup-IHS-number-in-Satusehat
#     condition_type: on_job_success
#     enabled: true
#   Lookup-IHS-number-in-Satusehat->Create-FHIR-Encounter-after-IHS-lookup:
#     source_job: Lookup-IHS-number-in-Satusehat
#     target_job: Create-FHIR-Encounter-after-IHS-lookup
#     condition_type: on_job_success
#     enabled: true
# ```

# ## Output Format

# A) 
# A conversational turn responding to the user (2-4 sentences).

# or

# B)
# In a few sentences (max. as many sentences as there are jobs in the workflow), explain your reasoning and, if relevant, aspects of the workflow that should be reviewed (e.g. to consider alternative approaches).
# After a blank line, provide the output as a proper YAML file that follows the structure above.
# """

All in one prompt

In [None]:
get_info_gen_yaml_system_prompt = """
You are an expert assistant for the OpenFn workflow automation platform.
Your task is to talk to a client with the goal of converting their description of a workflow into an OpenFn workflow YAML.
You should produce properly structured YAML files that define workflow jobs, triggers, and connections.
This might be an iterative process, where you adjust a previous YAML according to the user's instructions.
If necessary, you can ask the user for clarification instead of producing a YAML. You should ask for more details if it is not possible to determine what kind of data and databases/services they are using.
Do not produce a YAML unnecessarily; if the user does not otherwise appear to want a new YAML (and is instead e.g. asking for a clarification or hit send too early),
do not produce a YAML in your answer.
Be as brief as possible in your answers.

## Your Task

Given a text description of a workflow process, you will:
1. Identify distinct jobs/steps in the workflow
2. Determine appropriate adaptors for each job
3. Set up proper trigger mechanisms (webhook or cron)
4. Create the connections (edges) between jobs
5. Generate a valid project.yaml file that follows OpenFn's structure

## OpenFn Project.yaml Structure

A valid project.yaml must follow this structure:
```
name: open-project
jobs:
  job-one:
    name: First Job
    adaptor: "@openfn/language-common@latest"
    body: "| // Add operations here"
  job-two:
    name: Second Job
    adaptor: "@openfn/language-http@latest"
    body: "| // Add operations here"
triggers:
  # Choose one trigger type and remove the other
  cron:  # For scheduled jobs
    type: cron
    cron_expression: 0 0 * * *  # Format: minute hour day month weekday
    enabled: false
  # OR
  webhook:  # For event-based jobs
    type: webhook
    enabled: false
edges:
  daily-trigger->job-one:
    source_trigger: daily-trigger
    target_job: job-one
    condition_type: always
    enabled: true
  job-one->job-two:
    source_job: job-one
    target_job: job-two
    condition_type: on_job_success
    enabled: true
```

## Adaptor Knowledge

Here is a list of available OpenFn adaptors:
@openfn/language-asana@latest: For interacting with Asana project management platform
@openfn/language-azure-storage@latest: For Azure Storage operations
@openfn/language-beyonic@latest: For mobile money payments with Beyonic
@openfn/language-bigquery@latest: For Google BigQuery database operations
@openfn/language-cartodb@latest: For CartoDB spatial database operations
@openfn/language-chatgpt@latest: For interacting with ChatGPT API
@openfn/language-cht@latest: For Community Health Toolkit integration
@openfn/language-claude@latest: For interacting with Claude AI
@openfn/language-collections@latest: For working with data collections
@openfn/language-commcare@latest: For interacting with CommCare
@openfn/language-common@latest: For basic data transformation operations
@openfn/language-dhis2@latest: For DHIS2 health information systems
@openfn/language-divoc@latest: For Digital Infrastructure for Vaccination Open Credentialing
@openfn/language-dynamics@latest: For Microsoft Dynamics 365 operations
@openfn/language-facebook@latest: For Facebook platform integration
@openfn/language-fhir@latest: For Fast Healthcare Interoperability Resources
@openfn/language-fhir-4@latest: For FHIR version 4 specific operations
@openfn/language-fhir-fr@latest: For French implementation of FHIR
@openfn/language-fhir-jembi@latest: For Jembi Health Systems FHIR implementation
@openfn/language-fhir-ndr-et@latest: For Ethiopia National Data Repository FHIR implementation
@openfn/language-ghana-bdr@latest: For Ghana Birth and Death Registry
@openfn/language-ghana-nia@latest: For Ghana National Identification Authority
@openfn/language-gmail@latest: For Gmail email operations
@openfn/language-godata@latest: For WHO Go.Data outbreak management
@openfn/language-googledrive@latest: For Google Drive operations
@openfn/language-googlehealthcare@latest: For Google Healthcare API
@openfn/language-googlesheets@latest: For Google Sheets operations
@openfn/language-hive@latest: For Apache Hive data warehouse
@openfn/language-http@latest: For making HTTP requests
@openfn/language-hubtel@latest: For Hubtel messaging platform
@openfn/language-intuit@latest: For QuickBooks and Intuit services
@openfn/language-khanacademy@latest: For Khan Academy integration
@openfn/language-kobotoolbox@latest: For KoboToolbox data collection
@openfn/language-magpi@latest: For Magpi mobile data collection
@openfn/language-mailchimp@latest: For MailChimp email marketing
@openfn/language-mailgun@latest: For Mailgun email services
@openfn/language-maximo@latest: For IBM Maximo asset management
@openfn/language-medicmobile@latest: For Medic Mobile health platform
@openfn/language-mogli@latest: For Mogli SMS Salesforce app
@openfn/language-mojatax@latest: For digital tax platforms
@openfn/language-mongodb@latest: For MongoDB database operations
@openfn/language-mpesa@latest: For M-Pesa mobile payment service
@openfn/language-msgraph@latest: For Microsoft Graph API
@openfn/language-mssql@latest: For Microsoft SQL Server operations
@openfn/language-msupply@latest: For mSupply inventory management
@openfn/language-mysql@latest: For MySQL database operations
@openfn/language-nexmo@latest: For Nexmo/Vonage communications API
@openfn/language-ocl@latest: For Open Concept Lab terminology services
@openfn/language-odk@latest: For Open Data Kit data collection
@openfn/language-odoo@latest: For Odoo ERP system
@openfn/language-openboxes@latest: For OpenBoxes supply chain management
@openfn/language-openfn@latest: For OpenFn platform operations
@openfn/language-openhim@latest: For OpenHIM interoperability layer
@openfn/language-openimis@latest: For OpenIMIS insurance management
@openfn/language-openlmis@latest: For OpenLMIS logistics management
@openfn/language-openmrs@latest: For OpenMRS medical record system
@openfn/language-openspp@latest: For Open Social Protection Platform
@openfn/language-pesapal@latest: For PesaPal payment gateway
@openfn/language-postgresql@latest: For PostgreSQL database operations
@openfn/language-primero@latest: For Primero child protection information management
@openfn/language-progres@latest: For UNHCR ProGres refugee management
@openfn/language-rapidpro@latest: For RapidPro messaging platform
@openfn/language-redis@latest: For Redis database operations
@openfn/language-resourcemap@latest: For Resource Map geospatial platform
@openfn/language-salesforce@latest: For Salesforce CRM operations
@openfn/language-satusehat@latest: For Indonesia's SatuSehat health platform
@openfn/language-senaite@latest: For SENAITE laboratory information management
@openfn/language-sftp@latest: For secure file transfer protocol operations
@openfn/language-smpp@latest: For Short Message Peer-to-Peer protocol
@openfn/language-surveycto@latest: For SurveyCTO data collection
@openfn/language-telerivet@latest: For Telerivet SMS platform
@openfn/language-template@latest: For template operations
@openfn/language-testing@latest: For testing OpenFn adaptors
@openfn/language-twilio@latest: For Twilio communications API
@openfn/language-varo@latest: For Varo financial services
@openfn/language-vtiger@latest: For VTiger CRM operations
@openfn/language-wigal-sms@latest: For Wigal SMS gateway
@openfn/language-zoho@latest: For Zoho CRM and business applications

## Trigger Types

- **Webhook**: Use for event-based triggers (default if not specified)
- **Cron**: Use for time-based schedules
The trigger should be set to enabled: false by default.

## Rules for Job Identification

1. Each distinct action should become its own job
2. Jobs should have clear, descriptive names
3. Jobs should be connected in a logical sequence
4. Choose the most specific adaptor available for each operation
5. When in doubt about an adaptor, use `@openfn/language-common@latest`
6. Job IDs should be derived from their names, replacing spaces with hyphens

## Rules for Edge Creation

1. The first job should always connect to the trigger
2. Each subsequent job should connect to the previous job with one condition_type: on_job_success, on_job_failure, always or js_expression (for the latter, also add a condition_expression in quotes e.g. "!state.error")
3. For branching workflows, create conditional edges as appropriate
4. Edges should be enabled by default

## Example Conversation

User's conversation turn:
"Fetch visits from commare once a day. For each visitor with an IHS number, create a FHIR Encounter in Satusehat. Otherwise, lookup the number in satusehat and then create an encounter"

The output should be:
{
  "text": "Your reasoning (max ~4 sentences).",
  "yaml": "name: Daily CommCare to Satusehat Encounter Sync\njobs:\n  Fetch-visits-from-CommCare:\n    name: Fetch visits from CommCare\n    adaptor: \"@openfn/language-commcare@latest\"\n    body: \"| // Add operations here\"\n  Create-FHIR-Encounter-for-visitors-with-IHS-number:\n    name: Create FHIR Encounter for visitors with IHS number\n    adaptor: \"@openfn/language-satusehat@latest\"\n    body: \"| // Add operations here\"\n  Lookup-IHS-number-in-Satusehat:\n    name: Lookup IHS number in Satusehat\n    adaptor: \"@openfn/language-satusehat@latest\"\n    body: \"| // Add operations here\"\n  Create-FHIR-Encounter-after-IHS-lookup:\n    name: Create FHIR Encounter after IHS lookup\n    adaptor: \"@openfn/language-satusehat@latest\"\n    body: \"| // Add operations here\"\ntriggers:\n  cron:\n    type: cron\n    cron_expression: 0 0 * * *\n    enabled: false\nedges:\n  cron->Fetch-visits-from-CommCare:\n    source_trigger: cron\n    target_job: Fetch-visits-from-CommCare\n    condition_type: always\n    enabled: true\n  Fetch-visits-from-CommCare->Create-FHIR-Encounter-for-visitors-with-IHS-number:\n    source_job: Fetch-visits-from-CommCare\n    target_job: Create-FHIR-Encounter-for-visitors-with-IHS-number\n    condition_type: on_job_success\n    enabled: true\n  Fetch-visits-from-CommCare->Lookup-IHS-number-in-Satusehat:\n    source_job: Fetch-visits-from-CommCare\n    target_job: Lookup-IHS-number-in-Satusehat\n    condition_type: on_job_success\n    enabled: true\n  Lookup-IHS-number-in-Satusehat->Create-FHIR-Encounter-after-IHS-lookup:\n    source_job: Lookup-IHS-number-in-Satusehat\n    target_job: Create-FHIR-Encounter-after-IHS-lookup\n    condition_type: on_job_success\n    enabled: true"
}

## Output Format

You must respond in JSON format with two fields: "text" and "yaml". 
"text" for all explanation, and "yaml" for the YAML block.

You can either
A) answer with JUST a conversational turn responding to the user (2-4 sentences) in the "text" key and leave the "yaml" key as null,

or 

B) answer with BOTH the "text" key and the "yaml" key.
In this case, you should provide a few sentences in the "text" key (max. as many sentences as there are jobs in the workflow) to explain your reasoning. 
If relevant, you can note aspects of the workflow that should be reviewed, e.g. to add logging steps, data verification, or additional branches for cases with incomplete info.
In the "yaml" key, provide a proper YAML file that follows the structure above.


The user's latest message and prior conversation are provided below. Generate your response accordingly.
"""

# {existing_yaml}


In [5]:
# all in one go - ask for info and gen in one prompt

# get_info_gen_yaml_system_prompt_formatted = get_info_gen_yaml_system_prompt.format(existing_yaml=" ")

get_info_gen_yaml_user_prompt = """The user's automation task is as follows: "{user_question}" """

def get_info_and_gen_yaml(user_question):
    message = client.messages.create(
        model="claude-3-7-sonnet-20250219",
        max_tokens=1000,
        temperature=0,
        system=get_info_gen_yaml_system_prompt,
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": get_info_gen_yaml_user_prompt.format(user_question=user_question)
                    }
                ]
            }
        ]
    )
    return message.content[0].text

In [6]:
user_question = "Whenever fridge statistics are send to you, parse and aggregate the data and upload to a collection in redis."
# user_question = "i have fridge stats that should be analysed and then uploaded"

answer = get_info_and_gen_yaml(user_question)
answer

'{"text":"I\'ll create a workflow that uses a webhook trigger to receive fridge statistics, processes the data, and stores it in Redis. This requires two main jobs: one to parse/aggregate the data and another to upload to Redis.","yaml":"name: Fridge-Statistics-Processing\\njobs:\\n  parse-and-aggregate-data:\\n    name: Parse and Aggregate Fridge Data\\n    adaptor: \\"@openfn/language-common@latest\\"\\n    body: \\"| // Add data parsing and aggregation operations here\\"\\n  upload-to-redis:\\n    name: Upload to Redis Collection\\n    adaptor: \\"@openfn/language-redis@latest\\"\\n    body: \\"| // Add Redis upload operations here\\"\\ntriggers:\\n  webhook:\\n    type: webhook\\n    enabled: false\\nedges:\\n  webhook->parse-and-aggregate-data:\\n    source_trigger: webhook\\n    target_job: parse-and-aggregate-data\\n    condition_type: always\\n    enabled: true\\n  parse-and-aggregate-data->upload-to-redis:\\n    source_job: parse-and-aggregate-data\\n    target_job: upload-to-

In [11]:
# user_question = "When we get a patient visit we need to process the notes from it. But sometimes there's no patient IHS in the record yet so we need to try and get that."
user_question = "We need to extract daily patient visit info from commcare forms to trigger another workflow (that we can look at later). But sometimes there's no patient IHS in the record yet so we need to try and get that from Satusehat and add it to commcare for the next time."


answer = get_info_and_gen_yaml(user_question)
answer

'{"text": "I\'ll help you create a workflow to extract patient visit info from CommCare and handle cases where patient IHS is missing. This workflow will fetch daily CommCare data, check for IHS numbers, and for records without IHS, query Satusehat to find and update the CommCare record.", "yaml": "name: CommCare-Patient-Visit-Extraction\\njobs:\\n  fetch-daily-patient-visits:\\n    name: Fetch Daily Patient Visits from CommCare\\n    adaptor: \\"@openfn/language-commcare@latest\\"\\n    body: \\"| // Add operations to fetch patient visit forms\\"\\n  process-records-with-ihs:\\n    name: Process Records with IHS Number\\n    adaptor: \\"@openfn/language-common@latest\\"\\n    body: \\"| // Filter and process records that already have IHS numbers\\"\\n  lookup-missing-ihs-in-satusehat:\\n    name: Lookup Missing IHS in Satusehat\\n    adaptor: \\"@openfn/language-satusehat@latest\\"\\n    body: \\"| // Query Satusehat API to find IHS for patients without one\\"\\n  update-commcare-with

In [12]:
print(json.loads(answer)["yaml"])

name: CommCare-Patient-Visit-Extraction
jobs:
  fetch-daily-patient-visits:
    name: Fetch Daily Patient Visits from CommCare
    adaptor: "@openfn/language-commcare@latest"
    body: "| // Add operations to fetch patient visit forms"
  process-records-with-ihs:
    name: Process Records with IHS Number
    adaptor: "@openfn/language-common@latest"
    body: "| // Filter and process records that already have IHS numbers"
  lookup-missing-ihs-in-satusehat:
    name: Lookup Missing IHS in Satusehat
    adaptor: "@openfn/language-satusehat@latest"
    body: "| // Query Satusehat API to find IHS for patients without one"
  update-commcare-with-ihs:
    name: Update CommCare with Found IHS Numbers
    adaptor: "@openfn/language-commcare@latest"
    body: "| // Update CommCare records with IHS numbers found in Satusehat"
triggers:
  cron:
    type: cron
    cron_expression: 0 0 * * *  # Run daily at midnight
    enabled: false
edges:
  cron->fetch-daily-patient-visits:
    source_trigger: c

In [13]:
import yaml

In [24]:
output_text, output_yaml = extract_text_and_yaml(answer)
print(output_text)
output_yaml

This workflow involves receiving fridge statistics via webhook, processing the data, and storing it in Redis. I'll use language-http for receiving data, language-common for processing, and language-redis for storage.


'name: Fridge-Statistics-Processing\njobs:\n  receive-fridge-statistics:\n    name: Receive Fridge Statistics\n    adaptor: "@openfn/language-http@latest"\n    body: "| // Parse incoming webhook data"\n  process-and-aggregate:\n    name: Process and Aggregate Data\n    adaptor: "@openfn/language-common@latest"\n    body: "| // Aggregate and transform the fridge statistics"\n  upload-to-redis:\n    name: Upload to Redis Collection\n    adaptor: "@openfn/language-redis@latest"\n    body: "| // Store the processed data in Redis collection"\ntriggers:\n  webhook:\n    type: webhook\n    enabled: false\nedges:\n  webhook->receive-fridge-statistics:\n    source_trigger: webhook\n    target_job: receive-fridge-statistics\n    condition_type: always\n    enabled: true\n  receive-fridge-statistics->process-and-aggregate:\n    source_job: receive-fridge-statistics\n    target_job: process-and-aggregate\n    condition_type: on_job_success\n    enabled: true\n  process-and-aggregate->upload-to-red

In [None]:
# Decode the escaped newlines into actual newlines
formatted_str = output_yaml.encode().decode('unicode_escape')

try:
    yaml_data = yaml.safe_load(formatted_str)
    # Convert back to pretty YAML string
    pretty_yaml = yaml.dump(yaml_data, sort_keys=False)
except yaml.YAMLError as e:
    raise ValueError("Invalid YAML: ", e)
print(pretty_yaml)

name: Fridge-Statistics-Processing
jobs:
  receive-fridge-statistics:
    name: Receive Fridge Statistics
    adaptor: '@openfn/language-http@latest'
    body: '| // Parse incoming webhook data'
  process-and-aggregate:
    name: Process and Aggregate Data
    adaptor: '@openfn/language-common@latest'
    body: '| // Aggregate and transform the fridge statistics'
  upload-to-redis:
    name: Upload to Redis Collection
    adaptor: '@openfn/language-redis@latest'
    body: '| // Store the processed data in Redis collection'
triggers:
  webhook:
    type: webhook
    enabled: false
edges:
  webhook->receive-fridge-statistics:
    source_trigger: webhook
    target_job: receive-fridge-statistics
    condition_type: always
    enabled: true
  receive-fridge-statistics->process-and-aggregate:
    source_job: receive-fridge-statistics
    target_job: process-and-aggregate
    condition_type: on_job_success
    enabled: true
  process-and-aggregate->upload-to-redis:
    source_job: process-an


examplesx

'i have fridge stats that should be analysed and then uploaded'
--> 'I need more details to create an effective workflow. What system are the fridge stats coming from? What kind of analysis needs to be performed? Where should the results be uploaded to? With this information, I can design an appropriate workflow with the right adaptors and connections.'

'Whenever fridge statistics are send to you, parse and aggregate the data and upload to a collection in redis.'
--> 'This workflow involves receiving fridge statistics via webhook, processing the data, and storing it in Redis. I\'ll create a two-job workflow with appropriate adaptors.\n\n```yaml\nname: Fridge-Statistics-Processing\njobs:\n  parse-and-aggregate-fridge-data:\n    name: Parse and Aggregate Fridge Data\n    adaptor: "@openfn/language-common@latest"\n    body: "| // Add operations to parse and aggregate the fridge statistics"\n  upload-to-redis-collection:\n    name: Upload to Redis Collection\n    adaptor: "@openfn/language-redis@latest"\n    body: "| // Add operations to store the processed data in Redis"\ntriggers:\n  webhook:\n    type: webhook\n    enabled: false\nedges:\n  webhook->parse-and-aggregate-fridge-data:\n    source_trigger: webhook\n    target_job: parse-and-aggregate-fridge-data\n    condition_type: always\n    enabled: true\n  parse-and-aggregate-fridge-data->upload-to-redis-collection:\n    source_job: parse-and-aggregate-fridge-data\n    target_job: upload-to-redis-collection\n    condition_type: on_job_success\n    enabled: true\n```'



"When we get a patient visit we need to process the notes from it. But sometimes there's no patient IHS in the record yet so we need to try and get that."
'{"text": "I need more information to create a proper workflow. What system are you receiving patient visits from? Where are you storing the patient data? What does \'process the notes\' involve specifically? And where/how do you look up the IHS when it\'s missing?", "yaml": null}'

"We need to extract daily patient visit info from commcare forms to trigger another workflow (that we can look at later). But sometimes there's no patient IHS in the record yet so we need to try and get that from Satusehat and add it to commcare for the next time."
'{"text": "I\'ll help you create a workflow to extract patient visit info from CommCare and handle cases where patient IHS is missing. This workflow will fetch daily CommCare data, check for IHS numbers, and for records without IHS, query Satusehat to find and update the CommCare record.", "yaml": "name: CommCare-Patient-Visit-Extraction\\njobs:\\n  fetch-daily-patient-visits:\\n    name: Fetch Daily Patient Visits from CommCare\\n    adaptor: \\"@openfn/language-commcare@latest\\"\\n    body: \\"| // Add operations to fetch patient visit forms\\"\\n  process-records-with-ihs:\\n    name: Process Records with IHS Number\\n    adaptor: \\"@openfn/language-common@latest\\"\\n    body: \\"| // Filter and process records that already have IHS numbers\\"\\n  lookup-missing-ihs-in-satusehat:\\n    name: Lookup Missing IHS in Satusehat\\n    adaptor: \\"@openfn/language-satusehat@latest\\"\\n    body: \\"| // Query Satusehat API to find IHS for patients without one\\"\\n  update-commcare-with-ihs:\\n    name: Update CommCare with Found IHS Numbers\\n    adaptor: \\"@openfn/language-commcare@latest\\"\\n    body: \\"| // Update CommCare records with IHS numbers found in Satusehat\\"\\ntriggers:\\n  cron:\\n    type: cron\\n    cron_expression: 0 0 * * *  # Run daily at midnight\\n    enabled: false\\nedges:\\n  cron->fetch-daily-patient-visits:\\n    source_trigger: cron\\n    target_job: fetch-daily-patient-visits\\n    condition_type: always\\n    enabled: true\\n  fetch-daily-patient-visits->process-records-with-ihs:\\n    source_job: fetch-daily-patient-visits\\n    target_job: process-records-with-ihs\\n    condition_type: on_job_success\\n    enabled: true\\n  fetch-daily-patient-visits->lookup-missing-ihs-in-satusehat:\\n    source_job: fetch-daily-patient-visits\\n    target_job: lookup-missing-ihs-in-satusehat\\n    condition_type: on_job_success\\n    enabled: true\\n  lookup-missing-ihs-in-satusehat->update-commcare-with-ihs:\\n    source_job: lookup-missing-ihs-in-satusehat\\n    target_job: update-commcare-with-ihs\\n    condition_type: on_job_success\\n    enabled: true"}'


Separate convo + yaml calls

have two slightly different system prompts for first time generation (explain your reasonign in 2 sentences) and subsequent (explain what you changed and why)

In [None]:
# gen_yaml_system_prompt = """
# You are an expert assistant for the OpenFn workflow automation platform. Your task is to convert natural language descriptions of workflows into properly structured YAML files that define workflow jobs, triggers, and connections.

# ## Your Task

# Given a text description of a workflow process, you will:
# 1. Identify distinct jobs/steps in the workflow
# 2. Determine appropriate adaptors for each job
# 3. Set up proper trigger mechanisms (webhook or cron)
# 4. Create the connections (edges) between jobs
# 5. Generate a valid project.yaml file that follows OpenFn's structure

# ## OpenFn Project.yaml Structure

# A valid project.yaml must follow this structure:
# ```yaml
# name: open-project
# jobs:
#   job-one:
#     name: First Job
#     adaptor: "@openfn/language-common@latest"
#     body: "| // Add operations here"
#   job-two:
#     name: Second Job
#     adaptor: "@openfn/language-http@latest"
#     body: "| // Add operations here"
# triggers:
#   # Choose one trigger type and remove the other
#   cron:  # For scheduled jobs
#     type: cron
#     cron_expression: 0 0 * * *  # Format: minute hour day month weekday
#     enabled: false
#   # OR
#   webhook:  # For event-based jobs
#     type: webhook
#     enabled: false
# edges:
#   daily-trigger->job-one:
#     source_trigger: daily-trigger
#     target_job: job-one
#     condition_type: always
#     enabled: true
#   job-one->job-two:
#     source_job: job-one
#     target_job: job-two
#     condition_type: on_job_success
#     enabled: true
# ```

# ## Adaptor Knowledge

# Here is a list of available OpenFn adaptors:
# {adaptor_summaries}

# ## Trigger Types

# - **Webhook**: Use for event-based triggers (default if not specified)
# - **Cron**: Use for time-based schedules
# The trigger should be set to enabled: false by default.

# ## Rules for Job Identification

# 1. Each distinct action should become its own job
# 2. Jobs should have clear, descriptive names
# 3. Jobs should be connected in a logical sequence
# 4. Choose the most specific adaptor available for each operation
# 5. When in doubt about an adaptor, use `@openfn/language-common@latest`
# 6. Job IDs should be derived from their names, replacing spaces with hyphens

# ## Rules for Edge Creation

# 1. The first job should always connect to the trigger
# 2. Each subsequent job should connect to the previous job with one condition_type: on_job_success, on_job_failure, always or js_expression (for the latter, also add a condition_expression in quotes e.g. "!state.error")
# 3. For branching workflows, create conditional edges as appropriate
# 4. Edges should be enabled by default

# ## Example Conversion

# For the input:
# "Fetch visits from commare once a day. For each visitor with an IHS number, create a FHIR Encounter in Satusehat. Otherwise, lookup the number in satusehat and then create an encounter"

# The output should be:
# Your reasoning (2-5 sentences).

# ```yaml
# name: Daily CommCare to Satusehat Encounter Sync
# jobs:
#   Fetch-visits-from-CommCare:
#     name: Fetch visits from CommCare
#     adaptor: "@openfn/language-commcare@latest"
#     body: "| // Add operations here"
#   Create-FHIR-Encounter-for-visitors-with-IHS-number:
#     name: Create FHIR Encounter for visitors with IHS number
#     adaptor: "@openfn/language-satusehat@latest"
#     body: "| // Add operations here"
#   Lookup-IHS-number-in-Satusehat:
#     name: Lookup IHS number in Satusehat
#     adaptor: "@openfn/language-satusehat@latest"
#     body: "| // Add operations here"
#   Create-FHIR-Encounter-after-IHS-lookup:
#     name: Create FHIR Encounter after IHS lookup
#     adaptor: "@openfn/language-satusehat@latest"
#     body: "| // Add operations here"
# triggers:
#   cron:
#     type: cron
#     cron_expression: 0 0 * * *
#     enabled: false
# edges:
#   cron->Fetch-visits-from-CommCare:
#     source_trigger: cron
#     target_job: Fetch-visits-from-CommCare
#     condition_type: always
#     enabled: true
#   Fetch-visits-from-CommCare->Create-FHIR-Encounter-for-visitors-with-IHS-number:
#     source_job: Fetch-visits-from-CommCare
#     target_job: Create-FHIR-Encounter-for-visitors-with-IHS-number
#     condition_type: on_job_success
#     enabled: true
#   Fetch-visits-from-CommCare->Lookup-IHS-number-in-Satusehat:
#     source_job: Fetch-visits-from-CommCare
#     target_job: Lookup-IHS-number-in-Satusehat
#     condition_type: on_job_success
#     enabled: true
#   Lookup-IHS-number-in-Satusehat->Create-FHIR-Encounter-after-IHS-lookup:
#     source_job: Lookup-IHS-number-in-Satusehat
#     target_job: Create-FHIR-Encounter-after-IHS-lookup
#     condition_type: on_job_success
#     enabled: true
# ```

# ## Output Format

# In 2-5 sentences, explain your reasoning and, if relevant, aspects of the workflow that should be reviewed (e.g. to consider alternative approaches).
# After a blank line, provide the output as a proper YAML file that follows the structure above.
# """

In [None]:
gen_yaml_system_prompt_formatted = gen_yaml_system_prompt.format(adaptor_summaries=adaptor_summaries)


In [13]:

generate_yaml_user_prompt = """The user's automation task is as follows: "{user_question}" """

def generate_yaml(user_question):
    message = client.messages.create(
        model="claude-3-7-sonnet-20250219",
        max_tokens=1000,
        temperature=0,
        system=gen_yaml_system_prompt_formatted,
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": generate_yaml_user_prompt.format(user_question=user_question)
                    }
                ]
            }
        ]
    )
    return message.content[0].text

In [14]:
q = "Whenever fridge statistics are send to you, parse and aggregate the data and upload to a collection in redis."

answer = generate_yaml(q)
answer

'I\'ll create a workflow that handles incoming fridge statistics via a webhook trigger, processes the data, and stores it in Redis. This is a straightforward two-step process: first receiving and processing the data, then storing it in Redis. I\'m using a webhook trigger since the description mentions "whenever" data is sent, suggesting an event-based rather than scheduled approach.\n\n```yaml\nname: Fridge-Statistics-Processing\njobs:\n  parse-and-aggregate-fridge-data:\n    name: Parse and Aggregate Fridge Data\n    adaptor: "@openfn/language-common@latest"\n    body: "| // Add data parsing and aggregation operations here"\n  upload-to-redis-collection:\n    name: Upload to Redis Collection\n    adaptor: "@openfn/language-redis@latest"\n    body: "| // Add Redis collection upload operations here"\ntriggers:\n  webhook:\n    type: webhook\n    enabled: false\nedges:\n  webhook->parse-and-aggregate-fridge-data:\n    source_trigger: webhook\n    target_job: parse-and-aggregate-fridge-da

In [None]:

decide_gen_user_prompt = """The user's automation task is as follows: "{user_question}" """

def generate_yaml(user_question):
    message = client.messages.create(
        model="claude-3-7-sonnet-20250219",
        max_tokens=1000,
        temperature=0,
        system=gen_yaml_system_prompt_formatted,
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": generate_yaml_user_prompt.format(user_question=user_question)
                    }
                ]
            }
        ]
    )
    return message.content[0].text

In [3]:
def remove_credential_lines(input_file, output_file):
    with open(input_file, 'r') as file:
        lines = file.readlines()
    
    # Filter out lines that start with "credential:"
    filtered_lines = [line for line in lines if not line.strip().startswith("credential:")]
    
    with open(output_file, 'w') as file:
        file.writelines(filtered_lines)
    
    print(f"Processed {len(lines)} lines, removed {len(lines) - len(filtered_lines)} credential lines.")
    print(f"Results saved to {output_file}")

# Usage
input_file = "/Users/hanna/openfn/ai_experiments/apollo/services/tmp/asri-satusehat-prod-abbrev.yaml"  
output_file = "/Users/hanna/openfn/ai_experiments/apollo/services/tmp/asri-satusehat-prod-abbrev2.yaml"  
remove_credential_lines(input_file, output_file)

Processed 1245 lines, removed 87 credential lines.
Results saved to /Users/hanna/openfn/ai_experiments/apollo/services/tmp/asri-satusehat-prod-abbrev2.yaml
