In [1]:
# Install dependencies (run once per environment)
%pip install -q dspy faiss-cpu python-dotenv pandas

Note: you may need to restart the kernel to use updated packages.


In [6]:
# Basic imports and environment setup
import os
import dspy
from dotenv import load_dotenv

# Load API keys from .env (OPENAI_API_KEY is expected, already set in your env)
load_dotenv()

# Choose models similar to other notebooks
lm = dspy.LM("openai/gpt-5-mini", api_key=os.getenv("OPENAI_API_KEY"), temperature=1, max_tokens=16000)
embedder = dspy.Embedder("openai/text-embedding-3-large", api_key=os.getenv("OPENAI_API_KEY"))

# Configure DSPy default LM
dspy.configure(lm=lm)

print("DSPy configured. Ready to build RAG blog writer.")


DSPy configured. Ready to build RAG blog writer.


In [9]:
# Build a small example marketing corpus (can be replaced with your own docs)
# The idea: sections/snippets of prior posts the agent should consult via vector search
example_docs = [
    {"id": "company_vision_1", "text": "Nimbus is the revenue automation platform for RevOps and Data teams. We replace brittle spreadsheets with governed, AI-assisted workflows."},
    {"id": "pricing_tiers_1", "text": "Pricing: Starter $99/mo up to 5 seats; Pro $499/mo up to 25 seats; Scale $1,999/mo unlimited seats with SSO and SAML."},
    {"id": "compliance_1", "text": "Compliance: SOC 2 Type II and ISO 27001 certified. GDPR compliant. HIPAA not supported."},
    {"id": "data_residency_1", "text": "Data residency: EU customers can pin data to Frankfurt (eu-central-1). Default region us-east-1."},
    {"id": "sla_support_1", "text": "SLA: 99.9% uptime. Support first response under 4 business hours; Scale gets 30-minute critical SLA."},
    {"id": "support_channels_1", "text": "Support: Private Slack 9-5 PT on weekdays; 24/7 on-call for P1 incidents via PagerDuty."},
    {"id": "integrations_crm_1", "text": "Integrations: Native connectors for Salesforce and HubSpot including bidirectional sync and custom objects."},
    {"id": "integrations_warehouse_1", "text": "Warehouses: Snowflake and BigQuery supported; Redshift in private beta."},
    {"id": "integrations_streaming_1", "text": "Streaming: Kafka and Segment sources supported; exactly-once event delivery with idempotency keys."},
    {"id": "personas_1", "text": "Personas: RevOps needs pipeline visibility; Data Engineering needs reliable ingestion; Marketing Ops needs attribution sanity."},
    {"id": "brand_voice_1", "text": "Voice: practical, no-hype, crisp verbs, short sentences. Avoid exclamation marks."},
    {"id": "style_guide_1", "text": "Style: use US English, Oxford comma, and sentence case for headings."},
    {"id": "product_features_1", "text": "Features: Rules Engine, Playbooks, and Workflows. Rules Engine executes row-level policies with audit logs."},
    {"id": "security_1", "text": "Security: PII redaction enabled by default; customer-managed keys available on Scale."},
    {"id": "programs_migration_1", "text": "Concierge Migration: free one-time program up to 20 hours; includes schema mapping and QA."},
    {"id": "event_growth_summit_1", "text": "Growth Summit SF: Oct 14-16; booth B12; CEO Maya Chen keynote Oct 15 at 10:00am."},
    {"id": "offer_code_1", "text": "Promo: BUILD25 gives 25% off the first year for contracts signed before Dec 31."},
    {"id": "case_study_1", "text": "Case study: Acme Logistics increased lead-to-opportunity by 23% and cut churn 12% after adopting Nimbus."},
    {"id": "limits_api_1", "text": "API limits: 600 requests/min per org and 10 requests/sec per user. 429 means back off."},
    {"id": "data_retention_1", "text": "Data retention: logs stored 30 days by default; retention can be extended on Scale."},
    {"id": "roadmap_1", "text": "Roadmap: AI Forecasting open beta in Q4; Redwood Plugin GA in Q1."},
    {"id": "naming_1", "text": "Naming: use 'Nimbus' in external copy; avoid the internal codename 'AcmeCloud'."},
    {"id": "billing_1", "text": "Billing: Annual contracts only; invoices net-30; procurement often requests a security questionnaire."},
]

# Build FAISS index using DSPy Embeddings tool
import numpy as np
import faiss

# Embed all documents
texts = [d["text"] for d in example_docs]
embeddings = embedder(texts)

# Convert to numpy array
X = np.array(embeddings).astype("float32")

# Create FAISS index (L2)
index = faiss.IndexFlatL2(X.shape[1])
index.add(X)

# Helper: search top-k docs by cosine-like distance (here L2 on normalized vectors)
# For simplicity, we normalize embeddings so L2 approximates cosine ranking for nearest neighbors
X_norm = X / (np.linalg.norm(X, axis=1, keepdims=True) + 1e-12)
index_cosine = faiss.IndexFlatL2(X_norm.shape[1])
index_cosine.add(X_norm)

id_lookup = {i: example_docs[i]["id"] for i in range(len(example_docs))}

def faiss_search(query: str, k: int = 3) -> list[dict]:
    """
    Vector search helper that returns the top-k most similar prior snippets.
    """
    q_emb = np.array(embedder([query])[0]).astype("float32")
    q_emb = q_emb / (np.linalg.norm(q_emb) + 1e-12)
    distances, indices = index_cosine.search(q_emb.reshape(1, -1), k)
    results = []
    for idx, dist in zip(indices[0], distances[0]):
        if idx == -1:
            continue
        results.append({
            "id": id_lookup[idx],
            "text": example_docs[idx]["text"],
            "score": float(1.0 - dist/2)  # rough similarity indicator
        })
    return results

print("FAISS index ready. Example query:")
print(faiss_search("Write a launch email for Nimbus Scale customers in the EU that mentions SOC 2 and ISO 27001, EU data residency in Frankfurt, private Slack support hours, Salesforce and Snowflake integrations, the Concierge Migration program, Growth Summit details, case study results, and include code BUILD25 in a practical, no-hype tone."))


FAISS index ready. Example query:
[{'id': 'case_study_1', 'text': 'Case study: Acme Logistics increased lead-to-opportunity by 23% and cut churn 12% after adopting Nimbus.', 'score': 0.46519339084625244}, {'id': 'naming_1', 'text': "Naming: use 'Nimbus' in external copy; avoid the internal codename 'AcmeCloud'.", 'score': 0.4582504630088806}, {'id': 'company_vision_1', 'text': 'Nimbus is the revenue automation platform for RevOps and Data teams. We replace brittle spreadsheets with governed, AI-assisted workflows.', 'score': 0.4543600082397461}]


In [10]:
# DSPy signatures for outline and section writing
# Keep signatures simple for readability by interns
class OutlineSignature(dspy.Signature):
    """
    Create a clear, multi-level outline for a marketing blog post.
    """
    topic: str = dspy.InputField(description="Blog topic")
    outline: list[str] = dspy.OutputField(description="Ordered list of section titles")

class SectionSignature(dspy.Signature):
    """
    Write a focused section with context from prior company writing.
    """
    topic: str = dspy.InputField(description="Blog topic")
    section_title: str = dspy.InputField(description="Which section to write")
    retrieved_context: list[str] = dspy.InputField(description="Relevant snippets from prior posts")
    draft: str = dspy.OutputField(description="Section draft (3–6 paragraphs, concise)")

# Simple modules
outline_generator = dspy.Predict(OutlineSignature)
section_writer = dspy.Predict(SectionSignature)

print("Signatures and base modules ready.")


Signatures and base modules ready.


In [12]:
# ReAct-style tools encoded as simple Python callables the agent can invoke
# Each tool returns a dict so the agent can keep state simple

def tool_search_context(query: str, k: int = 4) -> dict:
    """
    Tool: vector search in FAISS for prior snippets related to the query.
    """
    hits = faiss_search(query, k=k)
    return {"tool": "search_context", "results": hits}

# In-memory working state for outline and sections
blog_state = {
    "topic": None,
    "outline": [],
    "sections": {},  # section_title -> draft text
}


def tool_change_outline(new_outline: list[str]) -> dict:
    """
    Tool: replace the current outline with a new one (single happy path).
    """
    blog_state["outline"] = list(new_outline)
    return {"tool": "change_outline", "outline": blog_state["outline"]}


def tool_write_section(topic: str, section_title: str) -> dict:
    """
    Tool: write a fresh section using retrieved context.
    """
    ctx_hits = faiss_search(f"{topic} {section_title}")
    ctx_texts = [h["text"] for h in ctx_hits]
    pred = section_writer(
        topic=topic,
        section_title=section_title,
        retrieved_context=ctx_texts,
    )
    blog_state["sections"][section_title] = pred.draft
    return {"tool": "write_section", "section_title": section_title, "draft": pred.draft}


def tool_edit_section(topic: str, section_title: str, instruction: str) -> dict:
    """
    Tool: light edit of an existing section by re-prompting with current draft and context.
    """
    existing = blog_state["sections"].get(section_title, "")
    ctx_hits = faiss_search(f"{topic} {section_title} {instruction}")
    ctx_texts = [h["text"] for h in ctx_hits] + [existing]
    pred = section_writer(
        topic=topic,
        section_title=section_title,
        retrieved_context=ctx_texts,
    )
    blog_state["sections"][section_title] = pred.draft
    return {"tool": "edit_section", "section_title": section_title, "draft": pred.draft}

print("Tools ready: search_context, change_outline, write_section, edit_section")


Tools ready: search_context, change_outline, write_section, edit_section


In [13]:
# ReAct agent using DSPy (preferred)
# Tools must have clear docstrings and type hints
from typing import Any


def tool_assemble_blog() -> dict:
    """
    Assemble the final blog post from the current outline and written sections.
    Returns a dict with a single key "final_blog".
    """
    parts = []
    for title in blog_state["outline"]:
        body = blog_state["sections"].get(title, "")
        parts.append(f"# {title}\n\n{body}")
    final = "\n\n".join(parts).strip()
    return {"tool": "assemble_blog", "final_blog": final}


class BlogAgentSignature(dspy.Signature):
    """
    You are a marketing blog writer. Given `topic`, create an outline, write each
    section using prior company writing as context (via vector search), optionally
    edit sections for continuity, and finish with a coherent draft.
    Use only the available tools. When finished, return the complete blog draft
    in `process_result`.
    """
    topic: str = dspy.InputField(description="Blog topic to write about")
    reasoning: str = dspy.OutputField(description="High-level plan and justification of actions")
    process_result: str = dspy.OutputField(description="Final blog draft text")

# Create a ReAct agent that can call the tools
react_agent = dspy.ReAct(
    BlogAgentSignature,
    tools=[
        tool_search_context,
        tool_change_outline,
        tool_write_section,
        tool_edit_section,
        tool_assemble_blog,
    ],
)

print("ReAct agent ready.")


ReAct agent ready.


In [14]:
# Demo: drive the DSPy ReAct agent end-to-end
# The agent will choose tools and assemble the final blog in process_result

# Reset state for a clean run
blog_state["outline"] = []
blog_state["sections"] = {}

react_result = react_agent(topic="Launch blog for Nimbus Scale EU customers covering SOC 2, ISO 27001, Frankfurt data residency, private Slack support hours, Salesforce and Snowflake integrations, Concierge Migration, Growth Summit SF details, Acme Logistics case study results, and promo code BUILD25, in a practical, no-hype tone.")

print("Reasoning:\n", react_result.reasoning)
print("\nOutline:")
for i, s in enumerate(blog_state["outline"], 1):
    print(f"{i}. {s}")

print("\nFinal Blog Draft:\n")
print(react_result.process_result)


Reasoning:
 I created a practical, no-hype launch blog for Nimbus Scale EU customers by: (1) defining a clear outline covering all requested topics, (2) retrieving company-sourced facts (SOC 2, ISO 27001, Frankfurt region, integrations, Concierge Migration, private Slack hours, Acme Logistics results, BUILD25) to ground each section, and (3) drafting each section with concrete steps, timelines, and calls to action. The result is a single coherent draft that explains what changed, what it means for customers, and how to get started with minimal friction.

Outline:
1. Introduction: Nimbus Scale launches in EU — what this means for customers
2. Security & Compliance: SOC 2 and ISO 27001 — what we’ve completed and what it means for you
3. Data residency in Frankfurt: how it works and who it helps
4. Integrations: Salesforce & Snowflake — setup, benefits, and typical use cases
5. Concierge Migration: hands-on migration support and timeline
6. Private Slack support hours: what’s included and

In [15]:
react_result.trajectory

{'thought_0': "I'll create a clear, practical outline covering all requested topics (security/compliance, Frankfurt data residency, integrations, migration help, support, events, case study, and promo). Once the outline is set, I'll write sections one by one using company context as needed.",
 'tool_name_0': 'tool_change_outline',
 'tool_args_0': {'new_outline': ['Introduction: Nimbus Scale launches in EU — what this means for customers',
   'Security & Compliance: SOC 2 and ISO 27001 — what we’ve completed and what it means for you',
   'Data residency in Frankfurt: how it works and who it helps',
   'Integrations: Salesforce & Snowflake — setup, benefits, and typical use cases',
   'Concierge Migration: hands-on migration support and timeline',
   'Private Slack support hours: what’s included and how to access',
   'Growth Summit SF: invitation, agenda highlights, and who should attend',
   'Customer spotlight — Acme Logistics: results and learnings',
   'Offer: promo code BUILD25 — 

In [16]:
lm.inspect_history(n=1)





[34m[2025-10-03T18:37:18.958574][0m

[31mSystem message:[0m

Your input fields are:
1. `topic` (str): Blog topic to write about
2. `trajectory` (str):
Your output fields are:
1. `reasoning` (str): High-level plan and justification of actions
2. `process_result` (str): Final blog draft text
All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## topic ## ]]
{topic}

[[ ## trajectory ## ]]
{trajectory}

[[ ## reasoning ## ]]
{reasoning}

[[ ## process_result ## ]]
{process_result}

[[ ## completed ## ]]
In adhering to this structure, your objective is: 
        You are a marketing blog writer. Given `topic`, create an outline, write each
        section using prior company writing as context (via vector search), optionally
        edit sections for continuity, and finish with a coherent draft.
        Use only the available tools. When finished, return the complete blog draft
        in `process_result`.


[31mUser message:[0m

[[