In [15]:
# Install dependencies (run once per environment)
%pip install -q dspy faiss-cpu python-dotenv pandas

Note: you may need to restart the kernel to use updated packages.


In [16]:
# Basic imports and environment setup
import os
import dspy
from dotenv import load_dotenv

# Load API keys from .env (OPENAI_API_KEY is expected, already set in your env)
load_dotenv()

# Choose models similar to other notebooks
lm = dspy.LM("openai/gpt-5-mini", api_key=os.getenv("OPENAI_API_KEY"), temperature=1, max_tokens=16000)
embedder = dspy.Embedder("openai/text-embedding-3-large", api_key=os.getenv("OPENAI_API_KEY"))

# Configure DSPy default LM
dspy.configure(lm=lm)

print("DSPy configured. Ready to build RAG blog writer.")


DSPy configured. Ready to build RAG blog writer.


In [17]:
# Build a small example marketing corpus (can be replaced with your own docs)
# The idea: sections/snippets of prior posts the agent should consult via vector search
example_docs = [
    {"id": "brand_strategy_1", "text": "Brand positioning clarifies who you serve, what you offer, and why you are different. Strong positioning guides messaging consistency across channels."},
    {"id": "seo_basics_1", "text": "Effective SEO content targets search intent, uses clear structure, and earns trust through expertise and internal links."},
    {"id": "email_nurture_1", "text": "Email nurture sequences should deliver value before pitching, segment by behavior, and align topics with buyer stage."},
    {"id": "product_marketing_1", "text": "Great product marketing connects features to outcomes, arms sales with crisp narratives, and maintains a feedback loop with customers."},
    {"id": "distribution_1", "text": "Content distribution multiplies ROI. Repurpose flagship assets into shorts, carousels, and newsletters with channel-native hooks."},
]

# Build FAISS index using DSPy Embeddings tool
import numpy as np
import faiss

# Embed all documents
texts = [d["text"] for d in example_docs]
embeddings = embedder(texts)

# Convert to numpy array
X = np.array(embeddings).astype("float32")

# Create FAISS index (L2)
index = faiss.IndexFlatL2(X.shape[1])
index.add(X)

# Helper: search top-k docs by cosine-like distance (here L2 on normalized vectors)
# For simplicity, we normalize embeddings so L2 approximates cosine ranking for nearest neighbors
X_norm = X / (np.linalg.norm(X, axis=1, keepdims=True) + 1e-12)
index_cosine = faiss.IndexFlatL2(X_norm.shape[1])
index_cosine.add(X_norm)

id_lookup = {i: example_docs[i]["id"] for i in range(len(example_docs))}

def faiss_search(query: str, k: int = 3) -> list[dict]:
    """
    Vector search helper that returns the top-k most similar prior snippets.
    """
    q_emb = np.array(embedder([query])[0]).astype("float32")
    q_emb = q_emb / (np.linalg.norm(q_emb) + 1e-12)
    distances, indices = index_cosine.search(q_emb.reshape(1, -1), k)
    results = []
    for idx, dist in zip(indices[0], distances[0]):
        if idx == -1:
            continue
        results.append({
            "id": id_lookup[idx],
            "text": example_docs[idx]["text"],
            "score": float(1.0 - dist/2)  # rough similarity indicator
        })
    return results

print("FAISS index ready. Example query:")
print(faiss_search("write an email nurture welcome series"))


FAISS index ready. Example query:
[{'id': 'email_nurture_1', 'text': 'Email nurture sequences should deliver value before pitching, segment by behavior, and align topics with buyer stage.', 'score': 0.5652014017105103}, {'id': 'distribution_1', 'text': 'Content distribution multiplies ROI. Repurpose flagship assets into shorts, carousels, and newsletters with channel-native hooks.', 'score': 0.2681694030761719}, {'id': 'product_marketing_1', 'text': 'Great product marketing connects features to outcomes, arms sales with crisp narratives, and maintains a feedback loop with customers.', 'score': 0.23627716302871704}]


In [18]:
# DSPy signatures for outline and section writing
# Keep signatures simple for readability by interns
class OutlineSignature(dspy.Signature):
    """
    Create a clear, multi-level outline for a marketing blog post.
    """
    topic: str = dspy.InputField(description="Blog topic")
    outline: list[str] = dspy.OutputField(description="Ordered list of section titles")

class SectionSignature(dspy.Signature):
    """
    Write a focused section with context from prior company writing.
    """
    topic: str = dspy.InputField(description="Blog topic")
    section_title: str = dspy.InputField(description="Which section to write")
    retrieved_context: list[str] = dspy.InputField(description="Relevant snippets from prior posts")
    draft: str = dspy.OutputField(description="Section draft (3–6 paragraphs, concise)")

# Simple modules
outline_generator = dspy.Predict(OutlineSignature)
section_writer = dspy.Predict(SectionSignature)

print("Signatures and base modules ready.")


Signatures and base modules ready.


In [19]:
# ReAct-style tools encoded as simple Python callables the agent can invoke
# Each tool returns a dict so the agent can keep state simple

def tool_search_context(query: str, k: int = 4) -> dict:
    """
    Tool: vector search in FAISS for prior snippets related to the query.
    """
    hits = faiss_search(query, k=k)
    return {"tool": "search_context", "results": hits}

# In-memory working state for outline and sections
blog_state = {
    "topic": None,
    "outline": [],
    "sections": {},  # section_title -> draft text
}


def tool_change_outline(new_outline: list[str]) -> dict:
    """
    Tool: replace the current outline with a new one (single happy path).
    """
    blog_state["outline"] = list(new_outline)
    return {"tool": "change_outline", "outline": blog_state["outline"]}


def tool_write_section(topic: str, section_title: str) -> dict:
    """
    Tool: write a fresh section using retrieved context.
    """
    ctx_hits = faiss_search(f"{topic} {section_title}")
    ctx_texts = [h["text"] for h in ctx_hits]
    pred = section_writer(
        topic=topic,
        section_title=section_title,
        retrieved_context=ctx_texts,
    )
    blog_state["sections"][section_title] = pred.draft
    return {"tool": "write_section", "section_title": section_title, "draft": pred.draft}


def tool_edit_section(topic: str, section_title: str, instruction: str) -> dict:
    """
    Tool: light edit of an existing section by re-prompting with current draft and context.
    """
    existing = blog_state["sections"].get(section_title, "")
    ctx_hits = faiss_search(f"{topic} {section_title} {instruction}")
    ctx_texts = [h["text"] for h in ctx_hits] + [existing]
    pred = section_writer(
        topic=topic,
        section_title=section_title,
        retrieved_context=ctx_texts,
    )
    blog_state["sections"][section_title] = pred.draft
    return {"tool": "edit_section", "section_title": section_title, "draft": pred.draft}

print("Tools ready: search_context, change_outline, write_section, edit_section")


Tools ready: search_context, change_outline, write_section, edit_section


In [None]:
# ReAct agent using DSPy (preferred)
# Tools must have clear docstrings and type hints
from typing import Any


def tool_assemble_blog() -> dict:
    """
    Assemble the final blog post from the current outline and written sections.
    Returns a dict with a single key "final_blog".
    """
    parts = []
    for title in blog_state["outline"]:
        body = blog_state["sections"].get(title, "")
        parts.append(f"# {title}\n\n{body}")
    final = "\n\n".join(parts).strip()
    return {"tool": "assemble_blog", "final_blog": final}


class BlogAgentSignature(dspy.Signature):
    """
    You are a marketing blog writer. Given `topic`, create an outline, write each
    section using prior company writing as context (via vector search), optionally
    edit sections for continuity, and finish with a coherent draft.
    Use only the available tools. When finished, return the complete blog draft
    in `process_result`.
    """
    topic: str = dspy.InputField(description="Blog topic to write about")
    reasoning: str = dspy.OutputField(description="High-level plan and justification of actions")
    process_result: str = dspy.OutputField(description="Final blog draft text")

# Create a ReAct agent that can call the tools
react_agent = dspy.ReAct(
    BlogAgentSignature,
    tools=[
        tool_search_context,
        tool_change_outline,
        tool_write_section,
        tool_edit_section,
        tool_assemble_blog,
    ],
)

print("ReAct agent ready.")


ReAct agent ready.


In [None]:
# Demo: drive the DSPy ReAct agent end-to-end
# The agent will choose tools and assemble the final blog in process_result

# Reset state for a clean run
blog_state["outline"] = []
blog_state["sections"] = {}

react_result = react_agent(topic="How to create a buyer-journey aligned content strategy")

print("Reasoning:\n", react_result.reasoning)
print("\nOutline:")
for i, s in enumerate(blog_state["outline"], 1):
    print(f"{i}. {s}")

print("\nFinal Blog Draft:\n")
print(react_result.process_result)


TypeError: ReAct._call_with_potential_trajectory_truncation() got multiple values for argument 'trajectory'