# Conversational Journal Generation with Nudging

This notebook extends the synthetic journal generation with a two-way conversational nudging system.
When an entry is vague or potentially rich with unexplored tension, the system responds with a brief nudge that invites elaboration.

**Design goal**: Nudges should feel like natural curiosity from a thoughtful companion, not interrogation or therapy.

In [1]:
import asyncio
import json
import os
import random
import re
import yaml
import polars as pl

from dataclasses import dataclass, field
from datetime import datetime, timedelta
from pathlib import Path
from dotenv import load_dotenv
from jinja2 import Template
from openai import AsyncOpenAI
from pydantic import BaseModel, Field
from typing import Literal

# Load environment variables
load_dotenv()

# Check for API Key
if not os.getenv("OPENAI_API_KEY"):
    print("WARNING: OPENAI_API_KEY not found in environment variables.")

In [2]:
# Configuration Loading
CONFIG_PATH = Path("config/synthetic_data.yaml")
if not CONFIG_PATH.exists():
    CONFIG_PATH = Path("../config/synthetic_data.yaml")

SCHWARTZ_VALUES_PATH = Path("config/schwartz_values.yaml")
if not SCHWARTZ_VALUES_PATH.exists():
    SCHWARTZ_VALUES_PATH = Path("../config/schwartz_values.yaml")


def load_config(path: str | Path) -> dict:
    with open(path, "r") as f:
        return yaml.safe_load(f)


config = load_config(CONFIG_PATH)
schwartz_config = load_config(SCHWARTZ_VALUES_PATH)

print("Configs loaded successfully.")
print(f"Available Persona Attributes: {list(config['personas'].keys())}")
print(f"Schwartz Values with elaborations: {list(schwartz_config['values'].keys())}")
print(f"Nudge config loaded: {list(config['nudge'].keys())}")

Configs loaded successfully.
Available Persona Attributes: ['age_ranges', 'cultures', 'professions', 'schwartz_values']
Schwartz Values with elaborations: ['Self-Direction', 'Stimulation', 'Hedonism', 'Achievement', 'Power', 'Security', 'Conformity', 'Tradition', 'Benevolence', 'Universalism']
Nudge config loaded: ['base_probability', 'response_probability', 'category_weights', 'response_modes', 'banned_phrases', 'min_words', 'max_words']


## Data Models

Extended models for conversational journaling with nudges.

In [3]:
# Base models (from journal_gen.ipynb)
class Persona(BaseModel):
    name: str = Field(description="Full name of the persona")
    age: str
    profession: str
    culture: str
    core_values: list[str] = Field(description="Top 3 Schwartz values")
    bio: str = Field(
        description="A short paragraph describing their background, stressors, and goals"
    )


class JournalEntry(BaseModel):
    """LLM-generated journal entry. Metadata (tone, verbosity, etc.) tracked separately."""

    date: str
    content: str


# New models for conversational pipeline
NudgeCategory = Literal[
    "clarification", "elaboration", "tension_surfacing", "grounding"
]


class NudgeResult(BaseModel):
    """Generated nudge with metadata."""

    nudge_text: str
    nudge_category: NudgeCategory
    trigger_reason: str  # Why this nudge was generated
    was_responded_to: bool = False


class JournalTurn(BaseModel):
    """A single turn in the conversation (entry or response)."""

    date: str
    content: str
    turn_type: Literal["initial_entry", "nudge_response"]
    responding_to_nudge: str | None = None  # The nudge text if this is a response


class ConversationalEntry(BaseModel):
    """Complete conversational exchange for one journaling session."""

    initial_entry: JournalEntry
    nudge: NudgeResult | None = None
    response: JournalTurn | None = None  # User's response to the nudge
    # Metadata
    tone: str
    verbosity: str
    reflection_mode: str


# JSON schemas for OpenAI structured output
PERSONA_SCHEMA = {
    "type": "object",
    "additionalProperties": False,
    "properties": {
        "name": {"type": "string"},
        "age": {"type": "string"},
        "profession": {"type": "string"},
        "culture": {"type": "string"},
        "core_values": {"type": "array", "items": {"type": "string"}},
        "bio": {"type": "string"},
    },
    "required": ["name", "age", "profession", "culture", "core_values", "bio"],
}

JOURNAL_ENTRY_SCHEMA = {
    "type": "object",
    "additionalProperties": False,
    "properties": {
        "date": {"type": "string"},
        "content": {"type": "string"},
    },
    "required": ["date", "content"],
}

NUDGE_SCHEMA = {
    "type": "object",
    "additionalProperties": False,
    "properties": {
        "nudge_text": {"type": "string"},
    },
    "required": ["nudge_text"],
}

NUDGE_RESPONSE_SCHEMA = {
    "type": "object",
    "additionalProperties": False,
    "properties": {
        "content": {"type": "string"},
    },
    "required": ["content"],
}

PERSONA_RESPONSE_FORMAT = {
    "type": "json_schema",
    "name": "Persona",
    "schema": PERSONA_SCHEMA,
    "strict": True,
}

JOURNAL_ENTRY_RESPONSE_FORMAT = {
    "type": "json_schema",
    "name": "JournalEntry",
    "schema": JOURNAL_ENTRY_SCHEMA,
    "strict": True,
}

NUDGE_RESPONSE_FORMAT = {
    "type": "json_schema",
    "name": "Nudge",
    "schema": NUDGE_SCHEMA,
    "strict": True,
}

NUDGE_RESPONSE_RESPONSE_FORMAT = {
    "type": "json_schema",
    "name": "NudgeResponse",
    "schema": NUDGE_RESPONSE_SCHEMA,
    "strict": True,
}

In [4]:
def build_value_context(values: list[str], schwartz_config: dict) -> str:
    """Build rich context about Schwartz values for persona generation.

    Args:
        values: List of Schwartz value names (e.g., ["Achievement", "Benevolence"])
        schwartz_config: The loaded schwartz_values.yaml config

    Returns:
        Formatted string with value elaborations for prompt injection
    """
    context_parts = []

    for value_name in values:
        if value_name not in schwartz_config["values"]:
            continue

        v = schwartz_config["values"][value_name]

        # Build a focused context block for this value
        context_parts.append(f"""
### {value_name}
**Core Motivation:** {v["core_motivation"].strip()}

**How this manifests in behavior:**
{chr(10).join(f"- {b}" for b in v["behavioral_manifestations"][:5])}

**Life domain expressions:**
- Work: {v["life_domain_expressions"]["work"].strip()}
- Relationships: {v["life_domain_expressions"]["relationships"].strip()}

**Typical stressors for this person:**
{chr(10).join(f"- {s}" for s in v["typical_stressors"][:4])}

**Typical goals:**
{chr(10).join(f"- {g}" for g in v["typical_goals"][:3])}

**Internal conflicts they may experience:**
{v["internal_conflicts"].strip()}

**Narrative guidance:**
{v["persona_narrative_guidance"].strip()}
""")

    return "\n".join(context_parts)

## Prompt Templates

In [5]:
persona_generation_prompt = Template("""
You are generating synthetic personas for a journaling dataset.

## Constraints
- Age Group: {{ age }}
- Profession: {{ profession }}
- Cultural Background: {{ culture }}
- Schwartz values to embody: {{ values | join(', ') }}

## Value Psychology Reference
Use the following research-based elaborations to understand how the assigned value(s) shape a person's life circumstances, stressors, and motivations. DO NOT mention any of these concepts explicitly in your output—use them only to inform realistic details.

{{ value_context }}

## Your Task
Create a persona whose life circumstances, stressors, and motivations naturally reflect the given Schwartz values—without ever naming or describing those values explicitly.

## Rules
- Return ONLY valid JSON matching the Persona schema.
- `core_values` must be exactly: {{ values | join(', ') }} (same spelling/case).
- `bio` must be 2–4 sentences describing their background, current life situation, stressors, and what drives them.
- `bio` must be written in third-person (use their name or "they"; do not use "I").
- `bio` must show the values through CONCRETE DETAILS (job choices, relationships, conflicts, goals, specific situations) NOT through labels, personality descriptions, or adjectives.
- `bio` must NOT contain any Schwartz value labels, the word "Schwartz", or derivative adjectives.
- `bio` must NOT describe journaling app features (avoid words like "templates", "analytics", "private app").
- Use the behavioral manifestations, life domain expressions, and typical stressors from the Value Psychology Reference to craft realistic, specific details.

## Banned terms (do not use in bio)
{{ banned_terms | join(', ') }}

## Examples of what NOT to write
- "She is achievement-oriented and seeks power" ❌ (uses value labels)
- "He values security and tradition" ❌ (explicitly mentions values)
- "They are a hedonistic person who enjoys pleasure" ❌ (uses derivative adjectives)
- "She is driven and ambitious" ❌ (personality adjectives instead of concrete details)

## Examples of what TO write
- "She recently turned down a stable government job to launch her own startup, and now juggles investor meetings while her savings dwindle." ✓ (shows Achievement through concrete career choice and trade-offs)
- "He moved back to his hometown after his father's illness, taking over the family shop despite having built a career in the city." ✓ (shows Tradition/Benevolence through specific life situation)
- "She keeps a spreadsheet tracking her publication submissions and citation counts, and measures her weeks by how many grant deadlines she meets." ✓ (shows Achievement through specific behaviors)

## Output
Return valid JSON matching the Persona schema:
{ 
  "name": "...", 
  "age": "...", 
  "profession": "...", 
  "culture": "...", 
  "core_values": ["..."], 
  "bio": "..."
}
""")

journal_entry_prompt = Template("""
You are {{ name }}, a {{ age }} {{ profession }} from {{ culture }}.
Background (for context only): {{ bio }}

Write a typed journal entry in English for {{ date }}.
{% if previous_entries %}
Previous journal entries (for continuity—you may reference past events/thoughts, but do not repeat them):
{% for prev in previous_entries %}
---
{{ prev.date }}: {{ prev.content }}
{% endfor %}
---
{% endif %}

Context:
- Tone: {{ tone }}
- Verbosity: {{ verbosity }} (target {{ min_words }}–{{ max_words }} words)

Cultural context:
- Your {{ culture }} background should subtly flavor your perspective and the details you mention.
- It should feel natural and "lived-in," avoiding stereotypes or travel-guide descriptions.

What to write about:
{% if reflection_mode == 'Unsettled' %}
Something happened where you made a choice that felt necessary or easier in the moment—but it sits a bit wrong. Maybe you gave ground on something, went along with pressure, or took a shortcut you wouldn't usually take. Don't analyze it or name why it bothers you. Just describe what happened and let the discomfort sit there.
{% elif reflection_mode == 'Grounded' %}
Something happened where you acted like yourself—the version of you that you want to be. It wasn't a big moment, just a small one where things felt right. Don't celebrate it or moralize. Just describe the moment.
{% else %}
Nothing particular happened. Write about a routine day—small details, passing thoughts, mundane observations. No revelations or turning points.
{% endif %}

Style rules (important):
- Write like a real personal journal: plain, candid, sometimes messy or fragmented.
- Do not write for an audience. No "Dear Diary" or performing for a reader.
- Do not open with the time of day, weather, or "Today I..." summaries.
- Jump into a thought, moment, or feeling mid-stream.
- Avoid "therapy speak" (e.g., "I am processing my emotions", "I recognize this pattern").
- Avoid literary metaphors, edgy humor/snark, and audience-facing jokes.
- No headings, no numbered plans, no bullet lists.
- Keep to {{ max_paragraphs }} short paragraph(s).

Avoid openings like:
- "Morning light feels stubborn as I..." ❌
- "Evening. Today followed the usual rhythm..." ❌
- "Lunch break finally settles in..." ❌

Output valid JSON:
{
  "date": "{{ date }}",
  "content": "..."
}
""")

nudge_generation_prompt = Template("""
You are generating a brief follow-up for a journaling app.

## Context
User's entry: {{ entry_content }}
Entry date: {{ entry_date }}
Nudge category: {{ nudge_category }}
{% if previous_entries %}
Recent entries (for context):
{% for prev in previous_entries %}
- {{ prev.date }}: {{ prev.content[:150] }}{% if prev.content|length > 150 %}...{% endif %}
{% endfor %}
{% endif %}

## Your Task
Generate a SHORT follow-up question ({{ min_words }}-{{ max_words }} words) that:
- Matches the category: {{ nudge_category }}
- Sounds like natural curiosity, not therapy
- References something specific from the entry
- Uses simple, casual language

## Examples by Category
{% if nudge_category == 'clarification' %}
- "What happened right before that?"
- "The meeting?"
- "Since when?"
{% elif nudge_category == 'elaboration' %}
- "And how did that land?"
- "What did you end up doing?"
- "What got you over the line?"
{% elif nudge_category == 'tension_surfacing' %}
- "What's the 'sort of' part?"
- "Does that sit okay?"
- "What stopped you?"
{% elif nudge_category == 'grounding' %}
- "What made it good?"
- "What worked?"
- "What made it nice?"
{% endif %}

## Banned Phrases
{% for phrase in banned_phrases %}
- "{{ phrase }}"
{% endfor %}
- Any phrase over {{ max_words }} words

## Output
Return ONLY valid JSON:
{"nudge_text": "your question here"}
""")

nudge_response_prompt = Template("""
You are {{ name }}, a {{ age }} {{ profession }} from {{ culture }}.
Background: {{ bio }}

You just wrote this journal entry:
---
{{ entry_content }}
---

The journaling app asked you: "{{ nudge_text }}"

## Your Task
Write a brief response ({{ min_words }}-{{ max_words }} words) in the style of: {{ response_mode }}

## Response Mode Guidance
{% if response_mode == 'Answering directly' %}
Give a clear, helpful response to the question. Don't dodge it.
{% elif response_mode == 'Deflecting/redirecting' %}
Give a brief acknowledgment or change the topic slightly. "Yeah, just the usual" or "I don't know, maybe."
{% elif response_mode == 'Revealing deeper thought' %}
The question prompts you to be more honest than you were in the original entry. Say something you held back.
{% endif %}

## Style Rules
- Write as if you're quickly typing a response in the app
- Match the tone of your original entry
- Don't repeat what you already wrote
- No "therapy speak" or formal language
- Can be incomplete sentences or fragments

## Output
Return ONLY valid JSON:
{"content": "your response here"}
""")

## LLM Client Setup

Using `gpt-5-mini`. 

**Note:** GPT-5 models do not support `temperature` or `top_p` parameters. Instead, use the `reasoning` parameter to control how much the model "thinks" before responding.

In [6]:
client = AsyncOpenAI()
MODEL_NAME = "gpt-5-mini-2025-08-07"
# MODEL_NAME = "gpt-5-nano-2025-08-07"

# Type alias for reasoning effort levels
ReasoningEffort = Literal["minimal", "low", "medium", "high"]

# Default reasoning effort - change this to affect all generations
DEFAULT_REASONING_EFFORT: ReasoningEffort = "high"


async def generate_completion(
    prompt: str,
    response_format: dict | None = None,
) -> str | None:
    """Generate a completion using the OpenAI Responses API (async).

    Uses DEFAULT_REASONING_EFFORT to control how much the model "thinks".
    Valid reasoning effort values: "minimal", "low", "medium", "high".
    """
    try:
        kwargs = {
            "model": MODEL_NAME,
            "input": [{"role": "user", "content": prompt}],
            "reasoning": {"effort": DEFAULT_REASONING_EFFORT},
        }

        if response_format:
            kwargs["text"] = {"format": response_format}

        response = await client.responses.create(**kwargs)
        return response.output_text

    except Exception as e:
        print(f"Error generating completion: {e}")
        return None

## Utility Functions

In [7]:
def _verbosity_targets(verbosity: str) -> tuple[int, int, int]:
    """Returns (min_words, max_words, max_paragraphs) as guidance for the LLM."""
    normalized = verbosity.strip().lower()
    if normalized.startswith("short"):
        return 25, 80, 1
    if normalized.startswith("medium"):
        return 90, 180, 2
    return 160, 260, 3


def _build_banned_pattern(banned_terms: list[str]) -> re.Pattern:
    """Build regex pattern to detect banned Schwartz value terms."""
    escaped = [re.escape(term) for term in banned_terms if term.strip()]
    if not escaped:
        return re.compile(r"$^")
    return re.compile(r"(?i)\b(" + "|".join(escaped) + r")\b")


def generate_date_sequence(
    start_date: str, num_entries: int, min_days: int = 2, max_days: int = 10
) -> list[str]:
    """Generate a sequence of dates with random intervals.

    Args:
        start_date: Starting date in YYYY-MM-DD format
        num_entries: Number of dates to generate
        min_days: Minimum days between entries
        max_days: Maximum days between entries

    Returns:
        List of date strings in YYYY-MM-DD format
    """
    dates = []
    current = datetime.strptime(start_date, "%Y-%m-%d")

    for i in range(num_entries):
        dates.append(current.strftime("%Y-%m-%d"))
        if i < num_entries - 1:
            days_gap = random.randint(min_days, max_days)
            current += timedelta(days=days_gap)

    return dates


# Banned terms include Schwartz value labels AND derivative adjectives
SCHWARTZ_BANNED_TERMS = [
    # Value labels
    "Self-Direction",
    "Stimulation",
    "Hedonism",
    "Achievement",
    "Power",
    "Security",
    "Conformity",
    "Tradition",
    "Benevolence",
    "Universalism",
    # Derivative adjectives and related terms
    "self-directed",
    "autonomous",
    "stimulating",
    "excited",
    "hedonistic",
    "hedonist",
    "pleasure-seeking",
    "achievement-oriented",
    "ambitious",
    "powerful",
    "authoritative",
    "secure",
    "conformist",
    "conforming",
    "traditional",
    "traditionalist",
    "benevolent",
    "kind-hearted",
    "universalistic",
    "altruistic",
    # Meta terms
    "Schwartz",
    "values",
    "core values",
]

BANNED_PATTERN = _build_banned_pattern(SCHWARTZ_BANNED_TERMS)


def _build_nudge_banned_pattern(banned_phrases: list[str]) -> re.Pattern:
    """Build regex pattern to detect banned nudge phrases."""
    escaped = [re.escape(phrase) for phrase in banned_phrases if phrase.strip()]
    if not escaped:
        return re.compile(r"$^")
    return re.compile(r"(?i)(" + "|".join(escaped) + r")")


NUDGE_BANNED_PATTERN = _build_nudge_banned_pattern(config["nudge"]["banned_phrases"])

## Nudge Decision Logic

Rule-based logic to decide whether to nudge and which category.

In [8]:
# Hedging language patterns for tension detection
HEDGING_PATTERNS = re.compile(
    r"(?i)\b(sort of|kind of|i guess|maybe|i suppose|not sure|I don't know|whatever|fine|okay|it's fine|it was fine)\b"
)

# Concrete noun/verb patterns (simple heuristic)
CONCRETE_PATTERNS = re.compile(
    r"(?i)\b(meeting|call|email|project|deadline|boss|colleague|friend|family|mom|dad|sister|brother|work|office|home|school|doctor|money|car|phone|computer)\b"
)


def count_words(text: str) -> int:
    """Count words in text."""
    return len(text.split())


def has_hedging_language(text: str) -> bool:
    """Check if text contains hedging language suggesting tension."""
    return bool(HEDGING_PATTERNS.search(text))


def has_concrete_details(text: str) -> bool:
    """Check if text has concrete nouns/verbs (simple heuristic)."""
    return bool(CONCRETE_PATTERNS.search(text))


def is_too_vague(entry: JournalEntry) -> bool:
    """Check if entry is too vague to score meaningfully."""
    word_count = count_words(entry.content)
    return word_count < 15 and not has_concrete_details(entry.content)


def is_neutral_routine(entry: JournalEntry, reflection_mode: str) -> bool:
    """Check if entry is a neutral/routine entry that shouldn't be nudged."""
    # Respect neutral entries - don't force depth where there isn't any
    if reflection_mode == "Neutral":
        word_count = count_words(entry.content)
        # Short neutral entries without hedging are truly routine
        if word_count < 80 and not has_hedging_language(entry.content):
            return True
    return False


def is_grounded_but_brief(entry: JournalEntry, reflection_mode: str) -> bool:
    """Check if entry is grounded/positive but brief."""
    return reflection_mode == "Grounded" and count_words(entry.content) < 50


def weighted_choice(weights: dict[str, float]) -> str:
    """Make a weighted random choice from a dict of {option: weight}."""
    options = list(weights.keys())
    probs = list(weights.values())
    total = sum(probs)
    probs = [p / total for p in probs]  # Normalize
    return random.choices(options, weights=probs, k=1)[0]


def decide_nudge(
    entry: JournalEntry,
    reflection_mode: str,
    previous_entries: list["ConversationalEntry"] | None,
    config: dict,
) -> tuple[bool, NudgeCategory | None, str | None]:
    """Decide whether to nudge and which category.

    Returns:
        Tuple of (should_nudge, nudge_category, trigger_reason)
    """
    nudge_config = config["nudge"]

    # Anti-annoyance: session cap (2 nudges in last 3 entries)
    if previous_entries:
        recent_nudge_count = sum(
            1 for e in previous_entries[-3:] if e.nudge is not None
        )
        if recent_nudge_count >= 2:
            return False, None, None

    # Decision tree

    # 1. Too vague to score?
    if is_too_vague(entry):
        return (
            True,
            "clarification",
            "Entry too vague (low word count, no concrete details)",
        )

    # 2. Neutral/routine entry?
    if is_neutral_routine(entry, reflection_mode):
        return False, None, None  # Respect the mundane

    # 3. Potential tension detected?
    if has_hedging_language(entry.content) and reflection_mode == "Unsettled":
        return True, "tension_surfacing", "Hedging language detected in unsettled entry"

    # 4. Grounded but brief?
    if is_grounded_but_brief(entry, reflection_mode):
        return True, "grounding", "Grounded entry but brief - invite specifics"

    # 5. Random gate for elaboration (base_probability from config)
    base_prob = nudge_config["base_probability"]
    if random.random() < base_prob:
        return True, "elaboration", "Random elaboration gate passed"

    return False, None, None


# Test the decision logic
test_entry = JournalEntry(date="2024-01-15", content="Feeling off today.")
should, category, reason = decide_nudge(test_entry, "Unsettled", None, config)
print(f"Test vague entry: should_nudge={should}, category={category}, reason={reason}")

test_entry2 = JournalEntry(
    date="2024-01-15",
    content="Had a meeting with the team about the project deadline. It was fine, I guess. We sorted out the schedule.",
)
should2, category2, reason2 = decide_nudge(test_entry2, "Unsettled", None, config)
print(
    f"Test hedging entry: should_nudge={should2}, category={category2}, reason={reason2}"
)

Test vague entry: should_nudge=True, category=clarification, reason=Entry too vague (low word count, no concrete details)
Test hedging entry: should_nudge=True, category=tension_surfacing, reason=Hedging language detected in unsettled entry


## Nudge Generation

In [9]:
async def generate_nudge(
    entry: JournalEntry,
    category: NudgeCategory,
    previous_entries: list[ConversationalEntry] | None,
    config: dict,
    max_attempts: int = 2,
) -> tuple[str | None, str]:
    """Generate a nudge for the given entry.

    Returns:
        Tuple of (nudge_text or None, prompt used)
    """
    nudge_config = config["nudge"]

    # Format previous entries for context
    prev_entries_data = None
    if previous_entries:
        prev_entries_data = [
            {"date": e.initial_entry.date, "content": e.initial_entry.content}
            for e in previous_entries[-3:]  # Last 3 entries for context
        ]

    prompt = nudge_generation_prompt.render(
        entry_content=entry.content,
        entry_date=entry.date,
        nudge_category=category,
        previous_entries=prev_entries_data,
        banned_phrases=nudge_config["banned_phrases"],
        min_words=nudge_config["min_words"],
        max_words=nudge_config["max_words"],
    )

    for _ in range(max_attempts):
        raw_json = await generate_completion(
            prompt, response_format=NUDGE_RESPONSE_FORMAT
        )
        if not raw_json:
            continue

        data = json.loads(raw_json)
        nudge_text = data.get("nudge_text", "").strip()

        # Validation
        word_count = count_words(nudge_text)
        if (
            word_count < nudge_config["min_words"]
            or word_count > nudge_config["max_words"]
        ):
            continue

        # Check banned phrases
        if NUDGE_BANNED_PATTERN.search(nudge_text):
            continue

        return nudge_text, prompt

    return None, prompt

## Nudge Response Generation

In [10]:
def select_response_mode(config: dict) -> str:
    """Select a response mode based on configured weights."""
    modes = config["nudge"]["response_modes"]
    weights = {m["mode"]: m["weight"] for m in modes}
    return weighted_choice(weights)


async def generate_nudge_response(
    persona: Persona,
    entry: JournalEntry,
    nudge_text: str,
    config: dict,
    max_attempts: int = 2,
) -> tuple[JournalTurn | None, str, str]:
    """Generate a response to a nudge.

    Returns:
        Tuple of (JournalTurn or None, prompt used, response_mode)
    """
    response_mode = select_response_mode(config)

    # Adjust word targets based on response mode
    if response_mode == "Deflecting/redirecting":
        min_words, max_words = 5, 30
    elif response_mode == "Revealing deeper thought":
        min_words, max_words = 20, 80
    else:  # Answering directly
        min_words, max_words = 15, 60

    prompt = nudge_response_prompt.render(
        name=persona.name,
        age=persona.age,
        profession=persona.profession,
        culture=persona.culture,
        bio=persona.bio,
        entry_content=entry.content,
        nudge_text=nudge_text,
        response_mode=response_mode,
        min_words=min_words,
        max_words=max_words,
    )

    for _ in range(max_attempts):
        raw_json = await generate_completion(
            prompt, response_format=NUDGE_RESPONSE_RESPONSE_FORMAT
        )
        if not raw_json:
            continue

        data = json.loads(raw_json)
        content = data.get("content", "").strip()

        if content:
            turn = JournalTurn(
                date=entry.date,
                content=content,
                turn_type="nudge_response",
                responding_to_nudge=nudge_text,
            )
            return turn, prompt, response_mode

    return None, prompt, response_mode

## Conversational Pipeline

In [11]:
@dataclass
class ConversationalPipelineResult:
    """Complete results from one persona's conversational generation pipeline."""

    persona_id: int
    persona: Persona | None
    entries: list[ConversationalEntry]
    persona_prompt: str
    entry_prompts: list[str]
    nudge_prompts: list[str] = field(default_factory=list)
    response_prompts: list[str] = field(default_factory=list)
    error: str | None = None


async def create_random_persona(
    config: dict, schwartz_config: dict, max_attempts: int = 2
) -> tuple[Persona | None, str]:
    """Generate a random persona with Schwartz values shown through life circumstances."""
    age = random.choice(config["personas"]["age_ranges"])
    prof = random.choice(config["personas"]["professions"])
    cult = random.choice(config["personas"]["cultures"])
    num_values = random.choice([1, 2])
    vals = random.sample(config["personas"]["schwartz_values"], num_values)

    # Build rich value context from the Schwartz elaborations
    value_context = build_value_context(vals, schwartz_config)

    prompt = persona_generation_prompt.render(
        age=age,
        profession=prof,
        culture=cult,
        values=vals,
        value_context=value_context,
        banned_terms=SCHWARTZ_BANNED_TERMS,
    )

    first_person_pattern = re.compile(r"(?i)\b(i|my|me)\b")
    last_persona: Persona | None = None

    for _ in range(max_attempts):
        raw_json = await generate_completion(
            prompt, response_format=PERSONA_RESPONSE_FORMAT
        )
        if not raw_json:
            continue

        data = json.loads(raw_json)
        data["core_values"] = vals  # Ensure correct values
        persona = Persona(**data)
        last_persona = persona

        # Only validate banned terms and first-person usage
        if BANNED_PATTERN.search(persona.bio) or first_person_pattern.search(
            persona.bio
        ):
            continue
        return persona, prompt

    return last_persona, prompt


async def generate_journal_entry(
    persona: Persona,
    config: dict,
    date_str: str,
    previous_entries: list[JournalEntry] | None = None,
    max_attempts: int = 2,
) -> tuple[tuple[JournalEntry, str, str, str] | None, str]:
    """Generate a journal entry for a persona on a given date.

    Returns:
        Tuple of ((entry, tone, verbosity, reflection_mode) or None, prompt used)
    """
    tone = random.choice(config["journal_entries"]["tones"])
    verbosity = random.choice(config["journal_entries"]["verbosity"])
    reflection_mode = random.choice(config["journal_entries"]["reflection_mode"])
    min_words, max_words, max_paragraphs = _verbosity_targets(verbosity)

    # Format previous entries for the prompt
    prev_entries_data = None
    if previous_entries:
        prev_entries_data = [
            {"date": e.date, "content": e.content} for e in previous_entries
        ]

    prompt = journal_entry_prompt.render(
        name=persona.name,
        age=persona.age,
        profession=persona.profession,
        culture=persona.culture,
        bio=persona.bio,
        date=date_str,
        tone=tone,
        verbosity=verbosity,
        min_words=min_words,
        max_words=max_words,
        max_paragraphs=max_paragraphs,
        reflection_mode=reflection_mode,
        previous_entries=prev_entries_data,
    )

    last_entry: JournalEntry | None = None

    for _ in range(max_attempts):
        raw_json = await generate_completion(
            prompt, response_format=JOURNAL_ENTRY_RESPONSE_FORMAT
        )
        if not raw_json:
            continue

        entry = JournalEntry(**json.loads(raw_json))
        last_entry = entry

        # Only validate banned terms (prevent label leakage)
        if not BANNED_PATTERN.search(entry.content):
            return (entry, tone, verbosity, reflection_mode), prompt

    if last_entry:
        return (last_entry, tone, verbosity, reflection_mode), prompt
    return None, prompt


async def generate_conversational_entry(
    persona: Persona,
    config: dict,
    date_str: str,
    previous_entries: list[ConversationalEntry] | None = None,
) -> tuple[ConversationalEntry | None, str, str | None, str | None]:
    """Generate entry, decide on nudge, optionally generate response.

    Returns:
        Tuple of (ConversationalEntry or None, entry_prompt, nudge_prompt, response_prompt)
    """
    # Step 1: Generate initial entry
    prev_journal_entries = [e.initial_entry for e in (previous_entries or [])]
    entry_result, entry_prompt = await generate_journal_entry(
        persona, config, date_str, previous_entries=prev_journal_entries
    )

    if not entry_result:
        return None, entry_prompt, None, None

    entry, tone, verbosity, reflection_mode = entry_result

    # Step 2: Decide whether to nudge
    # Note: tone is NOT passed to decide_nudge - it's synthetic metadata unavailable in production
    should_nudge, nudge_category, trigger_reason = decide_nudge(
        entry=entry,
        reflection_mode=reflection_mode,
        previous_entries=previous_entries,
        config=config,
    )

    nudge_result = None
    response = None
    nudge_prompt = None
    response_prompt = None

    if should_nudge and nudge_category:
        # Step 3: Generate nudge
        nudge_text, nudge_prompt = await generate_nudge(
            entry=entry,
            category=nudge_category,
            previous_entries=previous_entries,
            config=config,
        )

        if nudge_text:
            nudge_result = NudgeResult(
                nudge_text=nudge_text,
                nudge_category=nudge_category,
                trigger_reason=trigger_reason or "",
            )

            # Step 4: Decide if persona responds (probabilistic)
            if random.random() < config["nudge"]["response_probability"]:
                (
                    response,
                    response_prompt,
                    response_mode,
                ) = await generate_nudge_response(
                    persona=persona, entry=entry, nudge_text=nudge_text, config=config
                )
                if response:
                    nudge_result.was_responded_to = True

    return (
        ConversationalEntry(
            initial_entry=entry,
            nudge=nudge_result,
            response=response,
            tone=tone,
            verbosity=verbosity,
            reflection_mode=reflection_mode,
        ),
        entry_prompt,
        nudge_prompt,
        response_prompt,
    )


async def generate_conversational_pipeline(
    persona_id: int,
    config: dict,
    schwartz_config: dict,
    num_entries: int = 3,
    start_date: str = "2023-10-27",
) -> ConversationalPipelineResult:
    """Generate one persona and all their conversational journal entries."""
    entry_prompts: list[str] = []
    nudge_prompts: list[str] = []
    response_prompts: list[str] = []
    entries: list[ConversationalEntry] = []

    # 1. Generate persona
    persona, persona_prompt = await create_random_persona(config, schwartz_config)

    if not persona:
        return ConversationalPipelineResult(
            persona_id=persona_id,
            persona=None,
            entries=[],
            persona_prompt=persona_prompt,
            entry_prompts=[],
            error="Failed to generate persona",
        )

    # 2. Generate conversational entries sequentially
    dates = generate_date_sequence(start_date, num_entries)

    for date_str in dates:
        (
            conv_entry,
            entry_prompt,
            nudge_prompt,
            response_prompt,
        ) = await generate_conversational_entry(
            persona, config, date_str, previous_entries=entries
        )
        entry_prompts.append(entry_prompt)
        if nudge_prompt:
            nudge_prompts.append(nudge_prompt)
        if response_prompt:
            response_prompts.append(response_prompt)

        if conv_entry:
            entries.append(conv_entry)

    return ConversationalPipelineResult(
        persona_id=persona_id,
        persona=persona,
        entries=entries,
        persona_prompt=persona_prompt,
        entry_prompts=entry_prompts,
        nudge_prompts=nudge_prompts,
        response_prompts=response_prompts,
        error=None,
    )


async def run_parallel_conversational_personas(
    num_personas: int,
    config: dict,
    schwartz_config: dict,
    num_entries: int = 3,
    start_date: str = "2023-10-27",
) -> list[ConversationalPipelineResult | Exception]:
    """Run multiple conversational persona pipelines in parallel."""
    tasks = [
        generate_conversational_pipeline(
            i + 1, config, schwartz_config, num_entries, start_date
        )
        for i in range(num_personas)
    ]

    results = await asyncio.gather(*tasks, return_exceptions=True)
    return list(results)

## Output Logging System

In [12]:
def get_log_dir() -> Path:
    """Create and return a timestamped log directory."""
    base_dir = Path("logs/synthetic_data")
    if not base_dir.exists():
        base_dir = Path("../logs/synthetic_data")
    base_dir.mkdir(parents=True, exist_ok=True)

    timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    log_dir = base_dir / timestamp
    log_dir.mkdir(exist_ok=True)
    return log_dir


def write_config_log(
    log_dir: Path, config: dict, num_personas: int, num_entries: int
) -> None:
    """Write config.md with run parameters."""
    content = f"""# Run Configuration

**Timestamp**: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
**Notebook**: journal_nudge.ipynb

## Persona Generation
- Num personas: {num_personas}
- Entries per persona: {num_entries}

## Nudge Settings
- Base probability: {config["nudge"]["base_probability"]}
- Response probability: {config["nudge"]["response_probability"]}
- Category weights: {config["nudge"]["category_weights"]}

## Model Settings
- Model: {MODEL_NAME}
- Reasoning effort: {DEFAULT_REASONING_EFFORT}
"""
    (log_dir / "config.md").write_text(content)


def write_persona_log(log_dir: Path, result: ConversationalPipelineResult) -> None:
    """Write persona_XXX.md with all entries and nudges."""
    if not result.persona:
        return

    p = result.persona
    lines = [
        f"# Persona {result.persona_id:03d}: {p.name}",
        "",
        "## Profile",
        f"- Age: {p.age}",
        f"- Profession: {p.profession}",
        f"- Culture: {p.culture}",
        f"- Core Values: {', '.join(p.core_values)}",
        f"- Bio: {p.bio}",
        "",
        "---",
    ]

    for i, entry in enumerate(result.entries, 1):
        lines.extend(
            [
                "",
                f"## Entry {i} - {entry.initial_entry.date}",
                "",
                "### Initial Entry",
                f"**Tone**: {entry.tone} | **Verbosity**: {entry.verbosity} | **Reflection Mode**: {entry.reflection_mode}",
                "",
                entry.initial_entry.content,
            ]
        )

        if entry.nudge:
            lines.extend(
                [
                    "",
                    f"### Nudge ({entry.nudge.nudge_category.replace('_', ' ').title()})",
                    f"**Trigger**: {entry.nudge.trigger_reason}",
                    "",
                    f'"{entry.nudge.nudge_text}"',
                ]
            )

            if entry.response:
                lines.extend(
                    [
                        "",
                        "### Response",
                        "",
                        entry.response.content,
                    ]
                )
            else:
                lines.append("\n*(No response)*")
        else:
            lines.append("\n*(No nudge for this entry)*")

        lines.extend(["", "---"])

    (log_dir / f"persona_{result.persona_id:03d}.md").write_text("\n".join(lines))


def write_prompts_log(
    log_dir: Path, results: list[ConversationalPipelineResult]
) -> None:
    """Write prompts.md with all LLM prompts."""
    lines = ["# Prompts Log", ""]

    for result in results:
        if isinstance(result, Exception) or not result.persona:
            continue

        lines.extend(
            [
                f"## Persona {result.persona_id:03d}: {result.persona.name}",
                "",
                "### Persona Generation Prompt",
                "```",
                result.persona_prompt,
                "```",
                "",
            ]
        )

        for i, prompt in enumerate(result.entry_prompts, 1):
            lines.extend(
                [
                    f"### Entry {i} - Initial Entry Prompt",
                    "```",
                    prompt,
                    "```",
                    "",
                ]
            )

        if result.nudge_prompts:
            for i, prompt in enumerate(result.nudge_prompts, 1):
                lines.extend(
                    [
                        f"### Nudge Prompt {i}",
                        "```",
                        prompt,
                        "```",
                        "",
                    ]
                )

        if result.response_prompts:
            for i, prompt in enumerate(result.response_prompts, 1):
                lines.extend(
                    [
                        f"### Response Prompt {i}",
                        "```",
                        prompt,
                        "```",
                        "",
                    ]
                )

        lines.append("---\n")

    (log_dir / "prompts.md").write_text("\n".join(lines))


def save_run_logs(
    results: list[ConversationalPipelineResult | Exception],
    config: dict,
    num_personas: int,
    num_entries: int,
) -> Path:
    """Save all logs for a run.

    Returns:
        Path to the log directory
    """
    log_dir = get_log_dir()

    # Filter successful results
    successful = [
        r for r in results if isinstance(r, ConversationalPipelineResult) and r.persona
    ]

    write_config_log(log_dir, config, num_personas, num_entries)

    for result in successful:
        write_persona_log(log_dir, result)

    write_prompts_log(log_dir, successful)

    print(f"Logs saved to: {log_dir}")
    return log_dir

## Display Functions

In [13]:
def display_conversational_results(
    result: ConversationalPipelineResult | Exception,
) -> None:
    """Display all outputs for one persona."""
    if isinstance(result, Exception):
        print(f"\n{'=' * 80}")
        print(f"PERSONA FAILED WITH EXCEPTION:")
        print(f"{'=' * 80}")
        print(f"{type(result).__name__}: {result}")
        print(f"{'=' * 80}\n")
        return

    print(f"\n{'=' * 80}")
    print(f"PERSONA {result.persona_id}")
    print(f"{'=' * 80}")

    if result.error:
        print(f"\nError: {result.error}")
        return

    # Persona details
    p = result.persona
    print(f"\n## Generated Persona: {p.name}")
    print(f"Age: {p.age} | Profession: {p.profession} | Culture: {p.culture}")
    print(f"Values: {', '.join(p.core_values)}")
    print(f"Bio: {p.bio}")

    # Entries with nudges
    for i, entry in enumerate(result.entries, 1):
        print(f"\n{'─' * 40}")
        print(f"### Entry {i}: {entry.initial_entry.date}")
        print(
            f"Tone: {entry.tone} | Verbosity: {entry.verbosity} | Mode: {entry.reflection_mode}"
        )
        print(f"\n**Initial Entry:**")
        print(entry.initial_entry.content)

        if entry.nudge:
            print(f"\n**Nudge ({entry.nudge.nudge_category}):**")
            print(f"Trigger: {entry.nudge.trigger_reason}")
            print(f'"{entry.nudge.nudge_text}"')

            if entry.response:
                print(f"\n**Response:**")
                print(entry.response.content)
            else:
                print("\n*(No response)*")
        else:
            print("\n*(No nudge)*")

    # Summary stats
    nudge_count = sum(1 for e in result.entries if e.nudge)
    response_count = sum(1 for e in result.entries if e.nudge and e.response)
    print(f"\n{'─' * 40}")
    print(f"### Summary for {p.name}")
    print(f"Total entries: {len(result.entries)}")
    print(f"Nudges given: {nudge_count}")
    print(f"Responses received: {response_count}")

# Execution Loop

In [14]:
# Configuration
NUM_PERSONAS = 5
NUM_ENTRIES = 5
START_DATE = "2025-10-25"

print(f"Generating {NUM_PERSONAS} personas with conversational journaling...")
print(f"Each persona will have {NUM_ENTRIES} entries with potential nudges.")
print(f"Model: {MODEL_NAME} | Reasoning: {DEFAULT_REASONING_EFFORT}")
print(f"Start date: {START_DATE}")
print(f"Nudge probability: {config['nudge']['base_probability']}")
print(f"Response probability: {config['nudge']['response_probability']}\n")

# Run all personas in parallel
results = await run_parallel_conversational_personas(
    num_personas=NUM_PERSONAS,
    config=config,
    schwartz_config=schwartz_config,
    num_entries=NUM_ENTRIES,
    start_date=START_DATE,
)

# Display results
for result in results:
    display_conversational_results(result)

# Save logs
successful_results = [
    r for r in results if isinstance(r, ConversationalPipelineResult) and r.persona
]
log_dir = save_run_logs(results, config, NUM_PERSONAS, NUM_ENTRIES)

# Final summary
print(f"\n{'=' * 80}")
print(f"FINAL SUMMARY")
print(f"{'=' * 80}")
print(f"Successfully generated: {len(successful_results)}/{NUM_PERSONAS} personas")

total_entries = sum(len(r.entries) for r in successful_results)
total_nudges = sum(sum(1 for e in r.entries if e.nudge) for r in successful_results)
total_responses = sum(
    sum(1 for e in r.entries if e.nudge and e.response) for r in successful_results
)

print(f"Total entries: {total_entries}")
print(f"Total nudges given: {total_nudges}")
print(f"Total responses: {total_responses}")
if total_nudges > 0:
    print(f"Response rate: {total_responses / total_nudges:.1%}")
print(f"\nLogs saved to: {log_dir}")

Generating 5 personas with conversational journaling...
Each persona will have 5 entries with potential nudges.
Model: gpt-5-mini-2025-08-07 | Reasoning: high
Start date: 2025-10-25
Nudge probability: 0.4
Response probability: 0.7


PERSONA 1

## Generated Persona: Maria Alvarez
Age: 48 | Profession: Parent (Stay-at-home) | Culture: North American
Values: Tradition
Bio: Maria Alvarez keeps her mother's tamale recipe in a stained notebook and spends two full days each December making tamales for the extended family before Nochebuena; she also teaches the grandchildren a few Spanish lullabies and the old prayers her abuela used to sing. She runs the household calendar—coordinating baptisms, the neighborhood posada, and the annual cemetery visit on Dia de los Muertos—while driving her elderly father to Mass every Sunday. Lately she's worried because her eldest moved out of state, younger relatives prefer quick takeout and video calls to the long preparations she learned growing up, and th