Setup & Imports

In [1]:
from dotenv import load_dotenv
import os
import yaml
import re
from pydantic import BaseModel, Field
from pydantic_ai import Agent
from typing import List, Optional

load_dotenv()

if os.getenv('OPENAI_API_KEY'):
    print("Environment ready")
else:
    print("API key not found")

Environment ready


Load Configurations

In [3]:
with open('config/research_guidelines.yaml', 'r') as f:
    guidelines = yaml.safe_load(f)

print("research guidelines loaded")
print(f"   Frameworks: {list(guidelines['frameworks'].keys())}")

research guidelines loaded
   Frameworks: ['jobs_to_be_done', 'continuous_discovery', 'mom_test', 'mental_models']


Privacy & PII Removal Functions

In [5]:
def enforce_pii_removal(text: str, privacy_rules: dict) -> str:
    """Hard-enforced PII removal via pre-defined configurations"""
    if not privacy_rules['pii_removal']['enabled']:
        return text
    
    patterns = privacy_rules['pii_removal']['patterns']
    replacements = privacy_rules['pii_removal']['replacement_tokens']
    
    text = re.sub(patterns['api_token'], replacements['api_token'], text)
    text = re.sub(patterns['iban'], replacements['iban'], text)
    text = re.sub(patterns['email'], replacements['email'], text, flags=re.IGNORECASE)
    text = re.sub(patterns['phone'], replacements['phone'], text)
    text = re.sub(patterns['employee_id'], replacements['employee_id'], text, flags=re.IGNORECASE)
    text = re.sub(patterns['names'], replacements['names'], text)
    
    return text

def validate_no_pii(insights, privacy_rules):
    """Validate no PII in output"""
    all_text = insights.model_dump_json()
    patterns = privacy_rules['pii_removal']['patterns']
    
    issues = []
    if re.search(patterns['email'], all_text):
        issues.append("EMAIL detected")
    if re.search(patterns['phone'], all_text):
        issues.append("PHONE detected")
    if re.search(patterns['employee_id'], all_text):
        issues.append("ID detected")
    
    if issues:
        for issue in issues:
            print(issue)
        raise ValueError("PII VALIDATION FAILED")
    return True

def audit_pii_in_transcript(text: str, privacy_rules: dict):
    """Count PII found in transcript"""
    patterns = privacy_rules['pii_removal']['patterns']
    
    findings = {
        'Emails': len(re.findall(patterns['email'], text)),
        'Phone Numbers': len(re.findall(patterns['phone'], text)),
        'Employee IDs': len(re.findall(patterns['employee_id'], text)),
        'Names': len(re.findall(patterns['names'], text)),
        'IBANs': len(re.findall(patterns['iban'], text)),
        'API Tokens': len(re.findall(patterns['api_token'], text))
    }
    
    print("\nPII Detection Summary:")
    print("-" * 30)
    for pii_type, count in findings.items():
        print(f"{pii_type:20} {count:>5}")
    print("-" * 30)
    print(f"{'TOTAL':20} {sum(findings.values()):>5}")

print("Privacy functions ready")

Privacy functions ready


Pydantic Models (Data structure)

In [15]:
class PainPoint(BaseModel):
    description: str = Field(description="What's not working for the user")
    impact: str = Field(description="Time lost, extra work, costs, uncertainty")
    quote: str = Field(description="User's exact words")

class JobToBeDone(BaseModel):
    functional_job: str = Field(description="What task are they trying to complete?")
    emotional_job: str = Field(description="How do they want to feel?")
    context: str = Field(description="When/where does this happen?")
    quote: str

class Workaround(BaseModel):
    what_they_do: str = Field(description="The workaround they've created")
    why_needed: str = Field(description="What problem does this solve?")
    cost: str = Field(description="Time/effort this workaround takes")
    quote: str

class DesiredOutcome(BaseModel):
    outcome: str = Field(description="What do they really want?")
    current_gap: str = Field(description="Why can't they achieve this now?")
    quote: str

class BehavioralSignal(BaseModel):
    observation: str = Field(description="What did they say/do that was revealing?")
    what_it_reveals: str = Field(description="The underlying need or belief")
    quote: str

class MentalModel(BaseModel):
    description: str = Field(description="How they think about or categorize something")
    metaphor_or_analogy: Optional[str] = Field(default=None)
    mismatch_with_reality: Optional[str] = Field(default=None)
    quote: str

class InterviewInsights(BaseModel):
    pain_points: List[PainPoint] = []  # ← Added default
    jobs_to_be_done: List[JobToBeDone] = []  # ← Added default
    workarounds: List[Workaround] = []  # ← Added default
    desired_outcomes: List[DesiredOutcome] = []  # ← Added default
    behavioral_signals: List[BehavioralSignal] = []  # ← Added default
    mental_models: List[MentalModel] = []  # ← Added default

print("Data models defined")

Data models defined


Build system prompt and create agent

In [16]:
def build_system_prompt(guidelines):
    prompt = "You are an expert product researcher.\n\n"
    prompt += "APPLY THESE FRAMEWORKS:\n\n"
    
    for framework_name, description in guidelines['frameworks'].items():
        prompt += f"{framework_name.upper().replace('_', ' ')}:\n"
        prompt += f"{description}\n\n"
    
    prompt += """
Extract ALL insights from the interview:
- Pain points: Problems causing time waste, costs, uncertainty, frustration
- Jobs-to-be-done: What they're trying to accomplish (functional + emotional goals)
- Workarounds: Current hacks/solutions they've created
- Desired outcomes: What success looks like to them
- Behavioral signals: Implicit patterns revealing underlying needs
- Mental models: How they conceptualize their work

Always include exact quotes as evidence.
"""
    return prompt

system_prompt = build_system_prompt(guidelines)

multi_insight_agent = Agent(
    model='openai:gpt-4o-mini',
    output_type=InterviewInsights,
    system_prompt=system_prompt
)

print("Agent created with frameworks from research guidelines")

Agent created with frameworks from research guidelines


File loading helper

In [18]:
def load_transcript(filepath: str) -> str:
    """Load interview transcript from file"""
    with open(filepath, 'r') as f:
        return f.read()

print("File loading ready")

File loading ready


Load test files and run analysis with pre-defined privacy enforcement

In [19]:
# Load 
transcript = load_transcript('data/interviews/mock_interview.txt')

# Audit PII
audit_pii_in_transcript(transcript, guidelines['privacy_enforcement'])

# Remove PII
clean_transcript = enforce_pii_removal(transcript, guidelines['privacy_enforcement'])

print("\nPII removed from transcript")

# Extract insights
result = await multi_insight_agent.run(clean_transcript)
insights = result.output

# Validate output
validate_no_pii(insights, guidelines['privacy_enforcement'])
print("Output validated - no PII detected")


PII Detection Summary:
------------------------------
Emails                   0
Phone Numbers            0
Employee IDs             0
Names                  131
IBANs                    0
API Tokens               0
------------------------------
TOTAL                  131

PII removed from transcript
Output validated - no PII detected


Display results

In [20]:
print("\nPAIN POINTS:")
for i, pp in enumerate(insights.pain_points, 1):
    print(f"\n{i}. {pp.description}")
    print(f"   Impact: {pp.impact}")
    print(f"   Quote: {pp.quote}")

print("\n\nJOBS-TO-BE-DONE:")
for i, job in enumerate(insights.jobs_to_be_done, 1):
    print(f"\n{i}. Functional: {job.functional_job}")
    print(f"   Emotional: {job.emotional_job}")
    print(f"   Context: {job.context}")

print("\n\nWORKAROUNDS:")
for i, w in enumerate(insights.workarounds, 1):
    print(f"\n{i}. {w.what_they_do}")
    print(f"   Why: {w.why_needed}")
    print(f"   Cost: {w.cost}")

print("\n\nDESIRED OUTCOMES:")
for i, outcome in enumerate(insights.desired_outcomes, 1):
    print(f"\n{i}. {outcome.outcome}")
    print(f"   Gap: {outcome.current_gap}")

print("\n\nBEHAVIORAL SIGNALS:")
for i, signal in enumerate(insights.behavioral_signals, 1):
    print(f"\n{i}. {signal.observation}")
    print(f"   Reveals: {signal.what_it_reveals}")

print("\n\nMENTAL MODELS:")
for i, model in enumerate(insights.mental_models, 1):
    print(f"\n{i}. {model.description}")
    if model.metaphor_or_analogy:
        print(f"   Metaphor: {model.metaphor_or_analogy}")
    if model.mismatch_with_reality:
        print(f"   Mismatch: {model.mismatch_with_reality}")


PAIN POINTS:

1. Inconsistency in API responses, with some endpoints returning arrays and others returning objects with metadata.
   Impact: Adds friction and complexity, hindering effective onboarding of new developers.
   Quote: [NAME] now, some endpoints return arrays, others wrap everything in an object with metadata. [NAME]’s small, but it adds friction.

2. Lack of a real sandbox for testing, leading to potential issues when using production for tests.
   Impact: Risk of polluting production data; increases uncertainty in testing.
   Quote: [NAME] isn’t a real sandbox. [NAME] we either mock responses, which doesn’t feel realistic, or we test against production and hope for the best.

3. Insufficient error messages, especially with pagination errors.
   Impact: Wastes time and leads to frustration when troubleshooting API issues.
   Quote: [NAME] kept getting 'invalid cursor.' I couldn’t tell if I formatted it wrong or if it expired or what. [NAME] error message didn’t explain.

