# Therapeutic Conversation Simulator: 100+ Depression & Anxiety Sessions

Generate **100+ realistic therapist-client conversations** focused on depression and anxiety, with rich variation and meta-analysis.

**Features:**
- 🧠 100+ unique therapy sessions
- 😔 Depression & anxiety focused scenarios
- 🎲 Rich randomness: triggers, emotional states, symptoms, contexts
- 💬 Natural conversation flow: Client → Therapist → Client → Conclusion
- 📊 Meta-analysis: patterns, cognitive actions, therapeutic techniques
- 🔬 Cognitive action detection via probes (optional)
- 💾 All data exported for further analysis

**Requirements:**
- Google Colab with GPU (T4 or better)
- ~15 GB VRAM
- Runtime: ~2-4 hours for 100 sessions

## 1️⃣ Check GPU and Setup

In [None]:
# Check GPU availability
!nvidia-smi

import torch
print("\n" + "="*60)
print("GPU INFORMATION")
print("="*60)
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA version: {torch.version.cuda}")
    print(f"GPU device: {torch.cuda.get_device_name(0)}")
    print(f"GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
else:
    print("⚠️  WARNING: No GPU detected! This will be very slow on CPU.")
print("="*60)

## 2️⃣ Clone Repository and Install Dependencies

In [None]:
import os
import sys

# Clone the repository
repo_url = "https://github.com/ChuloIva/brije.git"
repo_name = "brije"

if not os.path.exists(repo_name):
    print("📥 Cloning Brije repository...")
    !git clone {repo_url}
    print("✅ Repository cloned successfully!")
else:
    print("✅ Repository already exists")
    print("🔄 Pulling latest changes...")
    !cd {repo_name} && git pull

# Change to repo directory
os.chdir(repo_name)
print(f"\n📁 Current directory: {os.getcwd()}")

In [None]:
# Install dependencies
print("📦 Installing dependencies...\n")

# Core dependencies
print("Installing core packages...")
!pip install -q torch transformers h5py scikit-learn tqdm matplotlib seaborn pandas

# Dependencies for liminal_backrooms
print("\nInstalling liminal_backrooms dependencies...")
!pip install -q python-dotenv requests Pillow

# Optional API clients (prevents import errors)
print("\nInstalling optional API clients...")
!pip install -q anthropic openai replicate together

# Clone and install nnsight
nnsight_dir = "third_party/nnsight"
nnsight_repo = "https://github.com/ndif-team/nnsight"

print("\n📦 Setting up nnsight...")
if not os.path.exists(nnsight_dir) or not os.listdir(nnsight_dir):
    print("   Cloning nnsight repository...")
    os.makedirs("third_party", exist_ok=True)
    !git clone {nnsight_repo} {nnsight_dir}
    print("   ✅ nnsight repository cloned")
else:
    print("   ✅ nnsight repository already exists")

# Install nnsight
print("   Installing nnsight...")
!pip install -q -e {nnsight_dir}

print("\n✅ All dependencies installed!")

## 3️⃣ Mount Google Drive (for outputs)

In [None]:
from google.colab import drive
from datetime import datetime

drive.mount('/content/drive')

# Create output directory with timestamp
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
output_dir = f'/content/drive/MyDrive/therapeutic_simulations_{timestamp}'
os.makedirs(output_dir, exist_ok=True)
os.makedirs(f"{output_dir}/conversations", exist_ok=True)
os.makedirs(f"{output_dir}/analysis", exist_ok=True)

print(f"✅ Outputs will be saved to: {output_dir}")

## 4️⃣ Setup Environment (No API Keys Required)

In [None]:
# Create minimal .env file
with open('.env', 'w') as f:
    f.write("# Gemma 3 4B runs locally - no API keys needed\n")

print("✅ Environment setup complete!")
print("\n💡 Note: Gemma 3 4B with probes runs entirely on your GPU.")
print("   • No API calls, No API keys, No internet required")
print("   • 100% local inference")

In [None]:
# Check if probes exist (they should be in the cloned repo)
import glob

probe_dirs = glob.glob('data/probes_binary/layer_*')

if probe_dirs:
    print("✅ Found pre-trained probes!")
    print(f"\nAvailable layers: {len(probe_dirs)}")
    
    # Show sample layers
    for probe_dir in sorted(probe_dirs)[:5]:
        layer_num = os.path.basename(probe_dir).replace('layer_', '')
        probe_files = glob.glob(f"{probe_dir}/probe_*.pth")
        print(f"   Layer {layer_num}: {len(probe_files)} probes")
    
    if len(probe_dirs) > 5:
        print(f"   ... and {len(probe_dirs) - 5} more layers")
    
    print("\n🎯 Cognitive action detection will be enabled for all conversations!")
    print("   Each conversation turn will analyze 45 cognitive actions across multiple layers.")
    
else:
    print("❌ ERROR: No pre-trained probes found!")
    print("\nThe probes should be in: data/probes_binary/layer_XX/")
    print("\n⚠️  WITHOUT PROBES:")
    print("   - Conversations will still run")
    print("   - But NO cognitive action analysis will be available")
    print("   - Meta-analysis will be limited to metadata only")
    print("\n💡 To get probes:")
    print("   1. Make sure the brije repo was cloned completely")
    print("   2. Or train probes using Brije_Full_Pipeline_Colab.ipynb")
    print("\nProceeding without probes...")

In [None]:
import random
from typing import Dict, List, Any

# Depression-specific symptom clusters
DEPRESSION_SYMPTOMS = [
    "persistent sadness and emptiness",
    "loss of interest in activities once enjoyed",
    "difficulty sleeping or sleeping too much",
    "significant changes in appetite and weight",
    "extreme fatigue and lack of energy",
    "difficulty concentrating and making decisions",
    "feelings of worthlessness and guilt",
    "physical aches and pains with no clear cause",
    "social withdrawal and isolation",
    "irritability and restlessness",
    "thoughts of death or self-harm",
    "feeling numb or emotionally detached"
]

# Anxiety-specific symptom clusters
ANXIETY_SYMPTOMS = [
    "constant worry and racing thoughts",
    "physical tension and muscle tightness",
    "rapid heartbeat and chest discomfort",
    "difficulty breathing or feeling smothered",
    "overwhelming dread about the future",
    "panic attacks with intense fear",
    "avoidance of triggering situations",
    "restlessness and feeling on edge",
    "difficulty falling or staying asleep",
    "stomach problems and nausea",
    "fear of losing control or going crazy",
    "hypervigilance and startling easily"
]

# Triggering contexts (what brings them to therapy)
TRIGGER_CONTEXTS = [
    "recent breakup or relationship ending",
    "job loss or career setback",
    "death of a loved one",
    "chronic work stress and burnout",
    "academic pressure and performance anxiety",
    "major life transition (moving, graduating, etc.)",
    "family conflict or estrangement",
    "health diagnosis or chronic illness",
    "financial difficulties and instability",
    "social isolation and loneliness",
    "trauma or past abuse resurfacing",
    "identity crisis or questioning life purpose",
    "parenting challenges and feeling overwhelmed",
    "caring for aging parents",
    "failure or major disappointment"
]

# Emotional states (how they present)
EMOTIONAL_PRESENTATIONS = [
    "tearful and openly emotional",
    "emotionally flat and detached",
    "agitated and restless",
    "withdrawn and quiet",
    "defensive and guarded",
    "hopeful but uncertain",
    "frustrated and angry",
    "exhausted and defeated",
    "anxious and hypervigilant",
    "confused and disoriented"
]

# Duration of symptoms
SYMPTOM_DURATIONS = [
    "a few weeks",
    "about a month",
    "two to three months",
    "several months",
    "six months or more",
    "almost a year",
    "years, on and off",
    "as long as I can remember"
]

# Cognitive distortions (negative thought patterns)
COGNITIVE_DISTORTIONS = [
    "all-or-nothing thinking",
    "catastrophizing worst-case scenarios",
    "overgeneralizing from single events",
    "mind reading (assuming others' thoughts)",
    "fortune telling (predicting negative outcomes)",
    "emotional reasoning (feelings as facts)",
    "should statements and rigid rules",
    "labeling self as defective or broken",
    "minimizing positives and magnifying negatives",
    "personalizing blame for external events"
]

# Support systems
SUPPORT_LEVELS = [
    "strong support from family and friends",
    "some supportive friends but distant family",
    "limited support system",
    "isolated with few close connections",
    "strained relationships with usual supports",
    "supportive partner but other relationships strained"
]

# Coping strategies (what they've tried)
COPING_ATTEMPTS = [
    "trying to stay busy and distracted",
    "withdrawing and sleeping more",
    "talking to friends but it doesn't help",
    "exercise and healthy eating",
    "journaling and self-reflection",
    "meditation and mindfulness apps",
    "self-help books and online resources",
    "using alcohol or substances to cope",
    "throwing themselves into work",
    "nothing seems to work"
]

# Therapy goals (what they hope for)
THERAPY_GOALS = [
    "feel like myself again",
    "manage anxiety and worry better",
    "improve sleep and energy levels",
    "understand why this is happening",
    "learn better coping strategies",
    "stop negative thought patterns",
    "rebuild confidence and self-worth",
    "repair damaged relationships",
    "make important life decisions",
    "just function day-to-day better"
]

# Sentence starters for natural variation
CLIENT_SENTENCE_STARTERS = [
    "I've been feeling",
    "Lately I've noticed",
    "I keep thinking about",
    "What's been hardest is",
    "I don't understand why",
    "Every day I wake up and",
    "People tell me",
    "I used to be able to",
    "The thing that scares me most is",
    "I can't stop worrying about"
]

print("✅ Variation pools loaded:")
print(f"   • {len(DEPRESSION_SYMPTOMS)} depression symptoms")
print(f"   • {len(ANXIETY_SYMPTOMS)} anxiety symptoms")
print(f"   • {len(TRIGGER_CONTEXTS)} trigger contexts")
print(f"   • {len(EMOTIONAL_PRESENTATIONS)} emotional presentations")
print(f"   • {len(COGNITIVE_DISTORTIONS)} cognitive distortions")
print(f"   • {len(COPING_ATTEMPTS)} coping strategies")
print(f"   • {len(THERAPY_GOALS)} therapy goals")
print(f"\n🎲 Total possible unique combinations: {len(DEPRESSION_SYMPTOMS) * len(ANXIETY_SYMPTOMS) * len(TRIGGER_CONTEXTS):,}+")

## 5️⃣ Define Variation Pools (Inspired by datagen)

Rich variation pools to create unique, realistic conversations

In [None]:
import sys
from pathlib import Path

# Add liminal_backrooms to path (MUST be before importing)
liminal_path = str(Path.cwd() / "third_party" / "liminal_backrooms")
if liminal_path not in sys.path:
    sys.path.insert(0, liminal_path)

# Now import from liminal_backrooms
from config import SYSTEM_PROMPT_PAIRS

# Add depression/anxiety focused therapeutic prompts
SYSTEM_PROMPT_PAIRS["Depression/Anxiety Therapy Session"] = {
    "AI_1": """You are an experienced, compassionate therapist specializing in depression and anxiety. Your approach integrates:
- Active listening and empathetic reflection
- Cognitive-behavioral techniques (identifying and reframing distorted thoughts)
- Exploration of feelings, triggers, and patterns
- Validation of emotions while gently challenging unhelpful beliefs
- Collaborative goal-setting and coping strategy development
- Normalizing experiences and reducing shame

In each response:
1. Reflect back what you're hearing
2. Ask one thoughtful question to deepen exploration
3. Offer an observation, reframe, or insight when appropriate

Be warm, non-judgmental, and focused on helping the client gain insight and develop practical coping strategies. Keep responses concise (3-5 sentences).""",
    
    "AI_2": """You are a client in therapy seeking help for depression and/or anxiety. You're experiencing real symptoms and struggling with daily life. 

In your responses:
- Express your thoughts, feelings, and experiences authentically
- Sometimes you're uncertain or confused about what you're feeling
- You may exhibit cognitive distortions (catastrophizing, all-or-nothing thinking, etc.)
- You're open to the therapist's questions but may need time to process
- Gradually show small shifts in perspective as the conversation progresses
- By the end, reflect on what you've realized or learned

Be genuine, vulnerable, and show realistic emotional responses. Keep responses natural and conversational (2-4 sentences)."""
}

print("✅ Therapeutic system prompts configured!")
print("\nPrompt pair: 'Depression/Anxiety Therapy Session'")
print("   • Therapist: CBT-focused, empathetic, structured")
print("   • Client: Authentic, vulnerable, shows cognitive distortions")

## 6️⃣ Setup AI Models and System Prompts

In [None]:
def generate_varied_session_opening(session_id: int) -> Dict[str, Any]:
    """
    Generate a unique, varied therapy session opening.
    Returns session metadata and client's opening statement.
    """
    # Set seed for reproducibility of this specific session
    random.seed(session_id)
    
    # Randomly determine primary presenting issue
    primary_issue = random.choice(["depression", "anxiety", "both"])
    
    # Select symptoms based on primary issue
    if primary_issue == "depression":
        primary_symptoms = random.sample(DEPRESSION_SYMPTOMS, random.randint(2, 4))
        secondary_symptoms = random.sample(ANXIETY_SYMPTOMS, random.randint(0, 2))
    elif primary_issue == "anxiety":
        primary_symptoms = random.sample(ANXIETY_SYMPTOMS, random.randint(2, 4))
        secondary_symptoms = random.sample(DEPRESSION_SYMPTOMS, random.randint(0, 2))
    else:  # both
        primary_symptoms = random.sample(DEPRESSION_SYMPTOMS, random.randint(2, 3))
        secondary_symptoms = random.sample(ANXIETY_SYMPTOMS, random.randint(2, 3))
    
    # Select other variation factors
    trigger = random.choice(TRIGGER_CONTEXTS)
    duration = random.choice(SYMPTOM_DURATIONS)
    presentation = random.choice(EMOTIONAL_PRESENTATIONS)
    distortion = random.choice(COGNITIVE_DISTORTIONS)
    support = random.choice(SUPPORT_LEVELS)
    coping = random.choice(COPING_ATTEMPTS)
    goal = random.choice(THERAPY_GOALS)
    starter = random.choice(CLIENT_SENTENCE_STARTERS)
    
    # Create session metadata
    metadata = {
        "session_id": session_id,
        "primary_issue": primary_issue,
        "primary_symptoms": primary_symptoms,
        "secondary_symptoms": secondary_symptoms,
        "trigger_context": trigger,
        "symptom_duration": duration,
        "emotional_presentation": presentation,
        "cognitive_distortion": distortion,
        "support_level": support,
        "coping_attempts": coping,
        "therapy_goal": goal
    }
    
    # Generate opening statement
    all_symptoms = primary_symptoms + secondary_symptoms
    main_symptom = all_symptoms[0]
    secondary_symptom = all_symptoms[1] if len(all_symptoms) > 1 else ""
    
    # Build opening with variation (properly handle apostrophes)
    fallback = "I just don't know what to do anymore"
    templates = [
        f"{starter} {main_symptom} for {duration}. It started after {trigger}. {secondary_symptom.capitalize() if secondary_symptom else fallback}.",
        f"I'm here because of {main_symptom}. This has been going on for {duration}, ever since {trigger}. I've tried {coping}, but nothing helps.",
        f"{starter} really struggling. {main_symptom.capitalize()} and {secondary_symptom if secondary_symptom else 'feeling lost'}. It's been {duration} since {trigger}.",
        f"Things haven't been good for {duration}. I'm dealing with {main_symptom}, especially after {trigger}. I really want to {goal}."
    ]
    
    opening_statement = random.choice(templates)
    
    return {
        "metadata": metadata,
        "opening_statement": opening_statement
    }

# Test the generator
print("Testing session generator...\n")
for i in range(3):
    test_session = generate_varied_session_opening(i)
    print(f"Session {i}:")
    print(f"  Issue: {test_session['metadata']['primary_issue']}")
    print(f"  Opening: {test_session['opening_statement']}")
    print()

## 7️⃣ Session Generator with Rich Variation

In [None]:
from main import ai_turn
import json
import time

def run_therapeutic_session(session_id: int, num_turns: int = 6, verbose: bool = True) -> Dict[str, Any]:
    """
    Run a complete therapeutic session.
    
    Args:
        session_id: Unique session identifier
        num_turns: Number of conversation turns (default 6 = 3 client, 3 therapist)
        verbose: Print conversation as it happens
    
    Returns:
        Complete session data with metadata, conversation, and analysis
    """
    # Generate varied session opening
    session_data = generate_varied_session_opening(session_id)
    
    if verbose:
        print(f"\n{'='*80}")
        print(f"SESSION {session_id}: {session_data['metadata']['primary_issue'].upper()}")
        print(f"{'='*80}")
        print(f"Client opens: {session_data['opening_statement']}")
        print("-" * 80)
    
    # Initialize conversation
    conversation = [
        {
            "role": "user",
            "content": session_data['opening_statement']
        }
    ]
    
    # Get system prompts
    prompt_pair = "Depression/Anxiety Therapy Session"
    therapist_prompt = SYSTEM_PROMPT_PAIRS[prompt_pair]["AI_1"]
    client_prompt = SYSTEM_PROMPT_PAIRS[prompt_pair]["AI_2"]
    
    # Model configuration
    therapist_model = "Gemma 3 4B (with Probes)"
    client_model = "Gemma 3 4B (with Probes)"
    
    # Run conversation turns
    for turn in range(num_turns):
        # Alternate: therapist (even), client (odd)
        is_therapist = (turn % 2 == 0)
        ai_name = "Therapist" if is_therapist else "Client"
        model = therapist_model if is_therapist else client_model
        system_prompt = therapist_prompt if is_therapist else client_prompt
        
        if verbose:
            print(f"\n{ai_name} (Turn {turn + 1}):")
        
        # Generate response
        conversation = ai_turn(
            ai_name=ai_name,
            conversation=conversation,
            model=model,
            system_prompt=system_prompt,
            gui=None
        )
        
        # Display response
        latest = conversation[-1]
        if verbose:
            print(latest.get('content', ''))
        
        # Small delay
        # time.sleep(0.)
    
    # Add client's final reflection/conclusion
    if verbose:
        print(f"\n{'='*80}")
        print("CLIENT REFLECTION (Final):")
        print(f"{'='*80}")
    
    # Generate final reflection with special prompt
    conclusion_prompt = client_prompt + "\n\nNow provide a brief reflection on what you realized or learned in this session. What insight or takeaway are you leaving with? (2-3 sentences)"
    
    conversation = ai_turn(
        ai_name="Client (Conclusion)",
        conversation=conversation,
        model=client_model,
        system_prompt=conclusion_prompt,
        gui=None
    )
    
    latest = conversation[-1]
    if verbose:
        print(latest.get('content', ''))
        print(f"\n{'='*80}")
    
    # Package complete session data
    session_data['conversation'] = conversation
    session_data['num_turns'] = num_turns + 1  # +1 for conclusion
    session_data['timestamp'] = datetime.now().isoformat()
    
    return session_data

print("✅ Therapeutic session runner ready!")

## 8️⃣ Run Single Therapeutic Conversation

In [None]:
from third_party.liminal_backrooms.main import ai_turn
import json
import time

def run_therapeutic_session(session_id: int, num_turns: int = 6, verbose: bool = True) -> Dict[str, Any]:
    """
    Run a complete therapeutic session.
    
    Args:
        session_id: Unique session identifier
        num_turns: Number of conversation turns (default 6 = 3 client, 3 therapist)
        verbose: Print conversation as it happens
    
    Returns:
        Complete session data with metadata, conversation, and analysis
    """
    # Generate varied session opening
    session_data = generate_varied_session_opening(session_id)
    
    if verbose:
        print(f"\n{'='*80}")
        print(f"SESSION {session_id}: {session_data['metadata']['primary_issue'].upper()}")
        print(f"{'='*80}")
        print(f"Client opens: {session_data['opening_statement']}")
        print("-" * 80)
    
    # Initialize conversation
    conversation = [
        {
            "role": "user",
            "content": session_data['opening_statement']
        }
    ]
    
    # Get system prompts
    prompt_pair = "Depression/Anxiety Therapy Session"
    therapist_prompt = SYSTEM_PROMPT_PAIRS[prompt_pair]["AI_1"]
    client_prompt = SYSTEM_PROMPT_PAIRS[prompt_pair]["AI_2"]
    
    # Model configuration
    therapist_model = "Gemma 3 4B (with Probes)"
    client_model = "Gemma 3 4B (with Probes)"
    
    # Run conversation turns
    for turn in range(num_turns):
        # Alternate: therapist (even), client (odd)
        is_therapist = (turn % 2 == 0)
        ai_name = "Therapist" if is_therapist else "Client"
        model = therapist_model if is_therapist else client_model
        system_prompt = therapist_prompt if is_therapist else client_prompt
        
        if verbose:
            print(f"\n{ai_name} (Turn {turn + 1}):")
        
        # Generate response
        conversation = ai_turn(
            ai_name=ai_name,
            conversation=conversation,
            model=model,
            system_prompt=system_prompt,
            gui=None
        )
        
        # Display response
        latest = conversation[-1]
        if verbose:
            print(latest.get('content', ''))
        
        # Small delay
        # time.sleep(0.5)
    
    # Add client's final reflection/conclusion
    if verbose:
        print(f"\n{'='*80}")
        print("CLIENT REFLECTION (Final):")
        print(f"{'='*80}")
    
    # Generate final reflection with special prompt
    conclusion_prompt = client_prompt + "\n\nNow provide a brief reflection on what you realized or learned in this session. What insight or takeaway are you leaving with? (2-3 sentences)"
    
    conversation = ai_turn(
        ai_name="Client (Conclusion)",
        conversation=conversation,
        model=client_model,
        system_prompt=conclusion_prompt,
        gui=None
    )
    
    latest = conversation[-1]
    if verbose:
        print(latest.get('content', ''))
        print(f"\n{'='*80}")
    
    # Package complete session data
    session_data['conversation'] = conversation
    session_data['num_turns'] = num_turns + 1  # +1 for conclusion
    session_data['timestamp'] = datetime.now().isoformat()
    
    return session_data

print("✅ Therapeutic session runner ready!")

## 9️⃣ Test Single Session

In [None]:
# Test with a single session
print("Running test session...\n")
test_session = run_therapeutic_session(session_id=999, num_turns=6, verbose=True)

print(f"\n✅ Test session complete!")
print(f"   Turns: {test_session['num_turns']}")
print(f"   Issue: {test_session['metadata']['primary_issue']}")

## 🔟 Run 100+ Simulation Sessions

Generate 100 unique therapeutic conversations with rich variation.

**⏰ Estimated time:** 2-4 hours for 100 sessions
- ~1-2 minutes per session
- Automatic checkpointing every 10 sessions
- Can resume if interrupted

In [None]:
import time
from tqdm.notebook import tqdm

# Configuration
NUM_SESSIONS = 100
TURNS_PER_SESSION = 6  # 3 exchanges + 1 conclusion
CHECKPOINT_INTERVAL = 10

print(f"{'='*80}")
print(f"RUNNING {NUM_SESSIONS} THERAPEUTIC SIMULATION SESSIONS")
print(f"{'='*80}")
print(f"Turns per session: {TURNS_PER_SESSION} + 1 conclusion = {TURNS_PER_SESSION + 1} total")
print(f"Checkpoint interval: Every {CHECKPOINT_INTERVAL} sessions")
print(f"Output directory: {output_dir}")
print(f"\nEstimated time: {NUM_SESSIONS * 1.5 / 60:.1f} hours\n")

overall_start = time.time()
all_sessions = []
errors = []

# Progress bar
pbar = tqdm(total=NUM_SESSIONS, desc="Simulating sessions", unit="session")

for session_id in range(NUM_SESSIONS):
    try:
        # Run session (non-verbose for bulk processing)
        session_data = run_therapeutic_session(
            session_id=session_id,
            num_turns=TURNS_PER_SESSION,
            verbose=False
        )
        
        all_sessions.append(session_data)
        
        # Save individual session
        session_file = f"{output_dir}/conversations/session_{session_id:03d}.json"
        with open(session_file, 'w') as f:
            json.dump(session_data, f, indent=2, default=str)
        
        pbar.update(1)
        pbar.set_postfix({
            'issue': session_data['metadata']['primary_issue'],
            'errors': len(errors)
        })
        
        # Checkpoint every N sessions
        if (session_id + 1) % CHECKPOINT_INTERVAL == 0:
            checkpoint_file = f"{output_dir}/checkpoint_{session_id + 1}.json"
            with open(checkpoint_file, 'w') as f:
                json.dump({
                    'sessions_completed': session_id + 1,
                    'sessions': all_sessions,
                    'errors': errors
                }, f, indent=2, default=str)
            
            elapsed = time.time() - overall_start
            rate = (session_id + 1) / elapsed
            remaining = (NUM_SESSIONS - session_id - 1) / rate if rate > 0 else 0
            
            pbar.write(f"✅ Checkpoint: {session_id + 1}/{NUM_SESSIONS} sessions | "
                      f"Rate: {rate*60:.1f}/hr | ETA: {remaining/60:.1f} min")
        
    except Exception as e:
        error_info = {
            'session_id': session_id,
            'error': str(e),
            'timestamp': datetime.now().isoformat()
        }
        errors.append(error_info)
        pbar.write(f"❌ Error in session {session_id}: {str(e)}")

pbar.close()

total_elapsed = time.time() - overall_start

print(f"\n{'='*80}")
print(f"SIMULATION COMPLETE!")
print(f"{'='*80}")
print(f"Total sessions: {len(all_sessions)}/{NUM_SESSIONS}")
print(f"Successful: {len(all_sessions)}")
print(f"Errors: {len(errors)}")
print(f"Total time: {total_elapsed/3600:.2f} hours ({total_elapsed/60:.1f} minutes)")
print(f"Average: {total_elapsed/len(all_sessions):.1f} seconds per session")
print(f"\nAll sessions saved to: {output_dir}/conversations/")

# Save final dataset
final_file = f"{output_dir}/all_sessions_{NUM_SESSIONS}.json"
with open(final_file, 'w') as f:
    json.dump({
        'total_sessions': len(all_sessions),
        'configuration': {
            'num_sessions': NUM_SESSIONS,
            'turns_per_session': TURNS_PER_SESSION,
            'total_time_hours': total_elapsed / 3600
        },
        'sessions': all_sessions,
        'errors': errors
    }, f, indent=2, default=str)

print(f"\n📦 Final dataset: {final_file}")

## 1️⃣1️⃣ Meta-Analysis: Aggregate Statistics

In [None]:
import pandas as pd
from collections import Counter, defaultdict
import numpy as np

print(f"{'='*80}")
print("META-ANALYSIS: 100 THERAPEUTIC SESSIONS")
print(f"{'='*80}\n")

# Extract metadata from all sessions
meta_df = pd.DataFrame([s['metadata'] for s in all_sessions])

# 1. Distribution of presenting issues
print("1️⃣ PRESENTING ISSUES DISTRIBUTION")
print("-" * 80)
issue_dist = meta_df['primary_issue'].value_counts()
for issue, count in issue_dist.items():
    pct = (count / len(all_sessions)) * 100
    bar = "█" * int(pct / 2)
    print(f"  {issue:15s}: {count:3d} ({pct:5.1f}%) {bar}")

# 2. Most common trigger contexts
print(f"\n2️⃣ TOP TRIGGER CONTEXTS")
print("-" * 80)
trigger_counts = Counter(meta_df['trigger_context'])
for trigger, count in trigger_counts.most_common(10):
    print(f"  {count:2d}x  {trigger}")

# 3. Symptom duration distribution
print(f"\n3️⃣ SYMPTOM DURATION DISTRIBUTION")
print("-" * 80)
duration_counts = Counter(meta_df['symptom_duration'])
for duration, count in duration_counts.most_common():
    print(f"  {count:2d}x  {duration}")

# 4. Most common symptoms
print(f"\n4️⃣ MOST COMMON SYMPTOMS (PRIMARY)")
print("-" * 80)
all_primary_symptoms = []
for symptoms in meta_df['primary_symptoms']:
    all_primary_symptoms.extend(symptoms)
symptom_counts = Counter(all_primary_symptoms)
for symptom, count in symptom_counts.most_common(15):
    print(f"  {count:2d}x  {symptom}")

# 5. Cognitive distortions
print(f"\n5️⃣ COGNITIVE DISTORTIONS REPRESENTED")
print("-" * 80)
distortion_counts = Counter(meta_df['cognitive_distortion'])
for distortion, count in distortion_counts.most_common(10):
    print(f"  {count:2d}x  {distortion}")

# 6. Support levels
print(f"\n6️⃣ SUPPORT SYSTEM LEVELS")
print("-" * 80)
support_counts = Counter(meta_df['support_level'])
for support, count in support_counts.most_common():
    print(f"  {count:2d}x  {support}")

# 7. Therapy goals
print(f"\n7️⃣ THERAPY GOALS")
print("-" * 80)
goal_counts = Counter(meta_df['therapy_goal'])
for goal, count in goal_counts.most_common(10):
    print(f"  {count:2d}x  {goal}")

# Save meta-analysis
meta_analysis = {
    'total_sessions': len(all_sessions),
    'presenting_issues': dict(issue_dist),
    'top_triggers': dict(trigger_counts.most_common(15)),
    'symptom_durations': dict(duration_counts),
    'top_symptoms': dict(symptom_counts.most_common(20)),
    'cognitive_distortions': dict(distortion_counts),
    'support_levels': dict(support_counts),
    'therapy_goals': dict(goal_counts)
}

with open(f"{output_dir}/analysis/meta_analysis_statistics.json", 'w') as f:
    json.dump(meta_analysis, f, indent=2)

print(f"\n✅ Meta-analysis saved to: {output_dir}/analysis/meta_analysis_statistics.json")

## 1️⃣2️⃣ Meta-Analysis: Cognitive Actions (If Probes Available)

In [None]:
# Analyze cognitive actions detected across all sessions
print(f"{'='*80}")
print("META-ANALYSIS: COGNITIVE ACTIONS")
print(f"{'='*80}\n")

therapist_actions = Counter()
client_actions = Counter()
sessions_with_probes = 0

for session in all_sessions:
    conversation = session['conversation']
    
    for i, turn in enumerate(conversation):
        if 'predictions' in turn:
            sessions_with_probes += 1
            # Turn 0 is initial opening, odd turns are therapist, even are client
            is_therapist = (i % 2 == 1)
            
            predictions = turn['predictions']
            for pred in predictions:
                if pred.get('is_active', False):
                    action = pred['action']
                    count_weight = pred.get('count', 1)
                    
                    if is_therapist:
                        therapist_actions[action] += count_weight
                    else:
                        client_actions[action] += count_weight

if therapist_actions or client_actions:
    print(f"✅ Found cognitive action data in {sessions_with_probes} sessions\n")
    
    print("1️⃣ TOP THERAPIST COGNITIVE ACTIONS")
    print("-" * 80)
    for action, count in therapist_actions.most_common(20):
        bar = "█" * (count // 10)
        print(f"  {action:35s} {count:4d} {bar}")
    
    print(f"\n2️⃣ TOP CLIENT COGNITIVE ACTIONS")
    print("-" * 80)
    for action, count in client_actions.most_common(20):
        bar = "█" * (count // 10)
        print(f"  {action:35s} {count:4d} {bar}")
    
    # Unique to each role
    therapist_only = set(therapist_actions.keys()) - set(client_actions.keys())
    client_only = set(client_actions.keys()) - set(therapist_actions.keys())
    
    if therapist_only:
        print(f"\n3️⃣ COGNITIVE ACTIONS UNIQUE TO THERAPIST")
        print("-" * 80)
        for action in sorted(therapist_only):
            print(f"  • {action}")
    
    if client_only:
        print(f"\n4️⃣ COGNITIVE ACTIONS UNIQUE TO CLIENT")
        print("-" * 80)
        for action in sorted(client_only):
            print(f"  • {action}")
    
    # Save cognitive action analysis
    cognitive_analysis = {
        'sessions_analyzed': sessions_with_probes,
        'therapist_top_actions': dict(therapist_actions.most_common(30)),
        'client_top_actions': dict(client_actions.most_common(30)),
        'therapist_unique': list(therapist_only),
        'client_unique': list(client_only)
    }
    
    with open(f"{output_dir}/analysis/cognitive_action_analysis.json", 'w') as f:
        json.dump(cognitive_analysis, f, indent=2)
    
    print(f"\n✅ Cognitive action analysis saved to: {output_dir}/analysis/cognitive_action_analysis.json")
else:
    print("⚠️  No cognitive action data found. Probes may not be loaded.")

## 1️⃣3️⃣ Visualize Meta-Analysis Results

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (16, 12)

fig, axes = plt.subplots(3, 2, figsize=(16, 14))

# 1. Presenting issues
ax1 = axes[0, 0]
issue_data = meta_df['primary_issue'].value_counts()
colors = ['#FF6B6B', '#4ECDC4', '#95E1D3']
ax1.pie(issue_data.values, labels=issue_data.index, autopct='%1.1f%%', colors=colors, startangle=90)
ax1.set_title('Distribution of Presenting Issues', fontsize=14, fontweight='bold')

# 2. Top triggers
ax2 = axes[0, 1]
trigger_data = Counter(meta_df['trigger_context']).most_common(8)
triggers, counts = zip(*trigger_data)
ax2.barh(range(len(triggers)), counts, color='steelblue', alpha=0.7)
ax2.set_yticks(range(len(triggers)))
ax2.set_yticklabels([t[:40] + '...' if len(t) > 40 else t for t in triggers], fontsize=9)
ax2.set_xlabel('Frequency', fontsize=10)
ax2.set_title('Top 8 Trigger Contexts', fontsize=14, fontweight='bold')
ax2.invert_yaxis()
ax2.grid(True, alpha=0.3, axis='x')

# 3. Symptom durations
ax3 = axes[1, 0]
duration_data = Counter(meta_df['symptom_duration']).most_common()
durations, d_counts = zip(*duration_data)
ax3.bar(range(len(durations)), d_counts, color='coral', alpha=0.7)
ax3.set_xticks(range(len(durations)))
ax3.set_xticklabels(durations, rotation=45, ha='right', fontsize=9)
ax3.set_ylabel('Frequency', fontsize=10)
ax3.set_title('Symptom Duration Distribution', fontsize=14, fontweight='bold')
ax3.grid(True, alpha=0.3, axis='y')

# 4. Top symptoms
ax4 = axes[1, 1]
all_symptoms = []
for symp_list in meta_df['primary_symptoms']:
    all_symptoms.extend(symp_list)
symptom_data = Counter(all_symptoms).most_common(10)
symptoms, s_counts = zip(*symptom_data)
ax4.barh(range(len(symptoms)), s_counts, color='lightgreen', alpha=0.7)
ax4.set_yticks(range(len(symptoms)))
ax4.set_yticklabels([s[:40] + '...' if len(s) > 40 else s for s in symptoms], fontsize=9)
ax4.set_xlabel('Frequency', fontsize=10)
ax4.set_title('Top 10 Primary Symptoms', fontsize=14, fontweight='bold')
ax4.invert_yaxis()
ax4.grid(True, alpha=0.3, axis='x')

# 5. Cognitive distortions
ax5 = axes[2, 0]
distortion_data = Counter(meta_df['cognitive_distortion']).most_common(8)
distortions, dist_counts = zip(*distortion_data)
ax5.bar(range(len(distortions)), dist_counts, color='orange', alpha=0.7)
ax5.set_xticks(range(len(distortions)))
ax5.set_xticklabels([d[:20] + '...' if len(d) > 20 else d for d in distortions], 
                     rotation=45, ha='right', fontsize=9)
ax5.set_ylabel('Frequency', fontsize=10)
ax5.set_title('Cognitive Distortions Represented', fontsize=14, fontweight='bold')
ax5.grid(True, alpha=0.3, axis='y')

# 6. Support levels
ax6 = axes[2, 1]
support_data = Counter(meta_df['support_level']).most_common()
supports, sup_counts = zip(*support_data)
ax6.barh(range(len(supports)), sup_counts, color='purple', alpha=0.7)
ax6.set_yticks(range(len(supports)))
ax6.set_yticklabels([s[:40] + '...' if len(s) > 40 else s for s in supports], fontsize=9)
ax6.set_xlabel('Frequency', fontsize=10)
ax6.set_title('Support System Levels', fontsize=14, fontweight='bold')
ax6.invert_yaxis()
ax6.grid(True, alpha=0.3, axis='x')

plt.tight_layout()
plt.savefig(f"{output_dir}/analysis/meta_analysis_visualization.png", dpi=150, bbox_inches='tight')
plt.show()

print(f"✅ Visualization saved to: {output_dir}/analysis/meta_analysis_visualization.png")

## 1️⃣4️⃣ Visualize Cognitive Actions (If Available)

In [None]:
if therapist_actions and client_actions:
    fig, axes = plt.subplots(1, 2, figsize=(16, 8))
    
    # Therapist actions
    ax1 = axes[0]
    top_therapist = dict(therapist_actions.most_common(15))
    actions_t = list(top_therapist.keys())
    counts_t = list(top_therapist.values())
    ax1.barh(range(len(actions_t)), counts_t, color='steelblue', alpha=0.8)
    ax1.set_yticks(range(len(actions_t)))
    ax1.set_yticklabels(actions_t, fontsize=9)
    ax1.set_xlabel('Frequency Across All Sessions', fontsize=11)
    ax1.set_title('Top 15 Therapist Cognitive Actions', fontsize=14, fontweight='bold')
    ax1.invert_yaxis()
    ax1.grid(True, alpha=0.3, axis='x')
    
    # Client actions
    ax2 = axes[1]
    top_client = dict(client_actions.most_common(15))
    actions_c = list(top_client.keys())
    counts_c = list(top_client.values())
    ax2.barh(range(len(actions_c)), counts_c, color='coral', alpha=0.8)
    ax2.set_yticks(range(len(actions_c)))
    ax2.set_yticklabels(actions_c, fontsize=9)
    ax2.set_xlabel('Frequency Across All Sessions', fontsize=11)
    ax2.set_title('Top 15 Client Cognitive Actions', fontsize=14, fontweight='bold')
    ax2.invert_yaxis()
    ax2.grid(True, alpha=0.3, axis='x')
    
    plt.tight_layout()
    plt.savefig(f"{output_dir}/analysis/cognitive_actions_comparison.png", dpi=150, bbox_inches='tight')
    plt.show()
    
    print(f"✅ Cognitive actions visualization saved to: {output_dir}/analysis/cognitive_actions_comparison.png")
else:
    print("⚠️  No cognitive action data available for visualization")

## 1️⃣5️⃣ Export Dataset for Further Analysis

In [None]:
# Create CSV export for easy analysis
export_data = []

for session in all_sessions:
    meta = session['metadata']
    conv = session['conversation']
    
    # Count turns by role
    therapist_turns = sum(1 for i, turn in enumerate(conv) if i % 2 == 1 and i > 0)
    client_turns = sum(1 for i, turn in enumerate(conv) if i % 2 == 0 or i == 0)
    
    # Extract text lengths
    total_words = sum(len(turn.get('content', '').split()) for turn in conv)
    
    export_data.append({
        'session_id': meta['session_id'],
        'primary_issue': meta['primary_issue'],
        'num_primary_symptoms': len(meta['primary_symptoms']),
        'num_secondary_symptoms': len(meta['secondary_symptoms']),
        'trigger_context': meta['trigger_context'],
        'symptom_duration': meta['symptom_duration'],
        'emotional_presentation': meta['emotional_presentation'],
        'cognitive_distortion': meta['cognitive_distortion'],
        'support_level': meta['support_level'],
        'coping_attempts': meta['coping_attempts'],
        'therapy_goal': meta['therapy_goal'],
        'total_turns': len(conv),
        'therapist_turns': therapist_turns,
        'client_turns': client_turns,
        'total_words': total_words
    })

export_df = pd.DataFrame(export_data)
csv_file = f"{output_dir}/analysis/sessions_dataset.csv"
export_df.to_csv(csv_file, index=False)

print(f"✅ Dataset exported to CSV: {csv_file}")
print(f"\nDataset shape: {export_df.shape}")
print(f"\nSample rows:")
print(export_df.head())

## 1️⃣6️⃣ Summary and Next Steps

In [None]:
print("\n" + "="*80)
print("🎉 THERAPEUTIC CONVERSATION SIMULATION COMPLETE!")
print("="*80)

print(f"\n📊 Generated Data:")
print(f"   • {len(all_sessions)} complete therapy sessions")
print(f"   • {sum(len(s['conversation']) for s in all_sessions)} total conversation turns")
print(f"   • Depression sessions: {sum(1 for s in all_sessions if s['metadata']['primary_issue'] == 'depression')}")
print(f"   • Anxiety sessions: {sum(1 for s in all_sessions if s['metadata']['primary_issue'] == 'anxiety')}")
print(f"   • Combined sessions: {sum(1 for s in all_sessions if s['metadata']['primary_issue'] == 'both')}")

print(f"\n📁 Output Files:")
print(f"   • Individual sessions: {output_dir}/conversations/")
print(f"   • Complete dataset: {output_dir}/all_sessions_{NUM_SESSIONS}.json")
print(f"   • Meta-analysis stats: {output_dir}/analysis/meta_analysis_statistics.json")
print(f"   • Cognitive actions: {output_dir}/analysis/cognitive_action_analysis.json")
print(f"   • CSV export: {output_dir}/analysis/sessions_dataset.csv")
print(f"   • Visualizations: {output_dir}/analysis/*.png")

print(f"\n🎲 Variation Achieved:")
print(f"   • {len(meta_df['trigger_context'].unique())} unique trigger contexts")
print(f"   • {len(meta_df['symptom_duration'].unique())} different symptom durations")
print(f"   • {len(meta_df['cognitive_distortion'].unique())} cognitive distortions")
print(f"   • {len(meta_df['emotional_presentation'].unique())} emotional presentations")

print(f"\n🔍 Next Steps:")
print(f"   1. Download all files from Google Drive: {output_dir}")
print(f"   2. Use CSV for statistical analysis in R, Python, or Excel")
print(f"   3. Analyze conversation patterns and therapeutic techniques")
print(f"   4. Train models on the conversation data")
print(f"   5. Compare cognitive actions across depression vs anxiety")
print(f"   6. Identify effective therapeutic interventions")

print(f"\n💡 Research Applications:")
print(f"   • Training conversational AI for mental health support")
print(f"   • Studying therapeutic dialogue patterns")
print(f"   • Identifying cognitive action signatures of depression/anxiety")
print(f"   • Testing intervention effectiveness")
print(f"   • Developing assessment tools")

print("\n" + "="*80)
print("All data saved to Google Drive!")
print("="*80)