In [6]:
# Initialize DSPy
import sys
import dspy
import json
import os
from datetime import datetime
sys.path.append('..')  # Add parent directory to path
from dspy_utils import load_dspy_config
load_dspy_config("../dspy_config.json")

<dspy.clients.lm.LM at 0x7ee8a64b98b0>

In [7]:
# Load analysis plan template
with open('../templates/analysis_plan_template.md', 'r') as f:
    template_content = f.read()
# Create output directory for file operations
output_dir = "."
os.makedirs(output_dir, exist_ok=True)

In [9]:
# Lightweight signature and example generator with archive functionality
# DSPy signature for generating synthetic examples
class GenerateExampleData(dspy.Signature):
    """Generate synthetic drug study examples with LLM-filled missing fields"""
    # Optional input fields (any combination can be provided)
    drug_name: str = dspy.InputField(desc="Name of the drug (optional - will be generated if not provided)", default="")
    indication: str = dspy.InputField(desc="Disease indication (optional - will be generated if not provided)", default="")
    mechanism: str = dspy.InputField(desc="Mechanism of action (optional - will be generated if not provided)", default="")
    competitor: str = dspy.InputField(desc="Competitor drug name (optional - will be generated if not provided)", default="")
    trial_type: str = dspy.InputField(desc="Type of clinical trial (optional - will be generated if not provided)", default="")
    # Output: complete example dictionary
    example_data: str = dspy.OutputField(desc="Complete example data as JSON string with all fields filled: drug_name, indication, competitor, trial_type")

generate_example = dspy.ChainOfThought(GenerateExampleData)

# Archive management functions
def load_archive(archive_file="examples_archive.json"):
    """Load existing examples from archive file"""
    try:
        with open(archive_file, 'r') as f:
            return json.load(f)
    except FileNotFoundError:
        return {"examples": [], "metadata": {"created": datetime.now().isoformat(), "last_updated": None}}

def save_archive(archive_data, archive_file="examples_archive.json"):
    """Save examples to archive file"""
    archive_data["metadata"]["last_updated"] = datetime.now().isoformat()
    with open(archive_file, 'w') as f:
        json.dump(archive_data, f, indent=2)
    print(f"Archive saved to {archive_file}")

def get_archived_examples(archive_file="examples_archive.json", filter_by=None):
    """Retrieve examples from archive with optional filtering"""
    archive = load_archive(archive_file)
    examples = archive["examples"]
    
    if filter_by:
        filtered = []
        for ex in examples:
            match = all(ex.get(key) == value for key, value in filter_by.items())
            if match:
                filtered.append(ex)
        return filtered
    
    return examples

# Main example generator with archive integration
def generate_examples(partial_examples=None, num_examples=3, save_to_archive=True, archive_file="examples_archive.json"):
    """
    Generate synthetic drug study examples with LLM filling missing fields
    
    Args:
        partial_examples: List of dictionaries with any combination of fields
        num_examples: Number of examples to generate if partial_examples is None
        save_to_archive: Whether to save generated examples to archive
        archive_file: Name of the archive file
    
    Returns:
        List of complete example dictionaries with metadata
    """
    # Load existing archive if saving is enabled
    if save_to_archive:
        archive = load_archive(archive_file)
    
    # If no partial examples provided, generate from scratch
    if partial_examples is None:
        partial_examples = [{} for _ in range(num_examples)]
    
    complete_examples = []
    
    for partial in partial_examples:
        # Fill in missing fields with LLM
        result = generate_example(
            drug_name=partial.get('drug_name', ''),
            indication=partial.get('indication', ''),
            mechanism=partial.get('mechanism', ''),
            competitor=partial.get('competitor', ''),
            trial_type=partial.get('trial_type', '')
        )
        
        # Parse the JSON response
        try:
            example_dict = json.loads(result.example_data)
        except:
            # Fallback if JSON parsing fails
            example_dict = {
                'drug_name': partial.get('drug_name', 'GeneratedDrug'),
                'indication': partial.get('indication', 'Generated Indication'),
                'mechanism': partial.get('mechanism', 'Generated Mechanism'),
                'competitor': partial.get('competitor', 'Generated Competitor'),
                'trial_type': partial.get('trial_type', 'Phase 2')
            }
        
        # Add metadata
        example_with_meta = {
            **example_dict,
            "_metadata": {
                "generated_at": datetime.now().isoformat(),
                "partial_input": partial,
                "generation_method": "dspy_llm"
            }
        }
        
        complete_examples.append(example_with_meta)
        
        # Add to archive if saving is enabled
        if save_to_archive:
            archive["examples"].append(example_with_meta)
    
    # Save updated archive
    if save_to_archive:
        save_archive(archive, archive_file)
    
    return complete_examples

# Test the archive system and generate examples
print("=== ARCHIVE SYSTEM TEST ===")
existing_examples = get_archived_examples()
print(f"Found {len(existing_examples)} existing examples in archive")
diabetes_examples = get_archived_examples(filter_by={"indication": "Type 2 Diabetes"})
print(f"\nType 2 Diabetes examples in archive: {len(diabetes_examples)}")
print("=== ARCHIVE SYSTEM READY ===\n")

# Generate test examples
partial_examples = [
    {"trial_type": "Phase 2 dose-finding", "indication": "Atopic Dermatitis"},
]

examples = generate_examples(partial_examples)
print(f"\nGenerated {len(examples)} new examples:")
for i, ex in enumerate(examples):
    print(f"{i+1}: {ex['drug_name']} - {ex['indication']} - {ex['trial_type']}")

=== ARCHIVE SYSTEM TEST ===
Found 0 existing examples in archive

Type 2 Diabetes examples in archive: 0
=== ARCHIVE SYSTEM READY ===

Archive saved to examples_archive.json

Generated 1 new examples:
1: Dermakinib - Atopic Dermatitis - Phase 2 dose-finding


In [10]:
# Section 1: Generate Analysis Plans
# DSPy signature for analysis plan generation
class GenerateAnalysisPlan(dspy.Signature):
    """Generate a population PK/PD analysis plan based on study parameters"""
    drug_name: str = dspy.InputField(desc="Name of the drug")
    indication: str = dspy.InputField(desc="Disease indication") 
    mechanism: str = dspy.InputField(desc="Mechanism of action")
    competitor: str = dspy.InputField(desc="Competitor drug name")
    trial_type: str = dspy.InputField(desc="Type of clinical trial")
    template: str = dspy.InputField(desc="Analysis plan template")
    # Output: generated analysis plan
    analysis_plan: str = dspy.OutputField(desc="Generated analysis plan")

generate_plan = dspy.ChainOfThought(GenerateAnalysisPlan)

print("=== GENERATING ANALYSIS PLANS ===")
for i, example in enumerate(examples):
    # Create drug-specific subdirectory
    drug_dir = f"{output_dir}/{example['drug_name']}"
    os.makedirs(drug_dir, exist_ok=True)
    
    # Generate analysis plan using the complete example data
    plan_result = generate_plan(
        drug_name=example["drug_name"],
        indication=example["indication"],
        mechanism=example["mechanism"],
        competitor=example["competitor"],
        trial_type=example["trial_type"],
        template=template_content
    )
    
    # Save analysis plan in drug subdirectory
    plan_filename = f"{drug_dir}/analysis_plan.md"
    with open(plan_filename, 'w') as f:
        f.write(plan_result.analysis_plan)
    
    print(f"✓ Analysis plan saved: {plan_filename}")
    
print(f"\n=== ANALYSIS PLANS COMPLETE ===\n")

=== GENERATING ANALYSIS PLANS ===
✓ Analysis plan saved: ./Dermakinib/analysis_plan.md

=== ANALYSIS PLANS COMPLETE ===



In [11]:
# Section 2: Generate Trial Simulations
# DSPy signature for R script generation
class GenerateTrialSimulation(dspy.Signature):
    """Generate R script for clinical trial simulation using mrgsolve"""
    analysis_plan: str = dspy.InputField(desc="Analysis plan text")
    # Output: complete R script
    r_script: str = dspy.OutputField(desc="Complete and error-free R script using mrgsolve for trial simulation. Output of script should be standard SAS SDTM datsets for PK PD analysis saved in a /data subfolder.")

generate_simulation = dspy.ChainOfThought(GenerateTrialSimulation)

print("=== GENERATING TRIAL SIMULATIONS ===")
for i, example in enumerate(examples):
    drug_dir = f"{output_dir}/{example['drug_name']}"
    
    # Read the previously generated analysis plan
    plan_filename = f"{drug_dir}/analysis_plan.md"
    try:
        with open(plan_filename, 'r') as f:
            analysis_plan_content = f.read()
    except FileNotFoundError:
        print(f"⚠️  Analysis plan not found for {example['drug_name']}, skipping simulation")
        continue
    
    # Generate R script using the analysis plan
    script_result = generate_simulation(
        analysis_plan=analysis_plan_content
    )
    
    # Clean and save R script in drug subdirectory (remove markdown code blocks)
    script_filename = f"{drug_dir}/simulation.R"
    clean_script = script_result.r_script.replace('```r', '').replace('```R', '').replace('```', '').strip()
    with open(script_filename, 'w') as f:
        f.write(clean_script)
    
    print(f"✓ Simulation script saved: {script_filename}")

print(f"\n=== SIMULATIONS COMPLETE ===")
print(f"All files organized in subdirectories under: {output_dir}/")

=== GENERATING TRIAL SIMULATIONS ===
✓ Simulation script saved: ./Dermakinib/simulation.R

=== SIMULATIONS COMPLETE ===
All files organized in subdirectories under: ./
