In [1]:
import os
import json
from benchmark.evaluation.compliance_check import validate_icf_sections, get_section_name_to_rules
from dotenv import load_dotenv

# load the environment variables
load_dotenv()

# Configuration
EVAL_MODEL = "gpt-4o-mini"  # Model for evaluation

# Get sections from the compliance rules
SECTION_NAME_TO_RULES = get_section_name_to_rules()
TARGET_SECTIONS = list(SECTION_NAME_TO_RULES.keys())

# Mock ICF section contents
MOCK_ICF_SECTIONS = {
    "Purpose of Research": """
    The purpose of this research study is to evaluate the safety and effectiveness of Drug XYZ in patients with condition ABC. 
    This study aims to determine whether Drug XYZ can help reduce symptoms of condition ABC compared to the current standard treatment.
    Researchers hope to learn if this new approach provides better outcomes with fewer side effects.
    """,
    
    "Duration of Study Involvement": """
    Your participation in this study will last approximately 12 weeks. This includes:
    - A screening period of 2 weeks
    - A treatment period of 8 weeks with regular visits
    - A follow-up period of 2 weeks after treatment ends
    The total time commitment includes 6 study visits, each lasting between 1-3 hours.
    """,
    
    "Procedures": """
    During this study, you will undergo the following procedures:
    
    Screening Visit:
    - Complete medical history and physical examination
    - Blood tests to check your overall health
    - ECG to monitor your heart
    
    Treatment Visits (Weeks 1, 4, and 8):
    - Receive the study medication
    - Blood samples will be collected to measure drug levels
    - Complete questionnaires about your symptoms
    - Physical examination
    
    In Week 8, you will also have an MRI scan to evaluate your response to treatment.
    
    Follow-up Visit (Week 12):
    - Final physical examination
    - Blood tests
    - Discussion about your experience in the study
    """,
    
    "Possible Risks, Discomforts, and Inconveniences": """
    Participation in this study involves the following risks and discomforts:
    
    Study Drug Risks:
    - Common side effects include headache, nausea, and fatigue
    - Less common but more serious side effects include allergic reactions or liver problems
    - There may be unknown risks as this drug is still being studied
    
    Procedure Risks:
    - Blood draws may cause pain, bruising, or fainting
    - MRI scans may cause discomfort from lying still or claustrophobia from the enclosed space
    
    Other inconveniences include:
    - Multiple visits to the study site
    - Time commitment for study procedures
    - Possible discomfort from fasting before certain tests
    
    If you are or may become pregnant, there may be unknown risks to the fetus.
    """
}

def calculate_compliance_score(evaluation_results):
    """Calculate compliance scores from evaluation results."""
    total_rules = 0
    followed_rules = 0
    section_scores = {}
    
    for section, rules in evaluation_results.items():
        section_total = len(rules)
        section_followed = sum(1 for rule in rules if rule["prediction"] == "Y")
        
        if section_total > 0:
            section_score = section_followed / section_total * 100
        else:
            section_score = 100  # If no rules, consider 100% compliant
            
        section_scores[section] = {
            "score": section_score,
            "followed": section_followed,
            "total": section_total
        }
        
        total_rules += section_total
        followed_rules += section_followed
    
    # Calculate overall score
    overall_score = (followed_rules / total_rules * 100) if total_rules > 0 else 100
    
    return {
        "overall_score": overall_score,
        "followed_rules": followed_rules,
        "total_rules": total_rules,
        "section_scores": section_scores
    }

def print_compliance_results(compliance_results):
    """Print formatted compliance results."""
    print("\n--- COMPLIANCE RESULTS ---\n")
    
    # Print overall score
    print(f"Overall Compliance Score: {compliance_results['overall_score']:.2f}% " + 
          f"({compliance_results['followed_rules']}/{compliance_results['total_rules']} rules followed)")
    
    # Print individual section scores
    print("\nSection Scores:")
    for section, score_data in compliance_results["section_scores"].items():
        print(f"- {section}: {score_data['score']:.2f}% " + 
              f"({score_data['followed']}/{score_data['total']} rules followed)")

def main():
    print("Starting compliance check with mock ICF sections...")
    
    # Evaluate compliance for each section
    print("\n--- EVALUATING COMPLIANCE ---\n")
    evaluation_results = {}
    
    for section_name in TARGET_SECTIONS:
        if section_name in MOCK_ICF_SECTIONS:
            section_content = MOCK_ICF_SECTIONS[section_name]
            print(f"Evaluating compliance for section: {section_name}")
            
            # Evaluate section compliance
            evaluation = validate_icf_sections(section_name, section_content, EVAL_MODEL)
            evaluation_results[section_name] = evaluation
            
            # Print evaluation details
            print(f"  - Rules evaluated: {len(evaluation)}")
            for rule in evaluation:
                print(f"    - {rule['rule_name']}: {rule['prediction']}")
                if rule["prediction"] == "N" and rule["rationale"]:
                    print(f"      Rationale: {rule['rationale']}")
        else:
            print(f"Section '{section_name}' not found in mock content")
    
    # Calculate and print compliance scores
    compliance_results = calculate_compliance_score(evaluation_results)
    print_compliance_results(compliance_results)
    
    # Save results to file
    output_dir = "./compliance_results"
    os.makedirs(output_dir, exist_ok=True)
    
    with open(f"{output_dir}/mock_compliance_results.json", "w") as f:
        json.dump({
            "evaluation_model": EVAL_MODEL,
            "sections": TARGET_SECTIONS,
            "compliance_score": compliance_results,
            "evaluation_details": evaluation_results,
            "mock_sections": MOCK_ICF_SECTIONS
        }, f, indent=2)
    
    print(f"\nResults saved to {output_dir}/mock_compliance_results.json")

In [2]:
main()

Starting compliance check with mock ICF sections...

--- EVALUATING COMPLIANCE ---

Evaluating compliance for section: Purpose of Research
  - Rules evaluated: 1
    - 1. Description of Clinical Investigation: 1a: Y
Evaluating compliance for section: Duration of Study Involvement
  - Rules evaluated: 1
    - 2. Duration of Study Involvement: 2a: Y
Evaluating compliance for section: Procedures
  - Rules evaluated: 4
    - 1. Description of Clinical Investigation: 1c: Y
    - Procedures: a: Y
    - Procedures: b: N
    - Procedures: c: Y
Evaluating compliance for section: Possible Risks, Discomforts, and Inconveniences
  - Rules evaluated: 5
    - 2. Risks and Discomforts: 2a: Y
    - 2. Risks and Discomforts: 2d: Y
    - 2. Risks and Discomforts: 2e: Y
    - 2. Risks and Discomforts: 2g: Y
    - 2. Risks and Discomforts: 2h: N

--- COMPLIANCE RESULTS ---

Overall Compliance Score: 81.82% (9/11 rules followed)

Section Scores:
- Purpose of Research: 100.00% (1/1 rules followed)
- Duratio