In [1]:
import os
import json
import pandas as pd
from benchmark.evaluation.fact_check import extract_facts, check_facts, section_to_fact_definition
from dotenv import load_dotenv

# load the environment variables
load_dotenv()

# Configuration
EVAL_MODEL = "gpt-4o-mini"  # Model for evaluation

# Get sections from the fact check definitions
TARGET_SECTIONS = list(section_to_fact_definition.keys())

# Mock ICF section contents
MOCK_ICF_SECTIONS = {
    "Purpose of Research": """
    The purpose of this research study is to evaluate the safety and effectiveness of Drug XYZ in patients with condition ABC. 
    This study aims to determine whether Drug XYZ can help reduce symptoms of condition ABC compared to the current standard treatment.
    Researchers hope to learn if this new approach provides better outcomes with fewer side effects.
    We plan to enroll approximately 200 participants across 20 clinical sites in the United States.
    """,
    
    "Duration of Study Involvement": """
    Your participation in this study will last approximately 12 weeks. This includes:
    - A screening period of 2 weeks
    - A treatment period of 8 weeks with regular visits
    - A follow-up period of 2 weeks after treatment ends
    There is no washout period required for this study.
    The total time commitment includes 6 study visits, each lasting between 1-3 hours.
    """,
    
    "Procedures": """
    During this study, you will undergo the following procedures:
    
    Screening Visit:
    - Complete medical history and physical examination
    - Blood tests to check your overall health (approximately 20 mL or 4 teaspoons)
    - ECG to monitor your heart
    
    Treatment Visits (Weeks 1, 4, and 8):
    - Receive the study medication
    - Blood samples will be collected to measure drug levels (approximately 10 mL or 2 teaspoons per visit)
    - Complete questionnaires about your symptoms
    - Physical examination
    
    In Week 8, you will also have an MRI scan to evaluate your response to treatment.
    
    Follow-up Visit (Week 12):
    - Final physical examination
    - Blood tests (approximately 20 mL or 4 teaspoons)
    - Discussion about your experience in the study
    
    Participants who can become pregnant must use effective contraception during the study and for 30 days after the last dose.
    """,
    
    "Possible Risks, Discomforts, and Inconveniences": """
    Participation in this study involves the following risks and discomforts:
    
    Study Drug Risks:
    - Common side effects include headache (30% of patients), nausea (25%), and fatigue (20%)
    - Less common but more serious side effects include allergic reactions (5%) or liver problems (2%)
    - There may be unknown risks as this drug is still being studied
    
    Procedure Risks:
    - Blood draws may cause pain, bruising, or fainting
    - MRI scans may cause discomfort from lying still or claustrophobia from the enclosed space
    
    Other inconveniences include:
    - Multiple visits to the study site
    - Time commitment for study procedures
    - Possible discomfort from fasting before certain tests
    
    If you are or may become pregnant, there may be unknown risks to the fetus.
    
    Discontinuing your current medications may lead to a worsening of your condition ABC symptoms.
    """
}

# Some additional facts for testing fact checking (some true, some false)
TEST_FACTS = {
    "Duration of Study Involvement": [
        "Study duration is 12 weeks",
        "Treatment period is 8 weeks",
        "Follow-up period is 2 weeks",
        "There is a 4-week washout period", # False
        "Study involves 10 visits" # False
    ],
    "Procedures": [
        "Blood tests are performed during screening",
        "MRI scan in Week 8",
        "Contraception is required for participants who can become pregnant",
        "Genetic testing will be performed", # False
        "Participants will receive a CT scan" # False
    ],
    "Possible Risks, Discomforts, and Inconveniences": [
        "Headache is a common side effect",
        "MRI scans may cause claustrophobia",
        "Discontinuing medications may worsen condition ABC symptoms",
        "The study drug may cause severe bleeding", # False
        "Risk of permanent hearing loss" # False
    ]
}

def print_extracted_facts(section_title, facts):
    """Print extracted facts in a readable format."""
    print(f"\n--- EXTRACTED FACTS FOR {section_title} ---\n")
    for i, fact in enumerate(facts, 1):
        print(f"{i}. {fact}")

def print_fact_check_results(results_df):
    """Print fact check results in a readable format."""
    print("\n--- FACT CHECK RESULTS ---\n")
    
    # Group by section title
    for section_title, group in results_df.groupby("section_title"):
        print(f"\n{section_title}:")
        for _, row in group.iterrows():
            result = "PRESENT" if row["result"] else "NOT PRESENT"
            print(f"  - {row['fact']}: {result}")
    
    # Print summary statistics
    print("\n--- SUMMARY ---")
    total_facts = len(results_df)
    present_facts = results_df["result"].sum()
    print(f"Total facts checked: {total_facts}")
    print(f"Facts present: {present_facts} ({present_facts/total_facts*100:.2f}%)")
    print(f"Facts not present: {total_facts-present_facts} ({(total_facts-present_facts)/total_facts*100:.2f}%)")

def main():
    print("Starting fact extraction and checking with mock ICF sections...")
    
    all_extraction_results = {}
    all_fact_check_results = []
    
    # Process each section
    for section_title in TARGET_SECTIONS:
        if section_title in MOCK_ICF_SECTIONS:
            section_content = MOCK_ICF_SECTIONS[section_title]
            print(f"\nProcessing section: {section_title}")
            
            # 1. Extract facts from the section
            print(f"Extracting facts from {section_title}...")
            extracted_facts = extract_facts(section_content, section_title, EVAL_MODEL)
            print_extracted_facts(section_title, extracted_facts)
            
            # Save extraction results
            all_extraction_results[section_title] = extracted_facts
            
            # 2. Check if test facts are present in the content
            if section_title in TEST_FACTS:
                print(f"\nChecking test facts against {section_title}...")
                fact_check_results = check_facts(
                    section_content, 
                    TEST_FACTS[section_title], 
                    section_title, 
                    EVAL_MODEL
                )
                all_fact_check_results.append(fact_check_results)
        else:
            print(f"Section '{section_title}' not found in mock content")
    
    # Combine all fact check results
    if all_fact_check_results:
        combined_results = pd.concat(all_fact_check_results, ignore_index=True)
        print_fact_check_results(combined_results)
    
    # Save results to file
    output_dir = "./factcheck_results"
    os.makedirs(output_dir, exist_ok=True)
    
    with open(f"{output_dir}/mock_factcheck_results.json", "w") as f:
        json.dump({
            "evaluation_model": EVAL_MODEL,
            "sections": TARGET_SECTIONS,
            "extracted_facts": all_extraction_results,
            "mock_sections": MOCK_ICF_SECTIONS
        }, f, indent=2)
    
    # Save fact check results if available
    if all_fact_check_results:
        combined_results.to_csv(f"{output_dir}/fact_check_summary.csv", index=False)
    
    print(f"\nResults saved to {output_dir}/")

In [2]:
main()

Starting fact extraction and checking with mock ICF sections...

Processing section: Duration of Study Involvement
Extracting facts from Duration of Study Involvement...

--- EXTRACTED FACTS FOR Duration of Study Involvement ---

1. Total study duration is approximately 12 weeks.
2. Active participation duration includes a screening period of 2 weeks and a treatment period of 8 weeks.
3. Follow-up duration is 2 weeks after treatment ends.
4. There is no washout period required for this study.

Checking test facts against Duration of Study Involvement...

Processing section: Procedures
Extracting facts from Procedures...

--- EXTRACTED FACTS FOR Procedures ---

1. Screening Visit: Complete medical history, physical examination, blood tests, and ECG.
2. Treatment Visits: Receive study medication, blood samples, questionnaires, and physical examination.
3. MRI Scan: Conducted in Week 8 to evaluate response to treatment.
4. Follow-up Visit: Final physical examination and blood tests.
5. Co