In [None]:
from datasets import load_dataset

ds = load_dataset("adesouza1/soap_notes")

In [3]:
ds

DatasetDict({
    train: Dataset({
        features: ['age', 'patient_name', 'doctor_data', 'gender', 'dob', 'phone', 'person_data', 'health_problem', 'patient_convo', 'soap_notes', 'doctor_name', 'address', 'full_patient_data'],
        num_rows: 558
    })
    test: Dataset({
        features: ['age', 'patient_name', 'doctor_data', 'gender', 'dob', 'phone', 'person_data', 'health_problem', 'patient_convo', 'soap_notes', 'doctor_name', 'address', 'full_patient_data'],
        num_rows: 62
    })
})

In [15]:
ds['train'][0].keys()

dict_keys(['age', 'patient_name', 'doctor_data', 'gender', 'dob', 'phone', 'person_data', 'health_problem', 'patient_convo', 'soap_notes', 'doctor_name', 'address', 'full_patient_data'])

In [48]:
ds['train'][0]['health_problem']

'Joint Pain'

In [18]:
ds['train'][0]['full_patient_data']

'\n\n\nDEMOGRAPHICS:\nName: Zack Fields\nAge: 28\nAddress: 6104 Parker Rd, Minneapolis, Georgia 60074\nPhone: (839) 255-2098\n\nPROBLEM LIST:\n1. Chronic: Joint Pain (Right Knee and Left Ankle)\n2. Other: Seasonal Allergies\n\nHEALTH MAINTENANCE:\nVaccines and Screenings:\n- Tdap: 08/14/2022 (overdue)\n- Influenza: 10/02/2022 (due soon)\n- Colonoscopy: 09/15/2023 (not due yet)\n\nREMINDERS AND RESULTS:\n- Reminder: Tdap booster due\n- Reminder: Schedule annual physical\n- Result: Lipid Panel (09/15/2022): Normal\n\nCARE TEAM AND COMMUNICATION:\nCare Team:\n- Dr. Kristen Kelly, Primary Care Physician\n- Dr. Rebecca Nguyen, Orthopedic Surgeon\n- Sarah Johnson, Nurse Practitioner\n- John Smith, Physical Therapist\n\nCommunication:\n- Preferred Method: Email\n- Emergency Contact: Jane Fields, Mother, (555) 123-4567\n\nALLERGIES:\n- Penicillin\n\nMEDICATIONS:\n- Ibuprofen 600mg, PRN, every 4-6 hours as needed for pain\n- Loratadine 10mg, daily for allergies\n\nIMMUNIZATIONS:\n- Tdap: 08/14/

In [16]:
ds['train'][0]['patient_convo']

"\nPhysician: Good morning, Zack. How are you feeling today?\n\nPatient: Hi, Doc. I've been feeling pretty lousy lately. I've got this constant pain in my right knee and left ankle that just won't go away. It's like a dull ache that's always there, no matter what I'm doing.\n\nPhysician: I see. Can you tell me more about the pain? Is it sharp, dull, throbbing? Does it feel like it's affecting your ability to move around?\n\nPatient: It's definitely dull and achy. It's not excruciating, but it's definitely noticeable. It's like there's pressure on my joints all the time. Yeah, it's definitely affecting my mobility. I've been avoiding stairs and stuff because it just hurts too much.\n\nPhysician: Okay, that helps. Have you noticed any swelling or redness in the affected areas?\n\nPatient: Not really. I mean, sometimes my knee will get a little puffy if I've been walking or standing for a while, but it's not like it's massively swollen or anything.\n\nPhysician: Got it. And how long have 

In [17]:
ds['train'][0]['soap_notes']

"Subjective:\nZack Fields reports experiencing constant pain in his right knee and left ankle for the past six months. He describes the pain as a dull ache that is always present and affects his mobility, particularly when walking or standing for extended periods. He notes that the pain improves when he is sitting or lying down. Zack has been taking ibuprofen for pain relief, but it provides only minimal relief and occasionally causes stomach discomfort.\n\nObjective:\nDuring the examination, Zack appeared uncomfortable when moving his right knee and left ankle. There were no signs of swelling or redness in the affected joints. Zack's range of motion in both joints was limited due to pain. He is currently taking ibuprofen 600mg as needed for pain relief. Zack's mother expressed concerns about his mental health due to the impact of the pain on his daily activities.\n\nAssessment:\nBased on Zack's symptoms and history, the likely diagnosis is osteoarthritis in the right knee and left ank

In [1]:
# src/data_loader.py

from datasets import load_dataset
from typing import Dict, List

class SOAPDataLoader:
    def __init__(self, dataset_name: str = "adesouza1/soap_notes"):
        self.dataset = load_dataset(dataset_name)
    
    def get_examples(self, split: str = "train", n: int = None):
        """Get n examples from dataset"""
        data = self.dataset[split]
        if n:
            data = data.select(range(min(n, len(data))))
        
        return [
            {
                'patient_convo': ex['patient_convo'],
                'soap_notes': ex['soap_notes'],
                'full_patient_data': ex['full_patient_data'],
                'health_problem': ex['health_problem']
            }
            for ex in data
        ]

In [2]:
# src/evals/structure_eval.py

import re

class StructureEvaluator:
    """Check if SOAP note has all required sections"""
    
    REQUIRED_SECTIONS = ['Subjective', 'Objective', 'Assessment', 'Plan']
    
    def evaluate(self, generated_note: str) -> dict:
        results = {
            'has_all_sections': True,
            'missing_sections': [],
            'section_lengths': {}
        }
        
        for section in self.REQUIRED_SECTIONS:
            # Check if section exists
            pattern = rf'{section}:?\s*\n'
            if not re.search(pattern, generated_note, re.IGNORECASE):
                results['has_all_sections'] = False
                results['missing_sections'].append(section)
            else:
                # Measure section length
                section_text = self._extract_section(generated_note, section)
                results['section_lengths'][section] = len(section_text.split())
        
        return results
    
    def _extract_section(self, note: str, section: str) -> str:
        """Extract text from a specific section"""
        # Simple regex to get section content
        pattern = rf'{section}:?\s*\n(.*?)(?=\n[A-Z][a-z]+:|\Z)'
        match = re.search(pattern, note, re.IGNORECASE | re.DOTALL)
        return match.group(1).strip() if match else ""

In [9]:
# Load data
loader = SOAPDataLoader()
examples = loader.get_examples(n=10)  # Start with 10

# Run eval
# evaluator = StructureEvaluator()

# for i, example in enumerate(examples):
#     result = evaluator.evaluate(example['soap_notes'])
#     print(f"\n--- Example {i} ---")
#     print(f"All sections present: {result['has_all_sections']}")
#     if result['missing_sections']:
#         print(f"Missing: {result['missing_sections']}")
#     print(f"Section lengths: {result['section_lengths']}")

In [11]:
example = examples[1]
# logging.getLogger("PyRuSH").setLevel(logging.WARNING)
# transcript = nlp(example['patient_convo']) 

In [18]:
import spacy
nlp = spacy.load("en_core_web_sm")

In [20]:
doc = nlp(example['soap_notes'])

In [21]:
print(f"Entities found: {len(doc.ents)}")
for ent in doc.ents[:10]:  # First 10
    print(f"{ent.text} | {ent.label_}")

Entities found: 6
Diaz | PERSON
Diaz | PERSON
Spouse | ORG
Diaz | PERSON
GERD | ORG
Order | ORG


In [5]:
import spacy

# Load medical model
nlp = spacy.load("en_core_sci_sm")

doc = nlp("Patient has GERD and pain in right knee")
print(f"Entities found: {len(doc.ents)}")
for ent in doc.ents:
    print(f"{ent.text} | {ent.label_}")

Entities found: 4
Patient | ENTITY
GERD | ENTITY
pain | ENTITY
right knee | ENTITY


  deserializers["tokenizer"] = lambda p: self.tokenizer.from_disk(  # type: ignore[union-attr]


In [6]:
from transformers import pipeline
import torch
# This just works
ner = pipeline("ner", model="d4data/biomedical-ner-all", aggregation_strategy="simple")

text = "Patient has GERD and pain in right knee for six months. Taking ibuprofen 600mg."
entities = ner(text)

print("Medical entities found:")
for ent in entities:
    print(f"{ent['word']:30} | {ent['entity_group']}")

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/266M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/373 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

Device set to use cpu


Medical entities found:
ge                             | Sign_symptom
##rd                           | Sign_symptom
pain                           | Sign_symptom
right knee                     | Biological_structure
six                            | Duration
ib                             | Medication
##uprofen                      | Medication
600                            | Dosage


In [22]:
from transformers import pipeline

class MedicalEntityExtractor:
    def __init__(self):
        self.ner = pipeline(
            "ner", 
            model="d4data/biomedical-ner-all",
            aggregation_strategy="simple"  # This should merge wordpieces
        )
    
    def extract_entities(self, text):
        """Extract medical entities from text"""
        results = self.ner(text)
        
        # Merge consecutive tokens of SAME type
        merged = []
        i = 0
        while i < len(results):
            current = results[i]['word'].replace('##', '').strip()
            entity_type = results[i]['entity_group']
            
            # Look ahead - merge ONLY if same type AND adjacent in text
            j = i + 1
            while j < len(results) and results[j]['entity_group'] == entity_type:
                next_word = results[j]['word'].replace('##', '').strip()
                # Add space between separate words
                if not results[j]['word'].startswith('##'):
                    current = current + ' ' + next_word
                else:
                    current = current + next_word
                j += 1
            
            if len(current) > 1:
                merged.append({'text': current.lower(), 'type': entity_type})
            
            i = j if j > i + 1 else i + 1
        
        return merged
    
    def get_entity_texts(self, text):
        """Get clean entity texts for comparison"""
        entities = self.extract_entities(text)
        
        # Filter out junk
        clean_entities = set()
        for ent in entities:
            text = ent['text'].strip()
            # Skip if too short, has weird chars, or looks like noise
            if len(text) > 2 and not any(x in text for x in ['copy', 'pati', 'sy']):
                clean_entities.add(text)
        
        return clean_entities

# Test it
extractor = MedicalEntityExtractor()
text = "Patient has GERD and pain in right knee for six months. Taking ibuprofen 600mg."

entities = extractor.extract_entities(text)
print("Cleaned entities:")
for ent in entities:
    print(f"{ent['text']:20} | {ent['type']}")

# For comparison
transcript_entities = extractor.get_entity_texts(example['patient_convo'])
note_entities = extractor.get_entity_texts(example['soap_notes'])
missing = transcript_entities - note_entities

print(f"\nMissing from note: {missing}")

Device set to use cpu


Cleaned entities:
gerd pain            | Sign_symptom
right knee           | Biological_structure
six                  | Duration
ibuprofen            | Medication
600                  | Dosage

Missing from note: {'fullness', 'to throw', 'stomach', 'burn stress', 'stabbing', 'sometimes', 'discomfort'}


In [23]:
extractor = MedicalEntityExtractor()

# Get entities from both
transcript_ents = extractor.get_entity_texts(example['patient_convo'])
note_ents = extractor.get_entity_texts(example['soap_notes'])

# Compare
missing = transcript_ents - note_ents

print(f"Transcript has {len(transcript_ents)} entities")
print(f"Note has {len(note_ents)} entities")
print(f"Missing {len(missing)} entities")
print(f"\nSome missing: {list(missing)[:10]}")

# Calculate recall
recall = len(transcript_ents & note_ents) / len(transcript_ents) if transcript_ents else 0
print(f"\nEntity recall: {recall:.1%}")

Device set to use cpu


Transcript has 13 entities
Note has 12 entities
Missing 7 entities

Some missing: ['fullness', 'to throw', 'stomach', 'burn stress', 'stabbing', 'sometimes', 'discomfort']

Entity recall: 46.2%


In [39]:
examples[7]['soap_notes']

"Subjective:\nNorma Woods reports feeling unwell with a constant cough and tightness in her chest. She describes her symptoms as worsening at night and experiencing shortness of breath and wheezing, especially with exertion. She also notes nasal congestion and pressure in her head. Norma has been using an albuterol inhaler and fluticasone propionate nasal spray with limited relief.\n\nObjective:\nNorma appears fatigued during the appointment but is alert and oriented. She demonstrates mild wheezing upon auscultation of her lungs. Vital signs are within normal limits. Norma's chest X-ray and CT scan are ordered, along with blood tests to investigate underlying conditions. Albuterol dosage is increased, and a congestion medication is added to her treatment plan.\n\nAssessment:\nNorma is presenting with persistent respiratory symptoms impacting her quality of life. Further diagnostic tests are needed to determine the cause of her symptoms. Adjustments to her medication regimen have been m

In [40]:
examples[7]['patient_convo']

"\nPhysician: Good morning, Mrs. Woods. Thank you for coming in today. How are you feeling?\n\nPatient: Hi, Dr. Gordon. I've been feeling pretty unwell lately. I've had this constant cough and my chest feels tight all the time. It's like there's a band around my chest that I can't get rid of.\n\nPhysician: I see. Have you noticed any patterns or triggers that make your symptoms worse? For example, does it get worse when you're outside or around certain substances?\n\nPatient: Well, it seems to get worse at night, especially when I'm lying down. And sometimes, it feels like my chest is heavy, like there's a weight on it.\n\nPhysician: I understand. Have you experienced any shortness of breath or wheezing?\n\nPatient: Yes, especially when I exert myself or climb stairs. It's like I can't catch my breath. And sometimes, I feel like I'm choking or gasping for air.\n\nPhysician: I see. Have you noticed any other symptoms? Perhaps a runny nose or congestion?\n\nPatient: Yes, my nose has been

In [44]:
import json

# Load your test set
with open('../data/synthetic_data.json', 'r') as f:
    test_data = json.load(f)

In [49]:
test_data[0]

{'id': 0,
 'patient_convo': "\nPhysician: Good morning, Zack. How are you feeling today?\n\nPatient: Hi, Doc. I've been feeling pretty lousy lately. I've got this constant pain in my right knee and left ankle that just won't go away. It's like a dull ache that's always there, no matter what I'm doing.\n\nPhysician: I see. Can you tell me more about the pain? Is it sharp, dull, throbbing? Does it feel like it's affecting your ability to move around?\n\nPatient: It's definitely dull and achy. It's not excruciating, but it's definitely noticeable. It's like there's pressure on my joints all the time. Yeah, it's definitely affecting my mobility. I've been avoiding stairs and stuff because it just hurts too much.\n\nPhysician: Okay, that helps. Have you noticed any swelling or redness in the affected areas?\n\nPatient: Not really. I mean, sometimes my knee will get a little puffy if I've been walking or standing for a while, but it's not like it's massively swollen or anything.\n\nPhysician

In [51]:
def evaluate_note(transcript, generated_note, ground_truth_note):
    
    # Extract entities from all three
    transcript_ents = extract_entities(transcript)
    generated_ents = extract_entities(generated_note)
    ground_truth_ents = extract_entities(ground_truth_note)
    
    # Eval 1: Transcript-based
    missing_from_transcript = transcript_ents - generated_ents
    hallucinated = generated_ents - transcript_ents
    
    # Eval 2: Ground truth-based
    recall = len(generated_ents & ground_truth_ents) / len(ground_truth_ents) if ground_truth_ents else 0
    precision = len(generated_ents & ground_truth_ents) / len(generated_ents) if generated_ents else 0
    
    return {
        'missing_findings_count': len(missing_from_transcript),
        'hallucination_count': len(hallucinated),
        'clinical_recall': recall,
        'clinical_precision': precision
    }

In [52]:
for case in test_data:
    result = evaluate_note(
        case.get('patient_convo', case.get('transcript')),  # handle both field names
        case['generated_soap'],
        case['gt_soap']
    )
    
    print(f"\n{case['label']}:")
    print(f"  Missing from transcript: {result['missing_findings_count']}")
    print(f"  Hallucinations: {result['hallucination_count']}")
    print(f"  Clinical recall: {result['clinical_recall']:.1%}")

TypeError: extract_entities() missing 1 required positional argument: 'text'

In [54]:
example['soap_notes']

'Subjective:\nMr. Diaz reports feeling lousy lately with constant heartburn that persists despite medication. He also experiences sharp stomach pain, bloating, abdominal discomfort, intermittent diarrhea, nausea, vomiting, weight loss, loss of appetite, and fatigue. He notes that spicy foods and stress may exacerbate his symptoms.\n\nObjective:\nMr. Diaz appears fatigued during the appointment. He denies any recent travel or exposures to illness. Gastroenterologist recommends further evaluation with an endoscopy due to intermittent abdominal pain and bloating. Spouse notes fatigue and loss of appetite as additional symptoms to consider.\n\nAssessment:\nMr. Diaz presents with chronic GERD, hypertension, and gastrointestinal issues. Further evaluation with an endoscopy and blood tests is warranted to investigate his symptoms. A food diary will be initiated to track potential triggers.\n\nPlan:\nOrder endoscopy and blood tests to investigate symptoms. Initiate food diary to track triggers

In [None]:
class StructureEvaluator: 
    """Check SOAP note structure content for completeness and flag outliers"""

    SECTIONS = ['Subjective', 'Objective', 'Assessment', 'Plan'] 

    def evaluate(self, note: str) -> dict: 
         """Evaluate SOAP note structure"""
        results = {
            'has_all_sections': True,
            'sections': {},
            'total_words': len(note.split()),
            'total_chars': len(note),
            'flags': []
        }

In [63]:
pattern = r'Subjective:?\s*\n(.*?)(?=\n[A-Z][a-z]+:|\Z)'
match = re.search(pattern, example['soap_notes'], re.IGNORECASE | re.DOTALL)
match

<re.Match object; span=(0, 331), match='Subjective:\nMr. Diaz reports feeling lousy latel>

In [60]:
import re

class StructureEvaluator:
    """Check SOAP note structure and content completeness"""
    
    SECTIONS = ['Subjective', 'Objective', 'Assessment', 'Plan']
    
    def evaluate(self, note):
        """Evaluate SOAP note structure"""
        results = {
            'has_all_sections': True,
            'sections': {},
            'total_words': len(note.split()),
            'total_chars': len(note),
            'flags': []
        }
        
        for section in self.SECTIONS:
            # Check if section exists
            pattern = rf'{section}:?\s*\n(.*?)(?=\n[A-Z][a-z]+:|\Z)'
            match = re.search(pattern, note, re.IGNORECASE | re.DOTALL)
            
            if not match:
                results['has_all_sections'] = False
                results['sections'][section] = {
                    'present': False,
                    'word_count': 0,
                    'char_count': 0
                }
                results['flags'].append(f"Missing section: {section}")
            else:
                content = match.group(1).strip()
                word_count = len(content.split())
                char_count = len(content)
                
                results['sections'][section] = {
                    'present': True,
                    'word_count': word_count,
                    'char_count': char_count
                }
                
                # Flag if empty or very short
                if word_count == 0:
                    results['flags'].append(f"Empty section: {section}")
                elif word_count < 10:
                    results['flags'].append(f"Very short section: {section} ({word_count} words)")
        
        return results

# Test it
evaluator = StructureEvaluator()

# Test on your synthetic data
with open('../data/synthetic_data.json', 'r') as f:
    test_data = json.load(f)

for case in test_data:  # Test first 3
    result = evaluator.evaluate(case['generated_soap'])
    print(f"\n{case['label']}:")
    print(f"  All sections: {result['has_all_sections']}")
    print(f"  Total words: {result['total_words']}")
    print(f"  Flags: {result['flags']}")
    for section, info in result['sections'].items():
        print(f"  {section}: {info['word_count']} words")


identity:
  All sections: True
  Total words: 274
  Flags: []
  Subjective: 74 words
  Objective: 68 words
  Assessment: 57 words
  Plan: 71 words

paraphrase:
  All sections: True
  Total words: 206
  Flags: []
  Subjective: 61 words
  Objective: 51 words
  Assessment: 45 words
  Plan: 45 words

paraphrase:
  All sections: True
  Total words: 164
  Flags: []
  Subjective: 39 words
  Objective: 35 words
  Assessment: 29 words
  Plan: 57 words

missing information: injury discussion, follow-up appointment:
  All sections: True
  Total words: 164
  Flags: []
  Subjective: 74 words
  Objective: 24 words
  Assessment: 32 words
  Plan: 30 words

hallucination: no heart palpitations in original transcript:
  All sections: True
  Total words: 196
  Flags: []
  Subjective: 96 words
  Objective: 25 words
  Assessment: 36 words
  Plan: 35 words

clinical accuracy issue: benzos not appropriate, caffeine not a good rec, encouraging avoidance for coping is unwarrented:
  All sections: True
  Total

In [66]:
import anthropic

In [85]:
class LLMHallucinationDetector:
    """Use LLM to detect unsupported facts in generated notes"""
    
    def __init__(self, api_key):
        self.client = anthropic.Anthropic(api_key=api_key)
    
    def evaluate(self, transcript, generated_note):
        """Check if note contains facts not in transcript"""
        
        prompt = f"""You are evaluating a medical SOAP note for clinical accuracy based on the provided transcript of the patient conversation.

TRANSCRIPT (what was actually said):
{transcript}

GENERATED NOTE:
{generated_note}

Task: 
1. Identify HALLUCINATIONS - facts in the note NOT supported by the transcript
2. Identify CLINICAL INACCURACIES - medically incorrect statements (wrong dosages, contraindications, inappropriate treatments)
3. Identify MISSING FINDINGS - important facts from the transcript that are absent from the note

Be specific and cite examples.

Format your response as:
HALLUCINATIONS: [list each hallucinated fact, or "None detected"]
CLINICAL INACURACIES: [list each innacuracy, or "None detected"]
MISSING FINDINGS: [list each missing critical finding, or "None detected"]
CONFIDENCE: [High/Medium/Low]
"""

        response = self.client.messages.create(
            model="claude-sonnet-4-5-20250929",
            max_tokens=1024,
            messages=[{"role": "user", "content": prompt}]
        )
        
        result_text = response.content[0].text
        
        # Simple parsing
        has_hallucinations = "None detected" not in result_text
        
        return {
            'has_hallucinations': has_hallucinations,
            'llm_response': result_text,
            'model_used': 'claude-4-5-sonnet'
        }

In [86]:
# Test it
detector = LLMHallucinationDetector(api_key=os.getenv('ANTHROPIC_API_KEY'))

# Test on your hallucination case (id=4)
result = detector.evaluate(
    test_data[5]['transcript'],  
    test_data[5]['generated_soap']
)

print(f"Hallucinations detected: {result['has_hallucinations']}")
print(f"\nLLM says:\n{result['llm_response']}")

Hallucinations detected: True

LLM says:
HALLUCINATIONS:
1. "Patient presents with visible signs of anxiety during visit" - No observation of visible signs of anxiety is documented in the transcript
2. "Recent weight loss noted" in Objective section - While patient reported weight loss in the Subjective section, there is no documentation that weight loss was objectively measured or noted during the visit

CLINICAL INACCURACIES:
1. "Initiate benzodiazepine therapy for long-term anxiety management" - Benzodiazepines are NOT recommended for long-term anxiety management due to dependence risk and are contraindicated for chronic use. The physician only stated "medication" without specifying benzodiazepines.
2. "Recommend increasing caffeine intake to improve energy levels and mood" - This is medically inappropriate and NOT mentioned in the transcript. Caffeine can worsen anxiety and panic attacks and is contraindicated in this patient.
3. "Advise patient to continue avoiding anxiety trigger

In [83]:
test_data[5]['transcript']

"\nPhysician: Good morning, Lance. How are you feeling today?\n\nPatient: Hi, Doc. I've been feeling pretty anxious lately. I've had trouble sleeping, and I've been getting these panic attacks out of the blue. It's like my heart is racing, and I can't catch my breath.\n\nPhysician: I see. Can you tell me more about these panic attacks? How often are they happening, and how long have you been experiencing them?\n\nPatient: They started about a month ago, and they've been happening pretty regularly since then. Maybe once or twice a week. It's like I'll be doing something normal, and then suddenly, I'll feel like I'm going to pass out or something. My heart races, and I get this feeling of dread in my stomach. It's really scary.\n\nPhysician: That sounds very distressing. Have you noticed any triggers that might set off these panic attacks?\n\nPatient: Not really. Like I said, it seems to happen out of the blue. Sometimes I'll be at work, and other times I'll be at home watching TV or som

In [84]:
test_data[5]['label']

'clinical accuracy issue: benzos not appropriate, caffeine not a good rec, encouraging avoidance for coping is unwarrented'

In [77]:
from dotenv import load_dotenv

In [78]:
load_dotenv()

True

In [80]:
import os

In [88]:
from sentence_transformers import SentenceTransformer

# 1. Load a pretrained Sentence Transformer model
model = SentenceTransformer("all-MiniLM-L6-v2")

In [113]:
def calculate_similarity(generated_note, ground_truth_note):
    embeddings = model.encode([generated_note, ground_truth_note])
    similarity = model.similarity(embeddings[0], embeddings[1]).item()
    quality = "null"
    if similarity >= 0.80:
        quality = "EXCELLENT"  # Minor differences only
    elif similarity >= 0.70:
        quality = "GOOD"  # Acceptable, may have minor omissions
    elif similarity >= 0.60:
        quality = "NEEDS REVIEW"  # Significant differences
    else:
        quality = "POOR"
    return round(similarity, 3), quality
    

In [114]:
calculate_similarity(test_data[7]['gt_soap'], test_data[7]['generated_soap'])

(0.649, 'NEEDS REVIEW')

In [106]:
test_data[7]['gt_soap']

"Subjective:\nNorma Woods reports feeling unwell with a constant cough and tightness in her chest. She describes her symptoms as worsening at night and experiencing shortness of breath and wheezing, especially with exertion. She also notes nasal congestion and pressure in her head. Norma has been using an albuterol inhaler and fluticasone propionate nasal spray with limited relief.\n\nObjective:\nNorma appears fatigued during the appointment but is alert and oriented. She demonstrates mild wheezing upon auscultation of her lungs. Vital signs are within normal limits. Norma's chest X-ray and CT scan are ordered, along with blood tests to investigate underlying conditions. Albuterol dosage is increased, and a congestion medication is added to her treatment plan.\n\nAssessment:\nNorma is presenting with persistent respiratory symptoms impacting her quality of life. Further diagnostic tests are needed to determine the cause of her symptoms. Adjustments to her medication regimen have been m

In [107]:
test_data[7]['generated_soap']

'Subjective:\nPatient reports not feeling well recently. Notes some respiratory complaints.\n\nObjective:\nPatient presents for evaluation. Currently using inhaler and nasal spray.\n\nAssessment:\nRespiratory symptoms present. History of previous bronchitis.\n\nPlan:'

In [116]:
len(test_data)

8

In [118]:
[case['id'] for case in test_data]

[0, 1, 2, 3, 4, 5, 6, 7]

In [120]:
!python --version

Python 3.10.19
