In [1]:
print("hello")

hello


In [2]:
qa_pairs = {
    "q1": "tell me the active set of problems that Myra Jones has",
    "a1": "Myra Jones currently has only one active health problem: Asthma. This condition began when she was 65 years old, with an official start date of January 3, 2007. There is no end date on record, so it is considered ongoing.",

    "q2": "tell me the active medication",
    "a2": "There are currently no active medications on record for Myra Jones. If you would like information about past or discontinued medications, please let me know!",

    "q3": "what are her allergies?",
    "a3": "Myra Jones has the following allergies:\nShellfish (severe—causes anaphylaxis; active since April 12, 1995)\nCodeine (moderate—causes shortness of breath; active since May 1, 2006)\nAspirin (mild to moderate—causes hives; active since May 1, 2008)\nShe also has a past allergy to Penicillin G benzathine (caused hives, moderate to severe), but this allergy is no longer considered active as of August 6, 2012.",

    "q4": "When was the last time lab test was done?",
    "a4": "The last laboratory test for Myra Jones was a Complete Blood Count Without Differential (CBC WO DIFFERENTIAL), which was performed on August 10, 2012.",

    "q5": "What kind of lab test was done?",
    "a5": "The last lab test Myra Jones had was a Complete Blood Count Without Differential (CBC WO DIFFERENTIAL). This test measures the main types of blood cells to help check for conditions like anemia or infection, but it does not provide a breakdown of the different types of white blood cells."
}

In [3]:
ground_truth_nl = {
    "q1": "Myra Jones has one active health problem: Asthma. It began on January 3, 2007, and is still ongoing.",
    
    "q2": "Myra Jones currently has no active medications on record.",
    
    "q3": "Myra Jones has active allergies to Shellfish (since April 12, 1995), Codeine (since May 1, 2006), and Aspirin (since May 1, 2008). She previously had an allergy to Penicillin G benzathine, which is now inactive as of August 6, 2012.",
    
    "q4": "Myra Jones's last lab test was done on August 10, 2012. It was a Complete Blood Count Without Differential.",
    
    "q5": "The last lab test done for Myra Jones was a Complete Blood Count Without Differential (CBC WO DIFFERENTIAL), which checks overall blood health but doesn’t break down white blood cell types."
}


In [4]:
ground_truth_structure = {
    "q1": {
        "patient_name": "Myra Jones",
        "active_problems": [
            {
                "problem": "Asthma",
                "start_date": "2007-01-03",
                "status": "ongoing"
            }
        ]
    },
    
    "q2": {
        "patient_name": "Myra Jones",
        "active_medications": []
    },
    
    "q3": {
        "patient_name": "Myra Jones",
        "active_allergies": [
            {
                "substance": "Shellfish",
                "severity": "severe",
                "reaction": "anaphylaxis",
                "active_since": "1995-04-12",
                "status": "active"
            },
            {
                "substance": "Codeine",
                "severity": "moderate",
                "reaction": "shortness of breath",
                "active_since": "2006-05-01",
                "status": "active"
            },
            {
                "substance": "Aspirin",
                "severity": "mild to moderate",
                "reaction": "hives",
                "active_since": "2008-05-01",
                "status": "active"
            }
        ],
        "inactive_allergies": [
            {
                "substance": "Penicillin G benzathine",
                "severity": "moderate to severe",
                "reaction": "hives",
                "inactivated_on": "2012-08-06",
                "status": "inactive"
            }
        ]
    },
    
    "q4": {
        "patient_name": "Myra Jones",
        "last_lab_test": {
            "test_name": "Complete Blood Count Without Differential",
            "date": "2012-08-10"
        }
    },
    
    "q5": {
        "patient_name": "Myra Jones",
        "last_lab_test": {
            "test_name": "Complete Blood Count Without Differential",
            "description": "This test measures major types of blood cells but does not provide a breakdown of white blood cell types.",
            "purpose": "check for conditions like anemia or infection"
        }
    }
}


In [None]:
def structured_to_text(data):
    qid = list(data.keys())[0]
    entry = data[qid]
    
    if qid == "q1":
        problem = entry['active_problems'][0]
        return f"{entry['patient_name']} , problem: {problem['problem']}, started on {problem['start_date']}."
    
    elif qid == "q2":
        return f"{entry['patient_name']} no medications."
    
    elif qid == "q3":
        allergies = entry["active_allergies"]
        text = f"{entry['patient_name']} active allergies "
        allergy_strs = []
        for a in allergies:
            allergy_strs.append(f"{a['substance']} ({a['severity']}, causes {a['reaction']}, since {a['active_since']})")
        text += "; ".join(allergy_strs) + "."
        return text
    
    elif qid == "q4":
        test = entry["last_lab_test"]
        return f" {entry['patient_name']} : {test['test_name']} on {test['date']}."
    
    elif qid == "q5":
        test = entry["last_lab_test"]
        return f"{entry['patient_name']}'s last lab test : {test['test_name']}, which {test['description']} to {test['purpose']}."
    
    return ""


In [6]:
from sentence_transformers import SentenceTransformer, util

model = SentenceTransformer("all-MiniLM-L6-v2")


  from .autonotebook import tqdm as notebook_tqdm


In [7]:
def compute_similarity(ans1, ans2):
    emb1 = model.encode(ans1, convert_to_tensor=True)
    emb2 = model.encode(ans2, convert_to_tensor=True)
    score = util.pytorch_cos_sim(emb1, emb2)
    return float(score[0][0])

In [13]:
scores = {}

for qid in ["q1", "q2", "q3", "q4", "q5"]:
    model_answer = qa_pairs[qid.replace("q", "a")]
    gt_text = structured_to_text({qid: ground_truth_structure[qid]})
    
    similarity = compute_similarity(model_answer, gt_text)
    scores[qid] = round(similarity, 4)

print(scores)

{'q1': 0.8794, 'q2': 0.7605, 'q3': 0.9524, 'q4': 0.9352, 'q5': 0.9392}


In [12]:
scores = {}

for qid in ["q1", "q2", "q3", "q4", "q5"]:
    model_answer = qa_pairs[qid.replace("q", "a")]
    gt_text = ground_truth_nl[qid]
    
    similarity = compute_similarity(model_answer, gt_text)
    scores[qid] = round(similarity, 4)

print(scores)

{'q1': 0.9316, 'q2': 0.7667, 'q3': 0.9255, 'q4': 0.9246, 'q5': 0.9635}
