In [2]:
import pandas as pd
import numpy as np
import os
import openai
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from transformers import AutoTokenizer, AutoModel
import torch

In [3]:
openai.api_key = "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"

In [4]:
data_path = "nbme-score-clinical-patient-notes/"
os.listdir(data_path)

['features.csv',
 'patient_notes.csv',
 'sample_submission.csv',
 'test.csv',
 'train.csv']

In [5]:
 features = pd.read_csv(data_path + 'features.csv')
 patient_notes = pd.read_csv(data_path + 'patient_notes.csv')
 sample_submission = pd.read_csv(data_path + 'sample_submission.csv')
 training_data = pd.read_csv(data_path + 'train.csv')
 test_data = pd.read_csv(data_path + 'train.csv')

In [6]:
patient_notes

Unnamed: 0,pn_num,case_num,pn_history
0,0,0,"17-year-old male, has come to the student heal..."
1,1,0,17 yo male with recurrent palpitations for the...
2,2,0,Dillon Cleveland is a 17 y.o. male patient wit...
3,3,0,a 17 yo m c/o palpitation started 3 mos ago; \...
4,4,0,17yo male with no pmh here for evaluation of p...
...,...,...,...
42141,95330,9,Ms. Madden is a 20 yo female presenting w/ the...
42142,95331,9,A 20 YO F CAME COMPLAIN A DULL 8/10 HEADACHE T...
42143,95332,9,Ms. Madden is a 20yo female who presents with ...
42144,95333,9,Stephanie madden is a 20 year old woman compla...


In [7]:
features

Unnamed: 0,feature_num,case_num,feature_text
0,0,0,Family-history-of-MI-OR-Family-history-of-myoc...
1,1,0,Family-history-of-thyroid-disorder
2,2,0,Chest-pressure
3,3,0,Intermittent-symptoms
4,4,0,Lightheaded
...,...,...,...
138,912,9,Family-history-of-migraines
139,913,9,Female
140,914,9,Photophobia
141,915,9,No-known-illness-contacts


In [8]:
# Load list of expected concepts

# Initialize features list for cases
expected_concepts = []

for i in range(10):
    expected_concepts.append([])
    feat = features.loc[features['case_num'] == i, 'feature_text']
    for f in feat:
        f = f.lower().replace('-', ' ')
        expected_concepts[i].append(f)

In [9]:
training_data

Unnamed: 0,id,case_num,pn_num,feature_num,annotation,location
0,00016_000,0,16,0,['dad with recent heart attcak'],['696 724']
1,00016_001,0,16,1,"['mom with ""thyroid disease']",['668 693']
2,00016_002,0,16,2,['chest pressure'],['203 217']
3,00016_003,0,16,3,"['intermittent episodes', 'episode']","['70 91', '176 183']"
4,00016_004,0,16,4,['felt as if he were going to pass out'],['222 258']
...,...,...,...,...,...,...
14295,95333_912,9,95333,912,[],[]
14296,95333_913,9,95333,913,[],[]
14297,95333_914,9,95333,914,['photobia'],['274 282']
14298,95333_915,9,95333,915,['no sick contacts'],['421 437']


In [10]:
# Get list of training ids
training_ids = training_data['pn_num'].unique()

# Get training scores
# Initialize list of training scores
training_scores = []
training_counts = []
expected_counts = []


temp = []

for i in training_ids:
    # Pull corresponding patient notes
    temp.append(patient_notes[patient_notes['pn_num'] == i])
    
    # Get case number
    c_num = training_data.loc[training_data['pn_num'] == i, 'case_num'].iloc[0]
    e_count = len(expected_concepts[c_num])
    
    # Calculate score
    t_count = len(training_data.loc[(training_data['pn_num'] == i) & (training_data['annotation'] != '[]')])
    t_score = t_count / len(expected_concepts[c_num])
    expected_counts.append(e_count)
    training_scores.append(t_score)
    training_counts.append(t_count)

In [11]:
# Create truncated list of notes based on training set

patient_notes_short = pd.concat(temp)

# Add training scores
patient_notes_short['Max Features'] = expected_counts
patient_notes_short['Expected Count'] = training_counts
patient_notes_short['Expected Score'] = training_scores

patient_notes_short

Unnamed: 0,pn_num,case_num,pn_history,Max Features,Expected Count,Expected Score
16,16,0,HPI: 17yo M presents with palpitations. Patien...,13,10,0.769231
40,41,0,17 Y/O M CAME TO THE CLINIC C/O HEART POUNDING...,13,8,0.615385
45,46,0,Mr. Cleveland is a 17yo M who was consented by...,13,11,0.846154
79,82,0,17 yo M w/ no cardiac or arrhythmia PMH presen...,13,10,0.769231
96,100,0,HPI: Dillon Cleveland is an otherwise healthy ...,13,10,0.769231
...,...,...,...,...,...,...
41963,95145,9,Pt is 20 yo F w headache since yesterday morni...,17,11,0.647059
42042,95228,9,"20 F no PMH, lives w/ roommate in apartment ha...",17,7,0.411765
42056,95243,9,20 y/o F with no PMH is presenting with 1 day ...,17,9,0.529412
42141,95330,9,Ms. Madden is a 20 yo female presenting w/ the...,17,12,0.705882


***Use GPT-3 to find matching features in patient notes from the list of expected concepts.***

In [12]:
# Initialize list for feature scores and count of matching features
f_scores = []
f_counts = []

# Use GPT-3 to count the number of features found in the student's notes
for n in range(len(patient_notes_short)):
    note = patient_notes_short['pn_history'].iloc[n].replace('\r\n', '')
    c_num = patient_notes_short['case_num'].iloc[n]

    # Create a prompt to match the phrases in the text and count them
    prompt = f"Count the number of concepts in the list, {expected_concepts[c_num]}, that can be found in the following text:\n\nText: {note}\n\n And list the matching phrases from the text.\n\nCount:"

    response = openai.Completion.create(
        engine="text-davinci-002", 
        prompt=prompt,
        max_tokens=1,
        n=2,
        stop=None,
        temperature=0.5
    )

    f_count = int(response.choices[0].text.strip())
    f_score = f_count/len(expected_concepts[c_num])
    f_scores.append(f_score)
    f_counts.append(f_count)

In [13]:
# Append feature counts and scores to notes table
patient_notes_short['Feature Count'] = f_counts
patient_notes_short['Feature Score'] = f_scores

patient_notes_short['Feature Error'] = abs(patient_notes_short['Expected Score'] - patient_notes_short['Feature Score']) / patient_notes_short['Expected Score']
patient_notes_short

Unnamed: 0,pn_num,case_num,pn_history,Max Features,Expected Count,Expected Score,Feature Count,Feature Score,Feature Error
16,16,0,HPI: 17yo M presents with palpitations. Patien...,13,10,0.769231,7,0.538462,0.300000
40,41,0,17 Y/O M CAME TO THE CLINIC C/O HEART POUNDING...,13,8,0.615385,8,0.615385,0.000000
45,46,0,Mr. Cleveland is a 17yo M who was consented by...,13,11,0.846154,8,0.615385,0.272727
79,82,0,17 yo M w/ no cardiac or arrhythmia PMH presen...,13,10,0.769231,9,0.692308,0.100000
96,100,0,HPI: Dillon Cleveland is an otherwise healthy ...,13,10,0.769231,5,0.384615,0.500000
...,...,...,...,...,...,...,...,...,...
41963,95145,9,Pt is 20 yo F w headache since yesterday morni...,17,11,0.647059,20,1.176471,0.818182
42042,95228,9,"20 F no PMH, lives w/ roommate in apartment ha...",17,7,0.411765,14,0.823529,1.000000
42056,95243,9,20 y/o F with no PMH is presenting with 1 day ...,17,9,0.529412,14,0.823529,0.555556
42141,95330,9,Ms. Madden is a 20 yo female presenting w/ the...,17,12,0.705882,20,1.176471,0.666667


***Use GPT-3 to generate "ideal" patient notes with all expected concepts.***

In [19]:
# Initialize list for generated notes
generated_note = []

for c in expected_concepts:
    print(c)
    prompt = (f'Given concepts in this list {c}, generate a patient note')
    response = openai.Completion.create(
        engine="text-davinci-002",
        prompt=prompt,
        temperature=0,
        max_tokens=1500,
        n=2,
        stop=None,
        frequency_penalty=0,
        presence_penalty=0
        )
    note = response.choices[0].text.replace('.\n', '').replace('\n', '')
    print('\n' + note + '\n')
    generated_note.append(note)


['family history of mi or family history of myocardial infarction', 'family history of thyroid disorder', 'chest pressure', 'intermittent symptoms', 'lightheaded', 'no hair changes or no nail changes or no temperature intolerance', 'adderall use', 'shortness of breath', 'caffeine use', 'heart pounding or heart racing', 'few months duration', '17 year', 'male']

Patient is a 17 year old male with a family history of myocardial infarction who presents with intermittent chest pressure, lightheadedness, and shortness of breath for the last few months. He also reports that he has been using Adderall and caffeine regularly and has noticed that his heart has been pounding or racing more frequently. There are no changes in his hair or nails, and he has not experienced any temperature intolerance.

['no vaginal discharge', 'weight loss', 'not sexually active', 'prior episodes of diarrhea', '20 year', 'no bloody bowel movements', 'recurrent bouts over past 6 months', 'right sided lq abdominal pa

In [20]:
# Generate cosine similarity scores between the generated notes and the student notes

# Load pre-trained BERT model and tokenizer
model_name = 'bert-base-uncased'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

# Initialize list for similarity scores
sim_scores = []

# Loop through notes and generate scores
for n in range(len(patient_notes_short)):
    note = patient_notes_short['pn_history'].iloc[n].replace('\r\n', '')
    c_num = patient_notes_short['case_num'].iloc[n]
    
    # Tokenize the texts
    tokens1 = tokenizer(generated_note[c_num], return_tensors='pt')
    tokens2 = tokenizer(note, return_tensors='pt')

    # Pass the tokenized inputs through the model to get the embeddings
    with torch.no_grad():
        outputs1 = model(**tokens1)
        outputs2 = model(**tokens2)
    embeddings1 = outputs1[0][:, 0, :]
    embeddings2 = outputs2[0][:, 0, :]

    # Compute cosine similarity between the embeddings
    cos_sim = torch.nn.functional.cosine_similarity(embeddings1, embeddings2)
    
    sim_scores.append(cos_sim.item())

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [21]:
# Append feature scores to notes table
patient_notes_short['Similarity Score'] = sim_scores

patient_notes_short['Similarity Error'] = abs(patient_notes_short['Expected Score'] - patient_notes_short['Similarity Score']) / patient_notes_short['Expected Score']
patient_notes_short

Unnamed: 0,pn_num,case_num,pn_history,Max Features,Expected Count,Expected Score,Feature Count,Feature Score,Feature Error,Similarity Score,Similarity Error
16,16,0,HPI: 17yo M presents with palpitations. Patien...,13,10,0.769231,7,0.538462,0.300000,0.735527,0.043815
40,41,0,17 Y/O M CAME TO THE CLINIC C/O HEART POUNDING...,13,8,0.615385,8,0.615385,0.000000,0.649276,0.055074
45,46,0,Mr. Cleveland is a 17yo M who was consented by...,13,11,0.846154,8,0.615385,0.272727,0.808809,0.044135
79,82,0,17 yo M w/ no cardiac or arrhythmia PMH presen...,13,10,0.769231,9,0.692308,0.100000,0.751793,0.022670
96,100,0,HPI: Dillon Cleveland is an otherwise healthy ...,13,10,0.769231,5,0.384615,0.500000,0.784450,0.019786
...,...,...,...,...,...,...,...,...,...,...,...
41963,95145,9,Pt is 20 yo F w headache since yesterday morni...,17,11,0.647059,20,1.176471,0.818182,0.773857,0.195961
42042,95228,9,"20 F no PMH, lives w/ roommate in apartment ha...",17,7,0.411765,14,0.823529,1.000000,0.824466,1.002274
42056,95243,9,20 y/o F with no PMH is presenting with 1 day ...,17,9,0.529412,14,0.823529,0.555556,0.861683,0.627623
42141,95330,9,Ms. Madden is a 20 yo female presenting w/ the...,17,12,0.705882,20,1.176471,0.666667,0.864480,0.224680


In [22]:
# Average Errors
print(f'The average Feature Error was: {patient_notes_short["Feature Error"].mean()}')
      
print(f'The average Similarity Error was: {patient_notes_short["Similarity Error"].mean()}')

The average Feature Error was: 0.36211485736485804
The average Similarity Error was: 0.2281997214408971


In [23]:
# Save results to file
patient_notes_short.to_csv("patient_notes_scores.csv", index=False)

In [18]:
# Generate notes with temperature=0.7
generated_note2 = []

for c in expected_concepts:
    print(c)
    prompt = (f'Given concepts in this list {c}, generate a patient note')
    response = openai.Completion.create(
        engine="text-davinci-002",
        prompt=prompt,
        temperature=0.7,
        max_tokens=1500,
        n=2,
        stop=None,
        frequency_penalty=0,
        presence_penalty=0
        )
    note = response.choices[0].text.replace('.\n', '').replace('\n', '')
    print('\n' + note + '\n')
    generated_note2.append(note)


['family history of mi or family history of myocardial infarction', 'family history of thyroid disorder', 'chest pressure', 'intermittent symptoms', 'lightheaded', 'no hair changes or no nail changes or no temperature intolerance', 'adderall use', 'shortness of breath', 'caffeine use', 'heart pounding or heart racing', 'few months duration', '17 year', 'male']

Patient presents with a chief complaint of intermittent chest pressure and shortness of breath for the last few months. He endorses lightheadedness and heart pounding/racing with these episodes. He has a family history of myocardial infarction and thyroid disorder. He takes Adderall and drinks caffeine regularly. He denies any changes in his hair or nails and has no temperature intolerance. He is a 17-year-old male.

['no vaginal discharge', 'weight loss', 'not sexually active', 'prior episodes of diarrhea', '20 year', 'no bloody bowel movements', 'recurrent bouts over past 6 months', 'right sided lq abdominal pain or right lowe

In [19]:
# Generate cosine similarity scores between the generated notes and the student notes

# Load pre-trained BERT model and tokenizer
model_name = 'bert-base-uncased'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

# Initialize list for similarity scores
sim_scores = []

# Loop through notes and generate scores
for n in range(len(patient_notes_short)):
    note = patient_notes_short['pn_history'].iloc[n].replace('\r\n', '')
    c_num = patient_notes_short['case_num'].iloc[n]
    
    # Tokenize the texts
    tokens1 = tokenizer(generated_note2[c_num], return_tensors='pt')
    tokens2 = tokenizer(note, return_tensors='pt')

    # Pass the tokenized inputs through the model to get the embeddings
    with torch.no_grad():
        outputs1 = model(**tokens1)
        outputs2 = model(**tokens2)
    embeddings1 = outputs1[0][:, 0, :]
    embeddings2 = outputs2[0][:, 0, :]

    # Compute cosine similarity between the embeddings
    cos_sim = torch.nn.functional.cosine_similarity(embeddings1, embeddings2)
    
    sim_scores.append(cos_sim.item())

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [20]:
# Append feature scores to notes table
patient_notes_short['Similarity Score'] = sim_scores

patient_notes_short['Similarity Error'] = abs(patient_notes_short['Expected Score'] - patient_notes_short['Similarity Score']) / patient_notes_short['Expected Score']
patient_notes_short

Unnamed: 0,pn_num,case_num,pn_history,Max Features,Expected Count,Expected Score,Similarity Score,Similarity Error
16,16,0,HPI: 17yo M presents with palpitations. Patien...,13,10,0.769231,0.792075,0.029697
40,41,0,17 Y/O M CAME TO THE CLINIC C/O HEART POUNDING...,13,8,0.615385,0.752111,0.222180
45,46,0,Mr. Cleveland is a 17yo M who was consented by...,13,11,0.846154,0.831027,0.017878
79,82,0,17 yo M w/ no cardiac or arrhythmia PMH presen...,13,10,0.769231,0.830136,0.079177
96,100,0,HPI: Dillon Cleveland is an otherwise healthy ...,13,10,0.769231,0.818508,0.064061
...,...,...,...,...,...,...,...,...
41963,95145,9,Pt is 20 yo F w headache since yesterday morni...,17,11,0.647059,0.777010,0.200834
42042,95228,9,"20 F no PMH, lives w/ roommate in apartment ha...",17,7,0.411765,0.834667,1.027049
42056,95243,9,20 y/o F with no PMH is presenting with 1 day ...,17,9,0.529412,0.867474,0.638562
42141,95330,9,Ms. Madden is a 20 yo female presenting w/ the...,17,12,0.705882,0.855366,0.211768


In [21]:
# Average Errors with temperature=0.7
print(f'The average Similarity Error was: {patient_notes_short["Similarity Error"].mean()}')

The average Similarity Error was: 0.24812209410284494
