In [1]:
import pandas as pd
import numpy as np
import os
import openai
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from transformers import AutoTokenizer, AutoModel
import torch

In [2]:
openai.api_key = "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"

In [3]:
data_path = "nbme-score-clinical-patient-notes/"
os.listdir(data_path)

['features.csv',
 'patient_notes.csv',
 'sample_submission.csv',
 'test.csv',
 'train.csv']

In [4]:
 features = pd.read_csv(data_path + 'features.csv')
 patient_notes = pd.read_csv(data_path + 'patient_notes.csv')
 sample_submission = pd.read_csv(data_path + 'sample_submission.csv')
 training_data = pd.read_csv(data_path + 'train.csv')
 test_data = pd.read_csv(data_path + 'train.csv')

In [5]:
patient_notes

Unnamed: 0,pn_num,case_num,pn_history
0,0,0,"17-year-old male, has come to the student heal..."
1,1,0,17 yo male with recurrent palpitations for the...
2,2,0,Dillon Cleveland is a 17 y.o. male patient wit...
3,3,0,a 17 yo m c/o palpitation started 3 mos ago; \...
4,4,0,17yo male with no pmh here for evaluation of p...
...,...,...,...
42141,95330,9,Ms. Madden is a 20 yo female presenting w/ the...
42142,95331,9,A 20 YO F CAME COMPLAIN A DULL 8/10 HEADACHE T...
42143,95332,9,Ms. Madden is a 20yo female who presents with ...
42144,95333,9,Stephanie madden is a 20 year old woman compla...


In [6]:
features

Unnamed: 0,feature_num,case_num,feature_text
0,0,0,Family-history-of-MI-OR-Family-history-of-myoc...
1,1,0,Family-history-of-thyroid-disorder
2,2,0,Chest-pressure
3,3,0,Intermittent-symptoms
4,4,0,Lightheaded
...,...,...,...
138,912,9,Family-history-of-migraines
139,913,9,Female
140,914,9,Photophobia
141,915,9,No-known-illness-contacts


In [7]:
# Load list of expected concepts

# Initialize features list for cases
expected_concepts = []

for i in range(10):
    expected_concepts.append([])
    feat = features.loc[features['case_num'] == i, 'feature_text']
    for f in feat:
        f = f.lower().replace('-', ' ')
        expected_concepts[i].append(f)


In [8]:
training_data

Unnamed: 0,id,case_num,pn_num,feature_num,annotation,location
0,00016_000,0,16,0,['dad with recent heart attcak'],['696 724']
1,00016_001,0,16,1,"['mom with ""thyroid disease']",['668 693']
2,00016_002,0,16,2,['chest pressure'],['203 217']
3,00016_003,0,16,3,"['intermittent episodes', 'episode']","['70 91', '176 183']"
4,00016_004,0,16,4,['felt as if he were going to pass out'],['222 258']
...,...,...,...,...,...,...
14295,95333_912,9,95333,912,[],[]
14296,95333_913,9,95333,913,[],[]
14297,95333_914,9,95333,914,['photobia'],['274 282']
14298,95333_915,9,95333,915,['no sick contacts'],['421 437']


In [9]:
# Get list of training ids
training_ids = training_data['pn_num'].unique()

# Get training scores
# Initialize list of training scores
training_scores = []
training_counts = []
expected_counts = []


temp = []

for i in training_ids:
    # Pull corresponding patient notes
    temp.append(patient_notes[patient_notes['pn_num'] == i])
    
    # Get case number
    c_num = training_data.loc[training_data['pn_num'] == i, 'case_num'].iloc[0]
    e_count = len(expected_concepts[c_num])
    
    # Calculate score
    t_count = len(training_data.loc[(training_data['pn_num'] == i) & (training_data['annotation'] != '[]')])
    t_score = t_count / len(expected_concepts[c_num])
    expected_counts.append(e_count)
    training_scores.append(t_score)
    training_counts.append(t_count)

In [13]:
# Create truncated list of notes based on training set

patient_notes_short = pd.concat(temp)

# Add training scores
patient_notes_short['Max Features'] = expected_counts
patient_notes_short['Expected Count'] = training_counts
patient_notes_short['Expected Score'] = training_scores

patient_notes_short

Unnamed: 0,pn_num,case_num,pn_history,Max Features,Expected Count,Expected Score
16,16,0,HPI: 17yo M presents with palpitations. Patien...,13,10,0.769231
40,41,0,17 Y/O M CAME TO THE CLINIC C/O HEART POUNDING...,13,8,0.615385
45,46,0,Mr. Cleveland is a 17yo M who was consented by...,13,11,0.846154
79,82,0,17 yo M w/ no cardiac or arrhythmia PMH presen...,13,10,0.769231
96,100,0,HPI: Dillon Cleveland is an otherwise healthy ...,13,10,0.769231
...,...,...,...,...,...,...
41963,95145,9,Pt is 20 yo F w headache since yesterday morni...,17,11,0.647059
42042,95228,9,"20 F no PMH, lives w/ roommate in apartment ha...",17,7,0.411765
42056,95243,9,20 y/o F with no PMH is presenting with 1 day ...,17,9,0.529412
42141,95330,9,Ms. Madden is a 20 yo female presenting w/ the...,17,12,0.705882


In [19]:
patient_notes_test = patient_notes_short[:10].copy()
patient_notes_test

Unnamed: 0,pn_num,case_num,pn_history,Max Features,Expected Count,Expected Score
16,16,0,HPI: 17yo M presents with palpitations. Patien...,13,10,0.769231
40,41,0,17 Y/O M CAME TO THE CLINIC C/O HEART POUNDING...,13,8,0.615385
45,46,0,Mr. Cleveland is a 17yo M who was consented by...,13,11,0.846154
79,82,0,17 yo M w/ no cardiac or arrhythmia PMH presen...,13,10,0.769231
96,100,0,HPI: Dillon Cleveland is an otherwise healthy ...,13,10,0.769231
155,161,0,"17 y/o M , Dillon Cleveland comes with c/o of ...",13,9,0.692308
181,188,0,17 yo M college student comes to the clinic du...,13,9,0.692308
204,211,0,HPI: Patient is a 17 yo m with a c/o of palpit...,13,12,0.923077
217,224,0,"Pt is a 17 y/o M w/ episodic feeling of ""heart...",13,9,0.692308
218,225,0,17 y/o previously healthy male here with heart...,13,11,0.846154


***Use GPT-3 to find matching features in patient notes from the list of expected concepts.***

In [15]:
# Initialize list for feature scores and count of matching features
f_scores = []
f_counts = []

# Use GPT-3 to count the number of features found in the student's notes
for n in range(len(patient_notes_test)):
    note = patient_notes_test['pn_history'].iloc[n].replace('\r\n', '')
    c_num = patient_notes_test['case_num'].iloc[n]

    # Create a prompt to match the phrases in the text and count them
    prompt = f"Count the number of concepts in the list, {expected_concepts[c_num]}, that can be found in the following text:\n\nText: {note}\n\n And list the matching phrases from the text.\n\nCount:"

    response = openai.Completion.create(
        engine="text-davinci-002", 
        prompt=prompt,
        max_tokens=1,
        n=2,
        stop=None,
        temperature=0.5
    )

    f_count = int(response.choices[0].text.strip())
    f_score = f_count/len(expected_concepts[c_num])
    f_scores.append(f_score)
    f_counts.append(f_count)

In [20]:
# Append feature counts and scores to notes table
patient_notes_test['Feature Count'] = f_counts
patient_notes_test['Feature Score'] = f_scores

patient_notes_test['Feature Error'] = abs(patient_notes_test['Expected Score'] - patient_notes_test['Feature Score']) / patient_notes_test['Expected Score']
patient_notes_test

Unnamed: 0,pn_num,case_num,pn_history,Max Features,Expected Count,Expected Score,Feature Count,Feature Score,Feature Error
16,16,0,HPI: 17yo M presents with palpitations. Patien...,13,10,0.769231,8,0.615385,0.2
40,41,0,17 Y/O M CAME TO THE CLINIC C/O HEART POUNDING...,13,8,0.615385,8,0.615385,0.0
45,46,0,Mr. Cleveland is a 17yo M who was consented by...,13,11,0.846154,14,1.076923,0.272727
79,82,0,17 yo M w/ no cardiac or arrhythmia PMH presen...,13,10,0.769231,8,0.615385,0.2
96,100,0,HPI: Dillon Cleveland is an otherwise healthy ...,13,10,0.769231,8,0.615385,0.2
155,161,0,"17 y/o M , Dillon Cleveland comes with c/o of ...",13,9,0.692308,11,0.846154,0.222222
181,188,0,17 yo M college student comes to the clinic du...,13,9,0.692308,8,0.615385,0.111111
204,211,0,HPI: Patient is a 17 yo m with a c/o of palpit...,13,12,0.923077,4,0.307692,0.666667
217,224,0,"Pt is a 17 y/o M w/ episodic feeling of ""heart...",13,9,0.692308,8,0.615385,0.111111
218,225,0,17 y/o previously healthy male here with heart...,13,11,0.846154,8,0.615385,0.272727


***Use GPT-3 to generate "ideal" patient notes with all expected concepts.***

In [21]:
# Initialize list for generated notes
generated_note = []

for c in expected_concepts:
    print(c)
    prompt = (f'Given concepts in this list {c}, generate a patient note')
    response = openai.Completion.create(
        engine="text-davinci-002",
        prompt=prompt,
        temperature=0,
        max_tokens=3000,
        n=2,
        stop=None,
        frequency_penalty=0,
        presence_penalty=0
        )
    note = response.choices[0].text.replace('.\n', '').replace('\n', '')
    print('\n' + note + '\n')
    generated_note.append(note)


['family history of mi or family history of myocardial infarction', 'family history of thyroid disorder', 'chest pressure', 'intermittent symptoms', 'lightheaded', 'no hair changes or no nail changes or no temperature intolerance', 'adderall use', 'shortness of breath', 'caffeine use', 'heart pounding or heart racing', 'few months duration', '17 year', 'male']

Patient is a 17 year old male with a family history of myocardial infarction who presents with intermittent chest pressure, lightheadedness, and shortness of breath for the last few months. He also reports that he has been using Adderall and caffeine regularly and has noticed that his heart has been pounding or racing more frequently. There are no changes in his hair or nails, and he has not experienced any temperature intolerance.

['no vaginal discharge', 'weight loss', 'not sexually active', 'prior episodes of diarrhea', '20 year', 'no bloody bowel movements', 'recurrent bouts over past 6 months', 'right sided lq abdominal pa

In [26]:
# Generate cosine similarity scores between the generated notes and the student notes

# Load pre-trained BERT model and tokenizer
model_name = 'bert-base-uncased'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

# Initialize list for similarity scores
sim_scores = []

# Loop through notes and generate scores
for n in range(len(patient_notes_test)):
    note = patient_notes_test['pn_history'].iloc[n].replace('\r\n', '')
    c_num = patient_notes_test['case_num'].iloc[n]
    
    # Tokenize the texts
    tokens1 = tokenizer(generated_note[c_num], return_tensors='pt')
    tokens2 = tokenizer(note, return_tensors='pt')

    # Pass the tokenized inputs through the model to get the embeddings
    with torch.no_grad():
        outputs1 = model(**tokens1)
        outputs2 = model(**tokens2)
    embeddings1 = outputs1[0][:, 0, :]
    embeddings2 = outputs2[0][:, 0, :]

    # Compute cosine similarity between the embeddings
    cos_sim = torch.nn.functional.cosine_similarity(embeddings1, embeddings2)
    
    sim_scores.append(cos_sim.item())

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [27]:
# Append feature scores to notes table
patient_notes_test['Similarity Score'] = sim_scores

patient_notes_test['Similarity Error'] = abs(patient_notes_test['Expected Score'] - patient_notes_test['Similarity Score']) / patient_notes_test['Expected Score']
patient_notes_test

Unnamed: 0,pn_num,case_num,pn_history,Max Features,Expected Count,Expected Score,Feature Count,Feature Score,Feature Error,Similarity Score,Similarity Error
16,16,0,HPI: 17yo M presents with palpitations. Patien...,13,10,0.769231,8,0.615385,0.2,0.735527,0.043815
40,41,0,17 Y/O M CAME TO THE CLINIC C/O HEART POUNDING...,13,8,0.615385,8,0.615385,0.0,0.649276,0.055074
45,46,0,Mr. Cleveland is a 17yo M who was consented by...,13,11,0.846154,14,1.076923,0.272727,0.808809,0.044135
79,82,0,17 yo M w/ no cardiac or arrhythmia PMH presen...,13,10,0.769231,8,0.615385,0.2,0.751793,0.02267
96,100,0,HPI: Dillon Cleveland is an otherwise healthy ...,13,10,0.769231,8,0.615385,0.2,0.78445,0.019786
155,161,0,"17 y/o M , Dillon Cleveland comes with c/o of ...",13,9,0.692308,11,0.846154,0.222222,0.766991,0.107876
181,188,0,17 yo M college student comes to the clinic du...,13,9,0.692308,8,0.615385,0.111111,0.775954,0.120823
204,211,0,HPI: Patient is a 17 yo m with a c/o of palpit...,13,12,0.923077,4,0.307692,0.666667,0.796328,0.137311
217,224,0,"Pt is a 17 y/o M w/ episodic feeling of ""heart...",13,9,0.692308,8,0.615385,0.111111,0.706721,0.020819
218,225,0,17 y/o previously healthy male here with heart...,13,11,0.846154,8,0.615385,0.272727,0.762204,0.099213


In [29]:
# Average Errors
print(f'The average Feature Error was: {patient_notes_test["Feature Error"].mean()}')
      
print(f'The average Similarity Error was: {patient_notes_test["Similarity Error"].mean()}')

The average Feature Error was: 0.2256565656565656
The average Similarity Error was: 0.06715216060328966


In [None]:
# Save results to file
patient_notes_short.to_csv("patient_notes_scores.csv", index=False)

***Unused/Experimental***

In [None]:
# import spacy
# from fuzzywuzzy import fuzz
# from transformers import OpenAIGPTConfig, OpenAIGPTModel
# from transformers import GPT2Tokenizer, GPT2Model, pipeline

In [None]:
# # Calculate similarity between the generated notes and the student notes using fuzzy matching

# similarity_scores = []
# for concept in expected_concepts:
#     score = fuzz.token_sort_ratio(concept, extracted_concepts)
#     similarity_scores.append(score)
    
# # Assign grade to patient note based on similarity scores
# average_similarity = sum(similarity_scores) / len(similarity_scores)
# if average_similarity > 80:
#     grade = "A"
# elif average_similarity > 60:
#     grade = "B"
# elif average_similarity > 40:
#     grade = "C"
# else:
#     grade = "F"
    
# print(f"Patient note: {patient_note}")
# print(f"Extracted concepts: {extracted_concepts}")
# print(f"Grade: {grade}")


In [None]:
# # Load pre-trained GPT model
# tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
# model = GPT2Model.from_pretrained('gpt2')

In [None]:
# # Preprocess and tokenize text data
# text1 = "This is the first document."
# text2 = "This document is the second document."
# tokens1 = tokenizer.encode(text1, return_tensors='pt')
# tokens2 = tokenizer.encode(text2, return_tensors='pt')

# # Generate embeddings for text data
# embeddings1 = model(tokens1)[0][:, 0, :].detach().numpy()  # Extract embeddings from model output
# embeddings2 = model(tokens2)[0][:, 0, :].detach().numpy()

# # Compare similarity using cosine similarity
# similarity = cosine_similarity(embeddings1, embeddings2)[0][0]

# print(f"Cosine similarity: {similarity}")

In [None]:
#  # Load pre-trained spaCy model for English language and NER
# nlp = spacy.load('en_core_web_sm')

# # Define function to extract concepts using NER
# def extract_concepts(patient_note):
#     doc = nlp(patient_note)
#     concepts = set([ent.text.lower() for ent in doc.ents])
# #     concepts = set([ent.text.lower() for ent in doc.ents if ent.label_ in ['ORG', 'PERSON', 'GPE']])
#     return concepts

# # # Generate patient note using GPT model
# # generator = pipeline('text-generation', model='gpt2')
# # patient_note = generator("Patient presented with", max_length=100)[0]['generated_text']

# # Extract concepts from patient note using NER
