******************************************Reading XML files into dataframe*************************************************

In [1]:
#from google.colab import drive
import xml.etree.ElementTree as ET
import pandas as pd
import os

# Reading xml files from a folder and storing its content in a data frame
def extract_data_from_xml(folder_path):
    
    # Initializing a list to store patient records and annotations
    data = []

    # Storing files into files list
    files = os.listdir(folder_path)

    # Iterating through each file and extracting content in it
    for file_name in files:
        file_path = os.path.join(folder_path, file_name)

        # Parse the XML file
        tree = ET.parse(file_path)
        root = tree.getroot()

        # Extracting text content from the 'TEXT' element
        text_content = root.find('.//TEXT').text.strip()

        # Create a dictionary to store the patient record
        record = {"text": text_content}

        # Loop through each TAG element
        for tag in root.findall('.//TAGS/*'):
            attribute = tag.tag
            value = tag.attrib.get('met', None)  # Check if 'met' attribute exists
            record[attribute] = value

        # Append the record to the list of data
        data.append(record)

    # Create a DataFrame from the extracted data
    df = pd.DataFrame(data)

    return df

# Mount Google Drive
#drive.mount('/content/drive')

# Path to the folder containing the XML files
folder_path = r'C:/Users/almas/OneDrive/Desktop/Spring 2024/NLP/project/train-20240430T170114Z-001/train'

# Extract data from XML files and create a DataFrame
data_df = extract_data_from_xml(folder_path)

In [2]:
data_df.head()

Unnamed: 0,text,ABDOMINAL,ADVANCED-CAD,ALCOHOL-ABUSE,ASP-FOR-MI,CREATININE,DIETSUPP-2MOS,DRUG-ABUSE,ENGLISH,HBA1C,KETO-1YR,MAJOR-DIABETES,MAKES-DECISIONS,MI-6MOS
0,Record date: 2106-02-12\n\nCampbell Orthopedic...,not met,met,not met,met,not met,met,not met,met,not met,not met,met,met,met
1,Record date: 2079-05-12\n\n\n\n\n\nMERCY CARE ...,not met,met,not met,met,not met,not met,not met,met,not met,not met,not met,met,not met
2,Record date: 2120-09-19\n\nPersonal Data and O...,met,met,not met,met,not met,not met,not met,met,not met,not met,met,met,not met
3,Record date: 2067-11-24\n\n ...,not met,met,not met,met,not met,not met,not met,met,not met,not met,not met,met,not met
4,Record date: 2094-02-16\n\nJENNIFER BOOKER\n\n...,not met,met,not met,not met,not met,not met,not met,met,met,not met,met,met,met


In [3]:
from sklearn.preprocessing import LabelEncoder

# Label encoding categorical features
encoder = LabelEncoder()

# Met:0 , Not met :1
for column in data_df.columns:
    # Excluding the column named "Text"
    if column != 'text':
        # Encoding the values of the current column
        data_df[column] = encoder.fit_transform(data_df[column])

# Find counts of each label for each column
for column in data_df.columns:
    if column != 'text':
        class_counts = data_df[column].value_counts()
        print(f"Counts for {column}:")
        print(class_counts)
        print()



Counts for ABDOMINAL:
1    125
0     77
Name: ABDOMINAL, dtype: int64

Counts for ADVANCED-CAD:
0    125
1     77
Name: ADVANCED-CAD, dtype: int64

Counts for ALCOHOL-ABUSE:
1    195
0      7
Name: ALCOHOL-ABUSE, dtype: int64

Counts for ASP-FOR-MI:
0    162
1     40
Name: ASP-FOR-MI, dtype: int64

Counts for CREATININE:
1    120
0     82
Name: CREATININE, dtype: int64

Counts for DIETSUPP-2MOS:
0    105
1     97
Name: DIETSUPP-2MOS, dtype: int64

Counts for DRUG-ABUSE:
1    190
0     12
Name: DRUG-ABUSE, dtype: int64

Counts for ENGLISH:
0    192
1     10
Name: ENGLISH, dtype: int64

Counts for HBA1C:
1    135
0     67
Name: HBA1C, dtype: int64

Counts for KETO-1YR:
1    201
0      1
Name: KETO-1YR, dtype: int64

Counts for MAJOR-DIABETES:
0    113
1     89
Name: MAJOR-DIABETES, dtype: int64

Counts for MAKES-DECISIONS:
0    194
1      8
Name: MAKES-DECISIONS, dtype: int64

Counts for MI-6MOS:
1    184
0     18
Name: MI-6MOS, dtype: int64



In [4]:
# Initializing a list with column names which we want
columns_to_keep = ['text','ABDOMINAL', 'ADVANCED-CAD', 'DIETSUPP-2MOS', 'MAJOR-DIABETES', 'CREATININE']

# Dropping columns which are not present in the above mentioned list
data_df = data_df[columns_to_keep]

In [5]:
data_df.head(5)

Unnamed: 0,text,ABDOMINAL,ADVANCED-CAD,DIETSUPP-2MOS,MAJOR-DIABETES,CREATININE
0,Record date: 2106-02-12\n\nCampbell Orthopedic...,1,0,0,0,1
1,Record date: 2079-05-12\n\n\n\n\n\nMERCY CARE ...,1,0,1,1,1
2,Record date: 2120-09-19\n\nPersonal Data and O...,0,0,1,0,1
3,Record date: 2067-11-24\n\n ...,1,0,1,1,1
4,Record date: 2094-02-16\n\nJENNIFER BOOKER\n\n...,1,0,1,0,1


In [6]:
data_df['text'][0]

'Record date: 2106-02-12\n\nCampbell Orthopedic Associates\n4 Madera Circle\nOmak, GA 28172\n \nHabib Valenzuela, M.D.\n \n \n                                             Valdez, Harlan Jr.  \n                                           845-41-54-4\n                                             February 12, 2106 \nHar is a 43 year old 6\' 214 pound gentleman who is referred for\nconsultation by Dr. Harlan Oneil.  About a week ago he slipped on\nthe driveway at home and sustained an injury to his left ankle. \nHe was seen at Tri-City Hospital and was told he had a\nfracture.  He was placed in an air splint and advised to be\npartial weight bearing, and he is using a cane.  He is here for\nroutine follow-up. \nPast medical history is notable for no ankle injuries previously. \nHe has a history of diabetes and sleep apnea.  He takes Prozac,\nCardizem, Glucophage and Amaryl.  He is also followed by Dr. Harold\nNutter for an arrhythmia.  He does not smoke.  He drinks\nminimally.  He is a set 

In [7]:
import re

# Removing long numbers 
def remove_long_numbers(text):
    text = re.sub(r'\b\d{3,}\b', '', text)
    return text

In [8]:
data_df['filtered_text'] = data_df['text'].apply(remove_long_numbers)

In [9]:
data_df['filtered_text'][0]

'Record date: -02-12\n\nCampbell Orthopedic Associates\n4 Madera Circle\nOmak, GA \n \nHabib Valenzuela, M.D.\n \n \n                                             Valdez, Harlan Jr.  \n                                           -41-54-4\n                                             February 12,  \nHar is a 43 year old 6\'  pound gentleman who is referred for\nconsultation by Dr. Harlan Oneil.  About a week ago he slipped on\nthe driveway at home and sustained an injury to his left ankle. \nHe was seen at Tri-City Hospital and was told he had a\nfracture.  He was placed in an air splint and advised to be\npartial weight bearing, and he is using a cane.  He is here for\nroutine follow-up. \nPast medical history is notable for no ankle injuries previously. \nHe has a history of diabetes and sleep apnea.  He takes Prozac,\nCardizem, Glucophage and Amaryl.  He is also followed by Dr. Harold\nNutter for an arrhythmia.  He does not smoke.  He drinks\nminimally.  He is a set designer at Columbi

In [10]:
data_df.head(5)

Unnamed: 0,text,ABDOMINAL,ADVANCED-CAD,DIETSUPP-2MOS,MAJOR-DIABETES,CREATININE,filtered_text
0,Record date: 2106-02-12\n\nCampbell Orthopedic...,1,0,0,0,1,Record date: -02-12\n\nCampbell Orthopedic Ass...
1,Record date: 2079-05-12\n\n\n\n\n\nMERCY CARE ...,1,0,1,1,1,Record date: -05-12\n\n\n\n\n\nMERCY CARE CENT...
2,Record date: 2120-09-19\n\nPersonal Data and O...,0,0,1,0,1,Record date: -09-19\n\nPersonal Data and Overa...
3,Record date: 2067-11-24\n\n ...,1,0,1,1,1,Record date: -11-24\n\n HU...
4,Record date: 2094-02-16\n\nJENNIFER BOOKER\n\n...,1,0,1,0,1,Record date: -02-16\n\nJENNIFER BOOKER\n\nLC U...


In [11]:
data_df['filtered_text'][0]

'Record date: -02-12\n\nCampbell Orthopedic Associates\n4 Madera Circle\nOmak, GA \n \nHabib Valenzuela, M.D.\n \n \n                                             Valdez, Harlan Jr.  \n                                           -41-54-4\n                                             February 12,  \nHar is a 43 year old 6\'  pound gentleman who is referred for\nconsultation by Dr. Harlan Oneil.  About a week ago he slipped on\nthe driveway at home and sustained an injury to his left ankle. \nHe was seen at Tri-City Hospital and was told he had a\nfracture.  He was placed in an air splint and advised to be\npartial weight bearing, and he is using a cane.  He is here for\nroutine follow-up. \nPast medical history is notable for no ankle injuries previously. \nHe has a history of diabetes and sleep apnea.  He takes Prozac,\nCardizem, Glucophage and Amaryl.  He is also followed by Dr. Harold\nNutter for an arrhythmia.  He does not smoke.  He drinks\nminimally.  He is a set designer at Columbi

In [12]:
import re

#removing white space characters
def remove_escape_sequence(text):
    text = re.sub(r'[\n\t]', '', text)
    text = re.sub(r'\t\d+', '', text)
    text = text.replace("w/", "")
    return text

data_df['filtered_text'] = data_df['filtered_text'].apply(remove_escape_sequence)


In [13]:
data_df['filtered_text'][0]

'Record date: -02-12Campbell Orthopedic Associates4 Madera CircleOmak, GA  Habib Valenzuela, M.D.                                               Valdez, Harlan Jr.                                             -41-54-4                                             February 12,  Har is a 43 year old 6\'  pound gentleman who is referred forconsultation by Dr. Harlan Oneil.  About a week ago he slipped onthe driveway at home and sustained an injury to his left ankle. He was seen at Tri-City Hospital and was told he had afracture.  He was placed in an air splint and advised to bepartial weight bearing, and he is using a cane.  He is here forroutine follow-up. Past medical history is notable for no ankle injuries previously. He has a history of diabetes and sleep apnea.  He takes Prozac,Cardizem, Glucophage and Amaryl.  He is also followed by Dr. HaroldNutter for an arrhythmia.  He does not smoke.  He drinksminimally.  He is a set designer at Columbia Pictures. On examination today he has slight

In [14]:
import re

#removing dates from text like '12/13/11' and '12/13/'
def remove_dates(text):
    # Remove dates in the format "12/13/11"
    text = re.sub(r'-?\d{1,2}/\d{1,2}/\d{2}', '', text)
    # Remove dates in the format "12/13/"
    text = re.sub(r'-?\d{1,2}/\d{1,2}/', '', text)
    return text

data_df['filtered_text'] = data_df['filtered_text'].apply(remove_dates)


In [15]:
data_df['filtered_text'][0]

'Record date: -02-12Campbell Orthopedic Associates4 Madera CircleOmak, GA  Habib Valenzuela, M.D.                                               Valdez, Harlan Jr.                                             -41-54-4                                             February 12,  Har is a 43 year old 6\'  pound gentleman who is referred forconsultation by Dr. Harlan Oneil.  About a week ago he slipped onthe driveway at home and sustained an injury to his left ankle. He was seen at Tri-City Hospital and was told he had afracture.  He was placed in an air splint and advised to bepartial weight bearing, and he is using a cane.  He is here forroutine follow-up. Past medical history is notable for no ankle injuries previously. He has a history of diabetes and sleep apnea.  He takes Prozac,Cardizem, Glucophage and Amaryl.  He is also followed by Dr. HaroldNutter for an arrhythmia.  He does not smoke.  He drinksminimally.  He is a set designer at Columbia Pictures. On examination today he has slight

In [16]:
import re

# Regular expression pattern to match both "*******" and "---------"

# Removing unwanted patterns
def remove_patterns(text):
    text = re.sub(r'\*+|-+', '', text)
    return text

data_df['filtered_text'] = data_df['filtered_text'].apply(remove_patterns)


In [17]:
data_df['filtered_text'][0]

'Record date: 0212Campbell Orthopedic Associates4 Madera CircleOmak, GA  Habib Valenzuela, M.D.                                               Valdez, Harlan Jr.                                             41544                                             February 12,  Har is a 43 year old 6\'  pound gentleman who is referred forconsultation by Dr. Harlan Oneil.  About a week ago he slipped onthe driveway at home and sustained an injury to his left ankle. He was seen at TriCity Hospital and was told he had afracture.  He was placed in an air splint and advised to bepartial weight bearing, and he is using a cane.  He is here forroutine followup. Past medical history is notable for no ankle injuries previously. He has a history of diabetes and sleep apnea.  He takes Prozac,Cardizem, Glucophage and Amaryl.  He is also followed by Dr. HaroldNutter for an arrhythmia.  He does not smoke.  He drinksminimally.  He is a set designer at Columbia Pictures. On examination today he has slight tender

In [18]:
import re


# Removing time data from the text is we are not analyzing it in that context
def remove_time(text):
    text = re.sub(r'\b\d{1,2}:\d{2}(?:\s?(?:a\.m\.|p\.m\.|am|pm))?\b', '', text)
    return text

data_df['filtered_text'] = data_df['filtered_text'].apply(remove_time)


In [19]:
data_df['filtered_text'][0]

'Record date: 0212Campbell Orthopedic Associates4 Madera CircleOmak, GA  Habib Valenzuela, M.D.                                               Valdez, Harlan Jr.                                             41544                                             February 12,  Har is a 43 year old 6\'  pound gentleman who is referred forconsultation by Dr. Harlan Oneil.  About a week ago he slipped onthe driveway at home and sustained an injury to his left ankle. He was seen at TriCity Hospital and was told he had afracture.  He was placed in an air splint and advised to bepartial weight bearing, and he is using a cane.  He is here forroutine followup. Past medical history is notable for no ankle injuries previously. He has a history of diabetes and sleep apnea.  He takes Prozac,Cardizem, Glucophage and Amaryl.  He is also followed by Dr. HaroldNutter for an arrhythmia.  He does not smoke.  He drinksminimally.  He is a set designer at Columbia Pictures. On examination today he has slight tender

In [20]:
data_df['filtered_text'] = data_df['filtered_text'].apply(remove_long_numbers)

In [21]:
data_df['filtered_text'][0]

'Record date: 0212Campbell Orthopedic Associates4 Madera CircleOmak, GA  Habib Valenzuela, M.D.                                               Valdez, Harlan Jr.                                                                                          February 12,  Har is a 43 year old 6\'  pound gentleman who is referred forconsultation by Dr. Harlan Oneil.  About a week ago he slipped onthe driveway at home and sustained an injury to his left ankle. He was seen at TriCity Hospital and was told he had afracture.  He was placed in an air splint and advised to bepartial weight bearing, and he is using a cane.  He is here forroutine followup. Past medical history is notable for no ankle injuries previously. He has a history of diabetes and sleep apnea.  He takes Prozac,Cardizem, Glucophage and Amaryl.  He is also followed by Dr. HaroldNutter for an arrhythmia.  He does not smoke.  He drinksminimally.  He is a set designer at Columbia Pictures. On examination today he has slight tenderness 

In [22]:
import re

def remove_dates_and_combine_numbers(text):

    # Removing months data
    text = re.sub(r"\b(?:January|February|March|April|May|June|July|August|September|October|November|December)\s\d+\w*\b", "", text)

    # Combine number with following word
#     text = re.sub(r"(\d+)\s+(\w+)", r"\1\2", text)

    return text

data_df['filtered_text'] = data_df['filtered_text'].apply(remove_dates_and_combine_numbers)


In [23]:
data_df['filtered_text'][0]

'Record date: 0212Campbell Orthopedic Associates4 Madera CircleOmak, GA  Habib Valenzuela, M.D.                                               Valdez, Harlan Jr.                                                                                          ,  Har is a 43 year old 6\'  pound gentleman who is referred forconsultation by Dr. Harlan Oneil.  About a week ago he slipped onthe driveway at home and sustained an injury to his left ankle. He was seen at TriCity Hospital and was told he had afracture.  He was placed in an air splint and advised to bepartial weight bearing, and he is using a cane.  He is here forroutine followup. Past medical history is notable for no ankle injuries previously. He has a history of diabetes and sleep apnea.  He takes Prozac,Cardizem, Glucophage and Amaryl.  He is also followed by Dr. HaroldNutter for an arrhythmia.  He does not smoke.  He drinksminimally.  He is a set designer at Columbia Pictures. On examination today he has slight tenderness of the left

In [24]:
import re

#Removing unwanted text 
def remove_long_words(text, max_length):
    text = re.sub(r'\b\w{%d,}\b' % max_length, '', text)
    return text

# Define the maximum word length
max_word_length = 20

data_df['filtered_text'] = data_df['filtered_text'].apply(lambda x: remove_long_words(x, max_word_length))


In [25]:
data_df['filtered_text'][0]

'Record date: 0212Campbell Orthopedic Associates4 Madera CircleOmak, GA  Habib Valenzuela, M.D.                                               Valdez, Harlan Jr.                                                                                          ,  Har is a 43 year old 6\'  pound gentleman who is referred forconsultation by Dr. Harlan Oneil.  About a week ago he slipped onthe driveway at home and sustained an injury to his left ankle. He was seen at TriCity Hospital and was told he had afracture.  He was placed in an air splint and advised to bepartial weight bearing, and he is using a cane.  He is here forroutine followup. Past medical history is notable for no ankle injuries previously. He has a history of diabetes and sleep apnea.  He takes Prozac,Cardizem, Glucophage and Amaryl.  He is also followed by Dr. HaroldNutter for an arrhythmia.  He does not smoke.  He drinksminimally.  He is a set designer at Columbia Pictures. On examination today he has slight tenderness of the left

In [26]:
import pandas as pd
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize


# Download NLTK stopwords if not already downloaded
import nltk
nltk.download('stopwords')
nltk.download('punkt')

# Get English stopwords
stop_words = set(stopwords.words('english'))

#Removing stop words
def remove_stopwords(text):
    words = word_tokenize(text)
    filtered_words = [word for word in words if word.lower() not in stop_words]
    filtered_text = ' '.join(filtered_words)
    return filtered_text

data_df['filtered_text'] = data_df['filtered_text'].apply(remove_stopwords)


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\almas\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\almas\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [27]:
data_df['filtered_text'][0]

"Record date : 0212Campbell Orthopedic Associates4 Madera CircleOmak , GA Habib Valenzuela , M.D . Valdez , Harlan Jr. , Har 43 year old 6 ' pound gentleman referred forconsultation Dr. Harlan Oneil . week ago slipped onthe driveway home sustained injury left ankle . seen TriCity Hospital told afracture . placed air splint advised bepartial weight bearing , using cane . forroutine followup . Past medical history notable ankle injuries previously . history diabetes sleep apnea . takes Prozac , Cardizem , Glucophage Amaryl . also followed Dr. HaroldNutter arrhythmia . smoke . drinksminimally . set designer Columbia Pictures . examination today slight tenderness left ankleabout four fingerbreadths malleolus . malleolus isnontender medially laterally ligamentous tendernesseither . Dorsal flexion plantar flexion without pain . significant swelling . skin changeswith small abrasions proximally . fibulartenderness proximally . anterior pain noted . hindfoot , midfoot forefoot tenderness noted

In [28]:
import re

# Removing unwanted characters.

def preprocess_text(text):
    text = re.sub(r'[(),:;\'\"`?+#]', '', text)
    return text

# Apply preprocessing to the preprocessed_text column
data_df['filtered_text'] = data_df['filtered_text'].apply(preprocess_text)


In [29]:
data_df['filtered_text'][0]

'Record date  0212Campbell Orthopedic Associates4 Madera CircleOmak  GA Habib Valenzuela  M.D . Valdez  Harlan Jr.  Har 43 year old 6  pound gentleman referred forconsultation Dr. Harlan Oneil . week ago slipped onthe driveway home sustained injury left ankle . seen TriCity Hospital told afracture . placed air splint advised bepartial weight bearing  using cane . forroutine followup . Past medical history notable ankle injuries previously . history diabetes sleep apnea . takes Prozac  Cardizem  Glucophage Amaryl . also followed Dr. HaroldNutter arrhythmia . smoke . drinksminimally . set designer Columbia Pictures . examination today slight tenderness left ankleabout four fingerbreadths malleolus . malleolus isnontender medially laterally ligamentous tendernesseither . Dorsal flexion plantar flexion without pain . significant swelling . skin changeswith small abrasions proximally . fibulartenderness proximally . anterior pain noted . hindfoot  midfoot forefoot tenderness noted . would l

In [30]:
import re

# Function to remove sequences of two or more consecutive dots to reduce noise
def remove_dots(text): 
    text = re.sub(r'\.{2,}', '', text)
    return text


In [31]:
data_df['filtered_text'] = data_df['filtered_text'].apply(remove_dots)


In [32]:
data_df['filtered_text'][0]

'Record date  0212Campbell Orthopedic Associates4 Madera CircleOmak  GA Habib Valenzuela  M.D . Valdez  Harlan Jr.  Har 43 year old 6  pound gentleman referred forconsultation Dr. Harlan Oneil . week ago slipped onthe driveway home sustained injury left ankle . seen TriCity Hospital told afracture . placed air splint advised bepartial weight bearing  using cane . forroutine followup . Past medical history notable ankle injuries previously . history diabetes sleep apnea . takes Prozac  Cardizem  Glucophage Amaryl . also followed Dr. HaroldNutter arrhythmia . smoke . drinksminimally . set designer Columbia Pictures . examination today slight tenderness left ankleabout four fingerbreadths malleolus . malleolus isnontender medially laterally ligamentous tendernesseither . Dorsal flexion plantar flexion without pain . significant swelling . skin changeswith small abrasions proximally . fibulartenderness proximally . anterior pain noted . hindfoot  midfoot forefoot tenderness noted . would l

In [33]:
def clean_text(text):
    # Remove whitespaces and escape sequences
    text = re.sub(r'\s+', ' ', text)
    return text

data_df['filtered_text'] = data_df['filtered_text'].apply(clean_text)


In [34]:
data_df['filtered_text'][0]

'Record date 0212Campbell Orthopedic Associates4 Madera CircleOmak GA Habib Valenzuela M.D . Valdez Harlan Jr. Har 43 year old 6 pound gentleman referred forconsultation Dr. Harlan Oneil . week ago slipped onthe driveway home sustained injury left ankle . seen TriCity Hospital told afracture . placed air splint advised bepartial weight bearing using cane . forroutine followup . Past medical history notable ankle injuries previously . history diabetes sleep apnea . takes Prozac Cardizem Glucophage Amaryl . also followed Dr. HaroldNutter arrhythmia . smoke . drinksminimally . set designer Columbia Pictures . examination today slight tenderness left ankleabout four fingerbreadths malleolus . malleolus isnontender medially laterally ligamentous tendernesseither . Dorsal flexion plantar flexion without pain . significant swelling . skin changeswith small abrasions proximally . fibulartenderness proximally . anterior pain noted . hindfoot midfoot forefoot tenderness noted . would like use tu

In [35]:
data_df['filtered_text'][1]

'Record date 0512MERCY CARE CENTERMercy Internal Medicine Associates8 Newburgh StreetTerrell AR Russell DonnaMs . Russell comes evaluation . Rhode Island six months . complaint heaviness chest . palpitations . note associated exertion . diaphoresis associated . shortness breath . heaviness chest last six weeks . quite concerned.She history hypothyroidism hypertension . also hyperlipidemia treated Lipitor . health maintenance order . hysterectomy . needs followup mammogram done . However today concerned chest heaviness.PHYSICAL EXAMINATION blood pressure /80 respiratory rate 20 pulse 68 . Neck supple . lymphadenopathy . Chest clear percussion auscultation . Cardiac exam regularly regular murmurs gallops rubs . Abdomen soft nontender . hepatosplenomegaly . Extremities clear cyanosis clubbing edema.LABORATORY DATA sodium electrolytes otherwise within normal limits . TSH 2.0 . LFTs pending time . sent electrocardiogram showed normal sinus rhythm nonspecific STT wave changes . also sent exe

In [36]:
data_df.head()

Unnamed: 0,text,ABDOMINAL,ADVANCED-CAD,DIETSUPP-2MOS,MAJOR-DIABETES,CREATININE,filtered_text
0,Record date: 2106-02-12\n\nCampbell Orthopedic...,1,0,0,0,1,Record date 0212Campbell Orthopedic Associates...
1,Record date: 2079-05-12\n\n\n\n\n\nMERCY CARE ...,1,0,1,1,1,Record date 0512MERCY CARE CENTERMercy Interna...
2,Record date: 2120-09-19\n\nPersonal Data and O...,0,0,1,0,1,Record date 0919Personal Data Overall HealthPa...
3,Record date: 2067-11-24\n\n ...,1,0,1,1,1,Record date HUNTINGTON EMERGENCY DEPT VISIT TH...
4,Record date: 2094-02-16\n\nJENNIFER BOOKER\n\n...,1,0,1,0,1,Record date 0216JENNIFER BOOKERLC Unit 5714NAS...


In [37]:
data_df['filtered_text'][0]

'Record date 0212Campbell Orthopedic Associates4 Madera CircleOmak GA Habib Valenzuela M.D . Valdez Harlan Jr. Har 43 year old 6 pound gentleman referred forconsultation Dr. Harlan Oneil . week ago slipped onthe driveway home sustained injury left ankle . seen TriCity Hospital told afracture . placed air splint advised bepartial weight bearing using cane . forroutine followup . Past medical history notable ankle injuries previously . history diabetes sleep apnea . takes Prozac Cardizem Glucophage Amaryl . also followed Dr. HaroldNutter arrhythmia . smoke . drinksminimally . set designer Columbia Pictures . examination today slight tenderness left ankleabout four fingerbreadths malleolus . malleolus isnontender medially laterally ligamentous tendernesseither . Dorsal flexion plantar flexion without pain . significant swelling . skin changeswith small abrasions proximally . fibulartenderness proximally . anterior pain noted . hindfoot midfoot forefoot tenderness noted . would like use tu

*****************************************************SUMMARIZATION*************************************************************

1.Luhn Summarization 

counting importance of each word based on its occurence checking span to find sentences which has these words.
Using these sentences to generate a summary.

In [38]:
import numpy as np
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
import pandas as pd

# Download NLTK resources
import nltk
nltk.download('punkt')
nltk.download('stopwords')

# Function to calculate Luhn score for a sentence
def calculate_luhn_score(sentence):
    words = word_tokenize(sentence)
    word_count = 0
    begin = -1
    end = -1
    for i, word in enumerate(words):
        if word not in stopwords.words('english'):
            word_count += 1
            if begin == -1:
                begin = i
            end = i
    span = end - begin + 1
    if span == 0:
        return 0
    return (word_count ** 2) / span

def generate_summary(text, summary_length=5):
    sentences = sent_tokenize(text)
    luhn_scores = [calculate_luhn_score(sent) for sent in sentences]
    summary_indices = np.argsort(luhn_scores)[-summary_length:]
    summary_indices = summary_indices[::-1]
    summary = [sentences[idx] for idx in summary_indices]
    return ' '.join(summary)

data_df['luhn_summary'] = data_df['filtered_text'].apply(lambda x: generate_summary(x, summary_length=5))


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\almas\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\almas\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [39]:
data_df['luhn_summary'][0]

'Reflex 22Neg32Up Coord dysmetria fingernosefinger heelkneeshinGait favors L leg slightly walkingRomberg normal LABS/STUDIES Chemistry Lytes/Renal/Glucose Sodium mmol/L L Potassium 3.6 3.44.8 mmol/L 4.9 H Chloride 109H mmol/L H Carbon Dioxide 27.0 23 9 mmol/L 22.9 L BUN 11 mg/dl Creatinine 1.0 0.61.5 mg/dl Glucose 216H 70 mg/dl H General Chemistries Calcium 9.4 8 5 mg/dl 8.3 L Phosphorus 3.2 2.64.5 mg/dl 2.4 L Magnesium 1.6 1.42.0 meq/L Lipid Tests Cholesterol mg/dl H Triglycerides 547H 40 mg/dl H HDL Cholesterol 25L 35 mg/dl 25 L LDL Cholesterol mg/dl Chol/HDL Ratio 6.9 Chemistry Miscellaneous Calc Mean Bld mg % Chemistry Com see detail Hemoglobin A1C 10.20H 3 40 % 10.20 H 1034Hematology Complete Blood Count WBC 6.1 4 0 th/cmm 11.1 H RBC 4.51 4 90 mil/cm 4.37 L Hgb 13.7 13 5 gm/dl 13.1 L HCT 39.4L 41 0 % 39.4 L MCV 87 80 fl MCH 30.4 26 0 pg/rbc MCHC 34.9 31 0 g/dl PLT th/cumm RDW 13.5 11 5 % Hematology ESR 14 mm/hr 19 Drugs Therapeutic Drug Monitoring Lithium < 0.10L 0 50 mmol/L < 0.1

In [40]:
!pip install -qq rouge

In [41]:
from rouge import Rouge

hypothesis = "Reflex 22Neg32Up Coord dysmetria fingernosefinger heelkneeshinGait favors L leg slightly walkingRomberg normal LABS/STUDIES Chemistry Lytes/Renal/Glucose Sodium mmol/L L Potassium 3.6 3.44.8 mmol/L 4.9 H Chloride 109H mmol/L H Carbon Dioxide 27.0 23 9 mmol/L 22.9 L BUN 11 mg/dl Creatinine 1.0 0.61.5 mg/dl Glucose 216H 70 mg/dl H General Chemistries Calcium 9.4 8 5 mg/dl 8.3 L Phosphorus 3.2 2.64.5 mg/dl 2.4 L Magnesium 1.6 1.42.0 meq/L Lipid Tests Cholesterol mg/dl H Triglycerides 547H 40 mg/dl H HDL Cholesterol 25L 35 mg/dl 25 L LDL Cholesterol mg/dl Chol/HDL Ratio 6.9 Chemistry Miscellaneous Calc Mean Bld mg % Chemistry Com see detail Hemoglobin A1C 10.20H 3 40 % 10.20 H 1034Hematology Complete Blood Count WBC 6.1 4 0 th/cmm 11.1 H RBC 4.51 4 90 mil/cm 4.37 L Hgb 13.7 13 5 gm/dl 13.1 L HCT 39.4L 41 0 % 39.4 L MCV 87 80 fl MCH 30.4 26 0 pg/rbc MCHC 34.9 31 0 g/dl PLT th/cumm RDW 13.5 11 5 % Hematology ESR 14 mm/hr 19 Drugs Therapeutic Drug Monitoring Lithium < 0.10L 0 50 mmol/L < 0.10 L 2040MRI brain Acute/subacute infarcts DWI bright/ADC dark/FLAIR bright L cerebellum punctate R precentral gyrus small elliptical area . complaint mild chest pain anginal pain day thought defibrillation.Past Medical History DMII hyperchol bipolar HTN depression s/p ECT Medications admission ASA Lipitor 20 lopressor 50 bid folate norvasc 5 qd lithium bid depakote bid sonata 10 mg qhs doxylamine 25 qhs mirtazapine 45 qd Meds Transfer please see green sheets Medications ASA Lipitor 20 lopressor 50 bid folate norvasc 5 qd lithium bid depakote bid sonata 10 mg qhs doxylamine 25 qhs mirtazapine 45 qd Allergies NKDAFamily History family h/o CADSocial History EtOH tob illicitsReview Systems per HPIAllergies NKDAFamily History family h/o CADSocial History EtOH tob illicitsReview Systems per HPICCU course plan1 Cardsa.Rhythm night admission patient started esmolol drip well amio bloused rhythm converted NSR . problems speech.PMH DM insulin x years CAD s/p NSTEMI aboveBipolar disorder lithium depakote required inpt hospitalization ECT pastAfib aboveHypercholMEDS Insulin 70/30 22 units bidAsa 81Plavix 75Lithium dose bid prior note Norvasc dose 5 prior note Mirtazapine dose 45 prior note Naltrexone Nexium 20 prior note Lipitor 40 prior note Cozaar doseDepakote dose bid prior note Lopressor 75 po q8 per prior notes Folate Lisinopril coughSH Tob occas cigarEtOH 0 used EtOH problems IVDA 0Lives 17yo son . Sleep Clinic Doctors Hospital North cc Sleep Clinic DHN DD DT TX Record date CCU JAR Transfer NoteAdmission Date Transfer Patient Name Valdez Harlan MRN Cardiologist Dr. NutterPCP Vicente BarkerCC Chest Pain Cath VF arrest RCA stentingHistory Present Illness obtained admission Pt 48 yo male h/o DMII Bipolar d/o depression began substernal day prior admission car presyncope profound weakness . Cath c/b VF arrest dye load resultant afib RVR.Plan outlined CCU course.Victor Shepard MD Record date NEUROLOGY CMF ADMISSION NOTEName VALDEZ HarlanDOB119MR Date 930pmFOR DETAILS PLEASE SEE AALIYAH IRAHETA S NOTEID/CC 49yoRHM PMH signif CAD Afib DM Bipolar presents RLE weakness/decreased sensation x 24hrsHPI 49yoRHM PMH signif CAD Afib DM Bipolar USOH 10d ago weakness L arm "
reference = "Record date 0212Campbell Orthopedic Associates4 Madera CircleOmak GA Habib Valenzuela M.D . Valdez Harlan Jr. Har 43 year old 6 pound gentleman referred forconsultation Dr. Harlan Oneil . week ago slipped onthe driveway home sustained injury left ankle . seen TriCity Hospital told afracture . placed air splint advised bepartial weight bearing using cane . forroutine followup . Past medical history notable ankle injuries previously . history diabetes sleep apnea . takes Prozac Cardizem Glucophage Amaryl . also followed Dr. HaroldNutter arrhythmia . smoke . drinksminimally . set designer Columbia Pictures . examination today slight tenderness left ankleabout four fingerbreadths malleolus . malleolus isnontender medially laterally ligamentous tendernesseither . Dorsal flexion plantar flexion without pain . significant swelling . skin changeswith small abrasions proximally . fibulartenderness proximally . anterior pain noted . hindfoot midfoot forefoot tenderness noted . would like use tube sock air cast . isusing cane ambulation . xrays show notablefracture pattern today await Radiology opinion . would like stay air splint sock . willsee back six weeks review Boxborough office . Diagnosis Left ankle fracture . Valenzuela M.D . HV/kuntzMmedical cc Harlan Oneil M.D . Harold Nutter M.D . Doctors Hospital North 64 Bruce St Omak GA Habib Valenzuela M.D . DD DT DV reviewed Attending Physician Record date CAMPBELL EMERGENCY DEPT VISIT VALDEZ HARLAN JR . VISIT DATE patient seen examined emergency department . patient seen Emergency Medicine resident . discussed management resident . also seen patient primarily reviewed medical record . brief addendum medical record.HISTORY PRESENTING COMPLAINT Briefly 45yearold male complains several days nausea vomiting left lower quadrant discomfort . also describes intermittent chest pain number months without significant change . sent primary care doctor today pain also noted EKG changes . patient chest pain time evaluation emergency department shortness breath.REVIEW SYSTEMS indicated otherwise negative.PAST MEDICAL HISTORY indicated chart.SOCIAL HISTORY FAMILY HISTORY indicated chart.PHYSICAL EXAMINATION physical examination patient wellappearing smiling pleasant gentleman acute distress . blood pressure /90 pulse 82 temperature 97.9 degrees . Normocephalic atraumatic . chest clear auscultation . heart regular rate rhythm . abdomen soft . left lower quadrant tenderness . also note cardiovascular examination soft murmur says since childhood . extremities normal . neurologic examination nonfocal . THERAPY RENDERED/COURSE ED gentleman abdominal pain receive CAT scan rule diverticulitis . also nonspecific ST changes EKG . painfree time . describe classic exertional pattern chest pain given diabetic EKG changes also admitted rule MI . CT pending time dictation.DISPOSITION including condition upon discharge . patient s condition currently stable . CK498/ JAY CARROLL M.D . JC72 Dictated JAY CARROLL M.D . JC72 reviewed Attending Physician Record date Vicente Blair M.D . Internal Medical Doctors Hospital North Omak Georgia VALDEZ Harlan DHN DATE BIRTH CURRENT CLINIC VISIT DATE Dear Vicente Thank advance allowing share medical care Mr. Harlan B. Valdez 46yearold male patient prior polysomnographic evidence sleep disordered breathing well history difficulty sleep reinitiation maintenance increased early morning awakenings well mixed systemic medical conditions . HISTORY PRESENT ILLNESS already know Mr. Valdez demonstrates history difficulties sleep reinitiation maintenance well increased early morning awakenings noted exacerbation sleep difficulties occurring temporal association loss wife pancreatic cancer last year . placed unfortunate situation single parent 15yearold son 10yearold daughter describes modification current employment duties set designer . particular Mr. Valdez describes undergoing frequent international travelling bee markedly curtailed tending family situation closer home . described history intermittent snoring symptomatology unaware specific nocturnal respiratory pauses . unaware restless lower limb sensory complaints may impact ability initiate reinitiate sleep . denies history night owl personality circadian rhythm dysfunction may played role respect nocturnal sleep disruptions sleep difficulties . denies history paroxysmal abnormal disturbances associated narcoleptic symptoms . Mr. Valdez underwent initial formal polysomnographic evaluation center sleep diagnostics Holy Cross time noted demonstrate respiratory disturbance index 81/hour particularly exacerbated supine position characterized predominantly hypopneas equal distribution nonREM stage REM sleep associated O2 desaturation Nadir 88 % respiratory disturbances predominantly obstructive mixed hypopneas . addition loud snoring noted . evidence sleep efficiency 88 % short sleep onset latency 4 minutes . predominance light nonREM stages III sleep concomitant inability achieve significant slowwave stage REM sleep . also alpha intrusions alpha delta sleep evident initial sleep study . addition premature ventricular contractions noted . patient underwent CPAP titration also Tenacre Foundation Nursing Home Boxborough time marked reduction frequency hypopneas respiratory disturbance index equals 2/hour CPAP titrations 46 cm . Sleep efficiency improved 91 % short sleep onset latency also noted 3 minutes . increased predominance light nonREM stage III sleep concomitant inability achieve sustained slow wave sleep . Since initial trial nocturnal CPAP titration 6 cm water pressure various CPAP mask modifications including CPAP nasal face mask Mallinckrodt Breeze supportive head gear nasal pillows . patient describes associated claustrophobic symptomatology relative difficulties sustained nocturnal home CPAP use difficulties regards CPAP complications bulkiness CPAP machine general . result utilized nocturnal CPAP therapy period time although still maintains CPAP equipment house . particular note exacerbation past year patient demonstrates increased early morning awakenings averaging 24 number typical awakenings occurring approximately two hours sleep initiation p.m. patient describes one awakening p.m. second awakening a.m. unclear causative etiology . patient might awaken 3 a.m. ready day . able reinitiate sleep thereafter patient may demonstrate additional two early morning awakenings final awakening 6 a.m . patient noted history mixed systemic conditions including diabetes coronary artery disease depressive disorder well relatively stable gastrointestinal condition upper GI evidence gastroparesis . MEDICATIONS 1 . Provigil mg p.o . q. a.m. PRN . 2 . Lithium . 3 . Valproate . 4 . Glucophage mg t.i.d . 5 . Humulin 15 units night . 6 . Folate . 7 . Metoprolol . 8 . Cardia . 9 . Vitamin E. 10 . Coated aspirin . ALLERGIES/ADVERSE REACTIONS patient describes enhancement suicidal tendencies association prior Prozac usage . SOCIAL HISTORY patient denies active tobacco alcoholic beverage usage . lost pounds past several years . current weight pounds . desirous losing additional weight regards regular exercise hectic social situation makes somewhat difficult present time . examination patient demonstrates blood pressure /88 seated left arm respiratory rate 16 . HEENT EXAMINATION Borderline small posterior oropharyngeal aperture slightly increased redundant tissue evident posteriorly slightly elongated uvula noted . patient appears awake alert speech clear fluent receptive language function essentially intact . presently wearing dental braces . obvious cranial nerve deficits appreciated . focal sensory motor neurologic deficits noted . significant appendicular dystaxias dysmetrias currently evidence . routine gait appears normal based without evidence significant gait dystaxias . current clinical ictal manifestations present . acute evidence microsleeps noted . IMPRESSION 1 . Sleep stage/arrousal dysfunction .56 Manifested subjective complaints nonrestorative sleep increased daytime fatigue alternating hypersomnia recurrent polysomnographic evidence lightened sleep pattern increased predominance nonREM stages 12 sleep presence alpha intrusions alpha delta component deeper sleep . latter EEG findings described association subjective complaints nonrestorative sleep well clinical setting chronic pain related complaints depressive anxiety disorder intercurrent psychotropics agents used usually associated benzodiazepine barbituate usage . 2 . Sleep disordered breathing evidenced prior polysomnographic evaluations mostly obstructive mixed hypopnea . patient appears largely refractory trial CPAP therapy particularly far demonstrates associated claustrophobic symptoms association s usage despite relatively modest CPAP water pressures 6 cm . addition tried various nasal CPAP face mask including Mallinckrodt Breeze supportive head gear nasal pillows limited success . One might consider repeating polysomnographic evaluation future utilizing potential trial BIPAP titration may help improve claustrophobic symptoms patient still left issues referable tangled tubing night issues referable nasal face mask usage noted . 3 . Relative difficulties sleep reinitiation maintenance patient describes least 24 early morning awakenings difficulty sleep reinitiation maintenance thereby compounding current sleep problem . would logically relationship current sleep exacerbations recent death wife pancreatic cancer last year may also evidence nocturnal sleep disturbances repeat polysomnographic evaluation i.e . particular looking presence increased spontaneous arousals limb associated arousals periodic limb movements sleep may special clinical benefit . PLAN 1 . short course far patient describes exceedingly tired unable perform routine daily tasks work managing family absence deceased wife suggested initiation PRN Zolpidem tartrate therapy 5 mg tablets utilizing one two tablets p.o . q. h.s . PRN difficulties sleep reinitiation maintenance . 2 . patient advised take Zolpidem tartrate therapy 23 times per week effort avoid issues physiologic dependency . 3 . patient advised potential adverse behavioral systemic side effects Zolpidem tartrate therapy including hypersomnolence gastric upset loose stools diarrhea cardiac palpitations . Pending clinical response Zolpidem tartrate therapy might seek direct treatment sleep disordered breathing issues may include repeat sleep study potential trial BIPAP therapy effort modify attenuate claustrophobic symptoms . proves poorly responsive trial BIPAP therapy however might consider supplemental O2 therapy night mind follow sleep study associated endtidal CO2 monitoring well . 4 . meantime patient advised contact sleep disorders clinic acute sleep related concerns interim . 5 . patient may also benefit nonpharmacologic approaches regards sleep reinitiation hypnotherapy hold strategies pending follow sleep disorders clinic evaluation approximately four months time . thank allowing share medical care Mr. Harlan Valdez . hope letter finds well . Sincerely Yovani Vergara M.D . Sleep Clinic Doctors Hospital North cc Sleep Clinic DHN DD DT TX Record date CCU JAR Transfer NoteAdmission Date Transfer Patient Name Valdez Harlan MRN Cardiologist Dr. NutterPCP Vicente BarkerCC Chest Pain Cath VF arrest RCA stentingHistory Present Illness obtained admission Pt 48 yo male h/o DMII Bipolar d/o depression began substernal day prior admission car presyncope profound weakness . CP minimal weakness made pull . repeat symptoms day admission . EKG c/w 2/ showed flattened Twave V2 TWI V3 flattened Twaves aVL . trop negative MB index elevated . Due Twave flattening history elevated index decided start heparin ASA take cath lab . Cath showed right dominant system prox Cx 40 % LAD clear RCA prox % lesion ostial PDA 90 % . final dye injection pt VF arrest 2 shocks . Pt regained puls AF new RVR . Pt started amio . Pt began experience discomfort RVR decided intervene . POBA done ostial PDA . first noeluting stent placed prox RCA pt dissection thus 2cd stent placed . admission CCU pt still AF RVR s . amio drip BB loaded plavix ASA lipitor integrilin placed Avandia study . complaint mild chest pain anginal pain day thought defibrillation.Past Medical History DMII hyperchol bipolar HTN depression s/p ECT Medications admission ASA Lipitor 20 lopressor 50 bid folate norvasc 5 qd lithium bid depakote bid sonata 10 mg qhs doxylamine 25 qhs mirtazapine 45 qd Meds Transfer please see green sheets Medications ASA Lipitor 20 lopressor 50 bid folate norvasc 5 qd lithium bid depakote bid sonata 10 mg qhs doxylamine 25 qhs mirtazapine 45 qd Allergies NKDAFamily History family h/o CADSocial History EtOH tob illicitsReview Systems per HPIAllergies NKDAFamily History family h/o CADSocial History EtOH tob illicitsReview Systems per HPICCU course plan1 Cardsa.Rhythm night admission patient started esmolol drip well amio bloused rhythm converted NSR . Esmolol drip well amio stopped BB escalated patient remained NSR.i.Ramp lopressor tolerated BPb.Pump patient remained euvolemic Echo EF 84 % aortic stenosisc.Ischemia stented x 2 prox RCA lesion integrilin x 24hrs prior . started plavix.i.Cont plavix lopressor lisinopril lipitor ASA2 Psych patient long history bipolar disorder depression . depakote lithium remeron outpt . seen psychiatry here.a.Continue depakote lithium subthereapeutic level psych thought likely due noncompliance.b.Continue remeron qhsc.F/u TSH3 DM Blood sugars originally elevated amio drip originally AF contained dextrose . remained NPH RISS.a.NPH RISS4 Prophy Fragmin nexiumLABS PE see today s progress noteEKG AFIB RVR diffuse twave 48 yo male h/o DMII Bipolar d/o depression CAD p/w CP presycope found RCA prox % lesion ostial PDA 90 % stents RCA POBA PDA . Cath c/b VF arrest dye load resultant afib RVR.Plan outlined CCU course.Victor Shepard MD Record date NEUROLOGY CMF ADMISSION NOTEName VALDEZ HarlanDOB119MR Date 930pmFOR DETAILS PLEASE SEE AALIYAH IRAHETA S NOTEID/CC 49yoRHM PMH signif CAD Afib DM Bipolar presents RLE weakness/decreased sensation x 24hrsHPI 49yoRHM PMH signif CAD Afib DM Bipolar USOH 10d ago weakness L arm . Thought 2/2 sleeping . Involved whole arm shoulder . still ADL s felt like arm slightly weaker took 45 days get back normal . eating dinner friend got go BR found RLE weakness/stiffness . Noticed hard climb stairs . Went sleep woke 2am nearly fell 2/2 weakness leg . addition sensory loss RLE esp foot felt cold dead . sensory loss worst distally extended thigh . Never trouble speaking understanding others facial droop problems RUE . Stayed bed called PCP told come ED . note OctNov cardiac cath x 2 . NSTEMI Oct 8 taken cardiac cath cath showed significant RCA disease 40 % prox LCx c/b Vfib needing defib stent placed stenotic RCA lesion c/b dissection needing 2nd stent placed . Post cath Afib requiring amiodarone . subsequent cath Nov 1 episode CP stents patent vessels stable . inpt cardiac hospitalization admitted psych 1 wk suicidality/depression . Previously spring L eye visual disturbance nearly blind went GHIC dxed retinal venous occlusion treated cortisone laser surgery mild improvement vision blurred eye .No HA tinnitus vertigo . blurry vision except L eye venous occlusion diplopia . problems speech.PMH DM insulin x years CAD s/p NSTEMI aboveBipolar disorder lithium depakote required inpt hospitalization ECT pastAfib aboveHypercholMEDS Insulin 70/30 22 units bidAsa 81Plavix 75Lithium dose bid prior note Norvasc dose 5 prior note Mirtazapine dose 45 prior note Naltrexone Nexium 20 prior note Lipitor 40 prior note Cozaar doseDepakote dose bid prior note Lopressor 75 po q8 per prior notes Folate Lisinopril coughSH Tob occas cigarEtOH 0 used EtOH problems IVDA 0Lives 17yo son . 13yo daughter lives sister . Widower x 3yrs . FH Mom PM age 50 died MI 71 . Father EtOH HTN . Sister 4 miscarriages . VS / % RAGeneral WNWD NADHEENT NC/AT . scleral icterus . MMM . OP benign . Neck Supple carotid bruits . CV RRR S1 S2 . IIIII/VI sys murmur best RUSB rad clav neck. second murmur axilla systolic well vs. Galiverdin s sign.Resp CTAB . r/rAbd BS . Soft/NT/ND.Ext C/C/E DP 2 bilat . Skin rashes intact . Neuro MS Conversationally intact.CN II III Pupils 5mm round reactive light 3mm visual field full confrontation optic discs sharpIII IV VIextraocular movements full nystagmus L eyelid slightly weaker longstanding since L eye problemVsensation LT pinprick intact expression muscles symmetric without weaknessIX Xpalate elevates symmetricallyXISCMs 5/5XIItongue protrudes midlineMotor normal bulk tone tremor . pronator drift . 55Sensory endorses difference sensation LT temp RLE vs. LLE . Zone difference feels colder pronounced R lateral calf area particular dermatomal distribution also toe position sense mildly decreased R great toe . Mild vibratory loss ankles bilat symmetrical . Reflex 22Neg32Up Coord dysmetria fingernosefinger heelkneeshinGait favors L leg slightly walkingRomberg normal LABS/STUDIES Chemistry Lytes/Renal/Glucose Sodium mmol/L L Potassium 3.6 3.44.8 mmol/L 4.9 H Chloride 109H mmol/L H Carbon Dioxide 27.0 23 9 mmol/L 22.9 L BUN 11 mg/dl Creatinine 1.0 0.61.5 mg/dl Glucose 216H 70 mg/dl H General Chemistries Calcium 9.4 8 5 mg/dl 8.3 L Phosphorus 3.2 2.64.5 mg/dl 2.4 L Magnesium 1.6 1.42.0 meq/L Lipid Tests Cholesterol mg/dl H Triglycerides 547H 40 mg/dl H HDL Cholesterol 25L 35 mg/dl 25 L LDL Cholesterol mg/dl Chol/HDL Ratio 6.9 Chemistry Miscellaneous Calc Mean Bld mg % Chemistry Com see detail Hemoglobin A1C 10.20H 3 40 % 10.20 H 1034Hematology Complete Blood Count WBC 6.1 4 0 th/cmm 11.1 H RBC 4.51 4 90 mil/cm 4.37 L Hgb 13.7 13 5 gm/dl 13.1 L HCT 39.4L 41 0 % 39.4 L MCV 87 80 fl MCH 30.4 26 0 pg/rbc MCHC 34.9 31 0 g/dl PLT th/cumm RDW 13.5 11 5 % Hematology ESR 14 mm/hr 19 Drugs Therapeutic Drug Monitoring Lithium < 0.10L 0 50 mmol/L < 0.10 L 2040MRI brain Acute/subacute infarcts DWI bright/ADC dark/FLAIR bright L cerebellum punctate R precentral gyrus small elliptical area . CTA head/neck aberrant origin R vert CCA ACA s come L carotid also bilateral fetal PCA s likely congenitally small vertebrobasilar vessels . significant focal stenoses atheromatous calcifications . EKG pendingMRI L/S spine negativeIMPRESSION49yoRHM PMH signif CAD DM Bipolar disorder afib s/p recent cardiac cath presents LUE weakness 10d prior admission resolving 45 days RLE weakness /sensory deficit Imaging reveals R precentral gyrus small infarct L cerebellar infarct significant vessel stenoses . Neuro exam brisker reflexes R equivocal RLE weakness R sensory sx R upgoing toe . Clinical picture imaging consistent . Unclear pt poor historian current sx nonobjective sensory findings meaningful event LUE weakness 45d ago.NEURO stroke u including TTE/Holter lipids/lipoprotein/homocysteine . also send hypercoag u including hypercoag panel PT20210 Factor V Leiden APLA lupus anticoagulant given hx retinal venous thrombus young age.Will also send BCx2 given recent cardiac cath although ESR wnl reassuring endocarditis.Will check A1c adequate DM control.Given psych hx check tox screen LFTs.Unclear afib context postcath post Vfib . look LAE holter abnl . Could make case anticoagulate regardless documented afib . CV hold antihtn meds . Continue lipitor outpt dose . Allow SBP .PSYCH continue depakote lithium . Mood ok need monitored.FEN IVF ada low chol/low fat diet . ENDO NPH 20 bid titrate needed RISS . Checking A1c.PPX put sc fragmin nexium . Pneumoboots . Anna V. WendyBird MDHPC Neuro Resident Case discussed Vern Snow senior resident ."

rouge = Rouge()
scores = rouge.get_scores(hypothesis, reference)
scores

[{'rouge-1': {'r': 0.22742474916387959, 'p': 1.0, 'f': 0.3705722040653654},
  'rouge-2': {'r': 0.19543973941368079,
   'p': 0.9917355371900827,
   'f': 0.3265306094944144},
  'rouge-l': {'r': 0.22742474916387959, 'p': 1.0, 'f': 0.3705722040653654}}]

For more relevant information we are trying to summarization using keywords.

2. Extract Keywords using tf-idf and using this give sentence scores according to the sentence scores based on which a  summary is generated.

In [42]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.tokenize import sent_tokenize

#Extracting keywords using TF-IDF
def extract_keywords(text):
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform([text])
    feature_names = vectorizer.get_feature_names_out()
    return feature_names

#Scoring sentences based on the keywords generated from TF-IDF
def score_sentences(sentences, keywords):
    scores = []
    for sentence in sentences:
        sentence_score = sum([1 for word in keywords if word in sentence])
        scores.append(sentence_score)
    return scores

# Generating summary based on the sentence scores
def generate_summary(text, num_sentences=5):
    sentences = sent_tokenize(text)
    keywords = extract_keywords(text)
    sentence_scores = score_sentences(sentences, keywords)
    top_sentences_indices = sorted(range(len(sentence_scores)), key=lambda i: sentence_scores[i], reverse=True)[:num_sentences]
    summary = '. '.join([sentences[i] for i in top_sentences_indices])
    return summary



In [43]:
summaries = []

for idx, row in data_df.iterrows():
    summary = generate_summary(row['filtered_text'])
    
    # Appending the summary to the list of summaries
    summaries.append(summary)

data_df['summary'] = summaries

print(data_df)


                                                  text  ABDOMINAL  \
0    Record date: 2106-02-12\n\nCampbell Orthopedic...          1   
1    Record date: 2079-05-12\n\n\n\n\n\nMERCY CARE ...          1   
2    Record date: 2120-09-19\n\nPersonal Data and O...          0   
3    Record date: 2067-11-24\n\n                   ...          1   
4    Record date: 2094-02-16\n\nJENNIFER BOOKER\n\n...          1   
..                                                 ...        ...   
197  Record date: 2111-09-26\n\nCC:  Ear pain, coug...          0   
198  Record date: 2081-02-10\n\nGI Fellow Consult N...          0   
199  Record date: 2104-11-10\n\n                   ...          0   
200  Record date: 2076-06-19\n\nPhysical exam - Mal...          1   
201  Record date: 2105-12-15\n\nMr. Bryan comes in ...          1   

     ADVANCED-CAD  DIETSUPP-2MOS  MAJOR-DIABETES  CREATININE  \
0               0              0               0           1   
1               0              1           

In [44]:
data_df['summary'][0]

'Reflex 22Neg32Up Coord dysmetria fingernosefinger heelkneeshinGait favors L leg slightly walkingRomberg normal LABS/STUDIES Chemistry Lytes/Renal/Glucose Sodium mmol/L L Potassium 3.6 3.44.8 mmol/L 4.9 H Chloride 109H mmol/L H Carbon Dioxide 27.0 23 9 mmol/L 22.9 L BUN 11 mg/dl Creatinine 1.0 0.61.5 mg/dl Glucose 216H 70 mg/dl H General Chemistries Calcium 9.4 8 5 mg/dl 8.3 L Phosphorus 3.2 2.64.5 mg/dl 2.4 L Magnesium 1.6 1.42.0 meq/L Lipid Tests Cholesterol mg/dl H Triglycerides 547H 40 mg/dl H HDL Cholesterol 25L 35 mg/dl 25 L LDL Cholesterol mg/dl Chol/HDL Ratio 6.9 Chemistry Miscellaneous Calc Mean Bld mg % Chemistry Com see detail Hemoglobin A1C 10.20H 3 40 % 10.20 H 1034Hematology Complete Blood Count WBC 6.1 4 0 th/cmm 11.1 H RBC 4.51 4 90 mil/cm 4.37 L Hgb 13.7 13 5 gm/dl 13.1 L HCT 39.4L 41 0 % 39.4 L MCV 87 80 fl MCH 30.4 26 0 pg/rbc MCHC 34.9 31 0 g/dl PLT th/cumm RDW 13.5 11 5 % Hematology ESR 14 mm/hr 19 Drugs Therapeutic Drug Monitoring Lithium < 0.10L 0 50 mmol/L < 0.1

In [45]:
from rouge import Rouge

hypothesis = "Reflex 22Neg32Up Coord dysmetria fingernosefinger heelkneeshinGait favors L leg slightly walkingRomberg normal LABS/STUDIES Chemistry Lytes/Renal/Glucose Sodium mmol/L L Potassium 3.6 3.44.8 mmol/L 4.9 H Chloride 109H mmol/L H Carbon Dioxide 27.0 23 9 mmol/L 22.9 L BUN 11 mg/dl Creatinine 1.0 0.61.5 mg/dl Glucose 216H 70 mg/dl H General Chemistries Calcium 9.4 8 5 mg/dl 8.3 L Phosphorus 3.2 2.64.5 mg/dl 2.4 L Magnesium 1.6 1.42.0 meq/L Lipid Tests Cholesterol mg/dl H Triglycerides 547H 40 mg/dl H HDL Cholesterol 25L 35 mg/dl 25 L LDL Cholesterol mg/dl Chol/HDL Ratio 6.9 Chemistry Miscellaneous Calc Mean Bld mg % Chemistry Com see detail Hemoglobin A1C 10.20H 3 40 % 10.20 H 1034Hematology Complete Blood Count WBC 6.1 4 0 th/cmm 11.1 H RBC 4.51 4 90 mil/cm 4.37 L Hgb 13.7 13 5 gm/dl 13.1 L HCT 39.4L 41 0 % 39.4 L MCV 87 80 fl MCH 30.4 26 0 pg/rbc MCHC 34.9 31 0 g/dl PLT th/cumm RDW 13.5 11 5 % Hematology ESR 14 mm/hr 19 Drugs Therapeutic Drug Monitoring Lithium < 0.10L 0 50 mmol/L < 0.10 L 2040MRI brain Acute/subacute infarcts DWI bright/ADC dark/FLAIR bright L cerebellum punctate R precentral gyrus small elliptical area .. complaint mild chest pain anginal pain day thought defibrillation.Past Medical History DMII hyperchol bipolar HTN depression s/p ECT Medications admission ASA Lipitor 20 lopressor 50 bid folate norvasc 5 qd lithium bid depakote bid sonata 10 mg qhs doxylamine 25 qhs mirtazapine 45 qd Meds Transfer please see green sheets Medications ASA Lipitor 20 lopressor 50 bid folate norvasc 5 qd lithium bid depakote bid sonata 10 mg qhs doxylamine 25 qhs mirtazapine 45 qd Allergies NKDAFamily History family h/o CADSocial History EtOH tob illicitsReview Systems per HPIAllergies NKDAFamily History family h/o CADSocial History EtOH tob illicitsReview Systems per HPICCU course plan1 Cardsa.Rhythm night admission patient started esmolol drip well amio bloused rhythm converted NSR .. able reinitiate sleep thereafter patient may demonstrate additional two early morning awakenings final awakening 6 a.m . patient noted history mixed systemic conditions including diabetes coronary artery disease depressive disorder well relatively stable gastrointestinal condition upper GI evidence gastroparesis .. Internal Medical Doctors Hospital North Omak Georgia VALDEZ Harlan DHN DATE BIRTH CURRENT CLINIC VISIT DATE Dear Vicente Thank advance allowing share medical care Mr. Harlan B. Valdez 46yearold male patient prior polysomnographic evidence sleep disordered breathing well history difficulty sleep reinitiation maintenance increased early morning awakenings well mixed systemic medical conditions .. Neuro MS Conversationally intact.CN II III Pupils 5mm round reactive light 3mm visual field full confrontation optic discs sharpIII IV VIextraocular movements full nystagmus L eyelid slightly weaker longstanding since L eye problemVsensation LT pinprick intact expression muscles symmetric without weaknessIX Xpalate elevates symmetricallyXISCMs 5/5XIItongue protrudes midlineMotor normal bulk tone tremor ."
reference = "Record date 0212Campbell Orthopedic Associates4 Madera CircleOmak GA Habib Valenzuela M.D . Valdez Harlan Jr. Har 43 year old 6 pound gentleman referred forconsultation Dr. Harlan Oneil . week ago slipped onthe driveway home sustained injury left ankle . seen TriCity Hospital told afracture . placed air splint advised bepartial weight bearing using cane . forroutine followup . Past medical history notable ankle injury previously . history diabetes sleep apnea . take Prozac Cardizem Glucophage Amaryl . also followed Dr. HaroldNutter arrhythmia . smoke . drinksminimally . set designer Columbia Pictures . examination today slight tenderness left ankleabout four fingerbreadth malleolus . malleolus isnontender medially laterally ligamentous tendernesseither . Dorsal flexion plantar flexion without pain . significant swelling . skin changeswith small abrasion proximally . fibulartenderness proximally . anterior pain noted . hindfoot midfoot forefoot tenderness noted . would like use tube sock air cast . isusing cane ambulation . xrays show notablefracture pattern today await Radiology opinion . would like stay air splint sock . willsee back six week review Boxborough office . Diagnosis Left ankle fracture . Valenzuela M.D . HV/kuntzMmedical cc Harlan Oneil M.D . Harold Nutter M.D . Doctors Hospital North 64 Bruce St Omak GA Habib Valenzuela M.D . DD DT DV reviewed Attending Physician Record date CAMPBELL EMERGENCY DEPT VISIT VALDEZ HARLAN JR . VISIT DATE patient seen examined emergency department . patient seen Emergency Medicine resident . discussed management resident . also seen patient primarily reviewed medical record . brief addendum medical record.HISTORY PRESENTING COMPLAINT Briefly 45yearold male complains several day nausea vomiting left lower quadrant discomfort . also describes intermittent chest pain number month without significant change . sent primary care doctor today pain also noted EKG change . patient chest pain time evaluation emergency department shortness breath.REVIEW SYSTEMS indicated otherwise negative.PAST MEDICAL HISTORY indicated chart.SOCIAL HISTORY FAMILY HISTORY indicated chart.PHYSICAL EXAMINATION physical examination patient wellappearing smiling pleasant gentleman acute distress . blood pressure /90 pulse 82 temperature 97.9 degree . Normocephalic atraumatic . chest clear auscultation . heart regular rate rhythm . abdomen soft . left lower quadrant tenderness . also note cardiovascular examination soft murmur say since childhood . extremity normal . neurologic examination nonfocal . THERAPY RENDERED/COURSE ED gentleman abdominal pain receive CAT scan rule diverticulitis . also nonspecific ST change EKG . painfree time . describe classic exertional pattern chest pain given diabetic EKG change also admitted rule MI . CT pending time dictation.DISPOSITION including condition upon discharge . patient s condition currently stable . CK498/ JAY CARROLL M.D . JC72 Dictated JAY CARROLL M.D . JC72 reviewed Attending Physician Record date Vicente Blair M.D . Internal Medical Doctors Hospital North Omak Georgia VALDEZ Harlan DHN DATE BIRTH CURRENT CLINIC VISIT DATE Dear Vicente Thank advance allowing share medical care Mr. Harlan B. Valdez 46yearold male patient prior polysomnographic evidence sleep disordered breathing well history difficulty sleep reinitiation maintenance increased early morning awakening well mixed systemic medical condition . HISTORY PRESENT ILLNESS already know Mr. Valdez demonstrates history difficulty sleep reinitiation maintenance well increased early morning awakening noted exacerbation sleep difficulty occurring temporal association loss wife pancreatic cancer last year . placed unfortunate situation single parent 15yearold son 10yearold daughter describes modification current employment duty set designer . particular Mr. Valdez describes undergoing frequent international travelling bee markedly curtailed tending family situation closer home . described history intermittent snoring symptomatology unaware specific nocturnal respiratory pause . unaware restless lower limb sensory complaint may impact ability initiate reinitiate sleep . denies history night owl personality circadian rhythm dysfunction may played role respect nocturnal sleep disruption sleep difficulty . denies history paroxysmal abnormal disturbance associated narcoleptic symptom . Mr. Valdez underwent initial formal polysomnographic evaluation center sleep diagnostics Holy Cross time noted demonstrate respiratory disturbance index 81/hour particularly exacerbated supine position characterized predominantly hypopnea equal distribution nonREM stage REM sleep associated O2 desaturation Nadir 88 % respiratory disturbance predominantly obstructive mixed hypopnea . addition loud snoring noted . evidence sleep efficiency 88 % short sleep onset latency 4 minute . predominance light nonREM stage III sleep concomitant inability achieve significant slowwave stage REM sleep . also alpha intrusion alpha delta sleep evident initial sleep study . addition premature ventricular contraction noted . patient underwent CPAP titration also Tenacre Foundation Nursing Home Boxborough time marked reduction frequency hypopnea respiratory disturbance index equal 2/hour CPAP titration 46 cm . Sleep efficiency improved 91 % short sleep onset latency also noted 3 minute . increased predominance light nonREM stage III sleep concomitant inability achieve sustained slow wave sleep . Since initial trial nocturnal CPAP titration 6 cm water pressure various CPAP mask modification including CPAP nasal face mask Mallinckrodt Breeze supportive head gear nasal pillow . patient describes associated claustrophobic symptomatology relative difficulty sustained nocturnal home CPAP use difficulty regard CPAP complication bulkiness CPAP machine general . result utilized nocturnal CPAP therapy period time although still maintains CPAP equipment house . particular note exacerbation past year patient demonstrates increased early morning awakening averaging 24 number typical awakening occurring approximately two hour sleep initiation p.m. patient describes one awakening p.m. second awakening a.m. unclear causative etiology . patient might awaken 3 a.m. ready day . able reinitiate sleep thereafter patient may demonstrate additional two early morning awakening final awakening 6 a.m . patient noted history mixed systemic condition including diabetes coronary artery disease depressive disorder well relatively stable gastrointestinal condition upper GI evidence gastroparesis . MEDICATIONS 1 . Provigil mg p.o . q. a.m. PRN . 2 . Lithium . 3 . Valproate . 4 . Glucophage mg t.i.d . 5 . Humulin 15 unit night . 6 . Folate . 7 . Metoprolol . 8 . Cardia . 9 . Vitamin E. 10 . Coated aspirin . ALLERGIES/ADVERSE REACTIONS patient describes enhancement suicidal tendency association prior Prozac usage . SOCIAL HISTORY patient denies active tobacco alcoholic beverage usage . lost pound past several year . current weight pound . desirous losing additional weight regard regular exercise hectic social situation make somewhat difficult present time . examination patient demonstrates blood pressure /88 seated left arm respiratory rate 16 . HEENT EXAMINATION Borderline small posterior oropharyngeal aperture slightly increased redundant tissue evident posteriorly slightly elongated uvula noted . patient appears awake alert speech clear fluent receptive language function essentially intact . presently wearing dental brace . obvious cranial nerve deficit appreciated . focal sensory motor neurologic deficit noted . significant appendicular dystaxias dysmetrias currently evidence . routine gait appears normal based without evidence significant gait dystaxias . current clinical ictal manifestation present . acute evidence microsleeps noted . IMPRESSION 1 . Sleep stage/arrousal dysfunction .56 Manifested subjective complaint nonrestorative sleep increased daytime fatigue alternating hypersomnia recurrent polysomnographic evidence lightened sleep pattern increased predominance nonREM stage 12 sleep presence alpha intrusion alpha delta component deeper sleep . latter EEG finding described association subjective complaint nonrestorative sleep well clinical setting chronic pain related complaint depressive anxiety disorder intercurrent psychotropics agent used usually associated benzodiazepine barbituate usage . 2 . Sleep disordered breathing evidenced prior polysomnographic evaluation mostly obstructive mixed hypopnea . patient appears largely refractory trial CPAP therapy particularly far demonstrates associated claustrophobic symptom association s usage despite relatively modest CPAP water pressure 6 cm . addition tried various nasal CPAP face mask including Mallinckrodt Breeze supportive head gear nasal pillow limited success . One might consider repeating polysomnographic evaluation future utilizing potential trial BIPAP titration may help improve claustrophobic symptom patient still left issue referable tangled tubing night issue referable nasal face mask usage noted . 3 . Relative difficulty sleep reinitiation maintenance patient describes least 24 early morning awakening difficulty sleep reinitiation maintenance thereby compounding current sleep problem . would logically relationship current sleep exacerbation recent death wife pancreatic cancer last year may also evidence nocturnal sleep disturbance repeat polysomnographic evaluation i.e . particular looking presence increased spontaneous arousal limb associated arousal periodic limb movement sleep may special clinical benefit . PLAN 1 . short course far patient describes exceedingly tired unable perform routine daily task work managing family absence deceased wife suggested initiation PRN Zolpidem tartrate therapy 5 mg tablet utilizing one two tablet p.o . q. h.s . PRN difficulty sleep reinitiation maintenance . 2 . patient advised take Zolpidem tartrate therapy 23 time per week effort avoid issue physiologic dependency . 3 . patient advised potential adverse behavioral systemic side effect Zolpidem tartrate therapy including hypersomnolence gastric upset loose stool diarrhea cardiac palpitation . Pending clinical response Zolpidem tartrate therapy might seek direct treatment sleep disordered breathing issue may include repeat sleep study potential trial BIPAP therapy effort modify attenuate claustrophobic symptom . prof poorly responsive trial BIPAP therapy however might consider supplemental O2 therapy night mind follow sleep study associated endtidal CO2 monitoring well . 4 . meantime patient advised contact sleep disorder clinic acute sleep related concern interim . 5 . patient may also benefit nonpharmacologic approach regard sleep reinitiation hypnotherapy hold strategy pending follow sleep disorder clinic evaluation approximately four month time . thank allowing share medical care Mr. Harlan Valdez . hope letter find well . Sincerely Yovani Vergara M.D . Sleep Clinic Doctors Hospital North cc Sleep Clinic DHN DD DT TX Record date CCU JAR Transfer NoteAdmission Date Transfer Patient Name Valdez Harlan MRN Cardiologist Dr. NutterPCP Vicente BarkerCC Chest Pain Cath VF arrest RCA stentingHistory Present Illness obtained admission Pt 48 yo male h/o DMII Bipolar d/o depression began substernal day prior admission car presyncope profound weakness . CP minimal weakness made pull . repeat symptom day admission . EKG c/w 2/ showed flattened Twave V2 TWI V3 flattened Twaves aVL . trop negative MB index elevated . Due Twave flattening history elevated index decided start heparin ASA take cath lab . Cath showed right dominant system prox Cx 40 % LAD clear RCA prox % lesion ostial PDA 90 % . final dye injection pt VF arrest 2 shock . Pt regained pul AF new RVR . Pt started amio . Pt began experience discomfort RVR decided intervene . POBA done ostial PDA . first noeluting stent placed prox RCA pt dissection thus 2cd stent placed . admission CCU pt still AF RVR s . amio drip BB loaded plavix ASA lipitor integrilin placed Avandia study . complaint mild chest pain anginal pain day thought defibrillation.Past Medical History DMII hyperchol bipolar HTN depression s/p ECT Medications admission ASA Lipitor 20 lopressor 50 bid folate norvasc 5 qd lithium bid depakote bid sonata 10 mg qhs doxylamine 25 qhs mirtazapine 45 qd Meds Transfer please see green sheet Medications ASA Lipitor 20 lopressor 50 bid folate norvasc 5 qd lithium bid depakote bid sonata 10 mg qhs doxylamine 25 qhs mirtazapine 45 qd Allergies NKDAFamily History family h/o CADSocial History EtOH tob illicitsReview Systems per HPIAllergies NKDAFamily History family h/o CADSocial History EtOH tob illicitsReview Systems per HPICCU course plan1 Cardsa.Rhythm night admission patient started esmolol drip well amio bloused rhythm converted NSR . Esmolol drip well amio stopped BB escalated patient remained NSR.i.Ramp lopressor tolerated BPb.Pump patient remained euvolemic Echo EF 84 % aortic stenosisc.Ischemia stented x 2 prox RCA lesion integrilin x 24hrs prior . started plavix.i.Cont plavix lopressor lisinopril lipitor ASA2 Psych patient long history bipolar disorder depression . depakote lithium remeron outpt . seen psychiatry here.a.Continue depakote lithium subthereapeutic level psych thought likely due noncompliance.b.Continue remeron qhsc.F/u TSH3 DM Blood sugar originally elevated amio drip originally AF contained dextrose . remained NPH RISS.a.NPH RISS4 Prophy Fragmin nexiumLABS PE see today s progress noteEKG AFIB RVR diffuse twave 48 yo male h/o DMII Bipolar d/o depression CAD p/w CP presycope found RCA prox % lesion ostial PDA 90 % stent RCA POBA PDA . Cath c/b VF arrest dye load resultant afib RVR.Plan outlined CCU course.Victor Shepard MD Record date NEUROLOGY CMF ADMISSION NOTEName VALDEZ HarlanDOB119MR Date 930pmFOR DETAILS PLEASE SEE AALIYAH IRAHETA S NOTEID/CC 49yoRHM PMH signif CAD Afib DM Bipolar present RLE weakness/decreased sensation x 24hrsHPI 49yoRHM PMH signif CAD Afib DM Bipolar USOH 10d ago weakness L arm . Thought 2/2 sleeping . Involved whole arm shoulder . still ADL s felt like arm slightly weaker took 45 day get back normal . eating dinner friend got go BR found RLE weakness/stiffness . Noticed hard climb stair . Went sleep woke 2am nearly fell 2/2 weakness leg . addition sensory loss RLE esp foot felt cold dead . sensory loss worst distally extended thigh . Never trouble speaking understanding others facial droop problem RUE . Stayed bed called PCP told come ED . note OctNov cardiac cath x 2 . NSTEMI Oct 8 taken cardiac cath cath showed significant RCA disease 40 % prox LCx c/b Vfib needing defib stent placed stenotic RCA lesion c/b dissection needing 2nd stent placed . Post cath Afib requiring amiodarone . subsequent cath Nov 1 episode CP stent patent vessel stable . inpt cardiac hospitalization admitted psych 1 wk suicidality/depression . Previously spring L eye visual disturbance nearly blind went GHIC dxed retinal venous occlusion treated cortisone laser surgery mild improvement vision blurred eye .No HA tinnitus vertigo . blurry vision except L eye venous occlusion diplopia . problem speech.PMH DM insulin x year CAD s/p NSTEMI aboveBipolar disorder lithium depakote required inpt hospitalization ECT pastAfib aboveHypercholMEDS Insulin 70/30 22 unit bidAsa 81Plavix 75Lithium dose bid prior note Norvasc dose 5 prior note Mirtazapine dose 45 prior note Naltrexone Nexium 20 prior note Lipitor 40 prior note Cozaar doseDepakote dose bid prior note Lopressor 75 po q8 per prior note Folate Lisinopril coughSH Tob occas cigarEtOH 0 used EtOH problem IVDA 0Lives 17yo son . 13yo daughter life sister . Widower x 3yrs . FH Mom PM age 50 died MI 71 . Father EtOH HTN . Sister 4 miscarriage . VS / % RAGeneral WNWD NADHEENT NC/AT . scleral icterus . MMM . OP benign . Neck Supple carotid bruits . CV RRR S1 S2 . IIIII/VI sys murmur best RUSB rad clav neck . second murmur axilla systolic well vs. Galiverdin s sign.Resp CTAB . r/rAbd BS . Soft/NT/ND.Ext C/C/E DP 2 bilat . Skin rash intact . Neuro MS Conversationally intact.CN II III Pupils 5mm round reactive light 3mm visual field full confrontation optic disc sharpIII IV VIextraocular movement full nystagmus L eyelid slightly weaker longstanding since L eye problemVsensation LT pinprick intact expression muscle symmetric without weaknessIX Xpalate elevates symmetricallyXISCMs 5/5XIItongue protrudes midlineMotor normal bulk tone tremor . pronator drift . 55Sensory endorses difference sensation LT temp RLE vs. LLE . Zone difference feel colder pronounced R lateral calf area particular dermatomal distribution also toe position sense mildly decreased R great toe . Mild vibratory loss ankle bilat symmetrical . Reflex 22Neg32Up Coord dysmetria fingernosefinger heelkneeshinGait favor L leg slightly walkingRomberg normal LABS/STUDIES Chemistry Lytes/Renal/Glucose Sodium mmol/L L Potassium 3.6 3.44.8 mmol/L 4.9 H Chloride 109H mmol/L H Carbon Dioxide 27.0 23 9 mmol/L 22.9 L BUN 11 mg/dl Creatinine 1.0 0.61.5 mg/dl Glucose 216H 70 mg/dl H General Chemistries Calcium 9.4 8 5 mg/dl 8.3 L Phosphorus 3.2 2.64.5 mg/dl 2.4 L Magnesium 1.6 1.42.0 meq/L Lipid Tests Cholesterol mg/dl H Triglycerides 547H 40 mg/dl H HDL Cholesterol 25L 35 mg/dl 25 L LDL Cholesterol mg/dl Chol/HDL Ratio 6.9 Chemistry Miscellaneous Calc Mean Bld mg % Chemistry Com see detail Hemoglobin A1C 10.20H 3 40 % 10.20 H 1034Hematology Complete Blood Count WBC 6.1 4 0 th/cmm 11.1 H RBC 4.51 4 90 mil/cm 4.37 L Hgb 13.7 13 5 gm/dl 13.1 L HCT 39.4L 41 0 % 39.4 L MCV 87 80 fl MCH 30.4 26 0 pg/rbc MCHC 34.9 31 0 g/dl PLT th/cumm RDW 13.5 11 5 % Hematology ESR 14 mm/hr 19 Drugs Therapeutic Drug Monitoring Lithium < 0.10L 0 50 mmol/L < 0.10 L 2040MRI brain Acute/subacute infarct DWI bright/ADC dark/FLAIR bright L cerebellum punctate R precentral gyrus small elliptical area . CTA head/neck aberrant origin R vert CCA ACA s come L carotid also bilateral fetal PCA s likely congenitally small vertebrobasilar vessel . significant focal stenosis atheromatous calcification . EKG pendingMRI L/S spine negativeIMPRESSION49yoRHM PMH signif CAD DM Bipolar disorder afib s/p recent cardiac cath present LUE weakness 10d prior admission resolving 45 day RLE weakness /sensory deficit Imaging reveals R precentral gyrus small infarct L cerebellar infarct significant vessel stenosis . Neuro exam brisker reflex R equivocal RLE weakness R sensory sx R upgoing toe . Clinical picture imaging consistent . Unclear pt poor historian current sx nonobjective sensory finding meaningful event LUE weakness 45d ago.NEURO stroke u including TTE/Holter lipids/lipoprotein/homocysteine . also send hypercoag u including hypercoag panel PT20210 Factor V Leiden APLA lupus anticoagulant given hx retinal venous thrombus young age.Will also send BCx2 given recent cardiac cath although ESR wnl reassuring endocarditis.Will check A1c adequate DM control.Given psych hx check tox screen LFTs.Unclear afib context postcath post Vfib . look LAE holter abnl . Could make case anticoagulate regardless documented afib . CV hold antihtn med . Continue lipitor outpt dose . Allow SBP .PSYCH continue depakote lithium . Mood ok need monitored.FEN IVF ada low chol/low fat diet . ENDO NPH 20 bid titrate needed RISS . Checking A1c.PPX put sc fragmin nexium . Pneumoboots . Anna V. WendyBird MDHPC Neuro Resident Case discussed Vern Snow senior resident ."

rouge = Rouge()
scores = rouge.get_scores(hypothesis, reference)
scores

[{'rouge-1': {'r': 0.21584699453551912,
   'p': 0.9753086419753086,
   'f': 0.3534675585538189},
  'rouge-2': {'r': 0.17293540474243663,
   'p': 0.950561797752809,
   'f': 0.2926323045554968},
  'rouge-l': {'r': 0.21584699453551912,
   'p': 0.9753086419753086,
   'f': 0.3534675585538189}}]

3. Summarization using keywords generated from count vectorizer
Count Vectorizer using tf-idf trnasformers extract keywords and search for the keyword in the sentence if it there generate summary based on this.

In [46]:
text_data = data_df['filtered_text']


In [47]:
text_data

0      Record date 0212Campbell Orthopedic Associates...
1      Record date 0512MERCY CARE CENTERMercy Interna...
2      Record date 0919Personal Data Overall HealthPa...
3      Record date HUNTINGTON EMERGENCY DEPT VISIT TH...
4      Record date 0216JENNIFER BOOKERLC Unit 5714NAS...
                             ...                        
197    Record date 0926CC Ear pain coughHPI 49 y.o . ...
198    Record date 0210GI Fellow Consult Note Pt Fran...
199    Record date SLHC EMERGENCY DEPT VISIT TANYA V ...
200    Record date 0619Physical exam MalePatient 45 y...
201    Record date 1215Mr . Bryan comes tonight compl...
Name: filtered_text, Length: 202, dtype: object

In [48]:
from sklearn.feature_extraction.text import CountVectorizer

cv = CountVectorizer(max_features=10000, ngram_range=(1, 8))

word_count_vectors = cv.fit_transform(text_data)


In [49]:
cv

In [50]:
from sklearn.feature_extraction.text import TfidfTransformer
tfidf_transformer=TfidfTransformer(smooth_idf=True,use_idf=True)
tfidf_transformer.fit(word_count_vectors)

In [51]:
text_data.index

RangeIndex(start=0, stop=202, step=1)

In [52]:
def cordinate_sorting(matrix):
    tuples = zip(matrix.col, matrix.data)
    return sorted(tuples, key=lambda x: (x[1], x[0]), reverse=True)

def extract_top10_from_vector(feature_names, sorted_items, top=10):
    sorted_items = sorted_items[:top]
    
    score_vals = []
    feature_vals = []
    for idx, score in sorted_items:
        fname = feature_names[idx]
        score_vals.append(round(score, 3))
        feature_vals.append(feature_names[idx])
    
    # Creating a dictionary of features and their scores
    results = {}
    for idx in range(len(feature_vals)):
        results[feature_vals[idx]] = score_vals[idx]
    return results

# Extracting feature names
feature_names = cv.get_feature_names_out()

In [53]:
def get_keywords(idx, docs):

    tf_idf_vector = tfidf_transformer.transform(cv.transform([docs[idx]]))
    
    # Sorting the TF-IDF vectors by descending order of scores
    sorted_items = cordinate_sorting(tf_idf_vector.tocoo())
    
    # Extracting max 10 keywords
    keywords = extract_top10_from_vector(feature_names, sorted_items, 10)
    
    # Keep track of printed keywords for the current record so that same keywords are used
    printed_keywords = set()
    
    keywords_set = {}
    
    for k in keywords:
        # Checking for repeated words
        words = k.split()
        same_word = None
        
        # Check if any word in the keyword has already been been taken for current record
        for word in words:
            if word in printed_keywords:
                same_word = word
                break
        
        if same_word is None:
            keywords_set[k] = keywords[k]
            for word in words:
                printed_keywords.add(word)
                
    return keywords_set


In [73]:
# Iterate over each row in the DataFrame
for idx, row in data_df.iterrows():
    
    patient = 1
    
    # Computing keywords for the current row
    keywords = get_keywords(idx, text_data)
    
    print(f"\nPatient{patient}:")

    # Printing information for each row
    print("\nABDOMINAL:")
    if row['ABDOMINAL'] == 0:
        print("criteria met")
    else:
        print("criteria not met")
    print("ADVANCED-CAD:")
    if row['ADVANCED-CAD'] == 0:
        print("criteria met")
    else:
        print("criteria not met")  
    print("DIETSUPP-2MOS:")  
    if row['DIETSUPP-2MOS'] == 0:
        print("criteria met")
    else:
        print("criteria not met")   
    print("MAJOR-DIABETES:")
    if row['MAJOR-DIABETES'] == 0:
        print("criteria met")
    else:
        print("criteria not met")    
    print("CREATININE:")
    if row['CREATININE'] == 0:
        print("criteria met")
    else:
        print("criteria not met")
        
    print("\nImportant Keywords")
    
    # Keep track of printed keywords
    printed_keywords = set()
    
    for k in keywords:
        # Check if keyword has already been printed
        if k not in printed_keywords:
            print(k, keywords[k])
            # Add the printed keyword to the set
            printed_keywords.add(k)
            
    patient += 1



Patient1:

ABDOMINAL:
criteria not met
ADVANCED-CAD:
criteria met
DIETSUPP-2MOS:
criteria met
MAJOR-DIABETES:
criteria met
CREATININE:
criteria not met

Important Keywords
sleep 0.441
valdez 0.23
cpap 0.174
lithium 0.171
dl 0.134
patient 0.13
bipolar 0.121
depakote 0.121
afib 0.112

Patient1:

ABDOMINAL:
criteria not met
ADVANCED-CAD:
criteria met
DIETSUPP-2MOS:
criteria not met
MAJOR-DIABETES:
criteria not met
CREATININE:
criteria not met

Important Keywords
russell 0.405
lose weight 0.161
rutledge 0.141
regular murmurs 0.121
hepatosplenomegaly extremities 0.121
chest clear percussion auscultation 0.118

Patient1:

ABDOMINAL:
criteria met
ADVANCED-CAD:
criteria met
DIETSUPP-2MOS:
criteria not met
MAJOR-DIABETES:
criteria met
CREATININE:
criteria not met

Important Keywords
brian 0.283
bruce 0.206
mg 0.176
history 0.163
pulses bilaterally 0.094
absent 0.094
dyspnea 0.094
without 0.089
dr 0.086

Patient1:

ABDOMINAL:
criteria not met
ADVANCED-CAD:
criteria met
DIETSUPP-2MOS:
criteria n


Patient1:

ABDOMINAL:
criteria met
ADVANCED-CAD:
criteria met
DIETSUPP-2MOS:
criteria not met
MAJOR-DIABETES:
criteria not met
CREATININE:
criteria met

Important Keywords
gerald 0.303
bumex 0.225
avapro 0.179
mg 0.171
olive 0.141
tablet po 0.134
pravachol 0.127
zetia 0.115
verapamil 0.115

Patient1:

ABDOMINAL:
criteria not met
ADVANCED-CAD:
criteria met
DIETSUPP-2MOS:
criteria not met
MAJOR-DIABETES:
criteria not met
CREATININE:
criteria not met

Important Keywords
retardation 0.196
tablet po 0.195
peptic ulcer disease 0.187
upper endoscopy 0.18
anemia 0.145

Patient1:

ABDOMINAL:
criteria not met
ADVANCED-CAD:
criteria met
DIETSUPP-2MOS:
criteria met
MAJOR-DIABETES:
criteria met
CREATININE:
criteria met

Important Keywords
smh 0.33
plaque smooth 0.305
mmhg 0.172
mg 0.139
ica 0.115
normal 0.109
stat lab 0.102
tunnel 0.102

Patient1:

ABDOMINAL:
criteria met
ADVANCED-CAD:
criteria not met
DIETSUPP-2MOS:
criteria not met
MAJOR-DIABETES:
criteria met
CREATININE:
criteria met

Important


Patient1:

ABDOMINAL:
criteria met
ADVANCED-CAD:
criteria met
DIETSUPP-2MOS:
criteria not met
MAJOR-DIABETES:
criteria met
CREATININE:
criteria met

Important Keywords
tablet 0.166
take po 0.144
abdo 0.114
mg 0.103
history 0.1
patient 0.099

Patient1:

ABDOMINAL:
criteria met
ADVANCED-CAD:
criteria met
DIETSUPP-2MOS:
criteria not met
MAJOR-DIABETES:
criteria met
CREATININE:
criteria not met

Important Keywords
tablet po 0.235
hyperthyroidism 0.172
imdur 0.162
patient 0.155
qd 0.139
pregnancy 0.128

Patient1:

ABDOMINAL:
criteria not met
ADVANCED-CAD:
criteria met
DIETSUPP-2MOS:
criteria not met
MAJOR-DIABETES:
criteria not met
CREATININE:
criteria not met

Important Keywords
common 0.263
mullen 0.244
mg daily 0.188
nunes 0.165
femoral 0.156
iliac 0.138

Patient1:

ABDOMINAL:
criteria not met
ADVANCED-CAD:
criteria met
DIETSUPP-2MOS:
criteria not met
MAJOR-DIABETES:
criteria met
CREATININE:
criteria met

Important Keywords
mg 0.275
bryan 0.178
pain 0.154
sl tng 0.092
codeine 0.087
drop


Patient1:

ABDOMINAL:
criteria met
ADVANCED-CAD:
criteria met
DIETSUPP-2MOS:
criteria met
MAJOR-DIABETES:
criteria not met
CREATININE:
criteria not met

Important Keywords
nick 0.316
squamous cell 0.165
rca 0.162
poba 0.156
vf arrest 0.149
stents 0.134
hodgkin 0.121

Patient1:

ABDOMINAL:
criteria not met
ADVANCED-CAD:
criteria met
DIETSUPP-2MOS:
criteria met
MAJOR-DIABETES:
criteria met
CREATININE:
criteria not met

Important Keywords
bpg 0.275
atwood 0.216
graft 0.2
fempop 0.168
angioplasty 0.159
angio 0.158
thrombectomy 0.154
lplasma 0.132

Patient1:

ABDOMINAL:
criteria not met
ADVANCED-CAD:
criteria met
DIETSUPP-2MOS:
criteria not met
MAJOR-DIABETES:
criteria not met
CREATININE:
criteria not met

Important Keywords
vicente 0.279
patient 0.273
hytrin 0.236
xayachack 0.158
prostatic hypertrophy 0.128
bundle branch 0.113

Patient1:

ABDOMINAL:
criteria met
ADVANCED-CAD:
criteria met
DIETSUPP-2MOS:
criteria met
MAJOR-DIABETES:
criteria met
CREATININE:
criteria not met

Important Keyw


Patient1:

ABDOMINAL:
criteria met
ADVANCED-CAD:
criteria not met
DIETSUPP-2MOS:
criteria met
MAJOR-DIABETES:
criteria met
CREATININE:
criteria met

Important Keywords
hocm 0.19
sarcoidosis 0.175
hypercalcemia 0.167
po 0.166
mg 0.165
small bowel 0.146
ivc filter 0.133
colostomy 0.107

Patient1:

ABDOMINAL:
criteria not met
ADVANCED-CAD:
criteria not met
DIETSUPP-2MOS:
criteria not met
MAJOR-DIABETES:
criteria not met
CREATININE:
criteria not met

Important Keywords
patient 0.168
ha 0.16
pain 0.125
history 0.125
neurology 0.12
chest wall 0.118
ed 0.103
syncope 0.101
gait 0.098
normal 0.097

Patient1:

ABDOMINAL:
criteria not met
ADVANCED-CAD:
criteria not met
DIETSUPP-2MOS:
criteria not met
MAJOR-DIABETES:
criteria met
CREATININE:
criteria met

Important Keywords
patient 0.323
mg 0.145
atrial fibrillation 0.127
knees 0.116
ventricular response 0.107
hydronephrosis 0.101

Patient1:

ABDOMINAL:
criteria not met
ADVANCED-CAD:
criteria not met
DIETSUPP-2MOS:
criteria not met
MAJOR-DIABETES


Patient1:

ABDOMINAL:
criteria not met
ADVANCED-CAD:
criteria not met
DIETSUPP-2MOS:
criteria met
MAJOR-DIABETES:
criteria met
CREATININE:
criteria met

Important Keywords
tablet 0.266
bryan 0.158
take directed 0.149


In [55]:
def generate_summary(idx, tfidf_transformer, cv, text_data):
    # Generating TF-IDF for the each document
    tf_idf_vector = tfidf_transformer.transform(cv.transform([text_data[idx]]))
    
    # Sorting the TF-IDF vectors by descending order of scores
    sorted_items = cordinate_sorting(tf_idf_vector.tocoo())
    
    # Extracting top 5 keywords
    keywords = extract_top10_from_vector(cv.get_feature_names_out(), sorted_items, 5)
    
    summary = []
    current_sentences = {keyword: 0 for keyword in keywords}  
    for sentence in text_data[idx].split('.'):
        for keyword in keywords:
            if current_sentences[keyword] < 5 and keyword.lower() in sentence.lower():
                summary.append(sentence.strip())
                current_sentences[keyword] += 1
                break  
        total_sentences = sum(current_sentences.values())
        if total_sentences >= 5 * len(keywords): 
            break
    return '. '.join(summary)
    
# Creating an empty list to store the summaries
summaries = []

# Iterating over each row in the DataFrame
for idx in range(len(data_df)):
    summary = generate_summary(idx, tfidf_transformer, cv, text_data)
    summaries.append(summary)

# Adding list of summaries as a new column to the DataFrame
data_df['count_vec_Summary'] = summaries



In [56]:
data_df['count_vec_Summary'][0]

'Valdez Harlan Jr. history diabetes sleep apnea. DD DT DV reviewed Attending Physician Record date CAMPBELL EMERGENCY DEPT VISIT VALDEZ HARLAN JR. Internal Medical Doctors Hospital North Omak Georgia VALDEZ Harlan DHN DATE BIRTH CURRENT CLINIC VISIT DATE Dear Vicente Thank advance allowing share medical care Mr. Valdez 46yearold male patient prior polysomnographic evidence sleep disordered breathing well history difficulty sleep reinitiation maintenance increased early morning awakenings well mixed systemic medical conditions. Valdez demonstrates history difficulties sleep reinitiation maintenance well increased early morning awakenings noted exacerbation sleep difficulties occurring temporal association loss wife pancreatic cancer last year. Valdez describes undergoing frequent international travelling bee markedly curtailed tending family situation closer home. unaware restless lower limb sensory complaints may impact ability initiate reinitiate sleep. denies history night owl person

In [57]:
from rouge import Rouge

hypothesis = "Valdez Harlan Jr. history diabetes sleep apnea. DD DT DV reviewed Attending Physician Record date CAMPBELL EMERGENCY DEPT VISIT VALDEZ HARLAN JR. Internal Medical Doctors Hospital North Omak Georgia VALDEZ Harlan DHN DATE BIRTH CURRENT CLINIC VISIT DATE Dear Vicente Thank advance allowing share medical care Mr. Valdez 46yearold male patient prior polysomnographic evidence sleep disordered breathing well history difficulty sleep reinitiation maintenance increased early morning awakenings well mixed systemic medical conditions. Valdez demonstrates history difficulties sleep reinitiation maintenance well increased early morning awakenings noted exacerbation sleep difficulties occurring temporal association loss wife pancreatic cancer last year. Valdez describes undergoing frequent international travelling bee markedly curtailed tending family situation closer home. unaware restless lower limb sensory complaints may impact ability initiate reinitiate sleep. denies history night owl personality circadian rhythm dysfunction may played role respect nocturnal sleep disruptions sleep difficulties. Valdez underwent initial formal polysomnographic evaluation center sleep diagnostics Holy Cross time noted demonstrate respiratory disturbance index 81/hour particularly exacerbated supine position characterized predominantly hypopneas equal distribution nonREM stage REM sleep associated O2 desaturation Nadir 88 % respiratory disturbances predominantly obstructive mixed hypopneas. patient underwent CPAP titration also Tenacre Foundation Nursing Home Boxborough time marked reduction frequency hypopneas respiratory disturbance index equals 2/hour CPAP titrations 46 cm. Since initial trial nocturnal CPAP titration 6 cm water pressure various CPAP mask modifications including CPAP nasal face mask Mallinckrodt Breeze supportive head gear nasal pillows. patient describes associated claustrophobic symptomatology relative difficulties sustained nocturnal home CPAP use difficulties regards CPAP complications bulkiness CPAP machine general. result utilized nocturnal CPAP therapy period time although still maintains CPAP equipment house. Lithium. patient appears largely refractory trial CPAP therapy particularly far demonstrates associated claustrophobic symptoms association s usage despite relatively modest CPAP water pressures 6 cm. Past Medical History DMII hyperchol bipolar HTN depression s/p ECT Medications admission ASA Lipitor 20 lopressor 50 bid folate norvasc 5 qd lithium bid depakote bid sonata 10 mg qhs doxylamine 25 qhs mirtazapine 45 qd Meds Transfer please see green sheets Medications ASA Lipitor 20 lopressor 50 bid folate norvasc 5 qd lithium bid depakote bid sonata 10 mg qhs doxylamine 25 qhs mirtazapine 45 qd Allergies NKDAFamily History family h/o CADSocial History EtOH tob illicitsReview Systems per HPIAllergies NKDAFamily History family h/o CADSocial History EtOH tob illicitsReview Systems per HPICCU course plan1 Cardsa. depakote lithium remeron outpt. Continue depakote lithium subthereapeutic level psych thought likely due noncompliance. still ADL s felt like arm slightly weaker took 45 days get back normal. PMH DM insulin x years CAD s/p NSTEMI aboveBipolar disorder lithium depakote required inpt hospitalization ECT pastAfib aboveHypercholMEDS Insulin 70/30 22 units bidAsa 81Plavix 75Lithium dose bid prior note Norvasc dose 5 prior note Mirtazapine dose 45 prior note Naltrexone Nexium 20 prior note Lipitor 40 prior note Cozaar doseDepakote dose bid prior note Lopressor 75 po q8 per prior notes Folate Lisinopril coughSH Tob occas cigarEtOH 0 used EtOH problems IVDA 0Lives 17yo son. CN II III Pupils 5mm round reactive light 3mm visual field full confrontation optic discs sharpIII IV VIextraocular movements full nystagmus L eyelid slightly weaker longstanding since L eye problemVsensation LT pinprick intact expression muscles symmetric without weaknessIX Xpalate elevates symmetricallyXISCMs 5/5XIItongue protrudes midlineMotor normal bulk tone tremor. Zone difference feels colder pronounced R lateral calf area particular dermatomal distribution also toe position sense mildly decreased R great toe. 9 L BUN 11 mg/dl Creatinine 1. 5 mg/dl Glucose 216H 70 mg/dl H General Chemistries Calcium 9"
reference = "Record date 0212Campbell Orthopedic Associates4 Madera CircleOmak GA Habib Valenzuela M.D . Valdez Harlan Jr. Har 43 year old 6 pound gentleman referred forconsultation Dr. Harlan Oneil . week ago slipped onthe driveway home sustained injury left ankle . seen TriCity Hospital told afracture . placed air splint advised bepartial weight bearing using cane . forroutine followup . Past medical history notable ankle injuries previously . history diabetes sleep apnea . takes Prozac Cardizem Glucophage Amaryl . also followed Dr. HaroldNutter arrhythmia . smoke . drinksminimally . set designer Columbia Pictures . examination today slight tenderness left ankleabout four fingerbreadths malleolus . malleolus isnontender medially laterally ligamentous tendernesseither . Dorsal flexion plantar flexion without pain . significant swelling . skin changeswith small abrasions proximally . fibulartenderness proximally . anterior pain noted . hindfoot midfoot forefoot tenderness noted . would like use tube sock air cast . isusing cane ambulation . xrays show notablefracture pattern today await Radiology opinion . would like stay air splint sock . willsee back six weeks review Boxborough office . Diagnosis Left ankle fracture . Valenzuela M.D . HV/kuntzMmedical cc Harlan Oneil M.D . Harold Nutter M.D . Doctors Hospital North 64 Bruce St Omak GA Habib Valenzuela M.D . DD DT DV reviewed Attending Physician Record date CAMPBELL EMERGENCY DEPT VISIT VALDEZ HARLAN JR . VISIT DATE patient seen examined emergency department . patient seen Emergency Medicine resident . discussed management resident . also seen patient primarily reviewed medical record . brief addendum medical record.HISTORY PRESENTING COMPLAINT Briefly 45yearold male complains several days nausea vomiting left lower quadrant discomfort . also describes intermittent chest pain number months without significant change . sent primary care doctor today pain also noted EKG changes . patient chest pain time evaluation emergency department shortness breath.REVIEW SYSTEMS indicated otherwise negative.PAST MEDICAL HISTORY indicated chart.SOCIAL HISTORY FAMILY HISTORY indicated chart.PHYSICAL EXAMINATION physical examination patient wellappearing smiling pleasant gentleman acute distress . blood pressure /90 pulse 82 temperature 97.9 degrees . Normocephalic atraumatic . chest clear auscultation . heart regular rate rhythm . abdomen soft . left lower quadrant tenderness . also note cardiovascular examination soft murmur says since childhood . extremities normal . neurologic examination nonfocal . THERAPY RENDERED/COURSE ED gentleman abdominal pain receive CAT scan rule diverticulitis . also nonspecific ST changes EKG . painfree time . describe classic exertional pattern chest pain given diabetic EKG changes also admitted rule MI . CT pending time dictation.DISPOSITION including condition upon discharge . patient s condition currently stable . CK498/ JAY CARROLL M.D . JC72 Dictated JAY CARROLL M.D . JC72 reviewed Attending Physician Record date Vicente Blair M.D . Internal Medical Doctors Hospital North Omak Georgia VALDEZ Harlan DHN DATE BIRTH CURRENT CLINIC VISIT DATE Dear Vicente Thank advance allowing share medical care Mr. Harlan B. Valdez 46yearold male patient prior polysomnographic evidence sleep disordered breathing well history difficulty sleep reinitiation maintenance increased early morning awakenings well mixed systemic medical conditions . HISTORY PRESENT ILLNESS already know Mr. Valdez demonstrates history difficulties sleep reinitiation maintenance well increased early morning awakenings noted exacerbation sleep difficulties occurring temporal association loss wife pancreatic cancer last year . placed unfortunate situation single parent 15yearold son 10yearold daughter describes modification current employment duties set designer . particular Mr. Valdez describes undergoing frequent international travelling bee markedly curtailed tending family situation closer home . described history intermittent snoring symptomatology unaware specific nocturnal respiratory pauses . unaware restless lower limb sensory complaints may impact ability initiate reinitiate sleep . denies history night owl personality circadian rhythm dysfunction may played role respect nocturnal sleep disruptions sleep difficulties . denies history paroxysmal abnormal disturbances associated narcoleptic symptoms . Mr. Valdez underwent initial formal polysomnographic evaluation center sleep diagnostics Holy Cross time noted demonstrate respiratory disturbance index 81/hour particularly exacerbated supine position characterized predominantly hypopneas equal distribution nonREM stage REM sleep associated O2 desaturation Nadir 88 % respiratory disturbances predominantly obstructive mixed hypopneas . addition loud snoring noted . evidence sleep efficiency 88 % short sleep onset latency 4 minutes . predominance light nonREM stages III sleep concomitant inability achieve significant slowwave stage REM sleep . also alpha intrusions alpha delta sleep evident initial sleep study . addition premature ventricular contractions noted . patient underwent CPAP titration also Tenacre Foundation Nursing Home Boxborough time marked reduction frequency hypopneas respiratory disturbance index equals 2/hour CPAP titrations 46 cm . Sleep efficiency improved 91 % short sleep onset latency also noted 3 minutes . increased predominance light nonREM stage III sleep concomitant inability achieve sustained slow wave sleep . Since initial trial nocturnal CPAP titration 6 cm water pressure various CPAP mask modifications including CPAP nasal face mask Mallinckrodt Breeze supportive head gear nasal pillows . patient describes associated claustrophobic symptomatology relative difficulties sustained nocturnal home CPAP use difficulties regards CPAP complications bulkiness CPAP machine general . result utilized nocturnal CPAP therapy period time although still maintains CPAP equipment house . particular note exacerbation past year patient demonstrates increased early morning awakenings averaging 24 number typical awakenings occurring approximately two hours sleep initiation p.m. patient describes one awakening p.m. second awakening a.m. unclear causative etiology . patient might awaken 3 a.m. ready day . able reinitiate sleep thereafter patient may demonstrate additional two early morning awakenings final awakening 6 a.m . patient noted history mixed systemic conditions including diabetes coronary artery disease depressive disorder well relatively stable gastrointestinal condition upper GI evidence gastroparesis . MEDICATIONS 1 . Provigil mg p.o . q. a.m. PRN . 2 . Lithium . 3 . Valproate . 4 . Glucophage mg t.i.d . 5 . Humulin 15 units night . 6 . Folate . 7 . Metoprolol . 8 . Cardia . 9 . Vitamin E. 10 . Coated aspirin . ALLERGIES/ADVERSE REACTIONS patient describes enhancement suicidal tendencies association prior Prozac usage . SOCIAL HISTORY patient denies active tobacco alcoholic beverage usage . lost pounds past several years . current weight pounds . desirous losing additional weight regards regular exercise hectic social situation makes somewhat difficult present time . examination patient demonstrates blood pressure /88 seated left arm respiratory rate 16 . HEENT EXAMINATION Borderline small posterior oropharyngeal aperture slightly increased redundant tissue evident posteriorly slightly elongated uvula noted . patient appears awake alert speech clear fluent receptive language function essentially intact . presently wearing dental braces . obvious cranial nerve deficits appreciated . focal sensory motor neurologic deficits noted . significant appendicular dystaxias dysmetrias currently evidence . routine gait appears normal based without evidence significant gait dystaxias . current clinical ictal manifestations present . acute evidence microsleeps noted . IMPRESSION 1 . Sleep stage/arrousal dysfunction .56 Manifested subjective complaints nonrestorative sleep increased daytime fatigue alternating hypersomnia recurrent polysomnographic evidence lightened sleep pattern increased predominance nonREM stages 12 sleep presence alpha intrusions alpha delta component deeper sleep . latter EEG findings described association subjective complaints nonrestorative sleep well clinical setting chronic pain related complaints depressive anxiety disorder intercurrent psychotropics agents used usually associated benzodiazepine barbituate usage . 2 . Sleep disordered breathing evidenced prior polysomnographic evaluations mostly obstructive mixed hypopnea . patient appears largely refractory trial CPAP therapy particularly far demonstrates associated claustrophobic symptoms association s usage despite relatively modest CPAP water pressures 6 cm . addition tried various nasal CPAP face mask including Mallinckrodt Breeze supportive head gear nasal pillows limited success . One might consider repeating polysomnographic evaluation future utilizing potential trial BIPAP titration may help improve claustrophobic symptoms patient still left issues referable tangled tubing night issues referable nasal face mask usage noted . 3 . Relative difficulties sleep reinitiation maintenance patient describes least 24 early morning awakenings difficulty sleep reinitiation maintenance thereby compounding current sleep problem . would logically relationship current sleep exacerbations recent death wife pancreatic cancer last year may also evidence nocturnal sleep disturbances repeat polysomnographic evaluation i.e . particular looking presence increased spontaneous arousals limb associated arousals periodic limb movements sleep may special clinical benefit . PLAN 1 . short course far patient describes exceedingly tired unable perform routine daily tasks work managing family absence deceased wife suggested initiation PRN Zolpidem tartrate therapy 5 mg tablets utilizing one two tablets p.o . q. h.s . PRN difficulties sleep reinitiation maintenance . 2 . patient advised take Zolpidem tartrate therapy 23 times per week effort avoid issues physiologic dependency . 3 . patient advised potential adverse behavioral systemic side effects Zolpidem tartrate therapy including hypersomnolence gastric upset loose stools diarrhea cardiac palpitations . Pending clinical response Zolpidem tartrate therapy might seek direct treatment sleep disordered breathing issues may include repeat sleep study potential trial BIPAP therapy effort modify attenuate claustrophobic symptoms . proves poorly responsive trial BIPAP therapy however might consider supplemental O2 therapy night mind follow sleep study associated endtidal CO2 monitoring well . 4 . meantime patient advised contact sleep disorders clinic acute sleep related concerns interim . 5 . patient may also benefit nonpharmacologic approaches regards sleep reinitiation hypnotherapy hold strategies pending follow sleep disorders clinic evaluation approximately four months time . thank allowing share medical care Mr. Harlan Valdez . hope letter finds well . Sincerely Yovani Vergara M.D . Sleep Clinic Doctors Hospital North cc Sleep Clinic DHN DD DT TX Record date CCU JAR Transfer NoteAdmission Date Transfer Patient Name Valdez Harlan MRN Cardiologist Dr. NutterPCP Vicente BarkerCC Chest Pain Cath VF arrest RCA stentingHistory Present Illness obtained admission Pt 48 yo male h/o DMII Bipolar d/o depression began substernal day prior admission car presyncope profound weakness . CP minimal weakness made pull . repeat symptoms day admission . EKG c/w 2/ showed flattened Twave V2 TWI V3 flattened Twaves aVL . trop negative MB index elevated . Due Twave flattening history elevated index decided start heparin ASA take cath lab . Cath showed right dominant system prox Cx 40 % LAD clear RCA prox % lesion ostial PDA 90 % . final dye injection pt VF arrest 2 shocks . Pt regained puls AF new RVR . Pt started amio . Pt began experience discomfort RVR decided intervene . POBA done ostial PDA . first noeluting stent placed prox RCA pt dissection thus 2cd stent placed . admission CCU pt still AF RVR s . amio drip BB loaded plavix ASA lipitor integrilin placed Avandia study . complaint mild chest pain anginal pain day thought defibrillation.Past Medical History DMII hyperchol bipolar HTN depression s/p ECT Medications admission ASA Lipitor 20 lopressor 50 bid folate norvasc 5 qd lithium bid depakote bid sonata 10 mg qhs doxylamine 25 qhs mirtazapine 45 qd Meds Transfer please see green sheets Medications ASA Lipitor 20 lopressor 50 bid folate norvasc 5 qd lithium bid depakote bid sonata 10 mg qhs doxylamine 25 qhs mirtazapine 45 qd Allergies NKDAFamily History family h/o CADSocial History EtOH tob illicitsReview Systems per HPIAllergies NKDAFamily History family h/o CADSocial History EtOH tob illicitsReview Systems per HPICCU course plan1 Cardsa.Rhythm night admission patient started esmolol drip well amio bloused rhythm converted NSR . Esmolol drip well amio stopped BB escalated patient remained NSR.i.Ramp lopressor tolerated BPb.Pump patient remained euvolemic Echo EF 84 % aortic stenosisc.Ischemia stented x 2 prox RCA lesion integrilin x 24hrs prior . started plavix.i.Cont plavix lopressor lisinopril lipitor ASA2 Psych patient long history bipolar disorder depression . depakote lithium remeron outpt . seen psychiatry here.a.Continue depakote lithium subthereapeutic level psych thought likely due noncompliance.b.Continue remeron qhsc.F/u TSH3 DM Blood sugars originally elevated amio drip originally AF contained dextrose . remained NPH RISS.a.NPH RISS4 Prophy Fragmin nexiumLABS PE see today s progress noteEKG AFIB RVR diffuse twave 48 yo male h/o DMII Bipolar d/o depression CAD p/w CP presycope found RCA prox % lesion ostial PDA 90 % stents RCA POBA PDA . Cath c/b VF arrest dye load resultant afib RVR.Plan outlined CCU course.Victor Shepard MD Record date NEUROLOGY CMF ADMISSION NOTEName VALDEZ HarlanDOB119MR Date 930pmFOR DETAILS PLEASE SEE AALIYAH IRAHETA S NOTEID/CC 49yoRHM PMH signif CAD Afib DM Bipolar presents RLE weakness/decreased sensation x 24hrsHPI 49yoRHM PMH signif CAD Afib DM Bipolar USOH 10d ago weakness L arm . Thought 2/2 sleeping . Involved whole arm shoulder . still ADL s felt like arm slightly weaker took 45 days get back normal . eating dinner friend got go BR found RLE weakness/stiffness . Noticed hard climb stairs . Went sleep woke 2am nearly fell 2/2 weakness leg . addition sensory loss RLE esp foot felt cold dead . sensory loss worst distally extended thigh . Never trouble speaking understanding others facial droop problems RUE . Stayed bed called PCP told come ED . note OctNov cardiac cath x 2 . NSTEMI Oct 8 taken cardiac cath cath showed significant RCA disease 40 % prox LCx c/b Vfib needing defib stent placed stenotic RCA lesion c/b dissection needing 2nd stent placed . Post cath Afib requiring amiodarone . subsequent cath Nov 1 episode CP stents patent vessels stable . inpt cardiac hospitalization admitted psych 1 wk suicidality/depression . Previously spring L eye visual disturbance nearly blind went GHIC dxed retinal venous occlusion treated cortisone laser surgery mild improvement vision blurred eye .No HA tinnitus vertigo . blurry vision except L eye venous occlusion diplopia . problems speech.PMH DM insulin x years CAD s/p NSTEMI aboveBipolar disorder lithium depakote required inpt hospitalization ECT pastAfib aboveHypercholMEDS Insulin 70/30 22 units bidAsa 81Plavix 75Lithium dose bid prior note Norvasc dose 5 prior note Mirtazapine dose 45 prior note Naltrexone Nexium 20 prior note Lipitor 40 prior note Cozaar doseDepakote dose bid prior note Lopressor 75 po q8 per prior notes Folate Lisinopril coughSH Tob occas cigarEtOH 0 used EtOH problems IVDA 0Lives 17yo son . 13yo daughter lives sister . Widower x 3yrs . FH Mom PM age 50 died MI 71 . Father EtOH HTN . Sister 4 miscarriages . VS / % RAGeneral WNWD NADHEENT NC/AT . scleral icterus . MMM . OP benign . Neck Supple carotid bruits . CV RRR S1 S2 . IIIII/VI sys murmur best RUSB rad clav neck. second murmur axilla systolic well vs. Galiverdin s sign.Resp CTAB . r/rAbd BS . Soft/NT/ND.Ext C/C/E DP 2 bilat . Skin rashes intact . Neuro MS Conversationally intact.CN II III Pupils 5mm round reactive light 3mm visual field full confrontation optic discs sharpIII IV VIextraocular movements full nystagmus L eyelid slightly weaker longstanding since L eye problemVsensation LT pinprick intact expression muscles symmetric without weaknessIX Xpalate elevates symmetricallyXISCMs 5/5XIItongue protrudes midlineMotor normal bulk tone tremor . pronator drift . 55Sensory endorses difference sensation LT temp RLE vs. LLE . Zone difference feels colder pronounced R lateral calf area particular dermatomal distribution also toe position sense mildly decreased R great toe . Mild vibratory loss ankles bilat symmetrical . Reflex 22Neg32Up Coord dysmetria fingernosefinger heelkneeshinGait favors L leg slightly walkingRomberg normal LABS/STUDIES Chemistry Lytes/Renal/Glucose Sodium mmol/L L Potassium 3.6 3.44.8 mmol/L 4.9 H Chloride 109H mmol/L H Carbon Dioxide 27.0 23 9 mmol/L 22.9 L BUN 11 mg/dl Creatinine 1.0 0.61.5 mg/dl Glucose 216H 70 mg/dl H General Chemistries Calcium 9.4 8 5 mg/dl 8.3 L Phosphorus 3.2 2.64.5 mg/dl 2.4 L Magnesium 1.6 1.42.0 meq/L Lipid Tests Cholesterol mg/dl H Triglycerides 547H 40 mg/dl H HDL Cholesterol 25L 35 mg/dl 25 L LDL Cholesterol mg/dl Chol/HDL Ratio 6.9 Chemistry Miscellaneous Calc Mean Bld mg % Chemistry Com see detail Hemoglobin A1C 10.20H 3 40 % 10.20 H 1034Hematology Complete Blood Count WBC 6.1 4 0 th/cmm 11.1 H RBC 4.51 4 90 mil/cm 4.37 L Hgb 13.7 13 5 gm/dl 13.1 L HCT 39.4L 41 0 % 39.4 L MCV 87 80 fl MCH 30.4 26 0 pg/rbc MCHC 34.9 31 0 g/dl PLT th/cumm RDW 13.5 11 5 % Hematology ESR 14 mm/hr 19 Drugs Therapeutic Drug Monitoring Lithium < 0.10L 0 50 mmol/L < 0.10 L 2040MRI brain Acute/subacute infarcts DWI bright/ADC dark/FLAIR bright L cerebellum punctate R precentral gyrus small elliptical area . CTA head/neck aberrant origin R vert CCA ACA s come L carotid also bilateral fetal PCA s likely congenitally small vertebrobasilar vessels . significant focal stenoses atheromatous calcifications . EKG pendingMRI L/S spine negativeIMPRESSION49yoRHM PMH signif CAD DM Bipolar disorder afib s/p recent cardiac cath presents LUE weakness 10d prior admission resolving 45 days RLE weakness /sensory deficit Imaging reveals R precentral gyrus small infarct L cerebellar infarct significant vessel stenoses . Neuro exam brisker reflexes R equivocal RLE weakness R sensory sx R upgoing toe . Clinical picture imaging consistent . Unclear pt poor historian current sx nonobjective sensory findings meaningful event LUE weakness 45d ago.NEURO stroke u including TTE/Holter lipids/lipoprotein/homocysteine . also send hypercoag u including hypercoag panel PT20210 Factor V Leiden APLA lupus anticoagulant given hx retinal venous thrombus young age.Will also send BCx2 given recent cardiac cath although ESR wnl reassuring endocarditis.Will check A1c adequate DM control.Given psych hx check tox screen LFTs.Unclear afib context postcath post Vfib . look LAE holter abnl . Could make case anticoagulate regardless documented afib . CV hold antihtn meds . Continue lipitor outpt dose . Allow SBP .PSYCH continue depakote lithium . Mood ok need monitored.FEN IVF ada low chol/low fat diet . ENDO NPH 20 bid titrate needed RISS . Checking A1c.PPX put sc fragmin nexium . Pneumoboots . Anna V. WendyBird MDHPC Neuro Resident Case discussed Vern Snow senior resident ."

rouge = Rouge()
scores = rouge.get_scores(hypothesis, reference)
scores

[{'rouge-1': {'r': 0.2682274247491639, 'p': 1.0, 'f': 0.42299577725538673},
  'rouge-2': {'r': 0.20358306188925082,
   'p': 0.9615384615384616,
   'f': 0.33602150249234025},
  'rouge-l': {'r': 0.2682274247491639, 'p': 1.0, 'f': 0.42299577725538673}}]

In [58]:
data_df['filtered_text'][1]

'Record date 0512MERCY CARE CENTERMercy Internal Medicine Associates8 Newburgh StreetTerrell AR Russell DonnaMs . Russell comes evaluation . Rhode Island six months . complaint heaviness chest . palpitations . note associated exertion . diaphoresis associated . shortness breath . heaviness chest last six weeks . quite concerned.She history hypothyroidism hypertension . also hyperlipidemia treated Lipitor . health maintenance order . hysterectomy . needs followup mammogram done . However today concerned chest heaviness.PHYSICAL EXAMINATION blood pressure /80 respiratory rate 20 pulse 68 . Neck supple . lymphadenopathy . Chest clear percussion auscultation . Cardiac exam regularly regular murmurs gallops rubs . Abdomen soft nontender . hepatosplenomegaly . Extremities clear cyanosis clubbing edema.LABORATORY DATA sodium electrolytes otherwise within normal limits . TSH 2.0 . LFTs pending time . sent electrocardiogram showed normal sinus rhythm nonspecific STT wave changes . also sent exe

In [59]:
data_df['count_vec_Summary'][1]

'Record date 0512MERCY CARE CENTERMercy Internal Medicine Associates8 Newburgh StreetTerrell AR Russell DonnaMs. Russell comes evaluation. RE Russell DonnaMRN Page 2We also need schedule followup mammogram. Rutledge M. docDD DT DV Record date 0623MERCY CARE CENTERMercy Internal Medicine Associates18 Newburgh StreetTerrell AR Russell DonnaHISTORY PRESENT ILLNESS Ms. Russell returning follow. frequency however decreased feeling better better begins lose weight. think weight loss going critical. also less fatigued begins lose weight. Weight gone pounds. Weight. began lose weight feeling much better result it. light try control blood pressure get lose weight. Rutledge M. become apparent since lost much weight. PHYSICAL EXAMINATION weight pounds blood pressure /64 respiratory rate 20 pulse 64. suspect going lose weight. Rutledge M'

In [60]:
from rouge import Rouge

hypothesis = "Record date 0512MERCY CARE CENTERMercy Internal Medicine Associates8 Newburgh StreetTerrell AR Russell DonnaMs. Russell comes evaluation. RE Russell DonnaMRN Page 2We also need schedule followup mammogram. Rutledge M. docDD DT DV Record date 0623MERCY CARE CENTERMercy Internal Medicine Associates18 Newburgh StreetTerrell AR Russell DonnaHISTORY PRESENT ILLNESS Ms. Russell returning follow. frequency however decreased feeling better better begins lose weight. think weight loss going critical. also less fatigued begins lose weight. Weight gone pounds. Weight. began lose weight feeling much better result it. light try control blood pressure get lose weight. Rutledge M. become apparent since lost much weight. PHYSICAL EXAMINATION weight pounds blood pressure /64 respiratory rate 20 pulse 64. suspect going lose weight. Rutledge M"
reference = "Record date 0512MERCY CARE CENTERMercy Internal Medicine Associates8 Newburgh StreetTerrell AR Russell DonnaMs . Russell comes evaluation . Rhode Island six months . complaint heaviness chest . palpitations . note associated exertion . diaphoresis associated . shortness breath . heaviness chest last six weeks . quite concerned.She history hypothyroidism hypertension . also hyperlipidemia treated Lipitor . health maintenance order . hysterectomy . needs followup mammogram done . However today concerned chest heaviness.PHYSICAL EXAMINATION blood pressure /80 respiratory rate 20 pulse 68 . Neck supple . lymphadenopathy . Chest clear percussion auscultation . Cardiac exam regularly regular murmurs gallops rubs . Abdomen soft nontender . hepatosplenomegaly . Extremities clear cyanosis clubbing edema.LABORATORY DATA sodium electrolytes otherwise within normal limits . TSH 2.0 . LFTs pending time . sent electrocardiogram showed normal sinus rhythm nonspecific STT wave changes . also sent exercise tolerance test preliminary reports showing went maximum heart rate protocol 6 minutes 50 seconds . evidence ischemia.ASSESSMENT PLAN1.Chest heaviness . sure causing . may primary pulmonary problem . following us one week . heartening see abnormal exercise tolerance test.2.Hypothyroidism well controlled present time . Continue dose Synthroid.3.Hypertension well controlled presently.4.Hyperlipidemia . recheck next visit.RE Russell DonnaMRN Page 2We also need schedule followup mammogram . Rutledge M.D.QR/QR5.docDD DT DV Record date 0623MERCY CARE CENTERMercy Internal Medicine Associates18 Newburgh StreetTerrell AR Russell DonnaHISTORY PRESENT ILLNESS Ms. Russell returning follow . couple episodes peculiar sensation arm feels like goes dead gets pressure chest . gets slight shortness breath . lasts 30 seconds . gets many three four day . frequency however decreased feeling better better begins lose weight . think weight loss going critical . also less fatigued begins lose weight.Her hypertension better control . Hydrochlorothiazide seems helping quite bit addition atenolol.PHYSICAL EXAMINATION Blood pressure /86 . Weight gone pounds . Respiratory rate 20 . Pulse 64 . Neck supple . lymphadenopathy . Chest clear percussion auscultation . Cardiac exam regularly regular murmurs gallops rubs . Abdomen soft nontender . hepatosplenomegaly . Extremities clear cyanosis clubbing edema.LABORATORY DATA Electrolytes within normal limits.ASSESSMENT PLAN1.Hypertension . Relatively well controlled hydrochlorothiazide plus atenolol.2.Weight . began lose weight feeling much better result it.3.Peculiar chest pressure . think coronary artery disease given exercise test . light try control blood pressure get lose weight . Hopefully help resolution symptoms . Rutledge M.D.QR/RM5.docDD DT DV Record date 0811MERCY CARE CENTERMERCY Internal Medicine Associates07 Newburgh StreetTerrell AR Russell DonnaHISTORY PRESENT ILLNESS Ms. Russell returns evaluation . well . exercising lost pounds . feeling well.Notably Dr. Lu sent followup ETT . find first ETT acceptable . second ETT grossly positive . result think reasonable us addition atenolol stop hydrochlorothiazide put ramipril nitrate . every two weeks feeling slight twinge pain went steps.She hyperlipidemia . put Lipitor provided control . However HCL still 36 LDL excellent ratio . Nonetheless CK within normal limits.She also right shoulder lipoma . become apparent since lost much weight . wanted see someone follow recommended Dr. Dawson.PHYSICAL EXAMINATION weight pounds blood pressure /64 respiratory rate 20 pulse 64 . Neck supple . lymphadenopathy . Chest clear percussion auscultation . Cardiac exam regularly regular murmurs gallops rubs . Abdomen soft nontender . hepatosplenomegaly . Extremities clear cyanosis clubbing edema.LABORATORY DATA None today.Russell DonnaPage 2ASSESSMENT PLAN1.CAD . start ramipril long acting nitrate . also gave nitroglycerine . stop hydrochlorothiazide continue atenolol . also one aspirin day along Premarin Synthroid.2.Weight loss . continue exercise . suspect going lose weight . excellent.3.Coronary artery disease . Dr. Lu deciding cardiac catheterization . opinion must concur appropriate next step Ms. Russell . going delay usual trip Rhode Island get final assessment done variety medical issues . Rutledge M.D.QR/RM5.docDD DT DV"

rouge = Rouge()
scores = rouge.get_scores(hypothesis, reference)
scores

[{'rouge-1': {'r': 0.24213836477987422, 'p': 1.0, 'f': 0.38987341458279123},
  'rouge-2': {'r': 0.1973392461197339,
   'p': 0.898989898989899,
   'f': 0.3236363606843637},
  'rouge-l': {'r': 0.24213836477987422, 'p': 1.0, 'f': 0.38987341458279123}}]

In [61]:
from gensim.models import Word2Vec
# Generating word embeddings from word2vec model
def generate_word_embedding(text):
    # Tokenizing the text into words
    tokenized_text = [word for word in text.split()]
    # Initializing and training Word2Vec model
    model = Word2Vec([tokenized_text], min_count=1, vector_size= 100)  
    # Generating word embeddings for the current text using the trained model
    word_embeddings = model.wv[tokenized_text] 
    return word_embeddings

In [62]:
word_embeddings_dict = data_df['filtered_text'].apply(generate_word_embedding).to_dict()


In [63]:
print(word_embeddings_dict)


{0: array([[-0.00256914,  0.00227673,  0.00795104, ..., -0.00645951,
         0.00628415, -0.00438161],
       [ 0.00388616,  0.0028525 , -0.00032404, ..., -0.00534174,
         0.00628848, -0.00712628],
       [-0.0077683 ,  0.00380003,  0.00227723, ...,  0.00592293,
        -0.00688393,  0.0004738 ],
       ...,
       [-0.0087788 , -0.00665238, -0.00712504, ...,  0.00440093,
         0.00697353, -0.00369924],
       [-0.00861589,  0.00342116, -0.00367859, ...,  0.00131482,
        -0.00454354,  0.00841003],
       [-0.00224361,  0.00299971,  0.00558911, ..., -0.01026712,
         0.0020143 ,  0.00648749]], dtype=float32), 1: array([[-0.00341896, -0.00683969,  0.00658085, ...,  0.00360064,
         0.0081674 , -0.00257248],
       [ 0.00191944,  0.00829272, -0.00818763, ..., -0.00096788,
        -0.00979226,  0.00584511],
       [-0.00296075,  0.00201387, -0.00718575, ..., -0.00835781,
         0.00882185,  0.00671415],
       ...,
       [ 0.00594861, -0.00557804,  0.00792287, ..., 

In [77]:
embedding_size = len(next(iter(word_embeddings_dict.values())))
print("Size of word embeddings:", embedding_size)


Size of word embeddings: 2872


In [78]:
import numpy as np

def calculate_sentence_vectors(sentence, word_embeddings_dict):
    # Tokenizing the sentence into words
    tokenized_sentence = sentence.split()
    
    # Filtering out words that don't have word embeddings
    valid_words = [word for word in tokenized_sentence if word in word_embeddings_dict]
    
    # If there are no valid words, return a vector of zeros
    if len(valid_words) == 0:
        return np.zeros(2872) 
    
    # Calculate the sentence vector as the mean of word embeddings
    sentence_vector = np.mean([word_embeddings_dict[word] for word in valid_words], axis=0)
    
    return sentence_vector


In [79]:
import networkx as nx

# Generating summary for each row
def generate_summary(row, word_embeddings_dict):
    sentences = sent_tokenize(row['filtered_text'])
    sentence_vectors = [calculate_sentence_vectors(sentence, word_embeddings_dict) for sentence in sentences]

#   Similarity matrix
    sim_mat = np.zeros([len(sentences), len(sentences)])

    from sklearn.metrics.pairwise import cosine_similarity

    # Calculating similarity scores
    for i in range(len(sentences)):
        for j in range(len(sentences)):
            if i != j:
                sim_mat[i][j] = cosine_similarity(sentence_vectors[i].reshape(1, -1), sentence_vectors[j].reshape(1, -1))[0, 0]

    # Creating a graph from the similarity matrix
    nx_graph = nx.from_numpy_array(sim_mat)

    # Calculate scores using PageRank
    scores = nx.pagerank(nx_graph)

    # Rank sentences based on scores
    ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True)

    # Generate summary
    summary_length = 5  # Number of sentences in summary
    summary_sentences = []
    for score, sentence in ranked_sentences:
        # Check if sentence length is greater than 3 words
        if len(sentence.split()) > 3:
            summary_sentences.append(sentence)
        # Break the loop if desired summary length is reached
        if len(summary_sentences) == summary_length:
            break

    return ' '.join(summary_sentences)

# Generate summaries for each row
data_df['word_embd_summary'] = data_df.apply(lambda row: generate_summary(row, word_embeddings_dict), axis=1)

# Display DataFrame with summaries
print(data_df[['filtered_text', 'word_embd_summary']])


                                         filtered_text  \
0    Record date 0212Campbell Orthopedic Associates...   
1    Record date 0512MERCY CARE CENTERMercy Interna...   
2    Record date 0919Personal Data Overall HealthPa...   
3    Record date HUNTINGTON EMERGENCY DEPT VISIT TH...   
4    Record date 0216JENNIFER BOOKERLC Unit 5714NAS...   
..                                                 ...   
197  Record date 0926CC Ear pain coughHPI 49 y.o . ...   
198  Record date 0210GI Fellow Consult Note Pt Fran...   
199  Record date SLHC EMERGENCY DEPT VISIT TANYA V ...   
200  Record date 0619Physical exam MalePatient 45 y...   
201  Record date 1215Mr . Bryan comes tonight compl...   

                                     word_embd_summary  
0    xrays show notablefracture pattern today await...  
1    wanted see someone follow recommended Dr. Daws...  
2    unusual dyspnea exertion though stable dyspnea...  
3    treat occluded stent . took many Tums relief s...  
4    treat Cipro m

In [80]:
data_df['word_embd_summary'][0]

'xrays show notablefracture pattern today await Radiology opinion . would logically relationship current sleep exacerbations recent death wife pancreatic cancer last year may also evidence nocturnal sleep disturbances repeat polysomnographic evaluation i.e . would like use tube sock air cast . would like stay air splint sock . willsee back six weeks review Boxborough office .'

In [81]:
from rouge import Rouge

hypothesis = "xrays show notablefracture pattern today await Radiology opinion . would logically relationship current sleep exacerbations recent death wife pancreatic cancer last year may also evidence nocturnal sleep disturbances repeat polysomnographic evaluation i.e . would like use tube sock air cast . would like stay air splint sock . willsee back six weeks review Boxborough office ."
reference = "Record date : Orthopedic CircleOmak , GA Habib Valenzuela , M.D . Valdez , Harlan Jr. , Har 43year old 6 ' 214pound gentleman referred Dr. Harlan Oneil . week ago slipped onthe driveway home sustained injury left ankle . seen TriCity Hospital told afracture . placed air splint advised bepartial weight bearing , using cane . forroutine followup . Past medical history notable ankle injuries previously . history diabetes sleep apnea . takes Prozac , Cardizem , Glucophage Amaryl . also followed Dr. arrhythmia . smoke . . set designer Columbia Pictures . examination today slight tenderness left ankleabout four malleolus . malleolus isnontender medially laterally ligamentous . Dorsal flexion plantar flexion without pain . significant swelling . skin changeswith small abrasions proximally . proximally . anterior pain noted . hindfoot , midfoot forefoot tenderness noted . would like use tube sock air cast . isusing cane ambulation . xrays show pattern today , await Radiology opinion . would like stay air splint sock . willsee back six weeks review Boxborough office . Diagnosis : Left ankle fracture . Valenzuela , M.D . HV/ cc : Harlan Oneil , M.D . Harold Nutter , M.D . Doctors Hospital North 64Bruce St Omak , GA Habib Valenzuela , M.D . DD : DT : DV : reviewed Attending Physician Record date : EMERGENCY DEPT VISIT VALDEZ , HARLAN , JR . VISIT DATE : patient seen examined emergency department . patient seen Emergency Medicine resident . discussed management resident . also seen patient primarily reviewed medical record . brief addendum medical record.HISTORY PRESENTING COMPLAINT : Briefly , 45yearold male complains several days nausea , vomiting , left lower quadrant discomfort . also describes chest pain , number months without significant change . sent primary care doctor today pain also noted EKG changes . patient chest pain time evaluation emergency department shortness breath.REVIEW SYSTEMS : indicated otherwise negative.PAST MEDICAL HISTORY : indicated chart.SOCIAL HISTORY FAMILY HISTORY : indicated chart.PHYSICAL EXAMINATION : physical examination , patient , smiling , pleasant gentleman acute distress . blood pressure 119/90 , pulse 82 , temperature 97.9degrees . atraumatic . chest clear . heart regular rate rhythm . abdomen soft . left lower quadrant tenderness . also , note examination , soft murmur says since childhood . extremities normal . neurologic examination nonfocal . THERAPY RENDERED/COURSE ED : gentleman abdominal pain receive CAT scan rule . also nonspecific ST changes EKG . painfree time . describe classic exertional pattern chest pain , given diabetic EKG changes , also admitted rule MI . CT pending time dictation.DISPOSITION ( including condition upon discharge ) : . patient 's condition currently stable . CK498/ JAY CARROLL , M.D . JC72D : : Dictated : JAY CARROLL , M.D . JC72Not reviewed Attending Physician Record date : , Vicente Blair , M.D . Internal Medical Doctors Hospital North Omak , Georgia : VALDEZ , Harlan DHN # : DATE BIRTH : 11/09/ CURRENT CLINIC VISIT DATE : 09/14/ Dear Vicente , Thank advance allowing share medical care Mr. Harlan B. Valdez , 46yearold male patient prior evidence sleep disordered breathing , well history difficulty sleep , maintenance increased early morning awakenings , well mixed systemic medical conditions . HISTORY PRESENT ILLNESS : already know , Mr. Valdez history sleep maintenance , well increased early morning awakenings , noted sleep , occurring temporal association loss wife pancreatic cancer last year . placed unfortunate situation single parent 15yearold son 10yearold daughter describes current employment duties set designer . particular , Mr. Valdez describes undergoing frequent travelling bee markedly curtailed tending family situation closer home . described history snoring unaware specific nocturnal respiratory pauses . unaware `` restless '' lower limb sensory complaints may impact ability initiate reinitiate sleep . denies history `` night owl '' personality circadian rhythm dysfunction may played role respect nocturnal sleep disruptions sleep . denies history paroxysmal abnormal associated narcoleptic symptoms . Mr. Valdez underwent initial formal evaluation center sleep diagnostics Holy Cross , time noted demonstrate respiratory disturbance index 81/hour , exacerbated supine position hypopneas , equal nonREM stage REM sleep associated Nadir 88 % respiratory obstructive mixed hypopneas . addition , loud snoring noted . evidence sleep efficiency 88 % short sleep onset latency 4minutes . `` light '' nonREM stages III sleep , concomitant inability achieve significant `` slowwave '' stage REM sleep . also `` alpha intrusions alpha delta sleep '' evident initial sleep study . addition premature ventricular noted . patient underwent CPAP titration , also Tenacre Foundation Nursing Home Boxborough , time marked reduction frequency hypopneas ( respiratory disturbance index equals 2/hour ) CPAP titrations 46cm . Sleep efficiency improved 91 % , short sleep onset latency also noted ( 3minutes ) . increased `` light '' nonREM stage III sleep , concomitant inability achieve sustained `` slow wave sleep '' . Since initial trial nocturnal CPAP titration ( 6cm water pressure ) various CPAP mask ( including CPAP nasal face mask `` Breeze '' supportive head gear `` nasal pillows '' . patient describes associated , relative sustained nocturnal home CPAP use , regards CPAP bulkiness CPAP machine general . result , utilized nocturnal CPAP therapy period time , although still maintains CPAP equipment house . particular note , past year , patient increased early morning awakenings ( averaging 24in number ) typical awakenings occurring two hours sleep initiation p.m. ( patient describes one awakening p.m. second awakening a.m. , unclear causative etiology ) . patient might awaken 3a.m . `` ready day '' . able reinitiate sleep thereafter , patient may demonstrate additional two early morning awakenings final awakening 6a.m . patient noted history mixed systemic conditions including diabetes , coronary artery disease , depressive disorder , well relatively stable condition , upper GI evidence . MEDICATIONS : 1 . Provigil 200mg p.o . q. a.m. PRN . 2 . Lithium . 3 . Valproate . 4 . Glucophage 850mg t.i.d . 5 . Humulin 15units night . 6 . Folate . 7 . Metoprolol . 8 . Cardia . 9 . Vitamin E. 10 . Coated aspirin . ALLERGIES/ADVERSE REACTIONS : patient describes enhancement suicidal tendencies association prior Prozac usage . SOCIAL HISTORY : patient denies active tobacco alcoholic beverage usage . lost 1520pounds past several years . current weight 195pounds . desirous losing additional weight regards regular exercise , hectic social situation makes somewhat difficult present time . examination , patient blood pressure 146/88 , ( seated , left arm ) , respiratory rate 16 . HEENT EXAMINATION : Borderline small posterior aperture , slightly increased redundant tissue evident posteriorly slightly elongated uvula noted . patient appears awake , alert , speech clear fluent receptive language function essentially intact . presently wearing dental braces . obvious cranial nerve deficits appreciated . focal , sensory , motor neurologic deficits noted . significant dystaxias dysmetrias currently evidence . routine gait appears normal based , without evidence significant gait dystaxias . current clinical ictal present . acute evidence `` microsleeps '' noted . IMPRESSION : 1 . Sleep stage/arrousal dysfunction ( 780.56 ) : Manifested subjective complaints sleep , increased daytime fatigue alternating hypersomnia , recurrent evidence `` lightened '' sleep pattern , increased nonREM stages 12sleep , presence `` alpha '' intrusions `` alpha delta '' component deeper sleep . latter EEG findings described association subjective complaints sleep , well clinical setting chronic pain related complaints , depressive anxiety disorder agents used ( usually associated barbituate usage ) . 2 . Sleep disordered breathing : evidenced prior evaluations , mostly obstructive mixed hypopnea . patient appears largely refractory trial CPAP therapy , far associated symptoms association 's usage , despite relatively modest CPAP water pressures ( 6cm ) . addition , tried various nasal CPAP face mask , including `` Breeze '' supportive head gear `` nasal pillows '' limited success . One might consider repeating evaluation future , , utilizing potential trial BIPAP titration , may help improve symptoms , patient still left issues referable `` tangled tubing night '' issues referable nasal face mask usage , noted . 3 . Relative sleep maintenance : patient describes least 24early morning awakenings difficulty sleep maintenance , thereby compounding current sleep problem . would logically current sleep recent death wife pancreatic cancer last year , may also evidence nocturnal sleep repeat evaluation ; i.e . particular looking presence increased spontaneous arousals limb associated arousals periodic limb movements sleep may special clinical benefit . PLAN : 1 . short course , far patient describes exceedingly tired , unable perform routine daily tasks work managing family absence deceased wife , suggested initiation PRN Zolpidem tartrate therapy , 5mg tablets , utilizing one two tablets p.o . q. h.s . PRN sleep maintenance . 2 . patient advised take Zolpidem tartrate therapy 23times per week , effort avoid issues physiologic dependency . 3 . patient advised potential adverse behavioral systemic side effects Zolpidem tartrate therapy including , gastric upset , loose stools , diarrhea , cardiac . Pending clinical response Zolpidem tartrate therapy , might seek direct treatment sleep disordered breathing issues may include repeat sleep study potential trial BIPAP therapy ( effort modify attenuate symptoms ) . proves poorly responsive trial BIPAP therapy however , might consider O2therapy night , mind follow sleep study associated endtidal well . 4 . meantime , patient advised contact sleep disorders clinic acute sleep related concerns interim . 5 . patient may also benefit approaches regards sleep , hold strategies pending follow sleep disorders clinic evaluation ( four months time ) . , thank allowing share medical care Mr. Harlan Valdez . hope letter finds well . Sincerely , Yovani Vergara , M.D . Sleep Clinic Doctors Hospital North cc : Sleep Clinic DHN DD:09/14/ DT:09/15/ TX : : Record date : 1010CCU JAR Transfer Date : 10/8/11Transfer : Patient Name : Valdez , Harlan MRN # : : Dr. NutterPCP : Vicente BarkerCC : Chest Pain Cath VF arrest RCA Present Illness ( obtained admission ) : Pt 48yo male h/o DMII , , Bipolar d/o , depression began substernal day prior admission car presyncope + profound weakness . CP minimal , weakness made pull . repeat symptoms day admission . EKG c/w 2/ showed flattened Twave V2and TWI V3and flattened Twaves , aVL . trop negative , MB index elevated . Due Twave flattening , history elevated index decided start heparin ASA take cath lab . Cath showed right dominant system prox Cx 40 % , LAD clear , RCA prox % lesion ostial PDA 90 % . final dye injection , pt VF arrest 2shocks . Pt regained puls AF ( new ) RVR . Pt started amio . Pt began experience discomfort RVR decided intervene . POBA done ostial PDA . first noeluting stent placed prox RCA pt dissection thus 2cd stent placed . admission CCU , pt still AF RVR ( 120 's ) . amio drip , BB , loaded plavix , ASA , lipitor , integrilin placed Avandia study . complaint mild chest pain ( anginal pain day ) thought .Past Medical History : DMII , hyperchol , bipolar , HTN , depression ( s/p ECT ) Medications admission : ASA , Lipitor 20 , lopressor 50bid , folate , norvasc 5qd ; lithium , 300bid ; depakote 500bid ; sonata 10mg qhs , doxylamine 25qhs , mirtazapine 45qd Meds Transfer : please see green sheets Medications : ASA , Lipitor 20 , lopressor 50bid , folate , norvasc 5qd ; lithium , 300bid ; depakote 500bid ; sonata 10mg qhs , doxylamine 25qhs , mirtazapine 45qd Allergies : NKDAFamily History : family h/o CADSocial History : EtOH , tob , Systems : per : NKDAFamily History : family h/o CADSocial History : EtOH , tob , Systems : per HPICCU course + plan:1 ) Cardsa.Rhythm night admission patient started esmolol drip well amio bloused rhythm converted NSR . Esmolol drip well amio stopped BB escalated patient remained NSR.i.Ramp lopressor tolerated BPb.Pump patient remained euvolemic Echo EF 84 % aortic stenosisc.Ischemia stented x 2to prox RCA lesion integrilin x 24hrs prior . started plavix.i.Cont plavix , lopressor , lisinopril , lipitor , ASA2 ) Psych patient long history bipolar disorder + depression . depakote , lithium remeron outpt . seen psychiatry here.a.Continue depakote lithium ( level psych thought likely due .b.Continue remeron qhsc.F/u TSH3 ) DM Blood sugars originally elevated amio drip originally AF contained dextrose . remained NPH RISS.a.NPH , RISS4 ) Prophy Fragmin nexiumLABS + PE see today 's progress noteEKG AFIB RVR , diffuse twave : 48yo male h/o DMII , , Bipolar d/o , depression CAD p/w CP presycope found RCA prox % lesion ostial PDA 90 % ( stents RCA POBA PDA ) . Cath c/b VF arrest dye load resultant afib RVR.Plan : outlined CCU course.Victor Shepard MD Record date : CMF ADMISSION NOTEName : VALDEZ , HarlanDOB:119MR # : Date : , 9:30pmFOR DETAILS , PLEASE SEE AALIYAH IRAHETA 'S NOTEID/CC : 49yoRHM w/ PMH signif CAD , Afib , DM , Bipolar , presents w/ RLE weakness/decreased sensation x 24hrsHPI : 49yoRHM w/ PMH signif CAD , Afib , DM , Bipolar , USOH 10d ago , weakness L arm . Thought 2/2sleeping . Involved whole arm shoulder . still ADL 's felt like arm slightly weaker , took 45days get back normal . , , eating dinner w/ friend , got go BR , found RLE weakness/stiffness . Noticed hard climb stairs . Went sleep woke 2am nearly fell 2/2weakness leg . addition , sensory loss RLE , esp foot felt cold dead . sensory loss worst distally , extended thigh . Never trouble speaking others , facial droop , problems w/ RUE . Stayed bed , called PCP , told come ED . note , OctNov , cardiac cath x 2 . NSTEMI Oct 8 , taken cardiac cath cath showed significant RCA disease 40 % prox LCx c/b Vfib needing defib stent placed stenotic RCA lesion , c/b dissection needing 2nd stent placed . Post cath Afib requiring amiodarone . subsequent cath Nov 1after episode CP , stents patent , vessels stable . inpt cardiac , admitted psych 1wk suicidality/depression . Previously , spring , L eye visual disturbance nearly blind went GHIC dx'ed w/ retinal venous occlusion , treated w/ cortisone laser surgery , w/ mild improvement vision ( blurred eye ) .No HA , tinnitus , vertigo . blurry vision except L eye venous occlusion , diplopia . problems w/ speech.PMH : DM insulin x years CAD s/p NSTEMI disorder lithium depakote , required inpt ECT pastAfib : Insulin 70/ units bidAsa 81Plavix 75Lithium ? dose ( 600bid prior note ) Norvasc ? dose ( 5on prior note ) Mirtazapine ? dose ( 45on prior note ) Naltrexone Nexium ( 20on prior note ) Lipitor ( 40on prior note ) Cozaar ? ? dose ( 500bid prior note ) ( Lopressor 75po q8per prior notes ) Folate : Lisinopril coughSH : Tob : occas cigarEtOH : 0 ( used EtOH problems ) IVDA : 0Lives w/ 17yo son . 13yo daughter lives w/ sister . Widower x 3yrs . FH : Mom w/ PM age 50 , died MI 71 . Father w/ EtOH , HTN . Sister w/ . VS : 129/ % RAGeneral : WNWD , NADHEENT : NC/AT . scleral icterus . MMM . OP benign . Neck : Supple , carotid bruits . CV : RRR S1 , S2 . IIIII/VI sys murmur best RUSB rad clav neck. ? second murmur axilla systolic well vs. Galiverdin 's sign.Resp : CTAB . r/w/rAbd : +BS . Soft/NT/ND.Ext : C/C/E , DP 2+ bilat . Skin : rashes , intact . Neuro : MS : intact.CN : II , III Pupils 5mm , round , reactive light 3mm ; visual field full ; optic discs sharpIII , IV , movements full , w/o nystagmus , L eyelid slightly weaker since L eye LT pinprick intact expression muscles symmetric without weaknessIX , Xpalate elevates 5/5XIItongue protrudes : normal bulk tone ; tremor . pronator drift . +55Sensory : endorses difference sensation LT temp RLE vs. LLE . Zone difference ( feels colder ) pronounced R lateral calf area , particular dermatomal , also w/ toe position sense mildly decreased R great toe . Mild vibratory loss ankles bilat symmetrical . Reflex : +22+Neg32Up ? Coord : dysmetria : favors L leg slightly : normal LABS/STUDIES : Chemistry Lytes/Renal/Glucose Sodium mmol/L 133 ( L ) Potassium 3.63.44.8mmol/L 4.9 ( H ) Chloride 109H 100108mmol/L 109 ( H ) Carbon Dioxide 27.023.031.9mmol/L 22.9 ( L ) BUN mg/dl Creatinine 1.00.61.5mg/dl Glucose 216H 70110mg/dl 216 ( H ) General Chemistries Calcium 9.48.510.5mg/dl 8.3 ( L ) Phosphorus 3.22.64.5mg/dl 2.4 ( L ) Magnesium 1.61.42.0meq/L Lipid Tests Cholesterol 173mg/dl 228 ( H ) 547H 40150mg/dl 547 ( H ) HDL Cholesterol 25L 35100mg/dl 25 ( L ) LDL Cholesterol mg/dl Chol/HDL Ratio 6.9Chemistry Calc Mean Bld ... 254mg % Chemistry Com ... see detail Hemoglobin A1C 10.20H 3.806.40 % 10.20 ( H ) 10 : Complete Blood Count WBC 6.14.511.0th/cmm 11.1 ( H ) RBC 4.514.505.90mil/cm 4.37 ( L ) Hgb 13.713.517.5gm/dl 13.1 ( L ) HCT 39.4L 41.053.0 % 39.4 ( L ) MCV fl MCH 30.426.034.0pg/rbc MCHC 34.931.037.0g/dl PLT th/cumm RDW 13.511.514.5 % Hematology ESR mm/hr 19 : Drugs Therapeutic Drug Monitoring Lithium < 0.10L 0.501.50mmol/L < 0.10 ( L ) 20:40MRI brain : Acute/subacute infarcts ( DWI bright/ADC dark/FLAIR bright ) L cerebellum ( punctate ) R precentral gyrus ( small elliptical area ) . CTA head/neck : aberrant origin R vert CCA , ACA 's come L carotid , also w/ bilateral fetal PCA 's likely small vessels . significant focal stenoses . EKG : pendingMRI L/S spine : :49yoRHM w/ PMH signif CAD , DM , Bipolar disorder , afib , s/p recent cardiac cath presents w/ LUE weakness 10d prior admission resolving 45days , RLE weakness ? /sensory deficit ? Imaging reveals R precentral gyrus small infarct , L cerebellar infarct , significant vessel stenoses . Neuro exam w/ brisker reflexes R , equivocal RLE weakness , R sensory sx , ? R upgoing toe . Clinical picture imaging consistent . Unclear pt poor historian current sx sensory findings meaningful event LUE weakness 45d ago.NEURO : stroke w/u including TTE/Holter , lipids/lipoprotein/ . also send hypercoag w/u ( including hypercoag panel , PT20210 , Factor V Leiden , APLA , lupus ) given hx retinal venous thrombus young age.Will also send BCx2given recent cardiac cath , although ESR wnl reassuring ? .Will check A1c ? adequate DM control.Given psych hx , check tox screen LFT's.Unclear afib context postcath , post Vfib . look LAE , holter abnl . Could make case regardless documented afib . CV : hold antihtn meds . Continue lipitor outpt dose . Allow SBP 180.PSYCH : continue w/ depakote lithium . Mood ok , need monitored.FEN : IVF , ada low chol/low fat diet . ENDO : NPH 20bid , titrate needed , RISS . Checking A1c.PPX : put sc fragmin , nexium . Pneumoboots . Anna V. WendyBird , MDHPC Neuro Resident # Case discussed w/ Vern Snow , senior resident ."

rouge = Rouge()
scores = rouge.get_scores(hypothesis, reference)
scores

[{'rouge-1': {'r': 0.027834351663272233,
   'p': 0.8913043478260869,
   'f': 0.05398288288865072},
  'rouge-2': {'r': 0.01309880239520958, 'p': 0.7, 'f': 0.025716384650392672},
  'rouge-l': {'r': 0.027834351663272233,
   'p': 0.8913043478260869,
   'f': 0.05398288288865072}}]

In [70]:
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import AdamW
from tqdm import tqdm

# Load the T5 model and tokenizer
model_name = "t5-small"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

# Define custom dataset class
class ClinicalDataset(Dataset):
    def __init__(self, text, tokenizer, max_source_len = 2048, max_target_len = 300):
        self.text = text
        self.tokenizer = tokenizer
        self.max_source_length = max_source_len
        self.max_target_length = max_target_len

    def __len__(self):
        return len(self.text)

#     def __getitem__(self, index):
#         original_text = self.data.iloc[index]['clinical_text']
#         summarized_text = self.data.iloc[index]['summary']
        
    def __getitem__(self, index):
        original_text = self.text.iloc[index]['filtered_text']
        summarized_text = self.text.iloc[index]['count_vec_Summary']

        original_text_encoding = tokenizer(
            original_text,
            max_length = self.max_source_length,
            padding="max_length",
            truncation=True,
            return_tensors="pt"
        )

        summarized_text_encoding = tokenizer(
            summarized_text,
            max_length=self.max_target_length,
            padding="max_length",
            truncation=True,
            return_tensors="pt"
        )

        return {
            "original_text": original_text,
            "original_input_ids": original_text_encoding.input_ids.flatten(),
            "source_attention_mask": original_text_encoding.attention_mask.flatten(),
            "summarized_text": summarized_text,
            "summarized_text_input_ids": summarized_text_encoding.input_ids.flatten(),
            "target_attention_mask": summarized_text_encoding.attention_mask.flatten()
        }

# Split dataset into train and test sets
train_data, test_data = train_test_split(data_df, test_size=0.2, random_state=42)

# Create dataset instances
train_dataset = ClinicalDataset(train_data, tokenizer)
test_dataset = ClinicalDataset(test_data, tokenizer)

# Define optimizer
optimizer = AdamW(model.parameters(), 0.001)

# Define data loaders
train_dataloader = DataLoader(train_dataset, 8, shuffle=True)
test_dataloader = DataLoader(test_dataset, 8)

# Define training parameters
epochs = 3

# # Training loop
# for epoch in range(epochs):
#     model.train()
#     total_loss = 0
#     for batch in tqdm(train_dataloader, desc=f"Epoch {epoch+1}"):
#         optimizer.zero_grad()
#         input_ids = batch["original_input_ids"]
#         attention_mask = batch["source_attention_mask"]
#         labels = batch["summarized_text_input_ids"]
#         outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
#         loss = outputs.loss
#         total_loss += loss.item()
#         loss.backward()
#         optimizer.step()

#     # Print average loss for the epoch
#     print(f"Average Loss for Epoch {epoch+1}: {total_loss / len(train_dataloader)}")

#     # Validation (optional)
#     # Evaluate the model on the validation set

# # Save trained model
# model.save_pretrained("clinical_text_summarization_model")


from sklearn.metrics import precision_recall_fscore_support

# Training loop
for epoch in range(epochs):
    model.train()
    total_loss = 0
    predictions = []
    true_labels = []
    
    for batch in tqdm(train_dataloader, desc=f"Epoch {epoch+1}"):
        optimizer.zero_grad()
        input_ids = batch["original_input_ids"]
        attention_mask = batch["source_attention_mask"]
        labels = batch["summarized_text_input_ids"]
        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        total_loss += loss.item()
        loss.backward()
        optimizer.step()
        
        # Store predictions and true labels for computing metrics
        predictions.extend(torch.argmax(outputs.logits, dim=-1).flatten().cpu().numpy())
        true_labels.extend(labels.flatten().cpu().numpy())
    
    # Compute precision, recall, and F1-score
    precision, recall, f1_score, _ = precision_recall_fscore_support(true_labels, predictions, average='macro')
    
    # Print average loss and metrics for the epoch
    print(f"Epoch {epoch+1}:")
    print(f"Average Loss: {total_loss / len(train_dataloader)}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1-score: {f1_score}")

    # Validation (optional)
    # Evaluate the model on the validation set

# Save trained model
model.save_pretrained("C:/Users/almas/OneDrive/Desktop/Spring 2024/NLP/project/clinical_text_summarization_model")



Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Epoch 1: 100%|██████████████████████████████████████████████████████████████████████| 21/21 [1:35:24<00:00, 272.59s/it]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1:
Average Loss: 2.4748470215570357
Precision: 0.6628085291400424
Recall: 0.5796280683718465
F1-score: 0.59769655666794


Epoch 2: 100%|██████████████████████████████████████████████████████████████████████| 21/21 [1:31:45<00:00, 262.16s/it]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2:
Average Loss: 1.0172609970683144
Precision: 0.8231156102345704
Recall: 0.7957190253070008
F1-score: 0.7964919382477526


Epoch 3: 100%|██████████████████████████████████████████████████████████████████████| 21/21 [1:32:37<00:00, 264.63s/it]
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 3:
Average Loss: 0.8209119382358733
Precision: 0.8449932233760508
Recall: 0.8236283257432091
F1-score: 0.8234534303152019
