In [None]:
import pandas as pd

# Prepare all the data that will be used
_discharge_summary_csv = 'ICD9-V4511_Patients_DischargeSummary.csv.gz'
_diagnoses_csv = '../data/zipped/DIAGNOSES_ICD.csv.gz'
_notes_csv = '../data/zipped/NOTEEVENTS.csv.gz'
_admission_csv = '../data/zipped/ADMISSIONS.csv.gz'
_patient_csv = '../data/zipped/PATIENTS.csv.gz'
_prescriptions_csv = '../data/zipped/PRESCRIPTIONS.csv.gz'
_icd9_code = '31401' # (ADHD ICD-9 Code)

In [None]:
# Load Data
diagnoses_icd_df = pd.read_csv(_diagnoses_csv, compression='gzip')
noteevents_df = pd.read_csv(_notes_csv, compression="gzip", low_memory=False)
prescription_df = pd.read_csv(_prescriptions_csv, compression="gzip", low_memory=False)
patient_df = pd.read_csv(_patient_csv, compression='gzip')

diagnoses_icd_df.columns = diagnoses_icd_df.columns.str.strip()
diagnoses_icd_df.columns = diagnoses_icd_df.columns.str.lower()
noteevents_df.columns = noteevents_df.columns.str.strip()
noteevents_df.columns = noteevents_df.columns.str.lower()
patient_df.columns = patient_df.columns.str.strip()
patient_df.columns = patient_df.columns.str.lower()
prescription_df.columns = prescription_df.columns.str.strip()
prescription_df.columns = prescription_df.columns.str.lower()

In [36]:
# Filter by ADHD
adhd_df = diagnoses_icd_df.loc[diagnoses_icd_df["icd9_code"] == _icd9_code]

# Combine patient with diagnosis
merge_df = pd.merge(adhd_df, patient_df, on="subject_id")
merge2_df = pd.merge(merge_df, noteevents_df, on="subject_id")

print(len(adhd_df))
print(len(patient_df))
print(len(merge_df))
print(len(merge2_df))

141
46520
141
7954


In [37]:
# Filter by hallucinations
hallucinations_df = merge2_df[merge2_df["text"].str.contains("hallucinations", case=False)].drop_duplicates(subset=["subject_id"])
ocd_df = merge2_df[merge2_df["text"].str.contains("ocd", case=False)].drop_duplicates(subset="subject_id")

print(f"Hallucinations: {len(hallucinations_df)}")
print(f"OCD: {len(ocd_df)}")

# Add a tag for hallucinations & OCD
merge_df["hallucinations"] = merge_df.apply(lambda row: row["subject_id"] in hallucinations_df["subject_id"].tolist(), axis=1)
merge_df["ocd"] = merge_df.apply(lambda row: row["subject_id"] in ocd_df["subject_id"].tolist(), axis=1)

Hallucinations: 15
OCD: 5


In [38]:
# Filter patients whose were given a prescriptions
prescription_patients_df = pd.merge(merge_df, prescription_df, on="subject_id")

patients_drugged_and_hallucinations = prescription_patients_df[prescription_patients_df["hallucinations"] == True].drop_duplicates(subset="subject_id")
patients_drugged_and_ocd = prescription_patients_df[prescription_patients_df["ocd"] == True].drop_duplicates(subset="subject_id")

# Clean out unwanted columns
patients_drugged_and_hallucinations = patients_drugged_and_hallucinations.drop(columns=["row_id_x", "row_id_y", "hadm_id_x", "seq_num", "icd9_code", "dod_ssn", "expire_flag", "row_id", "hadm_id_y", "startdate", "enddate", "gsn", "ndc", "route", "form_unit_disp", "form_val_disp", "dose_unit_rx", "dose_val_rx", "prod_strength", "formulary_drug_cd", "drug_name_poe", "drug_name_generic", "drug_type", "dob", "dod", "dod_hosp"])

patients_drugged_and_ocd = patients_drugged_and_ocd.drop(columns=["row_id_x", "row_id_y", "hadm_id_x", "seq_num", "icd9_code", "dod_ssn", "expire_flag", "row_id", "hadm_id_y", "startdate", "enddate", "gsn", "ndc", "route", "form_unit_disp", "form_val_disp", "dose_unit_rx", "dose_val_rx", "prod_strength", "formulary_drug_cd", "drug_name_poe", "drug_name_generic", "drug_type", "dob", "dod", "dod_hosp"])

In [39]:
patients_drugged_and_hallucinations[::]

Unnamed: 0,subject_id,gender,hallucinations,ocd,icustay_id,drug
216,2945,F,True,True,,Olanzapine
7773,27799,M,True,False,221948.0,Potassium Chloride
8561,28551,F,True,False,286543.0,Potassium Chloride
9206,30243,M,True,False,293578.0,Lorazepam
10643,44677,M,True,False,227847.0,0.9% Sodium Chloride
11708,58706,M,True,False,240714.0,OxycoDONE (Immediate Release)
11881,57207,M,True,False,286604.0,Diazepam
12762,61809,M,True,False,246414.0,Diazepam
13402,66706,M,True,False,264231.0,Thiamine
14945,78647,M,True,False,,Lorazepam


In [40]:
patients_drugged_and_ocd[::]

Unnamed: 0,subject_id,gender,hallucinations,ocd,icustay_id,drug
48,715,F,False,True,,DiphenhydrAMINE
216,2945,F,True,True,,Olanzapine
1277,5571,F,False,True,239647.0,Phenytoin Sodium
1478,23707,F,False,True,215476.0,Ranitidine (Liquid)
9708,42683,F,False,True,209735.0,Oxcarbazepine


In [41]:
final = pd.concat([patients_drugged_and_hallucinations, patients_drugged_and_ocd], ignore_index=True)
final[::]

Unnamed: 0,subject_id,gender,hallucinations,ocd,icustay_id,drug
0,2945,F,True,True,,Olanzapine
1,27799,M,True,False,221948.0,Potassium Chloride
2,28551,F,True,False,286543.0,Potassium Chloride
3,30243,M,True,False,293578.0,Lorazepam
4,44677,M,True,False,227847.0,0.9% Sodium Chloride
5,58706,M,True,False,240714.0,OxycoDONE (Immediate Release)
6,57207,M,True,False,286604.0,Diazepam
7,61809,M,True,False,246414.0,Diazepam
8,66706,M,True,False,264231.0,Thiamine
9,78647,M,True,False,,Lorazepam


In [42]:
adhd_patients = merge_df.drop(columns=["icd9_code", "row_id_x", "row_id_y", "hadm_id", "seq_num", "dod_ssn", "expire_flag", "dod", "dob", "dod_hosp", "dod_ssn"])

adhd_patients[::]

Unnamed: 0,subject_id,gender,hallucinations,ocd
0,303,M,False,False
1,715,F,False,True
2,1590,M,False,False
3,2945,F,True,True
4,2170,M,False,False
...,...,...,...,...
136,94075,M,False,False
137,97974,F,False,False
138,96463,M,True,False
139,98177,M,False,False


In [44]:
subjects_drug_with_hallucination_and_ocd_tags = pd.merge(adhd_patients, final, on=["subject_id"], how="outer")
subjects_drug_with_hallucination_and_ocd_tags = subjects_drug_with_hallucination_and_ocd_tags.drop(columns=["gender_y", "ocd_y", "hallucinations_y"])
subjects_drug_with_hallucination_and_ocd_tags = subjects_drug_with_hallucination_and_ocd_tags.rename(columns={"gender_x": "gender"})
subjects_drug_with_hallucination_and_ocd_tags = subjects_drug_with_hallucination_and_ocd_tags.rename(columns={"ocd_x": "ocd"})
subjects_drug_with_hallucination_and_ocd_tags = subjects_drug_with_hallucination_and_ocd_tags.rename(columns={"hallucinations_x": "hallucinations"})
subjects_drug_with_hallucination_and_ocd_tags = subjects_drug_with_hallucination_and_ocd_tags.drop_duplicates(subset=["subject_id"])



subjects_drug_with_hallucination_and_ocd_tags["hallucinations"] = subjects_drug_with_hallucination_and_ocd_tags["hallucinations"].fillna(False)
subjects_drug_with_hallucination_and_ocd_tags["ocd"] = subjects_drug_with_hallucination_and_ocd_tags["ocd"].fillna(False)

subjects_drug_with_hallucination_and_ocd_tags[::]

Unnamed: 0,subject_id,gender,hallucinations,ocd,icustay_id,drug
0,303,M,False,False,,
1,715,F,False,True,,DiphenhydrAMINE
2,1590,M,False,False,,
3,2945,F,True,True,,Olanzapine
5,2170,M,False,False,,
...,...,...,...,...,...,...
137,94075,M,False,False,,
138,97974,F,False,False,,
139,96463,M,True,False,285758.0,Propofol
140,98177,M,False,False,,
