# Diagnoses and Medications
Lists / prints out a bunch of information regarding medications and diagnoses

In [201]:
import numpy as np
import pandas as pd
import json

# subjects
subj_df = pd.read_csv('subject_list.txt', sep='\t', index_col = 0, header = 0)

In [203]:
phenotype_dir = '/projects/f_mc1689_1/ClinicalActFlow/data/ds000030_R105/phenotype/'

# load in dataframes
scid = pd.read_csv(phenotype_dir+'phenotype_scid.tsv',delimiter='\t')
meds = pd.read_csv(phenotype_dir+'phenotype_medication.tsv',delimiter='\t')

#load in metadata
med_dict = pd.read_csv('Medication_dict.csv')

In [204]:
df_out = pd.DataFrame(columns=['subj','primary_diagnosis','secondary_diagnosis'])
n_subj = len(subj_df[subj_df['group']=='SCHZ'].index)
for subj in subj_df[subj_df['group']=='SCHZ'].index:
    
    data = scid[scid['participant_id']==subj]
    df = pd.DataFrame(columns=['subj','primary_diagnosis','secondary_diagnosis'])
    df['primary_diagnosis'] = data['scid_dx1']
    df['secondary_diagnosis'] = data['scid_dx2']
    df['subj'] = subj
    df_out = pd.concat([df_out,df])
print('Primary Diag:')
print(df_out['primary_diagnosis'].value_counts())
print(df_out['primary_diagnosis'].value_counts() / n_subj*100)
print('')
print('Secondary Diag:')
print(df_out['secondary_diagnosis'].value_counts())
print(df_out['secondary_diagnosis'].value_counts() / n_subj*100)

Primary Diag:
295.30 Schizophrenia, Paranoid Type            16
295.70 Schizoaffective Disorder                 9
295.90 Schizophrenia, Undifferentiated Type     6
295.60 Schizophrenia, Residual Type             4
295.10 Schizophrenia, Disorganized Type         1
Name: primary_diagnosis, dtype: int64
295.30 Schizophrenia, Paranoid Type            44.444444
295.70 Schizoaffective Disorder                25.000000
295.90 Schizophrenia, Undifferentiated Type    16.666667
295.60 Schizophrenia, Residual Type            11.111111
295.10 Schizophrenia, Disorganized Type         2.777778
Name: primary_diagnosis, dtype: float64

Secondary Diag:
311  Depressive Disorder NOS                                                                 9
303.90 Alcohol Dependence                                                                    8
314.01 Attention-Deficit/Hyperactivity Disorder, Combined Type                               3
314.00 Attention-Deficit/Hyperactivity Disorder, Predominantly Inattent

In [273]:
df_out = pd.DataFrame(columns=['subj','med_name','med_class'])
df_ind = pd.DataFrame(columns=['subj','use','Antipsychotic','SHA','Antidepressant','Mood stabilizer','Other','Stimulant'])
for subj in subj_df[subj_df['group']=='SCHZ'].index:
    df_ind1 = pd.DataFrame()
    df_ind1['subj'] = [subj]
    count=0
    
    data = meds[meds['participant_id']==subj]
    for i in range(1,21):
        if data['med_use'+str(i)].values==1:
            df = pd.DataFrame()
            med_name = data['med_name'+str(i)].values[0]
            med_class = med_dict['Class'][med_dict['Drug'] == med_name].values[0]
            
            df['med_name'] = [med_name]
            df['med_class'] = [med_class]
            df['subj'] = [subj]
            df_out = pd.concat([df_out,df],sort=False)
            
            # apply to individual data
            count = count+1
            df_ind1[med_class] = [1]
                   
    df_ind1['use'] = [count]
    df_ind = pd.concat([df_ind,df_ind1],sort=False)

display(df_out.head())
display(df_ind.head())

Unnamed: 0,subj,med_name,med_class
0,sub-50007,Risperdal/ Risperidone,Antipsychotic
0,sub-50007,Artane/Trihexyphenidyl,Other
0,sub-50008,Abilify/ Aripiprazole,Antipsychotic
0,sub-50013,Zyprexa/ Olanzapine,Antipsychotic
0,sub-50013,Restoril/ Temazepam,SHA


Unnamed: 0,subj,use,Antipsychotic,SHA,Antidepressant,Mood stabilizer,Other,Stimulant
0,sub-50007,2,1,,,,1.0,
0,sub-50008,1,1,,,,,
0,sub-50013,3,1,1.0,,,,
0,sub-50014,2,1,,,,1.0,
0,sub-50015,1,1,,,,,


In [274]:
for med_class in ['Antipsychotic','SHA','Antidepressant','Mood stabilizer','Other','Stimulant']:
    print(df_ind[med_class].value_counts())

1    31
Name: Antipsychotic, dtype: int64
1    11
Name: SHA, dtype: int64
1    14
Name: Antidepressant, dtype: int64
1    7
Name: Mood stabilizer, dtype: int64
1    5
Name: Other, dtype: int64
1    1
Name: Stimulant, dtype: int64


In [165]:
print(df_out['med_class'].value_counts())

Unnamed: 0,subj,med_name
133,sub-50007,Risperdal/ Risperidone
133,sub-50007,Artane/Trihexyphenidyl
134,sub-50008,Abilify/ Aripiprazole
136,sub-50013,Zyprexa/ Olanzapine
136,sub-50013,Restoril/ Temazepam
136,sub-50013,Ativan/ Lorazepam
137,sub-50014,Abilify/ Aripiprazole
137,sub-50014,Cogentin/ Benztropine
138,sub-50015,Abilify/ Aripiprazole
141,sub-50021,Geodon/ Ziprasidone
