In [1]:
# Basic Imports
import numpy as np
import h5py as h5
import matplotlib.pyplot as plt
import pandas as pd
from scipy import stats
import pickle
import re
import collections
import tables as tb

In [2]:
#Paths
#in_file = '/projects/nikhil/ADNI_prediction/input_datasets/ADNI_Merge_filter.csv' #Original File

in_file = '/projects/nikhil/ADNI_prediction/input_datasets/ADNI_Merge_filter_Longitudinal.csv' #Selected Columns
in_data = pd.read_csv(in_file,header=0)

In [None]:
#Some Defs
def save_dictionary(_dict,save_path):
    f = open(save_path, 'wb')
    pickle.dump(_dict, f)
    f.close()
    

In [6]:
#Extract various cohorts

#col_list = ['PTID','DX_bl','ADAS13']
col_list = ['PTID']
cohort = 'ADNI2'
data_bl  = in_data[(in_data.ORIGPROT == cohort) & (in_data.COLPROT==cohort) & (in_data.VISCODE == 'bl')][col_list]
data_m06 = in_data[(in_data.ORIGPROT == cohort) & (in_data.COLPROT==cohort) & (in_data.VISCODE == 'm06')][col_list]
data_m12 = in_data[(in_data.ORIGPROT == cohort) & (in_data.COLPROT==cohort) & (in_data.VISCODE == 'm12')][col_list]
data_m18 = in_data[(in_data.ORIGPROT == cohort) & (in_data.COLPROT==cohort) & (in_data.VISCODE == 'm18')][col_list]
data_m24 = in_data[(in_data.ORIGPROT == cohort) & (in_data.COLPROT==cohort) & (in_data.VISCODE == 'm24')][col_list]
data_m30 = in_data[(in_data.ORIGPROT == cohort) & (in_data.COLPROT==cohort) & (in_data.VISCODE == 'm30')][col_list]
data_m36 = in_data[(in_data.ORIGPROT == cohort) & (in_data.COLPROT==cohort) & (in_data.VISCODE == 'm36')][col_list]

In [None]:
#Save data subsets
save_ADNI2_dict_path = '/projects/nikhil/ADNI_prediction/input_datasets/ADNI2_subjects_bl_PTIDs_dict.pkl'
save_dictionary(data_bl,save_ADNI2_dict_path)

In [3]:
#Rename Columns
data_bl.columns=['PTID','DX_bl','bl_score']
data_m06.columns=['PTID','DX_bl','m06_score']
data_m12.columns=['PTID','DX_bl','m12_score']
data_m18.columns=['PTID','DX_bl','m18_score']
data_m24.columns=['PTID','DX_bl','m24_score']
data_m30.columns=['PTID','DX_bl','m30_score']
data_m36.columns=['PTID','DX_bl','m36_score']

In [4]:
#Compute Joins / Unions / Intersections
test_merge = pd.merge(data_bl, data_m06, on='PTID',how='inner')
test_merge = pd.merge(test_merge, data_m12, on='PTID',how='inner')
test_merge = pd.merge(test_merge, data_m18, on='PTID',how='inner')
test_merge = pd.merge(test_merge, data_m24, on='PTID',how='inner')
test_merge['m06_diff'] = test_merge['m06_score'] - test_merge['bl_score']
test_merge['m12_diff'] = test_merge['m12_score'] - test_merge['bl_score']
test_merge['m18_diff'] = test_merge['m18_score'] - test_merge['bl_score']
test_merge['m24_diff'] = test_merge['m24_score'] - test_merge['bl_score']


In [5]:
#Plot configs
%matplotlib inline
plt.rcParams['figure.figsize'] = (15, 10)
plt.style.use('ggplot')

In [42]:
#Plots
plot_datasets = ['bl_score','m06_score','m12_score','m18_score','m24_score','m06_diff','m12_diff','m18_diff','m24_diff']
n_timepts = 5
delta_timepts = n_timepts-1 
y_mean = []
y_err = []
dx='All'
for plt_ds in plot_datasets:    
    if dx == 'All':
        y = np.array(test_merge[plt_ds])
    else:
        y = np.array(test_merge[test_merge.DX_bl==dx][plt_ds])
    
    y_mean.append(np.mean(y))
    y_err.append( v(y))
    
#Plot the stats
font_small = 8
font_med = 16
font_large = 24

plt.subplot(1,2,1)
x = np.arange(n_timepts)
y_mean_trunc = y_mean[:n_timepts]
y_err_trunc = y_err[:n_timepts]
plt.errorbar(x,y_mean_trunc, yerr=y_err_trunc, linewidth=2, label='bl_dx: {}'.format(dx))
plt.xlim([-1,5])
plt.xticks(x,plot_datasets[:n_timepts],fontsize=font_med)
plt.xlabel('Timepoint',fontsize=font_med)
plt.ylabel('ADAS13 Score',fontsize=font_med)            
plt.legend(fontsize=font_med,loc=2)
plt.title('Longitudinal ADAS_score',fontsize=font_med)

plt.subplot(1,2,2)
x = np.arange(1,delta_timepts+1,1)
y_mean_trunc = y_mean[n_timepts:]
y_err_trunc = y_err[n_timepts:]
plt.errorbar(x,y_mean_trunc, yerr=y_err_trunc, linewidth=2, label='bl_dx: {}'.format(dx))
plt.xlim([-1,5])
plt.xticks(x,plot_datasets[n_timepts:],fontsize=font_med)
plt.xlabel('Timepoint',fontsize=font_med)
plt.ylabel('ADAS13 Score',fontsize=font_med)            
plt.legend(fontsize=font_med,loc=2)
plt.title('Longitudinal Change in ADAS_score',fontsize=font_med)


<matplotlib.text.Text at 0x7f1a73aea050>

In [None]:

plt.hist(test_merge['m06_diff'],bins=50,alpha=0.5,label='06')
plt.hist(test_merge['m12_diff'],bins=50,alpha=0.5,label='12')
#plt.hist(test_merge['m18_diff'],bins=50,alpha=0.5,label='18')
plt.hist(test_merge['m24_diff'],bins=50,alpha=0.5,label='24')
plt.legend()
plt.show()