# Keep record of all scans preprocess and QCed
* **ADNI1-1.5T** 
* **ADNI1-3.0T** 
* **ADNI2-3.0T** 
* **ADNIGO** 
* **AIBL** 
    

In [1]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import collections
from scipy import stats
import seaborn as sns
import pickle
from os import listdir

In [20]:
def standarize_filenames(file_list,preproc,suffix):
    std_list = []
    if preproc == 'bpipe':        
        for f in file_list:
            filename = f.split('.')[0] + suffix
            std_list.append(filename)
    elif preproc in ['civet','qc']:        
        for f in file_list:
            filename = f + suffix
            std_list.append(filename)            
    elif preproc in ['qc_cic']:        
        for f in file_list:
            filename = f.split('_', 1)[1]
            filename = filename.rsplit('_', 1)[0] + suffix
            std_list.append(filename)
    else:
        print 'unknow preproc'
    
    return std_list

def print_dataset_stats(df, dataset):
    total_scans = len(df[df.cohort==dataset])
    bpipe_scans = len(df[(df.cohort==dataset)&(df.bpipe=='Complete')])
    civet_scans = len(df[(df.cohort==dataset)&(df.civet=='Complete')])
    QC_scans = len(df[(df.cohort==dataset)&(df.QC=='Pass')])
    #QC_scans = len(df[(df.cohort==dataset)&(df.QC=='Pass')])
    
    print dataset
    print 'total_scans: {}, bpipe_scans: {}, civet_scans: {}, QC_scans: {}'.format(total_scans,bpipe_scans,civet_scans,QC_scans)
    

In [3]:
#Paths for scans / file_lists
scan_dir_adni1_1_5 = '/external/ADNI/Standardized_Datasets/Image_Collections/ADNI1_Complete_3Yr_1.5T_11_15_2012/'
scan_dir_adni1_3_0 = '/external/ADNI/Standardized_Datasets/Image_Collections/ADNI1_Complete_3Yr_3T_11_15_2012/'

#ADNI2 needs to be updated based on COBRA lab list
scan_dir_adni2 = '/projects/nikhil/datasets/ADNI2/ADNI2_3yr_Complete_Jan2017/'
scan_dir_aibl = '/projects/nikhil/datasets/AIBL/aibl_mnc/'
scan_dir_adnigo = '/home/m/mchakrav/nikhil/scratch/ADNIGO/mnc_files/'

#file lists
all_files_adni1_1_5 = '/projects/nikhil/datasets/ADNI1_1_5/list_of_all_files_1yr_3yr'
all_files_adni1_3_0 = '/projects/nikhil/datasets/ADNI1_3_0/list_of_all_files'
all_files_adni2 = '/projects/nikhil/datasets/ADNI2/all_ADNI2_subject_filenames'
all_files_aibl = '/projects/nikhil/datasets/AIBL/list_of_3+_tps_files.txt'
all_files_adnigo = '/projects/nikhil/datasets/ADNIGO/list_of_all_files_on_scinet'

#bpipe output list
bpipe_output_list_adni1_1_5 = '/projects/nikhil/datasets/ADNI1_1_5/bpipe_output_uniq.txt'
bpipe_output_list_adni1_3_0 = ''
bpipe_output_list_adni2 = '/projects/nikhil/datasets/ADNI2/bpipe_output/list_of_all_files_from_bpipe_out'
bpipe_output_list_aibl = '/projects/nikhil/datasets/AIBL/bpipe_out_subjects_uniq.txt'
bpipe_output_list_adnigo = '/projects/nikhil/datasets/ADNIGO/list_of_all_files_from_bpipe_out'

#civet output list
civet_output_list_adni1_1_5 = '/projects/nikhil/datasets/ADNI1_1_5/civet_out_passed.txt'
civet_output_list_adni1_3_0 = ''
civet_output_list_adni2 = '/projects/nikhil/datasets/ADNI2/civet_out/civet_out_passed.txt'
civet_output_list_aibl = '/projects/nikhil/datasets/AIBL/civet_out_passed.txt'
civet_output_list_adnigo = '/projects/nikhil/datasets/ADNIGO/civet_out_passed.txt'


#QC output csv
qc_output_csv_adni1_1_5 = '/projects/nbhagwat/datasets/ADNI1_1_5/ADNI1_1.5T_QC.csv'
qc_output_csv_aibl = '/projects/nbhagwat/datasets/AIBL/aibl_QC.csv'

#ADNI2 from earlier run
qc_output_csv_adni2 = '/projects/nikhil/ADNI_prediction/input_datasets/longitudinal_trajectories/input_csv/adni2-qcvols_nodups.csv'


In [14]:
len(adni1_1_5_civet_output_files)

3131

In [4]:
#List total available files
adni1_1_5_files = list(pd.read_csv(all_files_adni1_1_5,header=None)[0])
adni1_3_0_files = list(pd.read_csv(all_files_adni1_3_0,header=None)[0])
adni2_files = list(pd.read_csv(all_files_adni2,header=None)[0])            
aibl_files = list(pd.read_csv(all_files_aibl,header=None)[0])
adnigo_files = list(pd.read_csv(all_files_adnigo,header=None)[0])

#List bpipe output files
adni1_1_5_bpipe_output_files = list(pd.read_csv(bpipe_output_list_adni1_1_5,header=None)[0])
adni1_1_5_bpipe_output_files = standarize_filenames(adni1_1_5_bpipe_output_files,'bpipe','.mnc')

adni2_bpipe_output_files = list(pd.read_csv(bpipe_output_list_adni2,header=None)[0])
adni2_bpipe_output_files = standarize_filenames(adni2_bpipe_output_files,'bpipe','.mnc')

aibl_bpipe_output_files = list(pd.read_csv(bpipe_output_list_aibl,header=None)[0])
aibl_bpipe_output_files = standarize_filenames(aibl_bpipe_output_files,'bpipe','.0.mnc')

adnigo_bpipe_output_files = list(pd.read_csv(bpipe_output_list_adnigo,header=None)[0])
adnigo_bpipe_output_files = standarize_filenames(adnigo_bpipe_output_files,'bpipe','.mnc')


#List civet output files
adni1_1_5_civet_output_files = list(pd.read_csv(civet_output_list_adni1_1_5,header=None)[0])
adni1_1_5_civet_output_files = standarize_filenames(adni1_1_5_civet_output_files,'civet','.mnc')

adni2_civet_output_files = list(pd.read_csv(civet_output_list_adni2,header=None)[0])
adni2_civet_output_files = standarize_filenames(adni2_civet_output_files,'civet','.mnc')

aibl_civet_output_files = list(pd.read_csv(civet_output_list_aibl,header=None)[0])
aibl_civet_output_files = standarize_filenames(aibl_civet_output_files,'civet','.mnc')

adnigo_civet_output_files = list(pd.read_csv(civet_output_list_adnigo,header=None)[0])
adnigo_civet_output_files = standarize_filenames(adnigo_civet_output_files,'civet','.mnc')


#List QC output files
adni1_1_5_qc_output_df = pd.read_csv(qc_output_csv_adni1_1_5)
adni1_1_5_qc_output_files = list(adni1_1_5_qc_output_df[adni1_1_5_qc_output_df['score']!=0]['filename'])
adni1_1_5_qc_output_files = standarize_filenames(adni1_1_5_qc_output_files,'qc_cic','.mnc')

aibl_qc_output_df = pd.read_csv(qc_output_csv_aibl)
aibl_qc_output_files = list(aibl_qc_output_df[aibl_qc_output_df['score']!=0]['filename'])
aibl_qc_output_files = standarize_filenames(aibl_qc_output_files,'qc_cic','.mnc')


adni2_qc_output_df = pd.read_csv(qc_output_csv_adni2)
adni2_qc_output_files = list(adni2_qc_output_df[adni2_qc_output_df['HPC_QC']!=0]['magetbase'])
adni2_qc_output_files = standarize_filenames(adni2_qc_output_files,'qc','.mnc')

In [5]:
len(aibl_qc_output_files),len(adni1_1_5_qc_output_files)

(483, 1922)

In [6]:
# Create dataframe
scan_info_cols = ['filename','cohort','basedir','bpipe','civet','QC','comments']
scan_manifest = pd.DataFrame(columns=scan_info_cols)

In [7]:
# Populate dataframe
scan_manifest['filename'] = adni1_1_5_files + adni1_3_0_files + adni2_files + aibl_files + adnigo_files
scan_manifest['basedir'] = list(np.tile(scan_dir_adni1_1_5,len(adni1_1_5_files))) + list(np.tile(scan_dir_adni1_3_0,len(adni1_3_0_files))) + list(np.tile(scan_dir_adni2,len(adni2_files))) + list(np.tile(scan_dir_aibl,len(aibl_files))) + list(np.tile(scan_dir_adnigo,len(adnigo_files)))
scan_manifest['cohort'] = list(np.tile('adni1_1_5',len(adni1_1_5_files))) + list(np.tile('adni1_3_0',len(adni1_3_0_files))) + list(np.tile('adni2',len(adni2_files))) + list(np.tile('aibl',len(aibl_files))) + list(np.tile('adnigo',len(adnigo_files)))
    

In [8]:
# Update bpipe values
scan_manifest.loc[scan_manifest.filename.isin(adni1_1_5_bpipe_output_files),'bpipe'] = 'Complete'
scan_manifest.loc[scan_manifest.filename.isin(adni2_bpipe_output_files),'bpipe'] = 'Complete'
scan_manifest.loc[scan_manifest.filename.isin(aibl_bpipe_output_files),'bpipe'] = 'Complete'
scan_manifest.loc[scan_manifest.filename.isin(adnigo_bpipe_output_files),'bpipe'] = 'Complete'

In [9]:
# Update civet values
scan_manifest.loc[scan_manifest.filename.isin(adni1_1_5_civet_output_files),'civet'] = 'Complete'
scan_manifest.loc[scan_manifest.filename.isin(adni2_civet_output_files),'civet'] = 'Complete'
scan_manifest.loc[scan_manifest.filename.isin(aibl_civet_output_files),'civet'] = 'Complete'
scan_manifest.loc[scan_manifest.filename.isin(adnigo_civet_output_files),'civet'] = 'Complete'

In [10]:
# Update QC values
scan_manifest.loc[scan_manifest.filename.isin(adni1_1_5_qc_output_files),'QC'] = 'Pass'
scan_manifest.loc[scan_manifest.filename.isin(aibl_qc_output_files),'QC'] = 'Pass'

In [11]:
# Update values from previously processed ADNI2 data
scan_manifest.loc[scan_manifest.filename.isin(adni2_qc_output_files),'bpipe'] = 'Complete'
scan_manifest.loc[scan_manifest.filename.isin(adni2_qc_output_files),'civet'] = 'Complete'
scan_manifest.loc[scan_manifest.filename.isin(adni2_qc_output_files),'QC'] = 'Pass'

In [21]:
#Get summary 
print_dataset_stats(scan_manifest,'adni1_1_5')

adni1_1_5
total_scans: 3131, bpipe_scans: 2181, civet_scans: 3131, QC_scans: 1922


In [92]:
df = scan_manifest
df[(df.cohort=='adni1')]['filename'].values[:10]

array(['ADNI_002_S_4171_MR_MT1__N3m_Br_20111216162826334_S130098_I272710.mnc',
       'ADNI_002_S_4171_MR_MT1__N3m_Br_20120319163808038_S142180_I291059.mnc',
       'ADNI_002_S_4171_MR_MT1__N3m_Br_20121001130644753_S162683_I337465.mnc',
       'ADNI_002_S_4213_MR_MT1__N3m_Br_20110910135704514_S121168_I255409.mnc',
       'ADNI_002_S_4213_MR_MT1__N3m_Br_20120110145450835_S133946_I277054.mnc',
       'ADNI_002_S_4213_MR_MT1__N3m_Br_20120327112043431_S144143_I293719.mnc',
       'ADNI_002_S_4213_MR_MT1__N3m_Br_20130313104621437_S168245_I362924.mnc',
       'ADNI_002_S_4213_MR_MT1__N3m_Br_20130926153549136_S201579_I392163.mnc',
       'ADNI_002_S_4219_MR_MT1__N3m_Br_20110928093601592_S122143_I258694.mnc',
       'ADNI_002_S_4219_MR_MT1__N3m_Br_20120125144856150_S133915_I280658.mnc'], dtype=object)

In [22]:
sub_ptids = []
for f in adni1_1_5_files:
    sub_ptids.append(f.split('_')[3])
    
print len(sub_ptids), len(set(sub_ptids))


sub_ptids = []
for f in adni2_files:
    sub_ptids.append(f.split('_')[3])
    
print len(sub_ptids), len(set(sub_ptids))

3131 639
4049 777


In [23]:
546+75

621