In [3]:
import pandas as pd
import numpy as np
from xgboost import XGBClassifier
import os.path as op
from glob import glob
import json

# Add MRIQC T1 metric names
classifier_keys = ['cjv', 'cnr', 'efc', 'fber', 'fwhm_avg', 'fwhm_x', 'fwhm_y',
 'fwhm_z', 'icvs_csf', 'icvs_gm', 'icvs_wm', 'inu_med', 'inu_range', 'qi_1',
 'qi_2', 'rpve_csf', 'rpve_gm', 'rpve_wm', 'size_x', 'size_y', 'size_z', 'snr_csf',
 'snr_gm', 'snr_total', 'snr_wm', 'snrd_csf', 'snrd_gm', 'snrd_total', 'snrd_wm',
 'spacing_x', 'spacing_y', 'spacing_z', 'summary_bg_k', 'summary_bg_mad', 'summary_bg_mean',
 'summary_bg_median', 'summary_bg_n', 'summary_bg_p05', 'summary_bg_p95', 'summary_bg_stdv',
 'summary_csf_k', 'summary_csf_mad', 'summary_csf_mean', 'summary_csf_median', 'summary_csf_n',
 'summary_csf_p05', 'summary_csf_p95', 'summary_csf_stdv', 'summary_gm_k', 'summary_gm_mad',
 'summary_gm_mean', 'summary_gm_median', 'summary_gm_n','summary_gm_p05', 'summary_gm_p95',
 'summary_gm_stdv', 'summary_wm_k', 'summary_wm_mad', 'summary_wm_mean', 'summary_wm_median',
 'summary_wm_n', 'summary_wm_p05', 'summary_wm_p95', 'summary_wm_stdv', 'tpm_overlap_csf',
 'tpm_overlap_gm', 'tpm_overlap_wm', 'wm2max']
classifier_keys.sort()

# Load the Model
bids = "/om2/user/mabdel03/files/Ravi_ISO_MRI/reformatted/"
t1_qc_model = op.join(bids, "code", "mriqc_t1_model.json")
model=XGBClassifier()
model.load_model(t1_qc_model)

# Define important paths
bids = "/om2/user/mabdel03/files/Ravi_ISO_MRI/reformatted/"
mriqc_dir = op.join(bids, "derivatives", "mriqc_22.0.6")
qsiprep_dir = op.join(bids, "derivatives", "qsiprep_0.20.0")
xcp_d_dir = op.join(bids, "derivatives", "xcp_d_0.6.1")

# Load the first qc data
df = pd.read_csv(op.join(bids, "code", "slm_data3.csv"), index_col=0)
subs = df.index

In [4]:
for sub in subs:
    # T1w stuff
    try:
        df_classifier = pd.DataFrame(columns=classifier_keys)
        t1_json_path = glob(op.join(mriqc_dir, sub, "ses*", "anat","*T1w.json"))[0]
        # populate df_classifier with T1 metric values
        with open(t1_json_path) as f:
            t1_json = json.load(f)
            for key in classifier_keys:
                df_classifier[key] = [t1_json[key]]
        # Run T1 QC model
        mriqc_model_input = df_classifier.values
        xgbscore = model.predict_proba(mriqc_model_input)[:, 1]
        # Decide if image passes threshold
        df.loc[sub, 'anat_xgbscore'] = xgbscore
        if xgbscore > 0.5:
            df.loc[sub, 'anat_qc'] = 1
        else:
            df.loc[sub, 'anat_qc'] = 0
    except Exception as e:
        print('sub:', sub, ', ANAT:',e)
        df.loc[sub, 'anat_xgbscore'] = np.nan
        df.loc[sub, 'anat_qc'] = 0
        continue
    
    # DWI stuff
    try:
        # Load neighbor corrleation score and decide if it passes threshold
        dwi_qc_csv_path = glob(op.join(qsiprep_dir, sub, "ses*", "dwi", "*ImageQC*.csv"))[0]
        dwi_qc_df = pd.read_csv(dwi_qc_csv_path)
        t1_neighbor_corr = dwi_qc_df['t1_neighbor_corr'].values
        df.loc[sub, 'dwi_neighbor_corr'] = t1_neighbor_corr
        if t1_neighbor_corr > 0.4:
            df.loc[sub, 'dwi_qc'] = 1
        else:
            df.loc[sub, 'dwi_qc'] = 0
    except Exception as e:
        print('sub:', sub, ', DWI:',e)
        df.loc[sub, 'dwi_neighbor_corr'] = np.nan
        df.loc[sub, 'dwi_qc'] = 0
        
    # fMRI Stuff
    try:
        # Get mean FD
        fmri_mriqc_json_path = glob(op.join(mriqc_dir, sub, "ses*", "func", "*bold.json"))[0]
        with open(fmri_mriqc_json_path) as f:
            fmri_mriqc_json = json.load(f)
            mean_fd = fmri_mriqc_json['fd_mean']
            df.loc[sub, 'bold_fd_mean'] = mean_fd
        # Get number of outliers
        outlier_tsv_path = glob(op.join(xcp_d_dir, sub, 'ses-*', 'func', '*outliers.tsv'))[0]
        outlier_tsv = pd.read_csv(outlier_tsv_path, sep="\t")
        n_outliers = sum(outlier_tsv['framewise_displacement'])
        df.loc[sub, 'bold_n_outliers'] = n_outliers
    except Exception as e:
        print('sub:', sub, ', fMRI:',e)
        df.loc[sub, 'fmri_qc'] = 0
        df.loc[sub, 'bold_fd_mean'] = np.nan
        df.loc[sub, 'bold_n_outliers'] = np.nan


sub: sub-10371937 , DWI: list index out of range
sub: sub-16854481 , fMRI: list index out of range
sub: sub-19924524 , fMRI: list index out of range
sub: sub-27419385 , fMRI: list index out of range
sub: sub-31183595 , DWI: list index out of range
sub: sub-48480640 , DWI: list index out of range
sub: sub-62693953 , fMRI: list index out of range
sub: sub-64505110 , fMRI: list index out of range
sub: sub-65580391 , fMRI: list index out of range
sub: sub-72277580 , fMRI: list index out of range
sub: sub-74536966 , fMRI: list index out of range
sub: sub-81185560 , fMRI: list index out of range
sub: sub-85520889 , fMRI: list index out of range
sub: sub-99911705 , fMRI: list index out of range


In [5]:
# Save dataframe
df.to_csv(op.join(bids, "code", "slm_data3_with_qc.csv"))