# Defacing pre-registration - Statistical analysis on image quality metrics (IQMs) in python

## Load IQMs from MRIQC output

In [None]:
n_sub <- 580 #nbr of subjects available in the dataset

randomization_path = "../randomization/"
data_path = "/data/derivatives/mriqc/RoyalCarpetPlot/DefacingPilotData/shuffled"

So that human raters cannot match the image with and without face for a single individual, participant identifiers were randomized by reassigning 1160 randomly drawn unique identifiers. To compare the IQMs from the two conditions, we need to retrieve the participants' original id using the dictionary that generated the anonymization.

In [None]:
#Load dictionary to map back anomymized id to participants' original identifier
with open(os.path.join(randomization_path,"DefacingPilotData_blind_dict.json")) as json_file:
    blind_dict = json.load(json_file)
with open(os.path.join(randomization_path,"DefacingPilotData_pos_dict.json")) as json_file:
    pos_dict = json.load(json_file)

def get_key(my_dict,val):
    """Get the key associated to the know value in a dictionary
    
    Parameters
    ----------
    my_dict : dictionary to search the value in 
    val : value to search
    
    Returns
    -------
    key : string associated to the value
    
    """
    for key, value in my_dict.items():
         if val == value:
             return key

Now that we have the tools to map MRIQC output back to the participants' original identifier, we have load the IQMs into two dataframes : one for original and one for defaced data.

In [None]:
## Load IQMs
iqms_original = np.zeros((n_sub,61))
iqms_defaced = np.zeros((n_sub,61))
for s in range(0,n_sub*2):
    with open(os.path.join(data_path, 'sub-{}'.format(s), "ses-V1", "anat","sub-{}_ses-V1_run-1_T1w.json".format(s))) as json_file:
        iqms = json.load(json_file)
        iqms_keys = list(iqms.keys())
        #Drop non-IQMs keys
        iqms_keys.remove('bids_meta')
        iqms_keys.remove('provenance')
        iqms_keys.remove('size_x')
        iqms_keys.remove('size_y')
        iqms_keys.remove('size_z')
        iqms_keys.remove('spacing_x')
        iqms_keys.remove('spacing_y')
        iqms_keys.remove('spacing_z')

    #Retrieve participant's original identifier
    sub = get_key(blind_dict,s)
    pos = int(get_key(pos_dict,sub[0:8]))

    if "non_deface" in sub:
        for i,key in enumerate(iqms_keys):
            iqms_original[pos,i] = iqms[key]
    elif "pydeface" in sub:
        for i,key in enumerate(iqms_keys):
            iqms_defaced[pos,i] = iqms[key]
    else:
        raise ValueError("{} is an invalid name".format(sub)) 

## Run MANOVA

In [None]:
# Build dataframe
i_o = np.hstack((iqms_original,np.zeros((10,1))))
i_d = np.hstack((iqms_defaced,np.ones((10,1))))
print(i_o.shape)
print(i_d.shape)
i_merge = np.vstack((i_o,i_d))
#Verify shape matches expectation
print(i_merge.shape)

iqms_df = pd.DataFrame(i_merge, columns = iqms_keys + ['Defaced'])
print(iqms_keys)

#Construct formula
dep_var = '' 
for key in iqms_keys:
    dep_var += '{} +'.format(key)
#Remove the last ' +'
dep_var = dep_var[:-2]

#Run MANOVA
maov = MANOVA.from_formula('{} ~ Defaced'.format(dep_var), data=iqms_df)

print(maov.mv_test())