# Masking ABA data using anatomy

In [29]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import normalize
from sklearn.preprocessing import StandardScaler
%matplotlib inline
%run nifti_tools.ipynb

In [57]:
# Loading the neuroanatomy
anat_path = '/data/bioprotean/ABA/PCA/80_variance/allen_annot200.nii'
anat_vec = nifti_to_vector(anat_path)

In [58]:
# Getting indices of non-zero values
indices = np.nonzero(anat_vec)

In [65]:
# Masking the anatomy
anat_masked = anat_vec[indices]
anat_masked.shape

(63113,)

In [69]:
# Reconstructing the anatomy
rc = np.zeros(159326,)
rc[indices] = anat_masked

# Checking the reconstruction
np.array_equal(rc, anat_vec)

True

In [26]:
# Saving indices to file
np.save('/data/bioprotean/ABA/MEMMAP/genes_list/mask_indices.npy', indices)

In [23]:
# Loading the main voxel * gene matrix
X = np.memmap('/data/bioprotean/ABA/MEMMAP/genes_list/finalgenes_T.mymemmap',\
dtype='float32', mode='r', shape=(159326,2941))

In [24]:
# Masking by the indices
X_mask = X[indices]
X_mask.shape

(63113, 2941)

In [25]:
# Saving the masked gene
X_save = np.memmap('/data/bioprotean/ABA/MEMMAP/genes_list/finalgenes_mask.mymemmap',\
dtype='float32', mode='w+', shape=(63113,2941))

X_save[:] = X_mask[:]

In [27]:
# Removing other -1 values
X_mask_pos = np.where(X_mask < 0, 0, X_mask)

In [28]:
# Saving the masked gene
X_save = np.memmap('/data/bioprotean/ABA/MEMMAP/genes_list/finalgenes_mask_pos.mymemmap',\
dtype='float32', mode='w+', shape=(63113,2941))

X_save[:] = X_mask_pos[:]

In [30]:
# Standardizing the matrix
X_std = StandardScaler().fit_transform(X_mask_pos)

X_save = np.memmap('/data/bioprotean/ABA/MEMMAP/genes_list/finalgenes_mask_pos_std.mymemmap',\
dtype='float32', mode='w+', shape=(63113,2941))

X_save[:] = X_std[:]

In [31]:
# Saving as NPY file
np.save('/data/bioprotean/ABA/MEMMAP/genes_list/ABA_mask_pos_std.npy', X_std)

In [34]:
# L2 normalizing the matrix
X_norm = normalize(X_mask_pos, norm='l2', axis=1, copy=True)

X_save = np.memmap('/data/bioprotean/ABA/MEMMAP/genes_list/finalgenes_mask_pos_L2.mymemmap',\
dtype='float32', mode='w+', shape=(63113,2941))

X_save[:] = X_norm[:]

In [35]:
# Saving as NPY file
np.save('/data/bioprotean/ABA/MEMMAP/genes_list/ABA_mask_pos_L2.npy', X_norm)

In [78]:
def reconstruct_ABA (vector, array_3D = False):
    '''
    This function reconstructs the masked vector to the original shape (159326,).
    
    Args:
        vector: vec
            The masked vector
        array: bool, default = False
            if True, an array of size (67,58,41) is returned.
    
    Returns:
        output: array
            output vector/array
    '''
    
    # Loading the indices from file
    indices = np.load('/data/bioprotean/ABA/MEMMAP/genes_list/mask_indices.npy')
    
    # Reconstructing the array
    output = np.zeros(159326,)
    output[indices] = vector
    
    # If 3D array is favored
    if array_3D == True:
        output = output.reshape(67,58,41)
    
    return output