# Computing the overlap of Jacobian and brain regions
Running on RAG2 Jacobian average and Allen Brain Annotations atlas.

In [1]:
# Importing libraries
import numpy as np
import nibabel as nib
import os
import pandas as pd
%run nifti_tools.ipynb

In [24]:
def overlap_with_jacobian(jac_file, ant_file, regions_file, sort_values=False, output_csv=False):
    """
    Computes the ratio of (overlapping brain regions and Jacobian)/Jacobian voxels,
    and returns a dataframe including these ratios and saves it as csv file (optional).
    
    Args:
        jac_file (str): The path to the Jacobian nii file.
        ant_file (str): The path to the Annotations nii file.
        regions_file (str): The path to the Pandas dataframe including names
            of brain regions and correspoing values.
        output_csv (str, optional): The path for ouputting csv file if wanted.
            Default set to False.
    
    Returns:
        output_df = new Pandas dataframe with a column for the ratios.
    """
    
    # Loading input files
    regions_df = pd.read_csv(regions_file)
    jac_vec = nifti_to_vector(jac_file)
    ant_vec = nifti_to_vector(ant_file)
    
    # Making the ouput dataframe by copying the input list of regions
    output_df = regions_df.copy()
    output_df['jac_overlap_ratio'] = np.nan
    
    # Converting Jacobian vector to a binary vector in case it is not
    jac_binary = np.where(jac_vec!=0, 1, 0)
    jac_binary_count = np.sum(jac_binary)
    
    # Looping over rows of regions dataframe to count the ratio of overlap
    for count, ant_id in enumerate(output_df['ant_id']):
        
        # Masking the ant_vec for the selected region only
        single_region_vec = np.where(ant_vec == ant_id, 1, 0)
        
        # Masking the selected region for the overlapping voxels with Jacobian
        region_overlap_vec = np.where((single_region_vec != 0) & (jac_binary != 0), 1, 0)
        
        # Counting the ratio over total Jacobian voxels
        ratio = np.sum(region_overlap_vec) / jac_binary_count
        output_df.jac_overlap_ratio[count] = ratio
    
    if sort_values:
        # Sorting the brain regions by overlap ratio
        output_df.sort_values(by=['jac_overlap_ratio'], ascending=False, na_position='last', inplace=True)

        # Resetting the index
        output_df.reset_index(drop=True, inplace=True)
    
    # Saving the Pandas Dataframe to file
    try:
        output_df.to_csv(output_csv)
    except:
        print("I can save it in CSV if you want...")
    
    return output_df

In [25]:
def overlap_with_brain_region(jac_file, ant_file, regions_file, sort_values=False, output_csv=False):
    """
    Computes the ratio of (overlapping brain regions and Jacobians)/brain region,
    and returns a dataframe including these ratios as well as saving them as csv file.
    
    Args:
        jac_file (str): The address to the Jacobian nii file.
        ant_file (str): The address to the Annotations nii file.
        regions_file (str): The address to the Pandas dataframe including names
            of brain regions and correspoing values.
        output_csv (str, optional): The address for ouputting csv file if wanted.
            Default set to False.
    
    Returns:
        output_df = new Pandas dataframe with a column for the ratios
    """
    
    # Loading input files
    regions_df = pd.read_csv(regions_file)
    jac_vec = nifti_to_vector(jac_file)
    ant_vec = nifti_to_vector(ant_file)
    
    # Making the ouput dataframe by copying the input list of regions
    output_df = regions_df.copy()
    output_df['region_overlap_ratio'] = np.nan
    
    # Converting Jacobian vector to a binary vector in case it is not
    jac_binary = np.where(jac_vec!=0, 1, 0)
    
    # Looping over rows of regions dataframe to count the ratio of overlap
    for count, ant_id in enumerate(output_df['ant_id']):
        
        # Masking the ant_vec for the selected region only
        single_region_vec = np.where(ant_vec == ant_id, 1, 0)
        single_region_count = np.sum(single_region_vec)
        
        # Masking the selected region for the overlapping voxels with Jacobian
        region_overlap_vec = np.where((single_region_vec != 0) & (jac_binary != 0), 1, 0)
        
        # Counting the ratio over total Jacobian voxels
        ratio = np.sum(region_overlap_vec) / single_region_count
        output_df.region_overlap_ratio[count] = ratio
    
    if sort_values:
        # Sorting the brain regions by overlap ratio
        output_df.sort_values(by=['region_overlap_ratio'], ascending=False, na_position='last', inplace=True)

        # Resetting the index
        output_df.reset_index(drop=True, inplace=True)
    
    # Saving the Pandas Dataframe to file
    try:
        output_df.to_csv(output_csv)
    except:
        print("I can save it in CSV if you want...")
    
    return output_df

In [62]:
def combine_overlap_df (jac_overlap_df_file, region_overlap_df_file, n_top, output_csv=False):
    """
    Combines the Pandas Dataframes of overlapping jacobian and brain regions
    ratios computed over Jacobian and Brain region for top n regions,
    and removes duplicate regions.
    
    Args:
        jac_overlap_df_file (str): The address to Jacobian overlap input file.
        region_overlap_df_file (str): The address to Brain region overlalp input file.
        n_top (int): Number of top brain regions from each file to keep.
    
    Returns:
        combined_df (DataFrame): Pandas dataframe including top choices from two analyses.
    """
    
    # Loading the Pandas Datframes from the input addresses
    jac_overlap_df = pd.read_csv(jac_overlap_df_file)
    region_overlap_df = pd.read_csv(region_overlap_df_file)
    
    # Selecting top n rows of each
    topn_jac_df = jac_overlap_df.iloc[:n_top]
    topn_region_df = region_overlap_df.iloc[:n_top]
    
    # Merging the Dataframes
    combined_df = pd.concat([topn_jac_df, topn_region_df], ignore_index=True, axis=0)
    
    try:
        combined_df.drop(columns=['Unnamed: 0'], inplace=True)
    except:
        pass
    
    # Removing duplicate brain regions
    combined_df.drop_duplicates(subset=['structure'], ignore_index=True, inplace=True)
    
    # Sorting the rows by both overlap ratios
    combined_df.sort_values(by=['jac_overlap_ratio', 'region_overlap_ratio'],\
    ascending=False, inplace=True, na_position='last', ignore_index=True)
    
    # Saving the Pandas Dataframe to file
    try:
        combined_df.to_csv(output_file)
    except:
        print("I can save it in CSV if you want...")
    
    return combined_df

In [60]:
def combined_overlap_analysis (jac_file, ant_file, regions_file, n_top, output_file=False):
    """
    Computes the ratio of overlap of the Jacobian and brain regions both divided by
    th Jacobian and the Brain region and outputs a csv file including top n brain regions
    of both analyses.
    
    Args:
        jac_file (str): The address to the Jacobian nii file.
        ant_file (str): The address to the Annotations nii file.
        regions_file (str): The address to the Pandas dataframe including names
            of brain regions and correspoing values
        output_file (str, optional): The address for ouputting csv file if wanted.
            Default set to False.
    
    Returns:
        combined_df (DataFrame): Pandas dataframe including top choices from two analyses
    """
    
    # Running analyses separately
    region_overlap_df = overlap_with_brain_region(jac_file, ant_file, regions_file, output_csv=False)
    jac_overlap_df = overlap_with_jacobian(jac_file, ant_file, regions_file, output_csv=False)
    
    # Selecting top n rows of each
    topn_jac_df = jac_overlap_df.iloc[:n_top]
    topn_region_df = region_overlap_df.iloc[:n_top]
    
    # Merging the Dataframes
    combined_df = pd.concat([topn_jac_df, topn_region_df], ignore_index=True, axis=0)
    
    # Dropping the column of old indices
    try:
        combined_df.drop(columns=['Unnamed: 0'], inplace=True)
    except:
        pass
    
    # Removing duplicate brain regions
    combined_df.drop_duplicates(subset=['structure'], ignore_index=True, inplace=True)
    
    # Sorting the rows by both overlap ratios
    combined_df.sort_values(by=['jac_overlap_ratio', 'region_overlap_ratio'],\
    ascending=False, inplace=True, na_position='last', ignore_index=True)
    
    # Saving the Pandas Dataframe to file
    try:
        combined_df.to_csv(output_file)
    except:
        print("I can save it in CSV if you want...")
    
    return combined_df

In [3]:
regions_file = '/data/bioprotean/RAG2/AVG/MWT_avg/to_allen/overlap/200um/ant200_list.csv'
regions_df = pd.read_csv(regions_file)
regions_df.head()

Unnamed: 0,full_ant_index,structure,acronym,ish_id,ant_id,in_200_atlas
0,0,"""root""","""root""",-1.0,997,True
1,7,"""Frontal pole, layer 1""","""FRP1""",998.0,68,True
2,8,"""Frontal pole, layer 2/3""","""FRP2/3""",1073.0,667,True
3,19,"""Primary motor area, Layer 1""","""MOp1""",888.0,320,True
4,20,"""Primary motor area, Layer 2/3""","""MOp2/3""",966.0,943,True


In [33]:
jac_file = '/data/bioprotean/RAG2/AVG/MWT_avg/to_allen/overlap/200um/\
MKO_MWTavg_invjcb_avg_thrsh_1set_qw_masked_neg_ctrst_clust_binary_200.nii'

ant_file = '/data/bioprotean/RAG2/AVG/MWT_avg/to_allen/overlap/200um/allen_annot200.nii'

regions_file = '/data/bioprotean/RAG2/AVG/MWT_avg/to_allen/overlap/200um/ant200_list.csv'

output_csv = '/data/bioprotean/RAG2/AVG/MWT_avg/to_allen/overlap/200um/RAG2_neg_overlap_anat.csv'

jac_df = overlap_with_jacobian (jac_file, ant_file, regions_file)
region_df = overlap_with_brain_region (jac_file, ant_file, regions_file)

complete_df = region_df.copy()
complete_df['jac_overlap_ratio'] = jac_df ['jac_overlap_ratio']

complete_df.to_csv(output_csv)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


I can save it in CSV if you want...
I can save it in CSV if you want...


In [34]:
complete_df.head()

Unnamed: 0,full_ant_index,structure,acronym,ish_id,ant_id,in_200_atlas,region_overlap_ratio,jac_overlap_ratio
0,0,"""root""","""root""",-1.0,997,True,0.069196,0.010389
1,7,"""Frontal pole, layer 1""","""FRP1""",998.0,68,True,0.3,0.002011
2,8,"""Frontal pole, layer 2/3""","""FRP2/3""",1073.0,667,True,0.25,0.002346
3,19,"""Primary motor area, Layer 1""","""MOp1""",888.0,320,True,0.0,0.0
4,20,"""Primary motor area, Layer 2/3""","""MOp2/3""",966.0,943,True,0.0,0.0


In [43]:
dice_file = '/data/bioprotean/RAG2/AVG/MWT_avg/to_allen/overlap/200um/RAG2_neg_DICE_anat.csv'
dice_df = pd.read_csv(dice_file)
dice_df.head()

Unnamed: 0.1,Unnamed: 0,full_ant_index,structure,acronym,ish_id,ant_id,in_200_atlas,DICE_neg_jcb_anat
0,0,0,"""root""","""root""",-1.0,997,True,0.018065
1,1,7,"""Frontal pole, layer 1""","""FRP1""",998.0,68,True,0.003995
2,2,8,"""Frontal pole, layer 2/3""","""FRP2/3""",1073.0,667,True,0.004648
3,3,19,"""Primary motor area, Layer 1""","""MOp1""",888.0,320,True,0.0
4,4,20,"""Primary motor area, Layer 2/3""","""MOp2/3""",966.0,943,True,0.0


In [44]:
complete_df['DICE_neg_jcb_anat'] = dice_df['DICE_neg_jcb_anat']

In [45]:
complete_df.head()

Unnamed: 0,full_ant_index,structure,acronym,ish_id,ant_id,in_200_atlas,region_overlap_ratio,jac_overlap_ratio,DICE_neg_jcb_anat
0,0,"""root""","""root""",-1.0,997,True,0.069196,0.010389,0.018065
1,7,"""Frontal pole, layer 1""","""FRP1""",998.0,68,True,0.3,0.002011,0.003995
2,8,"""Frontal pole, layer 2/3""","""FRP2/3""",1073.0,667,True,0.25,0.002346,0.004648
3,19,"""Primary motor area, Layer 1""","""MOp1""",888.0,320,True,0.0,0.0,0.0
4,20,"""Primary motor area, Layer 2/3""","""MOp2/3""",966.0,943,True,0.0,0.0,0.0


In [46]:
output_csv = '/data/bioprotean/RAG2/AVG/MWT_avg/to_allen/overlap/200um/RAG2_neg_comp_overlap_anat.csv'
complete_df.to_csv(output_csv)