In [2]:
import sys
import numpy as np
import pandas as pd
import nibabel as nib
import matplotlib.pyplot as plt
%run nifti_tools.ipynb
%matplotlib inline

**Modifying the annotations list**

In [None]:
# Loading the list of annotations structures and acronyms
annotations_df = pd.read_csv('/data/bioprotean/ABA/PCA/similarity/neuroantomy/annotations_list.csv')
annotations_df

**Adding a new column of True or False if used in 200 um annotations file**

In [None]:
# Loading the annotations NIFTI file and saving as a vector
annotations_file = '/data/bioprotean/ABA/PCA/similarity/neuroantomy/allen_annot200.nii'
annotations_vector = nifti_to_vector(annotations_file)

In [None]:
# Adding the new column to save the results
annotations_df['in_200_atlas'] = False
annotations_df

In [None]:
# Looping over different regions in the dataframe
for i in range(annotations_df.shape[0]):
    # The region ID
    region_id = annotations_df.ant_id[i]
    
    # Masked array where 1 replaces the values of that region id
    masked_array = np.where(annotations_vector != region_id, 0, 1)
    
    if np.sum(masked_array != 0):
        annotations_df.in_200_atlas[i] = True

In [None]:
copy_df

In [None]:
annotations_df

In [None]:
annotations_df.in_200_atlas.value_counts()

In [22]:
annotations_df.to_csv('/data/bioprotean/ABA/PCA/similarity/neuroantomy/new_list.csv')

# Overlap with one brain region: "IC"

In [28]:
# What cluster has the highest overlap with this region?
def max_overlap_cluster_region(ant_path, cluster_path, ant_id):
    """
    This function finds the cluster that has the highest overlap with the given brain region.
    
    Arguments:
        ant_path (str): Path to the NIFTI annotations file
        cluster_path (str): Path to the NIFTI clustered file
        ant_id (int): ID of the region to measure overlap with
    
    Returns:
        max_cluster_id (int): ID of the cluster with the maximum overlap with the region
        max_overlap_ratio (float): Ratio of the maximum overlap
    """
    
    # Loading the annotations and cluster file
    ant_vec = nifti_to_vector(ant_path)
    cluster_vec = nifti_to_vector(cluster_path)
    
    # Number of clusters in the cluster file
    n_clusters = np.unique(cluster_vec).shape[0]
    
    # Making a list of overlap ratio for each cluster
    overlap_perc_list = []
    
    # Making a binary mask of annotations for the brain region
    masked_ant = np.where(ant_vec == ant_id, 1, 0)
    
    # Computing the number of voxels within that region
    count_region_voxels = np.sum(masked_ant)
    
    for i in range(n_clusters):
        # Making a binary mask of cluster for each cluster number
        masked_cluster = np.where(cluster_vec == i, 1, 0)
        
        # Counting the number of overlapping voxels of the region and masked cluster
        count_overlap = np.dot(masked_cluster, masked_ant)
        
        # Computing the ratio of overlap over the number of voxels in the region
        overlap_ratio = count_overlap/count_region_voxels
        
        # Adding the ratio to the results list
        overlap_perc_list.append(overlap_ratio)
    
    # Getting the max overlap and its index from the list
    max_cluster_id = overlap_perc_list.index(max(overlap_perc_list))
    max_overlap_ratio = max(overlap_perc_list)
    
    return max_cluster_id, max_overlap_ratio

In [67]:
cluster_path = '/data/bioprotean/ABA/PCA/clusters/nclusters_fixed160.nii'
result = max_overlap_cluster_region(annotations_file, cluster_path, 811)
result

(80, 0.924812030075188)

In [None]:
'''
4/9/2021
Momo
Inconsistency seen in the number of regions existing in the 200 um resolution atlas.
Re-doing the check for all the values existing and saving to a new csv file.
'''

In [4]:
# Loading the list of annotations structures and acronyms
ant_df = pd.read_csv('/data/bioprotean/ABA/PCA/similarity/neuroantomy/allen_structures.csv')
ant_df.head()

Unnamed: 0,structure,acronym,ish_id,ant_id
0,"""root""","""root""",-1.0,997
1,"""Basic cell groups and regions""","""grey""",0.0,8
2,"""Cerebrum""","""CH""",70.0,567
3,"""Cerebral cortex""","""CTX""",85.0,688
4,"""Cortical plate""","""CTXpl""",86.0,695


In [5]:
# Loading the annotations NIFTI file and saving as a vector
ant_path = '/data/bioprotean/ABA/PCA/similarity/neuroantomy/allen_annot200.nii'
ant_vec = nifti_to_vector(ant_path)

In [7]:
# Adding a new column to save the results
ant_df['in_200_atlas'] = False
ant_df.head()

Unnamed: 0,structure,acronym,ish_id,ant_id,in_200_atlas
0,"""root""","""root""",-1.0,997,False
1,"""Basic cell groups and regions""","""grey""",0.0,8,False
2,"""Cerebrum""","""CH""",70.0,567,False
3,"""Cerebral cortex""","""CTX""",85.0,688,False
4,"""Cortical plate""","""CTXpl""",86.0,695,False


In [10]:
# Checking different values in annotations atlas
IDs_list = np.unique(ant_vec)
IDs_list.shape

(594,)

In [11]:
# Changing False to True if the value exists in the IDs_list
for i, ant_id in enumerate(ant_df['ant_id']):
    if ant_id in IDs_list:
        ant_df['in_200_atlas'][i] = True

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


In [12]:
ant_df.head()

Unnamed: 0,structure,acronym,ish_id,ant_id,in_200_atlas
0,"""root""","""root""",-1.0,997,True
1,"""Basic cell groups and regions""","""grey""",0.0,8,False
2,"""Cerebrum""","""CH""",70.0,567,False
3,"""Cerebral cortex""","""CTX""",85.0,688,False
4,"""Cortical plate""","""CTXpl""",86.0,695,False


In [None]:
'''
There are some values in the annotation.nii that does not exist in the list?
'''

In [15]:
# Making a new dataframe where only regions in 200 um exist
ant200_df = ant_df[ant_df['in_200_atlas']==True]
ant200_df.reset_index(drop=True, inplace=True)

In [16]:
ant200_df.head()

Unnamed: 0,structure,acronym,ish_id,ant_id,in_200_atlas
0,"""root""","""root""",-1.0,997,True
1,"""Frontal pole, layer 1""","""FRP1""",998.0,68,True
2,"""Frontal pole, layer 2/3""","""FRP2/3""",1073.0,667,True
3,"""Primary motor area, Layer 1""","""MOp1""",888.0,320,True
4,"""Primary motor area, Layer 2/3""","""MOp2/3""",966.0,943,True


In [19]:
regions_of_df = np.unique(ant200_df['ant_id'])

In [20]:
for ID in IDs_list:
    if ID not in regions_of_df:
        print(ID)

0.0
182305700.0
182305710.0
312782560.0
312782600.0
312782660.0
484682460.0
526157200.0
527697000.0
549009200.0
560581570.0
563807400.0
576073700.0
589508400.0
589508500.0
599626940.0
606826600.0
606826700.0
607344830.0
614454300.0
