In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
#
# -- import structure graph information
#

input_directory = "../cell_locator_atlas_packages/csv"

tables = ["structures","structure_graphs","structure_edges"]
df = {}

for t in tables :
    
    print(t)
    input_file = os.path.join( input_directory, "aibs_lims_%s.csv" % t)
    df[t] = pd.read_csv( input_file )


structures
structure_graphs
structure_edges


In [3]:
#
# -- CCF uses structure graph id = 1
#

#
# --- initialize substructure and superstructure columns
#
df["structures"]['substructures'] = None
df["structures"]['superstructures'] = None

# -- find the root node
idx = ( df['structure_graphs']['id'] == 1 )
filtered_graphs = df['structure_graphs'][idx]
root_node_id = list(filtered_graphs['root_struct_id'].values)[0]

# -- find all child node in graph
idx = ( df['structure_edges']['structure_graph_id'] == 1 )
filtered_edges = df['structure_edges'][idx]
nodes = list(filtered_edges['child_id'].values)

# -- all nodes = {root node} union {all child nodes}
nodes.append( root_node_id )

# -- filter structures table to just nodes in graph
idx = np.isin( df['structures']['id'], nodes)
filtered_structures = df['structures'][idx]

# -- join structures with edges
joined = pd.merge( filtered_structures, filtered_edges, left_on="id", right_on="child_id", how="left" )
joined.set_index('id_x', inplace=True)

In [4]:
#
# -- Populate the substructure and superstructure arrays
#

#
# for each structure S, 
# -- add S to S's list of substructures
# -- for each superstructure SuperS of S, 
# -- -- add S to SuperS's list of substructures
# -- -- add SuperS to S's list of superstructures

for index, row in joined.iterrows() :
    
    #print("structure: " + str(row['acronym']) + " (%d)" % index )
    
    if joined.at[index,'substructures'] is not None :
        joined.at[index,'substructures'].append(index)
    else :
        joined.at[index,'substructures'] = [index]

    
    
    has_parent = row['parent_id']
    
    while has_parent and not pd.isna(has_parent) :
        
        has_parent = int(has_parent)
        
        #print(" --: " + str(joined.loc[has_parent]['acronym']) )
        
        if joined.at[has_parent,'substructures'] is not None :
            joined.at[has_parent,'substructures'].append(index)
        else :
            joined.at[has_parent,'substructures'] = [index]

        
        if joined.at[index,'superstructures'] is not None :
            joined.at[index,'superstructures'].append(has_parent)
        else :
            joined.at[index,'superstructures'] = [has_parent]
            
         
        has_parent = joined.loc[has_parent,'parent_id']
        
        

In [5]:
def arr_to_string ( input ) :
    
    if input is None :
        return None
    
    output = [str(x) for x in input]
    output = '/'.join(output)
    return output

In [6]:
cc = joined['substructures'].values
joined['substructures'] = [arr_to_string(x) for x in cc ]

cc = joined['superstructures'].values
joined['superstructures'] = [arr_to_string(x) for x in cc ]

In [7]:
joined.reset_index(inplace=True)
joined.rename(columns={'id_x': 'id'}, inplace=True)

In [8]:
keep_columns = ['id','name', 'acronym', 'red', 'green', 'blue','substructures','superstructures']
dropped_columns = list(set(joined.columns)- set(keep_columns))
joined.drop(columns=dropped_columns,inplace=True)

In [9]:
joined.head()

Unnamed: 0,id,name,acronym,red,green,blue,substructures,superstructures
0,1,"Tuberomammillary nucleus, ventral part",TMv,255,76,62,1,557/331/467/1097/1129/343/8/997
1,2,"Primary somatosensory area, mouth, layer 6b",SSp-m6b,24,128,100,2,345/322/453/315/695/688/567/8/997
2,3,secondary fissure,sec,170,170,170,3,1040/1024/997
3,4,Inferior colliculus,IC,255,122,255,4/811/820/828,339/313/343/8/997
4,6,internal capsule,int,204,204,204,6,784/983/1009/997


In [10]:
joined.to_csv('structure_properties.csv',index=False)