# pySONATA
A python based api for working with SONATA based files

In [1]:
import glob
import pprint
import numpy as np
import pandas as pd

from sonata.io import File

## Reading SONATA Network files

In [2]:
# Use one network file
net = File(data_files='../../../examples/300_cells/network/internal_nodes.h5', 
           data_type_files='../../../examples/300_cells/network/internal_node_types.csv')
print('Contains nodes: {}'.format(net.has_nodes))
print('Contains edges: {}'.format(net.has_edges))

Contains nodes: True
Contains edges: False


In [3]:
# Read in mulitple files at a time
net = File(data_files=['../../../examples/300_cells/network/internal_nodes.h5', 
                       '../../../examples/300_cells/network/internal_internal_edges.h5'], 
           data_type_files=['../../../examples/300_cells/network/internal_node_types.csv',
                            '../../../examples/300_cells/network/internal_internal_edge_types.csv'])
print('Contains nodes: {}'.format(net.has_nodes))
print('Contains edges: {}'.format(net.has_edges))

Contains nodes: True
Contains edges: True


## Working with nodes 
SONATA files are divided into nodes (eg cells) and edges (eg synapses, junctions). The Nodes themselves are further divides into different **populations**. Different populations can be assumed to have been indepently created, with their own independent set of node ids, models, etc.


In [4]:
file_nodes = net.nodes  # Get the /nodes/ root 

# In this example there is only one node population that we've called "internal"
print('Node populations in File: {}'.format(net.nodes.population_names))

Node populations in File: [u'internal']


In [5]:
# Get Node populations with the name internal, This population contains 300 individual nodes 
internal_nodes = net.nodes['internal']  
print('Population "internal" contains {} nodes'.format(len(internal_nodes)))
#pprint.pprint(internal_nodes.node_ids)
print ('node ids: {}'.format(internal_nodes.node_ids))

Population "internal" contains 300 nodes
node ids: [  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53
  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71
  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89
  90  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107
 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233


Use to *to_dataframe* method to quickly analyze the node data

In [6]:
internal_nodes.to_dataframe().head()

Unnamed: 0,node_type_id,node_id,rotation_angle_yaxis,x,y,z,ei,model_processing,morphology,model_template,model_type,model_name
0,100,0,0.986542,22.343432,-79.627003,-1.534002,e,aibs_perisomatic,Scnn1a_473845048_m,nml:Cell_472363762.cell.nml,biophysical,Scnn1a
1,100,1,2.22122,-2.594341,-43.376118,24.234707,e,aibs_perisomatic,Scnn1a_473845048_m,nml:Cell_472363762.cell.nml,biophysical,Scnn1a
2,100,2,3.906942,24.310012,48.652922,11.593498,e,aibs_perisomatic,Scnn1a_473845048_m,nml:Cell_472363762.cell.nml,biophysical,Scnn1a
3,100,3,0.269737,7.004998,-85.896637,-4.738925,e,aibs_perisomatic,Scnn1a_473845048_m,nml:Cell_472363762.cell.nml,biophysical,Scnn1a
4,100,4,2.950101,26.716275,15.561911,-23.239033,e,aibs_perisomatic,Scnn1a_473845048_m,nml:Cell_472363762.cell.nml,biophysical,Scnn1a


You can use the *get_node_id* method to fetch information about a single node. Each node will be returned as a *sonata.Node* object with attributes and properties that can be accessed like a dictionary

In [7]:
node1 = internal_nodes.get_node_id(1) # Get node in population with node_id = 1
print('class: {}'.format(type(node1)))
print('node_id: {}'.format(node1.node_id))
print('node props: {}'.format(node1))

coords = (node1['x'], node1['y'], node1['z'])
print(coords)

class: <class 'sonata.io.node.Node'>
node_id: 1
node props: {'node_type_id': 100, 'ei': 'e', y: -43.37611757606959, 'model_processing': 'aibs_perisomatic', 'model_type': 'biophysical', 'node_id': 1, 'model_template': 'nml:Cell_472363762.cell.nml', x: -2.5943407870091946, 'morphology': 'Scnn1a_473845048_m', rotation_angle_yaxis: 2.2212202206885547, z: 24.23470695483876, 'model_name': 'Scnn1a'}
(-2.5943407870091946, -43.37611757606959, 24.23470695483876)


You can also use the *get_row* method to fetch the i'th row in the table. But it's important to note that node_id's do not have to be ordered or contigous - so the i'th row may not always contain node with node_id i.

In [8]:
node_row1 = internal_nodes.get_row(1)
print(node1.node_id == node_row1.node_id)

True


### Iterating through Nodes
To iterate through all nodes in the population:

In [9]:
import numpy as np
max_dist = 0.0
max_dist_node = None
for node in internal_nodes:
    assert(0 <= node.node_id < 300)
    ndist = np.sqrt(node['x']**2 + node['y']**2 + node['z']**2)
    if ndist > max_dist:
        max_dist = ndist
        max_dist_node = node

print('Node #{} has maximum distance {}um from origin'.format(max_dist_node.node_id, max_dist))    


Node #127 has maximum distance 119.942558428um from origin


Using the get_rows method we can pass in a list or range of the rows we want to analyze

In [10]:
n_count = 0
for node in internal_nodes.get_rows(range(0, len(internal_nodes), 2)):
    n_count += 1
    assert(node.node_id % 2 == 0)
print('processed {} nodes'.format(n_count))

n_count = 0
for node in internal_nodes.get_rows([0, 100, 200]):
    assert(node.node_id % 100 == 0)
    n_count += 1
print('processed {} nodes'.format(n_count))

processed 150 nodes
processed 3 nodes


### Filtering by attributes
Use the filter method to fetch nodes with certain properties

In [11]:
n_count = 0
for node in internal_nodes.filter(morphology='Nr5a1_471087815_m'):
    assert(node['morphology'] == 'Nr5a1_471087815_m')
    n_count += 1
    
print('Found {} nodes that use Nr5a1_471087815_m morphology'.format(n_count))

Found 80 nodes that use Nr5a1_471087815_m morphology


### Vectorized node attributes

SONATA stores all data into one or more **node groups**. Every node belongs to one node group, with every node group containing the same attributes. Thus nodes with different models (and therefore different attributes) will be split into different groups. To show the available node_groups:

In [12]:
# In this example there is only one node-group, group #0
print('Model groups in internal_nodes population: {}'.format(internal_nodes.group_ids))

grp0 = internal_nodes.get_group(internal_nodes.group_ids[0])
print('Group {} nodes contains the following attributes \n\t {}'.format(grp0.group_id, grp0.all_columns))


Model groups in internal_nodes population: [0]
Group 0 nodes contains the following attributes 
	 [rotation_angle_yaxis, x, y, z, ei, model_processing, morphology, model_template, model_type, model_name]


Instead of looping through and finding attributes of every individual node, we can use the *get_values* method to get all attributes at once as a list or numpy array. This allows us to use vectorization to speed up our analysis:

In [13]:
distances = np.sqrt(grp0.get_values('x')**2 + grp0.get_values('y')**2 + grp0.get_values('z')**2)
max_indx = np.argmax(distances)
print('Node #{} has maximum distance {}um from origin'.format(grp0.node_ids[max_indx], distances[max_indx])) 

Node #127 has maximum distance 119.942558428um from origin


## Working with edges

Edges share a similar data structure as our nodes. Within a file there may be zero, one, or more edge populations each identified by a unique user defined population name.

In [14]:
file_edges = net.edges
print('Edge populations in file: {}'.format(file_edges.population_names))
recurrent_edges = file_edges['internal_to_internal']

Edge populations in file: [u'internal_to_internal']


Every edge population contains directed connections between the nodes of one node-population to another. The attributes *target_population* and *source_population* will be important to determine what set of nodes will be used for pre-synaptic/post-synaptic connections:

In [15]:
# In this case both the source and target node population are the same indicating these are recurrent connections
print('Edge population {} contains {} connections from "{}" --> "{}"'.format(recurrent_edges.name, 
                                                                         len(recurrent_edges),
                                                                         recurrent_edges.source_population, 
                                                                         recurrent_edges.target_population))

Edge population internal_to_internal contains 88316 connections from "internal" --> "internal"


### Iterating over edges

When we iterate over all the edges in the edge population we are returned Edge objection. Every edge must contain a source (pre-synaptic) and target (post-synaptic) node-id, but also contain edge attributes which we fetch like a dictionary:

In [16]:
for edge in recurrent_edges:
    print('{} --> {}'.format(edge.source_node_id, edge.target_node_id))
    print edge
    assert(isinstance(edge['syn_weight'], float))
    
    break  # Iterating through all the edges can take a long time, do so at your own risk

1 --> 0
{pos_z: 276.13055401859, pos_x: 60.45881079218742, pos_y: 3.356650750251845, 'dynamics_params': 'AMPA_ExcToExc.json', 'edge_type_id': 100, 'target_query': "ei=='e'", 'delay': 2.0, syn_weight: 6e-05, 'model_template': 'Exp2Syn', sec_id: 28, sec_x: 0.5, dist: 98.29584776371554, 'source_query': "ei=='e'", type: 3}


Often we may want to get only those edges to connection to/from a given node or set of nodes. SONATA uses special indexing, so selecting by target or source can be achieved signficantly faster than iterating through all possible edges. To find all edges with a given target (or list of targets) we use the *get_target* (*get_targets*) method. :

In [17]:
con_count = 0
for edge in recurrent_edges.get_target(100):  # we can also use get_targets([id0, id1, ...])
    assert(edge.target_node_id == 100)
    con_count += 1
    
print('There are {} connections onto target node #{}'.format(con_count, 100))

There are 182 connections onto target node #100


Or we can search for edges by source_node_id

In [18]:
con_count = 0
for edge in recurrent_edges.get_targets(range(5)):
    assert(edge.target_node_id < 5)
    con_count += 1
    
print('The first 5 nodes have on average {} outgoing connections'.format(con_count/5.0))

The first 5 nodes have on average 190.6 outgoing connections


## Filtering by attributes

We can use the *filter* method to find only those edges with a given matching attribute value:

In [19]:
# may take some time to complete
n_ampa = sum(1 for _ in recurrent_edges.filter(dynamics_params='AMPA_ExcToExc.json'))
print('{} out of {} edges use AMPA_ExcToExc parameters'.format(n_ampa, len(recurrent_edges)))

45610 out of 88316 edges use AMPA_ExcToExc parameters
