# Single Cell Optimisation Tutorial

____________


Authors of this script:

Elisabetta Iavarone @ Blue Brain Project

Werner Van Geit @ Blue Brain Project

___
### Overview:

* In this exercise we will use the **Blue Brain Python Optimisation Library** [BluePyOpt](https://github.com/BlueBrain/BluePyOpt) to create a model template for the [NEURON simulator](https://www.neuron.yale.edu/neuron/) and to constrain the model parameters.


We first import some useful Python modules.

In [4]:
%load_ext autoreload
%autoreload

import numpy
import json
import IPython, os
from json2html import *

import matplotlib.pyplot as plt
%matplotlib notebook
plt.rcParams['figure.figsize'] = 6, 6

import collections

import efel
#import neurom
#import neurom.viewer

import bluepyopt as bpopt
import bluepyopt.ephys as ephys

import pprint
pp = pprint.PrettyPrinter(indent=2)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# 1. Electrophysiology data
In this section we will use the **electrophysiological data** we have seen in the eFEL tutorial.

In [2]:
data_dir = "../eFEL/data/"

# Store voltage data in a dictionary step_name : [list of repetitions]
steps_v_dict = collections.OrderedDict({'LongStepNeg': [], 
                                        'ShortStepPos': [], 'LongStepPos': []})

# Store current data in a dictionary step_name : [list of repetitions]
steps_i_dict = collections.OrderedDict({'LongStepNeg': [], 
                                        'ShortStepPos': [], 'LongStepPos': []})

# Import the glob Python module to interact with the data directory
import glob

files_list = glob.glob1(data_dir, "*.dat")

for file_name in files_list:
    # Get channel and trace number from the file_name
    channel = int(file_name[:-4].split('_')[2][2:])
    tracenum = int(file_name[:-4].split('_')[-1])
    
    # Even channel numbers are voltage traces in this case
    if channel % 2 == 0:
        if "APWaveform" in file_name:
            steps_v_dict['ShortStepPos'].append(numpy.fromfile(os.path.join(data_dir,file_name)))
        if "IDRest" in file_name:
            steps_v_dict['LongStepPos'].append(numpy.fromfile(os.path.join(data_dir,file_name)))
        if "IV" in file_name:
            steps_v_dict['LongStepNeg'].append(numpy.fromfile(os.path.join(data_dir,file_name)))
            
    # Odd channel numbers are current traces in this case        
    elif channel % 2 == 1:
        if "APWaveform" in file_name:
            steps_i_dict['ShortStepPos'].append(numpy.fromfile(os.path.join(data_dir,file_name)))
        if "IDRest" in file_name:
            steps_i_dict['LongStepPos'].append(numpy.fromfile(os.path.join(data_dir,file_name)))
        if "IV" in file_name:
            steps_i_dict['LongStepNeg'].append(numpy.fromfile(os.path.join(data_dir,file_name)))

We can now plot these data.

In [5]:
# Initialize a figure
fig1, axes = plt.subplots(len(steps_v_dict), sharey = True)

# Plot the voltage traces
for idx, step_name in enumerate(steps_v_dict.keys()):
    for rep, trace in enumerate(steps_v_dict[step_name]):
        data = trace.reshape(len(trace)/2,2)
        axes[idx].plot(data[:,0],data[:,1], label = 'Rep. ' + str(rep+1))
        axes[idx].set_ylabel('Voltage (mV)')
        axes[idx].legend(loc = 'best')
        axes[idx].set_title(step_name)
    axes[-1].set_xlabel('Time (ms)')
plt.tight_layout()

<IPython.core.display.Javascript object>

# 2. Electrophysiological features
To build a detailed neuron model, we need to quantify the electrical behavior we want to reproduce. The metrics we use are the eFeatures, that measure parameters describing for instance the shape of the action potential or the firing properties of a neuron (for examples, see [here](http://bluebrain.github.io/eFEL/eFeatures.html)).

In this particular example, we extract distinct features from the responses to negative and positive voltage steps.
The eFeatures extracted from the data and later from the model will be used to evaluate the results of the simulations.

In [7]:
# Extract features
import efel

# Define stimulus start and end times
steps_info = {'LongStepNeg': [250, 3250], 'ShortStepPos': [250, 700], 'LongStepPos': [250, 2950]}

# Prepare the traces for eFEL
def get_features(data):
    # All the traces converted in eFEL format
    efel_traces = {'LongStepNeg': [], 'ShortStepPos': [], 'LongStepPos': []}
    for step_name, step_traces in data.items():
        for rep in step_traces:
            data = rep.reshape(len(rep)/2,2)
            # A single eFEL trace 
            trace = {}
            trace['T'] = data[:,0]
            trace['V'] = data[:,1]
            trace['stim_start'] = [steps_info[step_name][0]]
            trace['stim_end'] = [steps_info[step_name][1]]
            trace['name'] = step_name
            
            efel_traces[step_name].append(trace)
    
    features_values = collections.defaultdict(dict)       
    
    features_values['LongStepNeg'] = efel.getMeanFeatureValues(efel_traces['LongStepNeg'], 
                                                                ['time_constant', 'voltage_deflection_begin', 
                                                                'voltage_deflection'])
    
    features_values['LongStepPos'] = efel.getMeanFeatureValues(efel_traces['LongStepPos'], 
                                                               ['mean_frequency', 'adaptation_index2', 
                                                                'ISI_CV', 'doublet_ISI'])
    
    features_values['ShortStepPos'] = efel.getMeanFeatureValues(efel_traces['ShortStepPos'], 
                                                                ['time_to_first_spike', 'AHP_depth', 
                                                                'AP_width', 'AP_height'])    
    
    mean_std_features = collections.defaultdict(lambda:collections.defaultdict(dict))
    for step_name in features_values:
        for efeature in features_values[step_name][0]:
            efeature_values = [x[efeature] for x in features_values[step_name]]
            mean_std_features[step_name][efeature] = {'mean': round(numpy.mean(efeature_values), 4), 
                                                      'std': round(numpy.std(efeature_values), 4)}
    
    return mean_std_features

mean_std_features = get_features(steps_v_dict)
IPython.display.HTML(json2html.convert(json=dict(mean_std_features)))

0,1
LongStepPos,adaptation_index2std0.0004mean0.0044mean_frequencystd0.4627mean21.6174ISI_CVstd0.0014mean0.0891doublet_ISIstd0.7348mean17.9
ShortStepPos,AP_widthstd0.1089mean2.1648AP_heightstd0.499mean19.599AHP_depthstd2.343mean27.5805time_to_first_spikestd0.3859mean8.1667
LongStepNeg,time_constantstd0.153mean9.7063voltage_deflectionstd2.5923mean-26.8739voltage_deflection_beginstd2.2305mean-27.6073

0,1
adaptation_index2,std0.0004mean0.0044
mean_frequency,std0.4627mean21.6174
ISI_CV,std0.0014mean0.0891
doublet_ISI,std0.7348mean17.9

0,1
std,0.0004
mean,0.0044

0,1
std,0.4627
mean,21.6174

0,1
std,0.0014
mean,0.0891

0,1
std,0.7348
mean,17.9

0,1
AP_width,std0.1089mean2.1648
AP_height,std0.499mean19.599
AHP_depth,std2.343mean27.5805
time_to_first_spike,std0.3859mean8.1667

0,1
std,0.1089
mean,2.1648

0,1
std,0.499
mean,19.599

0,1
std,2.343
mean,27.5805

0,1
std,0.3859
mean,8.1667

0,1
time_constant,std0.153mean9.7063
voltage_deflection,std2.5923mean-26.8739
voltage_deflection_begin,std2.2305mean-27.6073

0,1
std,0.153
mean,9.7063

0,1
std,2.5923
mean,-26.8739

0,1
std,2.2305
mean,-27.6073


# 3. Model definition
In this section we will define the neuron model template and the parameters that we will optimize later on. 

In the following steps we will use some BluePyOpt functionalities to set-up the model template and optimize the free parameters. First of all, we use the "ephys" module to load the morphology. This function is based on the NEURON utilities to load 3D morphologies we have seen before.

In [8]:
morphology_file = 'simple.swc'
morphology = ephys.morphologies.NrnFileMorphology(morphology_file)

Now we define the parameters, some of them are "**frozen**" (i.e. they are not going to be optimised), some are "**free**" (i.e. they are going to be optimised). In a typical biophysically detailed model the free parameters are the maximal conductances of the ionic currents (Na$^+$, K$^+$ and leak in this case).

We are going to use to use the HH mechanism that we have seen before.

The membrane potential in the somatic location we are recording from is given by:

$$C_{m}\frac{dV}{dt} = I_{l} + I_{Na} + I_{K} $$

$$C_{m}\frac{dV}{dt} = \overline{g}_{l}(V-E_{l}) + \overline{g}_{Na}m^{3}h(V-E_{Na}) + \overline{g}_{K}n^{4}(V-E_{K}) $$

Where $\overline{g}_{l}$, $\overline{g}_{Na}$ and $\overline{g}_{k}$ are the maximal conductances.

In this example, we are going to fit the passive properties of the cell model, by using the eFeatures extracted from the "LongStepNeg" stimulus.

First of all, we need to distribute the HH mechanism in the somatic section. 

In [9]:
# Here we define a list of section, containing the neuron "soma"
somatic_loc = ephys.locations.NrnSeclistLocation('somatic', seclist_name='somatic')

hh_mech = ephys.mechanisms.NrnMODMechanism(                                         
        name='hh',                                                                  
        suffix='hh',                                                                
        locations=[somatic_loc])

# Leak conductance (Passive membrane resistance)
gl_param = ephys.parameters.NrnSectionParameter(                                    
        name='gl_hh',                                                           
        param_name='gl_hh',                                                     
        locations=[somatic_loc],                                                    
        bounds=[0, 1],
        frozen=False)  

# Na+ conductance
gnabar_param = ephys.parameters.NrnSectionParameter(                                    
        name='gnabar_hh',                                                           
        param_name='gnabar_hh',                                                     
        locations=[somatic_loc],                                                    
        #bounds=[0, 1],
        value=0.0001,
        frozen=True)

# K+ conductance
gkbar_param = ephys.parameters.NrnSectionParameter(                              
        name='gkbar_hh',                                                         
        param_name='gkbar_hh',                                                   
        value=0.0003,
        #bounds=[0, 1],
        locations=[somatic_loc],                                                 
        frozen=True)

# Reversal potential of leak conductance
el_param = ephys.parameters.NrnSectionParameter(                                    
        name='el_hh',                                                           
        param_name='el_hh',                                                     
        locations=[somatic_loc],                                                    
        value=-83,
        frozen=True)

# Specific membrane capacitance
cm_param = ephys.parameters.NrnSectionParameter(                                 
        name='cm',                                                               
        param_name='cm',                                                         
        value=2.0,                                                               
        locations=[somatic_loc],                                                 
        frozen=True)

# Temperature (celsius)
celsius_param = ephys.parameters.NrnGlobalParameter(                                 
        name='celsius',                                                               
        param_name='celsius',                                                         
        value=34.0,                                                                 
        frozen=True)

# Voltage at time 0
vinit_param = ephys.parameters.NrnGlobalParameter(                                 
        name='v_init',                                                               
        param_name='v_init',                                                         
        value=-83,                                                                 
        frozen=True)   

To define the cell model, we specify the morphology, ion channels and parameters

In [11]:
simple_cell = ephys.models.CellModel(                                            
        name='simple_cell',                                                      
        morph=morphology,                                                             
        mechs=[hh_mech],                                                         
        params=[cm_param, gnabar_param, gkbar_param, 
                gl_param, el_param, celsius_param, vinit_param])   
print simple_cell

simple_cell:
  morphology:
    simple.swc
  mechanisms:
    hh: hh at ['somatic']
  params:
    cm: ['somatic'] cm = 2.0
    gnabar_hh: ['somatic'] gnabar_hh = 0.0001
    gkbar_hh: ['somatic'] gkbar_hh = 0.0003
    gl_hh: ['somatic'] gl_hh = [0, 1]
    el_hh: ['somatic'] el_hh = -83
    celsius: celsius = 34.0
    v_init: v_init = -83



We define an object that point to the location of the soma

In [12]:
soma_loc = ephys.locations.NrnSeclistCompLocation(                               
        name='soma',                                                             
        seclist_name='somatic',                                                  
        sec_index=0,                                                             
        comp_x=0.5)         

# 4. Setting-up a simulation
To evaluate the behavior of the model, we need to setup a simulation. We first try to fit the passive trace (negative current injection).

In [13]:
nrn = ephys.simulators.NrnSimulator()                                            

# Current amplitudes (nA) used for the different protocols, 
# we have obtained them in the previous tutorial

LongStepPos = .98
LongStepNeg = -0.46
ShortStepPos = 0.85

sweep_protocols = []                                                             
for protocol_name, amplitude in [('step1', LongStepNeg)]:           
    stim = ephys.stimuli.NrnSquarePulse(                                         
                step_amplitude=amplitude,                                        
                step_delay=250,                                                  
                step_duration=3000,                                                
                location=soma_loc,                                               
                total_duration=3500)                                              
    rec = ephys.recordings.CompRecording(                                        
            name='%s.soma.v' % protocol_name,                                    
            location=soma_loc,                                                   
            variable='v')                                                        
    protocol = ephys.protocols.SweepProtocol(protocol_name, [stim], [rec])       
    sweep_protocols.append(protocol)                                             
twostep_protocol = ephys.protocols.SequenceProtocol('twostep', protocols=sweep_protocols)

default_params = {'gl_hh': 5e-4}
# {'gnabar_hh': 0.1, 'gkbar_hh': 0.03}  
responses = twostep_protocol.run(cell_model=simple_cell, 
                                 param_values=default_params, sim=nrn)


In [17]:
def plot_responses(responses):
    fig1, ax = plt.subplots(len(responses), figsize = (5, 3))
    ax.plot(responses['step1.soma.v']['time'], responses['step1.soma.v']['voltage'], label='Model')
    data = steps_v_dict['LongStepNeg'][0].reshape(len(steps_v_dict['LongStepNeg'][0])/2,2)
    ax.plot(data[:, 0], data[:, 1], label='Experiment')
    ax.set_xlabel("Time (ms)")
    ax.set_ylabel("Voltage (mV)")
    ax.legend(loc="best")
plot_responses(responses)


<IPython.core.display.Javascript object>

The "ephys" module containes helper functionality to define the objectives function and a scoring system that will be used by the genetic algorithm.

In [18]:
efel_feature_means = {'step1': {'voltage_deflection': 
                                mean_std_features['LongStepNeg']['voltage_deflection']}}
                                                                                 
objectives = []                                                                  
                                                                                 
for protocol in sweep_protocols:                                                 
    stim_start = protocol.stimuli[0].step_delay                                  
    stim_end = stim_start + protocol.stimuli[0].step_duration                    
    for efel_feature_name, mean_std_dict in efel_feature_means[protocol.name].iteritems():
        mean = mean_std_dict['mean']
        std = mean_std_dict['std']
        feature_name = '%s.%s' % (protocol.name, efel_feature_name)              
        feature = ephys.efeatures.eFELFeature(                                   
                    feature_name,                                                
                    efel_feature_name=efel_feature_name,                         
                    recording_names={'': '%s.soma.v' % protocol.name},           
                    stim_start=stim_start,                                       
                    stim_end=stim_end,                                           
                    exp_mean=mean,                                               
                    exp_std=std)                                        
        objective = ephys.objectives.SingletonObjective(                         
            feature_name,                                                        
            feature)                                                             
        objectives.append(objective)         
        
score_calc = ephys.objectivescalculators.ObjectivesCalculator(objectives)        

cell_evaluator = ephys.evaluators.CellEvaluator(                                 
        cell_model=simple_cell,                                                  
        param_names=['gl_hh'],                                   
        fitness_protocols={twostep_protocol.name: twostep_protocol},             
        fitness_calculator=score_calc,                                           
        sim=nrn)        

print 'Score of the feature:', cell_evaluator.evaluate_with_dicts(default_params)                         


Score of the feature:
 {'step1.voltage_deflection': 9.237101968216427}


# 5. Parameters optimization 
We are now ready to run an optimisation. This procedure could require supercomputing resources and can take from some hours to days. As a proof of concept, we will run an optimization with a small population of individuals for a small number of generations.

In [19]:
optimisation = bpopt.optimisations.DEAPOptimisation(                              
        evaluator=cell_evaluator,                                                
        offspring_size = 15)    

After 14 generations, the model passive responses are fairly satisfying.

In [22]:
final_pop, hall_of_fame, logs, hist = optimisation.run(max_ngen=14)               


In [24]:
responses = twostep_protocol.run(cell_model=simple_cell, 
                                 param_values=cell_evaluator.param_dict(hall_of_fame[0]), sim=nrn)
plot_responses(responses)

<IPython.core.display.Javascript object>

After running an optimisation, we can inspect the 10 best inviduals, or "Hall of Fame". In our case it contains values for the parameter $\bar{g}_{leak}$

In [26]:
print('Hall of fame: \n')
for ind in hall_of_fame:
    print 'g_leak=%f' % tuple(ind)

Hall of fame: 

g_leak=0.000054
g_leak=0.000054
g_leak=0.000054
g_leak=0.000054
g_leak=0.000054
g_leak=0.000054
g_leak=0.000054
g_leak=0.000054
g_leak=0.000054
g_leak=0.000054


The best individual from the optimisation is the first one in the all of fame.

In [30]:
best_ind = hall_of_fame[0]
print 'Best individual: {} '.format(best_ind)

Best individual: [5.446917032862581e-05] 


We can evaluate this individual and make use of a convenience function of the cell evaluator to return us a dictionary of the scores (the lower, the better).

In [31]:
best_ind_dict = cell_evaluator.param_dict(best_ind)                              
print cell_evaluator.evaluate_with_dicts(best_ind_dict)

{'step1.voltage_deflection': 0.004056167867689003}


We can also plot the evolution of the feature values from generation to generation. We can observe a progressive convergence of the model features values toward the experimental ones.

In [33]:
gen_numbers = logs.select('gen')
min_fitness = numpy.array(logs.select('min'))
max_fitness = logs.select('max')
mean_fitness = numpy.array(logs.select('avg'))
std_fitness = numpy.array(logs.select('std'))

fig, ax = plt.subplots(1, figsize=(5, 5), facecolor='white')
#fig_trip, ax_trip = plt.subplots(1, figsize=(10, 5), facecolor='white')

#plot_count = len(responses)
std = std_fitness
mean = mean_fitness
minimum = min_fitness
stdminus = mean - std                                                           
stdplus = mean + std

ax.plot(                                                                      
    gen_numbers,                                                                
    mean,                                                                       
    color='black',                                                              
    linewidth=2,                                                                
    label='population average')                                                 

ax.fill_between(                                                              
    gen_numbers,                                                                
    stdminus,                                                                   
    stdplus,                                                                    
    color='lightgray',                                                          
    linewidth=2,                                                                
    label=r'population standard deviation')                                     

ax.plot(                                                                      
    gen_numbers,                                                                
    minimum,                                                                    
    color='red',                                                                
    linewidth=2,                                                                
    label='population minimum')                                                 

ax.set_xlim(min(gen_numbers) - 1, max(gen_numbers) + 1)                       
ax.set_xlabel('Generation #')                                                 
ax.set_ylabel('# Experimental standard deviations')                                            
ax.set_ylim([0, max(stdplus)])                                                
ax.legend(loc="best") 

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x10eca8390>