# Train custom model

### Imports and check keras/tensorflow version

In [1]:
import os
if 'notebooks' in os.getcwd(): os.chdir('..')  # change to main directory
print('Current directory: {}'.format( os.getcwd() ))

import os
import numpy as np
import matplotlib.pyplot as plt
from datetime import date
import time

import keras
import tensorflow

import ruamel.yaml     # make sure it is installed, if not: pip install ruamel.yaml

from helper_scripts import config
from helper_scripts import utils

print('Keras version:', keras.__version__)
print('Tensorflow version:', tensorflow.__version__)

Current directory: /home/adrian/coding/Calibrated-inference-of-spiking


Using TensorFlow backend.


Keras version: 2.3.1
Tensorflow version: 2.1.0


In [2]:
# interactive plotting
%matplotlib notebook

### Define model parameter as cfg dictionary

In [3]:
cfg = dict( 
    model_name = 'GCaMP6s_5Hz',    # Model name (and name of the save folder)
    sampling_rate = 5,    # Sampling rate in Hz (round to next integer)
    
    training_datasets = [
        'DS13-GCaMP6s-m-V1',
        'DS14-GCaMP6s-m-V1',
        'DS15-GCaMP6s-m-V1',
        'DS11-GCaMP6s-m-V1-neuropil-corrected',
        'DS12-GCaMP6s-m-V1-neuropil-corrected',
                        ],
    
    noise_levels = list(noise for noise in range(1,10)),  # int values of noise values (do not use numpy here!)
    
    smoothing = 0.2,     # std of Gaussian smoothing in time (sec)
    windowsize = 64,     # in timepoints
    before_frac = 0.5,   # fraction of window before timepoint (0.5 symmetric)

    # Set model parameters 
    conv_filter = 'Conv1D',
    filter_sizes = [31, 19, 5],   # for each conv layer
    filter_numbers = [30,40,50],  # for each conv layer
    dense_expansion = 30,         # for dense layer

    # Set parameters of gradient descent learning
    loss_function = 'mean_squared_error',
    optimizer = 'Adagrad',
    nr_of_epochs = 10,

    # Use ensemble learning
    ensemble_size = 5,
    batch_size = 2048,
    
    keras_version = keras.__version__,
    tensorflow_version = tensorflow.__version__,
    training_date = str(date.today()),
    training_finished = 'No',
          )

### Create model folder and save config file

In [5]:
model_folder = os.path.join('Pretrained_models', cfg['model_name'])

if not os.path.exists( model_folder ):
    # create folder
    os.mkdir(model_folder)
    
    # save config file into the folder
    config.write_config(cfg, os.path.join(model_folder, 'config.yaml') )
    
else:
    print('There is already a model called {}.'.format(cfg['model_name']),
          'Please rename your model.')

Created file pretrained_models/GCaMP6s_5Hz/config.yaml


### Load config (in case model is already defined)

In [6]:
model_name = 'GCaMP6s_5Hz'
model_folder = os.path.join('Pretrained_models', model_name)

cfg = config.read_config( os.path.join(model_folder, 'config.yaml') )

# Train model and save results

In [7]:
start = time.time()

print('Used configuration for model fitting:\n{}'.format(cfg))
print('Models will be saved into this folder: ', model_folder)

# add base folder to selected training datasets
training_folders = [os.path.join('Ground_truth', ds) for ds in cfg['training_datasets']]

# turn string of convolution filter into keras.layer.FILTER
if cfg['conv_filter'] == 'Conv1D':
    conv_filter = keras.layers.Conv1D
elif cfg['conv_filter'] == 'YOUR_FILTER':
    # conv_filter = keras.layers.YOUR_FILTER
    pass
else:
    raise Exception('Unknown convolution filter: {}'.format(cfg['conv_filter']))
    
# Update model fitting status
cfg['training_finished'] = 'Running'
config.write_config(cfg, os.path.join( model_folder,'config.yaml' ))

for noise_level in cfg['noise_levels']:
    for ensemble in range( cfg['ensemble_size'] ):
        # train 'ensemble_size' (e.g. 5) models for each noise level
        
        # TODO: reduce number of printed lines by using verbose parameter
        
        # preprocess dataset to get uniform dataset for training
        X,Y = utils.preprocess_groundtruth_artificial_noise_balanced(
                            ground_truth_folders = training_folders,
                            before_frac = cfg['before_frac'],
                            windowsize = cfg['windowsize'],
                            after_frac = 1 - cfg['before_frac'],
                            noise_level = noise_level,
                            sampling_rate = cfg['sampling_rate'],
                            smoothing = cfg['smoothing'] * cfg['sampling_rate'],
                            omission_list = [],
                            permute = 1)
        
        model = utils.define_model(
                            filter_sizes = cfg['filter_sizes'],
                            filter_numbers = cfg['filter_numbers'],
                            dense_expansion = cfg['dense_expansion'],
                            windowsize = cfg['windowsize'],
                            conv_filter = conv_filter,
                            loss_function = cfg['loss_function'],
                            optimizer = cfg['optimizer']
                                    )
        
        model.compile( loss = cfg['loss_function'],
                       optimizer = cfg['optimizer'] )
        
        model.fit(X,Y,
                  batch_size = cfg['batch_size'],
                  epochs = cfg['nr_of_epochs'],
                  verbose = 1)
        
        # save model
        file_name = 'Model_NoiseLevel_{}_Ensemble_{}.h5'.format(int(noise_level), ensemble)
        model.save( os.path.join( model_folder,file_name ) )

# Update model fitting status
cfg['training_finished'] = 'Yes'
config.write_config(cfg, os.path.join( model_folder,'config.yaml' ))

print('\n\nDone!')
print('Runtime: {:.0f} min'.format((time.time() - start)/60))



Used configuration for model fitting:
ordereddict([('model_name', 'GCaMP6s_5Hz'), ('sampling_rate', 5), ('training_datasets', ['GT_dataset_GC6s_Chen', 'GT_dataset_Theis_3', 'GT_dataset_Theis_5', 'GT_dataset_Allen_tetOs_neuropil_corrected', 'GT_dataset_Allen_Emx1s_neuropil_corrected']), ('noise_levels', [1, 2, 3, 4, 5, 6, 7, 8, 9]), ('smoothing', 0.2), ('windowsize', 64), ('before_frac', 0.5), ('conv_filter', 'Conv1D'), ('filter_sizes', [31, 19, 5]), ('filter_numbers', [30, 40, 50]), ('dense_expansion', 30), ('loss_function', 'mean_squared_error'), ('optimizer', 'Adagrad'), ('nr_of_epochs', 10), ('ensemble_size', 5), ('batch_size', 2048), ('keras_version', '2.3.1'), ('tensorflow_version', '2.1.0'), ('training_date', '2020-04-03'), ('training_finished', 'No')])
Models will be saved into this folder:  pretrained_models/GCaMP6s_5Hz
Created file pretrained_models/GCaMP6s_5Hz/config.yaml
Resampling neuron 1 from a total of 7 neurons.
Resampling neuron 2 from a total of 7 neurons.
Resampling 



KeyboardInterrupt: 