In [1]:
# Append system path to include the config scripts
import sys
import os
from copy import deepcopy

print('importing lanfactory')
import lanfactory

print('importing ssms')
import ssms

sys.path.insert(1, os.path.join(sys.path[0], '..'))
from config import *

import tensorflow
import torch

importing sys
importing os
importing copy
importing lanfactory
importing ssms


# DATA GENERATOR CONFIGS
#### Note: Look into the ssms package documentation to get a better idea about the kinds of configs that you need for different kinds of training data


In [2]:
ssms.config.data_generator_config['lan']['mlp']

{'output_folder': 'data/lan_mlp/',
 'dgp_list': 'ddm',
 'nbins': 0,
 'n_samples': 100000,
 'n_parameter_sets': 10000,
 'n_parameter_sets_rejected': 100,
 'n_training_samples_by_parameter_set': 1000,
 'max_t': 20.0,
 'delta_t': 0.001,
 'pickleprotocol': 4,
 'n_cpus': 'all',
 'kde_data_mixture_probabilities': [0.8, 0.1, 0.1],
 'simulation_filters': {'mode': 20,
  'choice_cnt': 5,
  'mean_rt': 15,
  'std': 0,
  'mode_cnt_rel': 0.6},
 'negative_rt_cutoff': -66.77497,
 'n_subruns': 10,
 'bin_pointwise': False,
 'separate_response_channels': False}

### Define Metadata

In [10]:
# Specify model
model = 'full_ddm'

# Where do you want to save the config file?
config_save_folder = '/users/afengler/data/proj_lan_pipeline/LAN_scripts/config_files/'

# What kind of likelihood approximator are we generating training data for?
generator_approach = 'lan'

# Specific network type we train? (Might affect training data representation needed)
generator_network_type = 'mlp'

# Specify arguments which you want to adjust in the data generator
data_generator_arg_dict = {'dgp_list': model,
                           'n_samples': 200000,
                           'n_parameter_sets': 1000,
                           'delta_t': 0.001,
                           'n_training_samples_by_parameter_set': 2000,
                           'n_subruns': 5}

# model_config_arg_dict = {'param_bounds': [[-2.5, 0.2, 0.1, 0.0],
#                                           [2.5, 2.2, 0.9, 2.0]]}
model_config_arg_dict = {}

# Name of the config file
data_config_save_name = model + '_nsim_' + str(data_generator_arg_dict['n_samples']) + \
                        '_dt_' + str(data_generator_arg_dict['delta_t']) + \
                        '_nps_' + str(data_generator_arg_dict['n_parameter_sets']) + \
                        '_npts_' + str(data_generator_arg_dict['n_training_samples_by_parameter_set']) + '.pickle'

Found folder:  /users
Moving on...
Found folder:  /users/afengler
Moving on...
Found folder:  /users/afengler/data
Moving on...
Found folder:  /users/afengler/data/proj_lan_pipeline
Moving on...
Found folder:  /users/afengler/data/proj_lan_pipeline/LAN_scripts
Moving on...
Found folder:  /users/afengler/data/proj_lan_pipeline/LAN_scripts/config_files
Moving on...


{'model_config': {'name': 'full_ddm',
  'params': ['v', 'a', 'z', 't', 'sz', 'sv', 'st'],
  'param_bounds': [[-3.0, 0.3, 0.3, 0.25, 0.001, 0.001, 0.001],
   [3.0, 2.5, 0.7, 2.25, 0.2, 2.0, 0.25]],
  'boundary': <function ssms.basic_simulators.boundary_functions.constant(t=0)>,
  'n_params': 7,
  'default_params': [0.0, 1.0, 0.5, 0.25, 0.001, 0.001, 0.001],
  'hddm_include': ['z', 'st', 'sv', 'sz'],
  'nchoices': 2},
 'data_config': {'output_folder': 'data/lan_mlp/',
  'dgp_list': 'full_ddm',
  'nbins': 0,
  'n_samples': 200000,
  'n_parameter_sets': 1000,
  'n_parameter_sets_rejected': 100,
  'n_training_samples_by_parameter_set': 2000,
  'max_t': 20.0,
  'delta_t': 0.001,
  'pickleprotocol': 4,
  'n_cpus': 'all',
  'kde_data_mixture_probabilities': [0.8, 0.1, 0.1],
  'simulation_filters': {'mode': 20,
   'choice_cnt': 5,
   'mean_rt': 15,
   'std': 0,
   'mode_cnt_rel': 0.6},
  'negative_rt_cutoff': -66.77497,
  'n_subruns': 5,
  'bin_pointwise': False,
  'separate_response_channels':

### Generate the Config File

In [None]:
make_data_generator_configs(model = model,
                            generator_approach = 'lan',
                            generator_network_type = 'mlp',
                            data_generator_arg_dict = data_generator_arg_dict,
                            model_config_arg_dict = model_config_arg_dict,
                            save_name = data_config_save_name,
                            save_folder = config_save_folder)

# NETWORK AND TRAIN CONFIGS

### Define Metadata

In [None]:
# Where do you want to save config files?
network_train_config_save_folder = '/users/afengler/data/proj_lan_pipeline/LAN_scripts/config_files/'

# Specify training data folder:
training_data_folder = '/users/afengler/data/proj_lan_pipeline/LAN_scripts/data/lan_mlp/training_data_0_nbins_0_n_200000'

# Provide a unique identifier for the particular files you need from the training_data_folder
training_file_identifier = 'ddm_mic2_adj_weibull_no_bias'

# Specify the name of the config file
network_train_config_save_name = hardware + '_' + dl_backend + '_network_train_config_' + training_file_identifier + '_nsim_200000_dt_0.001_nps_500_npts_2000_architecture_search.pickle'

# Hardware and dl_backend
hardware = 'gpu'
dl_backend = 'torch'

# Get list of relevant training files
file_list = os.listdir(training_data_folder)
valid_file_list = np.array([training_data_folder + '/' + \
                     file_ for file_ in file_list if training_file_identifier in file_])
n_training_files = [len(valid_file_list)]
print(n_training_files)

# Training config hyperparameters

# Hardware
if hardware == 'gpu':
    print('config for gpu --> use large batch size')
    batch_size = 100000
if hardware == 'cpu':
    print('config for cpu --> use small batch size')
    batch_size = 1000

# How many epochs to train?
n_epochs = 20

# Network architectures
layer_sizes = [[100, 100, 100, 1], [100, 100, 100, 100, 1], [100, 100, 100, 100, 100, 1],
               [120, 120, 120, 1], [120, 120, 120, 120, 1], [120, 120, 120, 120, 120, 1],
               [150, 150, 150, 1], [150, 150, 150, 150, 1], [150, 150, 150, 150, 150, 1]]
layer_types = [['dense', 'dense', 'dense', 'dense'], ['dense', 'dense', 'dense', 'dense', 'dense'], ['dense', 'dense', 'dense', 'dense', 'dense', 'dense'],
               ['dense', 'dense', 'dense', 'dense'], ['dense', 'dense', 'dense', 'dense', 'dense'], ['dense', 'dense', 'dense', 'dense', 'dense', 'dense'],
               ['dense', 'dense', 'dense', 'dense'], ['dense', 'dense', 'dense', 'dense', 'dense'], ['dense', 'dense', 'dense', 'dense', 'dense', 'dense'],
              ]
activations = [['tanh', 'tanh', 'tanh', 'linear'], ['tanh', 'tanh', 'tanh', 'tanh', 'linear'], ['tanh', 'tanh', 'tanh', 'tanh', 'tanh', 'linear'],
               ['tanh', 'tanh', 'tanh', 'linear'], ['tanh', 'tanh', 'tanh', 'tanh', 'linear'], ['tanh', 'tanh', 'tanh', 'tanh', 'tanh', 'linear'],
               ['tanh', 'tanh', 'tanh', 'linear'], ['tanh', 'tanh', 'tanh', 'tanh', 'linear'], ['tanh', 'tanh', 'tanh', 'tanh', 'tanh', 'linear'],
              ]

# Train / validations split
train_val_split = [0.98, 0.98, 0.98, 
                   0.98, 0.98, 0.98,
                   0.98, 0.98, 0.98]

### Create the Config file

In [5]:
# Loop objects
config_dict = {}
network_arg_dicts = {}
train_arg_dicts = {}    
cnt = 0

for i in range(len(layer_sizes)):
    for j in range(len(n_training_files)):
        val_idx_cutoff = int(train_val_split[i] * n_training_files[j])

        # Specify the arguments which you want to adjust in the network and train configs
        # Check: lanfactory.config.network_config_mlp
        #        lanfactor.config.train_config_mlp for details

        network_arg_dict = {'layer_types': layer_types[i],
                            'layer_sizes': layer_sizes[i],
                            'activations': activations[i],
                            'loss': ['huber'],
                            'model_id': training_file_identifier
                            }

        train_arg_dict = {'batch_size': batch_size,
                          'n_epochs': n_epochs,
                          'training_files': valid_file_list[:val_idx_cutoff],
                          'validation_files': valid_file_list[val_idx_cutoff:n_training_files[j]],
                          'shuffle_files': True,
                          'label_prelog_cutoff_low': 1e-7,
                          'label_prelog_cutoff_high': None,
                          'save_history': True,
                          'callbacks': ['checkpoint', 'earlystopping', 'reducelr'],
                          }

        config_dict[cnt] = make_train_network_configs(save_folder = network_train_config_save_folder,
                                                      network_arg_dict = network_arg_dict,
                                                      train_arg_dict = train_arg_dict,
                                                      save_name = None)
        
        print('NEW PRINT')
        print(cnt)
        cnt += 1

print('Now saving')
pickle.dump(config_dict, open(network_train_config_save_folder + network_train_config_save_name, 'wb'))

[500]
NEW PRINT
0
NEW PRINT
1
NEW PRINT
2
NEW PRINT
3
NEW PRINT
4
NEW PRINT
5
NEW PRINT
6
NEW PRINT
7
NEW PRINT
8
Now saving


In [13]:
x = pickle.load(open(network_train_config_save_folder + network_train_config_save_name, 'rb'))

In [14]:
x[0]

{'network_config': {'layer_types': ['dense', 'dense', 'dense'],
  'layer_sizes': [100, 100, 1],
  'activations': ['tanh', 'tanh', 'linear'],
  'loss': ['huber'],
  'callbacks': ['checkpoint', 'earlystopping', 'reducelr'],
  'model_id': 'race_no_bias_4'},
 'train_config': {'batch_size': 50000,
  'n_epochs': 200,
  'optimizer': 'adam',
  'learning_rate': 0.002,
  'loss': 'huber',
  'metrics': [<tensorflow.python.keras.losses.MeanSquaredError at 0x7fafc1ae55d0>,
   <tensorflow.python.keras.losses.Huber at 0x7fb3f13ad850>],
  'callbacks': ['checkpoint', 'earlystopping', 'reducelr'],
  'training_files': array(['/users/afengler/data/proj_lan_pipeline/LAN_scripts/data/lan_mlp/training_data_0_nbins_0_n_200000/training_data_race_no_bias_4_9353d924fc6311eb95df0cc47afdbf59.pickle',
         '/users/afengler/data/proj_lan_pipeline/LAN_scripts/data/lan_mlp/training_data_0_nbins_0_n_200000/training_data_race_no_bias_4_3843ab7afbcf11eba97d0cc47afdbfb9.pickle',
         '/users/afengler/data/proj_lan_