In [1]:
# Append system path to include the config scripts
import sys
import os
sys.path.insert(1, os.path.join(sys.path[0], '..'))
from copy import deepcopy

from config import *
import lanfactory
import ssms

# DATA GENERATOR CONFIGS
#### Note: Look into the ssms package documentation to get a better idea about the kinds of configs that you need for different kinds of training data


In [3]:
ssms.config.data_generator_config['lan']['mlp']

{'output_folder': 'data/lan_mlp/',
 'dgp_list': 'ddm',
 'nbins': 0,
 'n_samples': 100000,
 'n_parameter_sets': 10000,
 'n_parameter_sets_rejected': 100,
 'n_training_samples_by_parameter_set': 1000,
 'max_t': 20.0,
 'delta_t': 0.001,
 'pickleprotocol': 4,
 'n_cpus': 'all',
 'kde_data_mixture_probabilities': [0.8, 0.1, 0.1],
 'simulation_filters': {'mode': 20,
  'choice_cnt': 10,
  'mean_rt': 15,
  'std': 0,
  'mode_cnt_rel': 0.5},
 'negative_rt_cutoff': -66.77497,
 'n_subruns': 10,
 'bin_pointwise': False,
 'separate_response_channels': False}

In [3]:
# Specify model
model = 'race_no_bias_4'

# Where do you want to save the config file?
config_save_folder = '/users/afengler/data/proj_lan_pipeline/LAN_scripts/config_files/'

# Name of the config file
data_config_save_name = 'race_no_bias_4_nsim_200000_dt_001_nps_1000_npts_2000.pickle'

# What kind of likelihood approximator are we generating training data for?
generator_approach = 'lan'

# Specific network type we train? (Might affect training data representation needed)
generator_network_type = 'mlp'

# Specify arguments which you want to adjust in the data generator
data_generator_arg_dict = {'dgp_list': model,
                           'n_samples': 200000,
                           'n_parameter_sets': 1000,
                           'delta_t': 0.001,
                           'n_training_samples_by_parameter_set': 2000,
                           'n_subruns': 5}

# model_config_arg_dict = {'param_bounds': [[-2.5, 0.2, 0.1, 0.0],
#                                           [2.5, 2.2, 0.9, 2.0]]}
model_config_arg_dict = {}

make_data_generator_configs(model = model,
                            generator_approach = 'lan',
                            generator_network_type = 'mlp',
                            data_generator_arg_dict = data_generator_arg_dict,
                            model_config_arg_dict = model_config_arg_dict,
                            save_name = data_config_save_name,
                            save_folder = config_save_folder)

Found folder:  /users
Moving on...
Found folder:  /users/afengler
Moving on...
Found folder:  /users/afengler/data
Moving on...
Found folder:  /users/afengler/data/proj_lan_pipeline
Moving on...
Found folder:  /users/afengler/data/proj_lan_pipeline/LAN_scripts
Moving on...
Found folder:  /users/afengler/data/proj_lan_pipeline/LAN_scripts/config_files
Moving on...


{'model_config': {'name': 'race_no_bias_4',
  'params': ['v0', 'v1', 'v2', 'v3', 'a', 'z', 'ndt'],
  'param_bounds': [[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0],
   [2.5, 2.5, 2.5, 2.5, 3.0, 0.9, 2.0]],
  'boundary': <function ssms.basic_simulators.boundary_functions.constant(t=0)>,
  'n_params': 7,
  'default_params': [0.0, 0.0, 0.0, 0.0, 2.0, 0.5, 0.001],
  'hddm_include': ['v0', 'v1', 'v2', 'a', 'z', 'ndt'],
  'nchoices': 4},
 'data_config': {'output_folder': 'data/lan_mlp/',
  'dgp_list': 'race_no_bias_4',
  'nbins': 0,
  'n_samples': 200000,
  'n_parameter_sets': 1000,
  'n_parameter_sets_rejected': 100,
  'n_training_samples_by_parameter_set': 2000,
  'max_t': 20.0,
  'delta_t': 0.001,
  'pickleprotocol': 4,
  'n_cpus': 'all',
  'kde_data_mixture_probabilities': [0.8, 0.1, 0.1],
  'simulation_filters': {'mode': 20,
   'choice_cnt': 5,
   'mean_rt': 15,
   'std': 0,
   'mode_cnt_rel': 0.6},
  'negative_rt_cutoff': -66.77497,
  'n_subruns': 5,
  'bin_pointwise': False,
  'separate_respons

# NETWORK AND TRAIN CONFIGS

In [5]:
# Provide a unique identifier for the particular files you need from the training_data_folder
training_file_identifier = 'ddm'

# Specify training data folder:
training_data_folder = '/users/afengler/data/proj_lan_pipeline/LAN_scripts/data/lan_mlp/training_data_0_nbins_0_n_200000'

# Where do you want to save config files?
network_train_config_save_folder = '/users/afengler/data/proj_lan_pipeline/LAN_scripts/config_files/'

# Name of the config file
network_train_config_save_name = 'network_train_config_ddm_nsim_200000_dt_0005_nps_500_npts_2000.pickle'

# Get list of training files
train_val_split = 0.9
file_list = os.listdir(training_data_folder)
valid_file_list = np.array([training_data_folder + '/' + \
                       file_ for file_ in file_list if training_file_identifier in file_])
val_idx_cutoff = int(0.9 * len(valid_file_list))

# Specify the arguments which you want to adjust in the network and train configs
# Check lanfactory.config.network_config_mlp
#.      lanfactor.config.train_config_mlp for details

network_arg_dict = {'layer_types': ['dense' for i in range(4)],
                    'layer_sizes': [100, 100, 100, 1],
                    'activations': ['tanh', 'tanh', 'tanh', 'linear'],
                    'loss': ['huber'],
                    'model_id': training_file_identifier
                   }
                    
train_arg_dict = {'batch_size': 50000,
                  'n_epochs': 200,
                  'training_files': valid_file_list[:val_idx_cutoff],
                  'validation_files': valid_file_list[val_idx_cutoff:],
                  'shuffle_files': True,
                  'label_prelog_cutoff_low': 1e-7,
                  'label_prelog_cutoff_high': None,
                  'save_history': True,
                  'callbacks': ['checkpoint', 'earlystopping', 'reducelr']
                 }

make_train_network_configs(save_folder = network_train_config_save_folder,
                           network_arg_dict = network_arg_dict,
                           train_arg_dict = train_arg_dict,
                           save_name = network_train_config_save_name)

Found folder:  /users
Moving on...
Found folder:  /users/afengler
Moving on...
Found folder:  /users/afengler/data
Moving on...
Found folder:  /users/afengler/data/proj_lan_pipeline
Moving on...
Found folder:  /users/afengler/data/proj_lan_pipeline/LAN_scripts
Moving on...
Found folder:  /users/afengler/data/proj_lan_pipeline/LAN_scripts/config_files
Moving on...


{'network_config': {'layer_types': ['dense', 'dense', 'dense'],
  'layer_sizes': [100, 100, 1],
  'activations': ['tanh', 'tanh', 'linear'],
  'loss': ['huber'],
  'callbacks': ['checkpoint', 'earlystopping', 'reducelr']},
 'train_config': {'batch_size': 50000,
  'n_epochs': 200,
  'optimizer': 'adam',
  'learning_rate': 0.002,
  'loss': 'huber',
  'metrics': [<tensorflow.python.keras.losses.MeanSquaredError at 0x7f9db2b8f450>,
   <tensorflow.python.keras.losses.Huber at 0x7fa06a9a5490>],
  'callbacks': ['checkpoint', 'earlystopping', 'reducelr'],
  'training_files': array(['/users/afengler/data/proj_lan_pipeline/LAN_scripts/data/lan_mlp/training_data_0_nbins_0_n_200000/training_data_ddm_75daaf9ef64b11ebb86cac1f6b627e10.pickle',
         '/users/afengler/data/proj_lan_pipeline/LAN_scripts/data/lan_mlp/training_data_0_nbins_0_n_200000/training_data_ddm_36d0d238f64b11eb83ba0cc47afe4c38.pickle',
         '/users/afengler/data/proj_lan_pipeline/LAN_scripts/data/lan_mlp/training_data_0_nbin

In [2]:
# Name of the config file
network_train_config_save_name = 'network_train_config_lca_no_bias_4_architecture_search_test.pickle'

# Where do you want to save config files?
network_train_config_save_folder = '/users/afengler/data/proj_lan_pipeline/LAN_scripts/config_files/'

# Specify training data folder:
training_data_folder = '/users/afengler/data/proj_lan_pipeline/LAN_scripts/data/lan_mlp/training_data_0_nbins_0_n_200000'

# Provide a unique identifier for the particular files you need from the training_data_folder
training_file_identifier = 'lca_no_bias_4'

# Get list of relevant training files
train_val_split = 0.9
file_list = os.listdir(training_data_folder)
valid_file_list = np.array([training_data_folder + '/' + \
                     file_ for file_ in file_list if training_file_identifier in file_])
val_idx_cutoff = int(0.9 * len(valid_file_list))

# Training config hyperparameters
layer_sizes = [[100, 100, 1], [100, 100, 100, 1], [100, 100, 100, 100, 1],
               [100, 100, 1], [100, 100, 100, 1], [100, 100, 100, 100, 1],
               [100, 100, 1], [100, 100, 100, 1], [100, 100, 100, 100, 1],
               [200, 200, 1], [200, 200, 200, 1], [200, 200, 200, 200, 1],
               [200, 200, 1], [200, 200, 200, 1], [200, 200, 200, 200, 1],
               [200, 200, 1], [200, 200, 200, 1], [200, 200, 200, 200, 1]
              ]
layer_types = [['dense', 'dense', 'dense'], ['dense', 'dense', 'dense', 'dense'], ['dense', 'dense', 'dense', 'dense', 'dense'],
               ['dense', 'dense', 'dense'], ['dense', 'dense', 'dense', 'dense'], ['dense', 'dense', 'dense', 'dense', 'dense'],
               ['dense', 'dense', 'dense'], ['dense', 'dense', 'dense', 'dense'], ['dense', 'dense', 'dense', 'dense', 'dense'],
               ['dense', 'dense', 'dense'], ['dense', 'dense', 'dense', 'dense'], ['dense', 'dense', 'dense', 'dense', 'dense'],
               ['dense', 'dense', 'dense'], ['dense', 'dense', 'dense', 'dense'], ['dense', 'dense', 'dense', 'dense', 'dense'],
               ['dense', 'dense', 'dense'], ['dense', 'dense', 'dense', 'dense'], ['dense', 'dense', 'dense', 'dense', 'dense'],
              ]
activations = [['tanh', 'tanh', 'linear'], ['tanh', 'tanh', 'tanh', 'linear'], ['tanh', 'tanh', 'tanh', 'tanh', 'linear'],
               ['relu', 'tanh', 'linear'], ['relu', 'relu', 'tanh', 'linear'], ['relu', 'relu', 'relu', 'tanh', 'linear'],
               ['relu', 'relu', 'linear'], ['relu', 'relu', 'relu', 'linear'], ['relu', 'relu', 'relu', 'relu', 'linear'],
               ['tanh', 'tanh', 'linear'], ['tanh', 'tanh', 'tanh', 'linear'], ['tanh', 'tanh', 'tanh', 'tanh', 'linear'],
               ['relu', 'tanh', 'linear'], ['relu', 'relu', 'tanh', 'linear'], ['relu', 'relu', 'relu', 'tanh', 'linear'],
               ['relu', 'relu', 'linear'], ['relu', 'relu', 'relu', 'linear'], ['relu', 'relu', 'relu', 'relu', 'linear'],
              ]

batch_size = 50000
n_epochs = 200
n_training_files = [50, 100, 200, 300, val_idx_cutoff]

# Loop objects
config_dict = {}
network_arg_dicts = {}
train_arg_dicts = {}    
cnt = 0

for i in range(len(layer_sizes)):
    for j in range(len(n_training_files)):
        # Specify the arguments which you want to adjust in the network and train configs
        # Check lanfactory.config.network_config_mlp
        #.      lanfactor.config.train_config_mlp for details

        network_arg_dict = {'layer_types': layer_types[i],
                            'layer_sizes': layer_sizes[i],
                            'activations': activations[i],
                            'loss': ['huber'],
                            'model_id': training_file_identifier
                            }

        train_arg_dict = {'batch_size': batch_size,
                          'n_epochs': n_epochs,
                          'training_files': valid_file_list[:n_training_files[j]],
                          'validation_files': valid_file_list[val_idx_cutoff:],
                          'shuffle_files': True,
                          'label_prelog_cutoff_low': 1e-7,
                          'label_prelog_cutoff_high': None,
                          'save_history': True,
                          'callbacks': ['checkpoint', 'earlystopping', 'reducelr']  # ['checkpoint', 'earlystopping', 'reducelr']
                          }

        config_dict[cnt] = make_train_network_configs(save_folder = network_train_config_save_folder,
                                                      network_arg_dict = network_arg_dict,
                                                      train_arg_dict = train_arg_dict,
                                                      save_name = None)
        print('NEW PRINT')
        print(cnt)
        #print(config_dict[0]['network_config'])
        cnt += 1

pickle.dump(config_dict, open(network_train_config_save_folder + network_train_config_save_name, 'wb'))

NEW PRINT
0
NEW PRINT
1
NEW PRINT
2
NEW PRINT
3
NEW PRINT
4
NEW PRINT
5
NEW PRINT
6
NEW PRINT
7
NEW PRINT
8
NEW PRINT
9
NEW PRINT
10
NEW PRINT
11
NEW PRINT
12
NEW PRINT
13
NEW PRINT
14
NEW PRINT
15
NEW PRINT
16
NEW PRINT
17
NEW PRINT
18
NEW PRINT
19
NEW PRINT
20
NEW PRINT
21
NEW PRINT
22
NEW PRINT
23
NEW PRINT
24
NEW PRINT
25
NEW PRINT
26
NEW PRINT
27
NEW PRINT
28
NEW PRINT
29
NEW PRINT
30
NEW PRINT
31
NEW PRINT
32
NEW PRINT
33
NEW PRINT
34
NEW PRINT
35
NEW PRINT
36
NEW PRINT
37
NEW PRINT
38
NEW PRINT
39
NEW PRINT
40
NEW PRINT
41
NEW PRINT
42
NEW PRINT
43
NEW PRINT
44
NEW PRINT
45
NEW PRINT
46
NEW PRINT
47
NEW PRINT
48
NEW PRINT
49
NEW PRINT
50
NEW PRINT
51
NEW PRINT
52
NEW PRINT
53
NEW PRINT
54
NEW PRINT
55
NEW PRINT
56
NEW PRINT
57
NEW PRINT
58
NEW PRINT
59
NEW PRINT
60
NEW PRINT
61
NEW PRINT
62
NEW PRINT
63
NEW PRINT
64
NEW PRINT
65
NEW PRINT
66
NEW PRINT
67
NEW PRINT
68
NEW PRINT
69
NEW PRINT
70
NEW PRINT
71
NEW PRINT
72
NEW PRINT
73
NEW PRINT
74
NEW PRINT
75
NEW PRINT
76
NEW PRINT

In [4]:
x = pickle.load(open(network_train_config_save_folder + network_train_config_save_name, 'rb'))

In [12]:
for i in range(90):
    print(x[i]['train_config']['training_files'].shape[0])


50
100
200
300
463
50
100
200
300
463
50
100
200
300
463
50
100
200
300
463
50
100
200
300
463
50
100
200
300
463
50
100
200
300
463
50
100
200
300
463
50
100
200
300
463
50
100
200
300
463
50
100
200
300
463
50
100
200
300
463
50
100
200
300
463
50
100
200
300
463
50
100
200
300
463
50
100
200
300
463
50
100
200
300
463
50
100
200
300
463
