IBM · tkornuta-ibm · May 30, 2019 · May 29, 2019 · May 29, 2019 · May 29, 2019
diff --git a/README.md b/README.md
@@ -16,9 +16,9 @@ PyTorchPipe (PTP) is a component-oriented framework that facilitates development
 PTP frames training and testing procedures as _pipelines_ consisting of many components communicating through data streams.
 Each such a stream can consist of several components, including one problem instance (providing batches of data), any number of trainable components (models) and additional components providing required transformations and computations.
 
-As a result, the training & testing procedures are no longer pinned to a specific problem or model, and built-in mechanisms for compatibility checking (handshaking), configuration management & statistics collection facilitate running diverse experiments.
+As a result, the training & testing procedures are no longer pinned to a specific problem or model, and built-in mechanisms for compatibility checking (handshaking), configuration and global variables management & statistics collection facilitate rapid development of complex pipelines and running diverse experiments.
 
-In its core, to _accelerate the computations_ on their own, PTP relies on PyTorch and extensively uses its mechanisms for distribution of computations on CPUs/GPUs, including multi-threaded data loaders and multi-GPU data parallelism.
+In its core, to _accelerate the computations_ on their own, PTP relies on PyTorch and extensively uses its mechanisms for distribution of computations on CPUs/GPUs, including multi-process data loaders and multi-GPU data parallelism.
 The models are _agnostic_ to those operations and one indicates whether to use them in configuration files (data loaders) or by passing adequate run-time arguments (--gpu).
 
 **Datasets:**
@@ -31,13 +31,16 @@ PTP focuses on multi-modal reasoning combining vision and language. Currently it
   * ANKI (Machine Translation)
 
 Aside of providing batches of samples, the Problem class will automatically download the files associated with a given dataset (as long as the dataset is publicly available).
-The diversity of those problems proves the flexibility of the framework, we are working on incorporation of new ones into PTP.
+The diversity of those problems (and associated models) proves the flexibility of the framework, we are working on incorporation of new ones into PTP.
 
-**Model Zoo:**
-What people typically define as _model_ in PTP is decomposed into components, with _Model_ being a defived class that contains trainable elements.
-Those components are loosely coupled and care only about the inputs they retrieve and outputs they produce.
-The framework offers full flexibility and it is up to the programer to choose the _granularity_ of his/her components/models.
-However, PTP provides several ready to use, out of the box components, from ones of general usage to very specialized ones:
+**Pipelines:**
+What people typically define as a _model_ in PTP is framed as a _pipeline_, consisting of many inter-connected components, with one or more _Models_ containing trainable elements.
+Those components are loosely coupled and care only about the _input streams_ they retrieve and _output streams_ they produce.
+The framework offers full flexibility and it is up to the programmer to choose the _granularity_ of his/her components/models/pipelines.
+Such a decomposition enables one to easily combine many components and models into pipelines, whereas the framework supports loading of pretrained models, freezing during training, saving them to checkpoints etc.
+
+**Model/Component Zoo:**
+PTP provides several ready to use, out of the box components, from ones of general usage to very specialized ones:
 
   * Feed Forward Network (Fully Connected layers with activation functions and dropout, variable number of hidden layers, general usage)
   * Torch Vision Wrapper (wrapping several models from Torch Vision, e.g. VGG-16, ResNet-50, ResNet-152, DenseNet-121, general usage)

diff --git a/configs/default/workers/online_trainer.yml b/configs/default/workers/online_trainer.yml
@@ -0,0 +1,114 @@
+####################################################################
+# Section defining all the default values of parameters used during training.
+# If you want to use different section for training pass its name as command line argument '--training_section_name' to trainer (DEFAULT: training)
+# Note: in such a case remember to define all the required parameters in the new section.
+training:
+  # Set the random seeds: -1 means that they will be picked randomly.
+  # Note: their final values will be stored in the final training_configuration.yml saved to log dir.
+  seed_numpy: -1
+  seed_torch: -1
+
+  # Default batch size.
+  batch_size: 64
+
+  # Definition of the problem (Mandatory!)
+  #problem:
+  #  One must define its type (Mandatory!)
+  #  type: ?
+  #  The rest of the content of that section is problem-specific...
+
+  # Section describing curriculum learning (Optional)
+  #curriculum_learning: 
+  #  # Flag indicating whether curriculum learning has to finish before (eventual) termination of the training.
+  #  must_finish: True
+  #  The rest of the content of that section is problem-specific...
+
+  # Definition of optimizer (Mandatory!)
+  #optimizer:
+  #  # Type - generally all optimizers from PyTorch.optim are allowed (Mandatory!)
+  #  type: Adam
+  #  # Options: 
+  #  lr: 0.0001
+  #  The rest of the content of that section is optimizer-specific...
+
+  # Set a default configuration section for data loader.
+  dataloader:
+    # Shuffle set by default.
+    shuffle: True 
+    batch_sampler: None
+     # Do not use multiprocessing by default.
+    num_workers: 0
+    pin_memory: False
+    # Do not drop last frame by default.
+    drop_last: False
+    timeout: 0
+
+  # Definition of sampler (Optional)
+  # When this section will not be present, worker will use "standard" sampling (please refer to shuffle in dataloader)
+  #sampler:
+  #  # Type - generally all samplers from PyTorch (plus some new onses) are allowed (Mandatory!)
+  #  # Options: 
+  #  type: RandomSmpler
+  #  The rest of the content of that section is optimizer-specific...
+
+  # Terminal conditions that will be used during training.
+  # They can (and ofter should) be overwritten.
+  terminal_conditions:
+    # Terminal condition I: loss threshold, going below will terminate the training.
+    loss_stop: 0.00001 # 1e-5
+    # Terminal condition II: maximal number of epochs (optional, -1 means that this condition is disabled)
+    epoch_limit: -1
+    # Terminal condition III: maximal number of episodes (Mandatory for this trainer! Must be > 0)
+    episode_limit: 100000
+
+
+
+####################################################################
+# Section defining all the default values of parameters used during validation.
+# If you want to use different section for validation pass its name as command line argument '--validation_section_name' to trainer (DEFAULT: validation)
+# Note: in such a case remember to define all the required parameters in the new section.
+validation:
+  # Defines how often the partial validation will be performed.
+  # In this trainer Partial Validation is mandatory, hence interval must be > 0.
+  partial_validation_interval: 100
+
+  # Definition of the problem (mandatory!)
+  #problem:
+  #  One must define its type (Mandatory!)
+  #  type: ?
+  #  The rest of the content of that section is problem-specific...
+
+  # Set a default configuration section for data loader.
+  dataloader:
+    # Shuffle set by default.
+    shuffle: True 
+     # Do not use multiprocessing by default.
+    num_workers: 0
+    pin_memory: False
+    # Do not drop last frame by default.
+    drop_last: False
+    timeout: 0
+
+  # Definition of sampler (Optional)
+  # When this section will not be present, worker will use "standard" sampling (please refer to shuffle in dataloader)
+  #sampler:
+  #  # Type - generally all samplers from PyTorch (plus some new onses) are allowed (Mandatory!)
+  #  # Options: 
+  #  type: RandomSmpler
+  #  The rest of the content of that section is optimizer-specific...
+
+
+
+####################################################################
+# Section defining all the default values of parameters used during training.
+# If you want to use different section for validation pass its name as command line argument '--pipeline_section_name' to trainer (DEFAULT: pipeline)
+pipeline: 
+  # Pipeline must contain at least one component.
+  #name_1:
+  #   Each component must have defined its priority... (Mandatory!)
+  #   priority: 0.1 # Can be float. Smaller means higher priority, up to zero.
+  #   # ... and type (Mandatory!)
+  #   type: ?
+  #   The rest of the content of that section is component-specific...
+
+
diff --git a/configs/default/workers/processor.yml b/configs/default/workers/processor.yml
@@ -0,0 +1,51 @@
+####################################################################
+# Section defining all the default values of parameters used during testing.
+# If you want to use different section for training pass its name as command line argument '--section_name' to trainer (DEFAULT: test)
+# Note: in such a case remember to define all the required parameters in the new section.
+test:
+  # Set the random seeds: -1 means that they will be picked randomly.
+  # Note: their final values will be stored in the final training_configuration.yml saved to log dir.
+  seed_numpy: -1
+  seed_torch: -1
+
+  # Default batch size.
+  batch_size: 64
+
+  # Definition of the problem (Mandatory!)
+  #problem:
+  #  One must define its type (Mandatory!)
+  #  type: ?
+  #  The rest of the content of that section is problem-specific...
+
+  # Set a default configuration section for data loader.
+  dataloader:
+    # Shuffle set by default.
+    shuffle: True 
+    batch_sampler: None
+     # Do not use multiprocessing by default.
+    num_workers: 0
+    pin_memory: False
+    # Do not drop last frame by default.
+    drop_last: False
+    timeout: 0
+
+  # Definition of sampler (Optional)
+  # When this section will not be present, worker will use "standard" sampling (please refer to shuffle in dataloader)
+  #sampler:
+  #  # Type - generally all samplers from PyTorch (plus some new onses) are allowed (Mandatory!)
+  #  # Options: 
+  #  type: RandomSmpler
+  #  The rest of the content of that section is optimizer-specific...
+
+
+####################################################################
+# Section defining all the default values of parameters used during training.
+# If you want to use different section for validation pass its name as command line argument '--pipeline_section_name' to trainer (DEFAULT: pipeline)
+pipeline: 
+  # Pipeline must contain at least one component.
+  #name_1:
+  #   Each component must have defined its priority... (Mandatory!)
+  #   priority: 0.1 # Can be float. Smaller means higher priority, up to zero.
+  #   # ... and type (Mandatory!)
+  #   type: ?
+  #   The rest of the content of that section is component-specific...
diff --git a/configs/mnist/default_mnist.yml b/configs/mnist/default_mnist.yml
@@ -7,11 +7,11 @@ training:
     #resize: [32, 32]
   # Use sampler that operates on a subset.
   #sampler:
-  #  name: SubsetRandomSampler
+  #  type: SubsetRandomSampler
   #  indices: [0, 55000]
   # optimizer parameters:
   optimizer:
-    name: Adam
+    type: Adam
     lr: 0.0001
   # settings parameters
   terminal_conditions:
@@ -29,11 +29,11 @@ validation:
     #resize: [32, 32]
   # Use sampler that operates on a subset.
   #sampler:
-  #  name: SubsetRandomSampler
+  #  type: SubsetRandomSampler
   #  indices: [55000, 60000]
 
 # Testing parameters:
-testing:
+test:
   problem:
     type: MNIST
     batch_size: *b

diff --git a/configs/mnist/mnist_classification_convnet_softmax.yml b/configs/mnist/mnist_classification_convnet_softmax.yml
@@ -2,7 +2,6 @@
 default_configs: mnist/default_mnist.yml
 
 pipeline:
-  name: mnist_convnet_softmax_classifier
 
   # Model consisting of two components.
   image_encoder:

diff --git a/configs/mnist/mnist_classification_kfold_softmax.yml b/configs/mnist/mnist_classification_kfold_softmax.yml
@@ -10,11 +10,11 @@ training:
     #resize: [32, 32]
   # Use k-fold cross-validation random sampler.
   sampler:
-    name: kFoldRandomSampler
+    type: kFoldRandomSampler
     folds: 7 # Each with size of 6000
   # optimizer parameters:
   optimizer:
-    name: Adam
+    type: Adam
     lr: 0.0001
   # settings parameters
   terminal_conditions:
@@ -32,11 +32,10 @@ validation:
     #resize: [32, 32]
   # Use k-fold cross-validation random sampler.
   sampler:
-    name: kFoldRandomSampler
+    type: kFoldRandomSampler
     folds: 7 # Each with size of 6000
 
 pipeline:
-  name: mnist_softmax_classifier
 
   # Reshapes tensors.
   reshaper:

diff --git a/configs/mnist/mnist_classification_lenet5.yml b/configs/mnist/mnist_classification_lenet5.yml
@@ -12,13 +12,12 @@ validation:
     resize_image: [32, 32]
 
 # Testing parameters - overwrite defaults:
-testing:
+test:
   problem:
     resize_image: [32, 32]
 
 # Definition of the pipeline.
 pipeline:
-  name: mnist_lenet5_classifier
 
   # Image classifier.
   image_classifier:

diff --git a/configs/mnist/mnist_classification_softmax.yml b/configs/mnist/mnist_classification_softmax.yml
@@ -2,7 +2,6 @@
 default_configs: mnist/default_mnist.yml
 
 pipeline:
-  name: mnist_softmax_classifier
 
   # Reshapes tensors.
   reshaper:

diff --git a/configs/mnist/mnist_classification_vf_2lenet5.yml b/configs/mnist/mnist_classification_vf_2lenet5.yml
@@ -12,13 +12,13 @@ validation:
     resize_image: [32, 32]
 
 # Testing parameters - overwrite defaults:
-testing:
+test:
   problem:
     resize_image: [32, 32]
 
 # Definition of the pipeline.
 pipeline:
-  name: mnist_variational_flow_2lenet5
+
   # Disable components for "default" flow.
   disable: nllloss, precision_recall
 

diff --git a/configs/mnist/mnist_classification_vf_lenet5.yml b/configs/mnist/mnist_classification_vf_lenet5.yml
@@ -12,13 +12,13 @@ validation:
     resize_image: [32, 32]
 
 # Testing parameters - overwrite defaults:
-testing:
+test:
   problem:
     resize_image: [32, 32]
 
 # Definition of the pipeline.
 pipeline:
-  name: mnist_variational_flow_lenet5
+
   # Disable components for "default" flow.
   disable: nllloss, precision_recall
 

diff --git a/configs/mnist/mnist_classification_vf_lenet5_2losses.yml b/configs/mnist/mnist_classification_vf_lenet5_2losses.yml
@@ -12,13 +12,13 @@ validation:
     resize_image: [32, 32]
 
 # Testing parameters - overwrite defaults:
-testing:
+test:
   problem:
     resize_image: [32, 32]
 
 # Definition of the pipeline.
 pipeline:
-  name: mnist_variational_flow_lenet5
+
   # Disable components for "default" flow.
   disable: nllloss, precision_recall
 

diff --git a/configs/mnist/mnist_classification_vf_shared_convnet_2softmaxes.yml b/configs/mnist/mnist_classification_vf_shared_convnet_2softmaxes.yml
@@ -7,7 +7,7 @@ training:
     #resize_image: [32, 32]
     batch_size: 64
   #optimizer:
-  #  #name: Adam
+  #  #type: Adam
   #  lr: 0.001
   #terminal_conditions:
   #  loss_stop: 0.08
@@ -19,13 +19,13 @@ training:
 #    resize_image: [32, 32]
 
 # Testing parameters - overwrite defaults:
-#testing:
+#test:
 #  problem:
 #    resize_image: [32, 32]
 
 # Definition of the pipeline.
 pipeline:
-  name: mnist_variational_flow_shared_convnet_2softmaxes
+
   # Disable components for "default" flow.
   disable: nllloss, precision_recall
 

diff --git a/configs/translation/eng_fra_translation_enc_attndec.yml b/configs/translation/eng_fra_translation_enc_attndec.yml
@@ -14,7 +14,7 @@ training:
 
   # optimizer parameters:
   optimizer:
-    name: Adam
+    type: Adam
     lr: 1.0e-3
 
   # settings parameters
@@ -35,7 +35,7 @@ validation:
     batch_size:  64
 
 # Testing parameters:
-testing:
+test:
   problem:
     type: *p_type 
     data_folder: *data_folder
@@ -45,7 +45,6 @@ testing:
     batch_size: 64
 
 pipeline:
-  name: eng_fra_translation_enc_attndec
 
   # Source encoding - model 1.
   source_sentence_embedding:

diff --git a/configs/vqa_med_2019/c1_classification/c1_classification_all_bow_vgg16_concat.yml b/configs/vqa_med_2019/c1_classification/c1_classification_all_bow_vgg16_concat.yml
@@ -2,7 +2,6 @@
 default_configs: vqa_med_2019/c1_classification/default_c1_classification.yml
 
 pipeline:
-  name: vqa_med_c1_classification_all_bow_vgg16_concat
 
   global_publisher:
     type: GlobalVariablePublisher