diff --git a/.github/actions/install-pnl/action.yml b/.github/actions/install-pnl/action.yml
index dedae754886..a0a6bdde249 100644
--- a/.github/actions/install-pnl/action.yml
+++ b/.github/actions/install-pnl/action.yml
@@ -49,7 +49,9 @@ runs:
           sed -i /modeci_mdf/d requirements.txt
           # pywinpty is a transitive dependency and v1.0+ removed support for x86 wheels
           # terminado >= 0.10.0 pulls in pywinpty >= 1.1.0
-          [[ ${{ runner.os }} = Windows* ]] && pip install "pywinpty<1" "terminado<0.10"
+          # scipy >=1.9.2 doesn't provide win32 wheel and GA doesn't have working fortran on windows
+          # scikit-learn >= 1.1.3 doesn't provide win32 wheel
+          [[ ${{ runner.os }} = Windows* ]] && pip install "pywinpty<1" "terminado<0.10" "scipy<1.9.2" "scikit-learn<1.1.3" "statsmodels<0.13.3" "jupyter-server<2" -c requirements.txt
         fi
 
     - name: Install updated package
@@ -57,10 +59,21 @@ runs:
       shell: bash
       id: new_package
       run: |
-        export NEW_PACKAGE=`echo '${{ github.head_ref }}' | cut -f 4 -d/ | sed 's/-gt.*//' | sed 's/-lt.*//'`
-        echo "::set-output name=new_package::$NEW_PACKAGE"
-        pip install "`echo $NEW_PACKAGE | sed 's/[-_]/./g' | xargs grep *requirements.txt -h -e | head -n1`"
-        pip show "$NEW_PACKAGE" | grep 'Version' | tee new_version.deps
+        # The branch name pattern is: "dependabot/pip/$TARGET/$PACKAGE{-gt-$MINVERSION,,}{-lt-$MAXVERSION,}{-$VERSION,}
+        # The expression below extracts just the $PACKAGE part
+        export NEW_PACKAGE=$(echo '${{ github.head_ref }}' | cut -f 4 -d/ | sed 's/-gt.*//' | sed 's/-lt.*//' | sed 's/-[0-9\.]*$//' )
+        if grep "$NEW_PACKAGE" *requirements.txt; then 
+          echo "new_package=$NEW_PACKAGE" >> $GITHUB_OUTPUT
+          # save a list of all installed packages (including pip, wheel; it's never empty)
+          pip freeze --all > orig
+          pip install "$(echo $NEW_PACKAGE | sed 's/[-_]/./g' | xargs grep *requirements.txt -h -e | head -n1)"
+          pip show "$NEW_PACKAGE" | grep 'Version' | tee new_version.deps
+          # uninstall new packages but skip those from previous steps (pywinpty, terminado on windows x86)
+          # the 'orig' list is not empty (includes at least pip, wheel)
+          pip uninstall -y $(pip freeze -r orig | sed '1,/## /d')
+        else
+          echo "new_package=''" >> $GITHUB_OUTPUT
+        fi
 
     - name: Python dependencies
       shell: bash
@@ -81,9 +94,11 @@ runs:
         done
 
     - name: Check updated package
-      if: ${{ startsWith(github.head_ref, 'dependabot/pip') && matrix.pnl-version != 'base' }}
+      if: ${{ startsWith(github.head_ref, 'dependabot/pip') && matrix.pnl-version != 'base' && steps.new_package.outputs.new_package != '' }}
       shell: bash
       run: |
-        pip show ${{ steps.new_package.outputs.new_package }} | grep 'Version' | tee installed_version.deps
-        cmp -s new_version.deps installed_version.deps || echo "::error::Package version restricted by dependencies: ${{ steps.new_package.outputs.new_package }}"
-        diff new_version.deps installed_version.deps
+        if [ $(pip list | grep -o ${{ steps.new_package.outputs.new_package }} | wc -l) != "0" ] ; then
+          pip show ${{ steps.new_package.outputs.new_package }} | grep 'Version' | tee installed_version.deps
+          cmp -s new_version.deps installed_version.deps || echo "::error::Package version restricted by dependencies: ${{ steps.new_package.outputs.new_package }}"
+          diff new_version.deps installed_version.deps
+        fi
diff --git a/.github/actions/on-branch/action.yml b/.github/actions/on-branch/action.yml
index 770018ba85a..a4dcfd5ec3a 100644
--- a/.github/actions/on-branch/action.yml
+++ b/.github/actions/on-branch/action.yml
@@ -25,4 +25,4 @@ runs:
         git describe --always --tags
         export ON_BRANCH=$(git branch -a --contains ${{ github.ref }} | grep -q '^  remotes/origin/${{ inputs.branch }}$' && echo "${{ inputs.branch }}" || echo "")
         echo "Found out: ${ON_BRANCH}"
-        echo "::set-output name=on_branch::$ON_BRANCH"
+        echo "on_branch=$ON_BRANCH" >> $GITHUB_OUTPUT
diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
index 146255ce292..9bcf97d976f 100644
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@@ -3,11 +3,20 @@ name: "CodeQL"
 on:
   push:
     branches: [ "master", "devel" ]
+    paths-ignore:
+      - 'docs/**'
   pull_request:
-    branches: [ "master" ]
+    branches: [ "master", "devel" ]
   schedule:
     - cron: "14 21 * * 5"
 
+# run only the latest instance of this workflow job for the current branch/PR
+# cancel older runs
+# fall back to run id if not available (run id is unique -> no cancellations)
+concurrency:
+  group: ci-${{ github.ref || github.run_id }}-${{ github.workflow }}
+  cancel-in-progress: true
+
 jobs:
   analyze:
     name: Analyze
diff --git a/.github/workflows/pnl-ci-docs.yml b/.github/workflows/pnl-ci-docs.yml
index a37c9e7a250..e85043e7f24 100644
--- a/.github/workflows/pnl-ci-docs.yml
+++ b/.github/workflows/pnl-ci-docs.yml
@@ -8,6 +8,13 @@ on:
       - 'v*'
   pull_request:
 
+# run only the latest instance of this workflow job for the current branch/PR
+# cancel older runs
+# fall back to run id if not available (run id is unique -> no cancellations)
+concurrency:
+  group: ci-${{ github.ref || github.run_id }}-${{ github.workflow }}
+  cancel-in-progress: true
+
 jobs:
   docs-build:
     runs-on: ${{ matrix.os }}
@@ -76,14 +83,14 @@ jobs:
       run: |
         python -m pip install -U pip
         python -m pip --version
-        echo ::set-output name=pip_cache_dir::$(python -m pip cache dir)
+        echo "pip_cache_dir=$(python -m pip cache dir)" | tee -a $GITHUB_OUTPUT
 
     - name: Wheels cache
       uses: actions/cache@v3
       with:
         path: ${{ steps.pip_cache.outputs.pip_cache_dir }}/wheels
-        key: ${{ runner.os }}-python-${{ matrix.python-version }}-${{ matrix.python-architecture }}-pip-wheels-v2-${{ github.sha }}
-        restore-keys: ${{ runner.os }}-python-${{ matrix.python-version }}-${{ matrix.python-architecture }}-pip-wheels-v2
+        key: ${{ runner.os }}-python-${{ matrix.python-version }}-${{ matrix.python-architecture }}-pip-wheels-${{ hashFiles('requirements.txt', 'doc_requirements.txt') }}-${{ github.sha }}
+        restore-keys: ${{ runner.os }}-python-${{ matrix.python-version }}-${{ matrix.python-architecture }}-pip-wheels-${{ hashFiles('requirements.txt', 'doc_requirements.txt') }}
 
     # We need to install all PNL deps since docs config imports psyneulink module
     - name: Install local, editable PNL package
diff --git a/.github/workflows/pnl-ci.yml b/.github/workflows/pnl-ci.yml
index b97eaa55ecf..19775508b56 100644
--- a/.github/workflows/pnl-ci.yml
+++ b/.github/workflows/pnl-ci.yml
@@ -11,29 +11,60 @@ on:
       - 'v**'
   pull_request:
 
+env:
+  SELF_HOSTED_MACOS: ${{ secrets.SELF_HOSTED_MACOS }}
+  SELF_HOSTED_LINUX: ${{ secrets.SELF_HOSTED_LINUX }}
+  SELF_HOSTED_WINDOWS: ${{ secrets.SELF_HOSTED_WINDOWS }}
+
+# run only the latest instance of this workflow job for the current branch/PR
+# cancel older runs
+# fall back to run id if not available (run id is unique -> no cancellations)
+concurrency:
+  group: ci-${{ github.ref || github.run_id }}-${{ github.workflow }}
+  cancel-in-progress: true
+
 jobs:
+  # A job to select self-hosted runner if requested by an env var
+  select-runner:
+    runs-on: ubuntu-latest
+
+    outputs:
+      self_hosted_macos: ${{ steps.is_self_hosted.outputs.macos && 'macos' || '' }}
+      self_hosted_linux: ${{ steps.is_self_hosted.outputs.linux && 'linux' || '' }}
+      self_hosted_windows: ${{ steps.is_self_hosted.outputs.windows && 'windows' || '' }}
+
+    steps:
+    - name: Add macos
+      id: is_self_hosted
+      run: |
+        echo "macos=$SELF_HOSTED_MACOS" | tee -a $GITHUB_OUTPUT
+        echo "linux=$SELF_HOSTED_LINUX" | tee -a $GITHUB_OUTPUT
+        echo "windows=$SELF_HOSTED_WINDOWS" | tee -a $GITHUB_OUTPUT
+
+  # the main build job
   build:
-    runs-on: ${{ matrix.os }}
+    needs: select-runner
+    runs-on: ${{ (contains(needs.select-runner.outputs.*, matrix.os) && fromJSON(format('[ "self-hosted","{0}", "X64" ]', matrix.os))) || format('{0}-latest', matrix.os) }}
     strategy:
       fail-fast: false
       matrix:
         python-version: [3.7, 3.8, 3.9]
         python-architecture: ['x64']
         extra-args: ['']
-        os: [ubuntu-latest, macos-latest, windows-latest]
+        os: [ubuntu, macos, windows]
         include:
           # add 32-bit build on windows
           - python-version: 3.8
             python-architecture: 'x86'
-            os: windows-latest
+            os: windows
           # code-coverage build on macos python 3.9
           - python-version: 3.9
-            os: macos-latest
+            os: macos
             extra-args: '--cov=psyneulink'
         exclude:
           # 3.7 is broken on macos-11, https://github.com/actions/virtual-environments/issues/4230
           - python-version: 3.7
-            os: macos-latest
+            os: macos
 
     steps:
     # increased fetch-depth and tag checkout needed to get correct
@@ -61,14 +92,14 @@ jobs:
       run: |
         python -m pip install -U pip
         python -m pip --version
-        echo ::set-output name=pip_cache_dir::$(python -m pip cache dir)
+        echo "pip_cache_dir=$(python -m pip cache dir)" | tee -a $GITHUB_OUTPUT
 
     - name: Wheels cache
       uses: actions/cache@v3
       with:
         path: ${{ steps.pip_cache.outputs.pip_cache_dir }}/wheels
-        key: ${{ runner.os }}-python-${{ matrix.python-version }}-${{ matrix.python-architecture }}-pip-wheels-v2-${{ github.sha }}
-        restore-keys: ${{ runner.os }}-python-${{ matrix.python-version }}-${{ matrix.python-architecture }}-pip-wheels-v2
+        key: ${{ runner.os }}-python-${{ matrix.python-version }}-${{ matrix.python-architecture }}-pip-wheels-${{ hashFiles('requirements.txt', 'dev_requirements.txt') }}-${{ github.sha }}
+        restore-keys: ${{ runner.os }}-python-${{ matrix.python-version }}-${{ matrix.python-architecture }}-pip-wheels-${{ hashFiles('requirements.txt', 'dev_requirements.txt') }}
 
     - name: Install local, editable PNL package
       uses: ./.github/actions/install-pnl
diff --git a/.github/workflows/test-release.yml b/.github/workflows/test-release.yml
index 0b7887ea5ee..8f1822de2e0 100644
--- a/.github/workflows/test-release.yml
+++ b/.github/workflows/test-release.yml
@@ -34,8 +34,8 @@ jobs:
         python setup.py sdist
         python setup.py bdist_wheel
         cd dist
-        echo ::set-output name=sdist::$(ls *.tar.gz)
-        echo ::set-output name=wheel::$(ls *.whl)
+        echo "sdist=$(ls *.tar.gz)" >> $GITHUB_OUTPUT
+        echo "wheel=$(ls *.whl)" >> $GITHUB_OUTPUT
 
     - name: Upload Python dist files
       uses: actions/upload-artifact@v3
diff --git a/.gitignore b/.gitignore
index 84bfbe22d2a..4993664944c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -196,4 +196,5 @@ tests/*.pdf
 # mypy cache
 .mypy_cache
 
-/tests/json/*.json
+/tests/mdf/*.json
+/tests/mdf/*.yml
diff --git a/Scripts/Models (Under Development)/N-back.py b/Scripts/Models (Under Development)/N-back.py
deleted file mode 100644
index 716962ffb60..00000000000
--- a/Scripts/Models (Under Development)/N-back.py	
+++ /dev/null
@@ -1,116 +0,0 @@
-from psyneulink import *
-
-# TODO:
-#   Nback::
-#     - separate out stim/context external inputs from those from EM into FFN
-#     - figure out how to specify feedback from DDM to EM:
-#     - figure out how to execute EM twice:
-#       > first, at beginning of trial, to retrieve item based on current stimulus & context
-#             (with prob retrieval = 1, prob storage = 0)
-#       > second time, at end of trial (under influence of ControlMechanism) to encode current stimulus & context
-#             (with prob storage = 1;  prob of retrieval = 0)
-#         scheduler.add_condition(A, pnl.AfterNCalls(CM, 1))
-#         scheduler.add_condition(CM, pnl.Always())
-#         composition.run(...termination_conds={pnl.TimeScale.TRIAL: pnl.And(pnl.AfterNCalls(CM, 2), pnl.JustRan(CM))})
-#     - implement circular drift as function for an input mechanism
-#     - ADD PNL FEATURE:  should be able to use InputPort as spec for a pathway (if there is nothing after it);
-#             same for OutputPort (if there is nothing before it)
-
-
-#region N-BACK MODEL
-def n_back_model():
-
-    # Input Mechs
-    stim = TransferMechanism(name='STIM', size=5)
-    context = TransferMechanism(name='CONTEXT', size=5)
-
-    # Feedforward Network:
-    stim_input_layer = TransferMechanism(name='STIM INPUT LAYER', size=5)
-    context_input_layer = TransferMechanism(name='CONTEXT INPUT LAYER', size=5)
-    match_output_layer = TransferMechanism(name='MATCH LAYER', size=1)
-    # ffn = AutodiffComposition(name='FFN', pathways=[[stim_input,match_output], [context_input, match_output]])
-    ffn = Composition(name='FFN', pathways=[[stim_input_layer, match_output_layer],
-                                            [context_input_layer, match_output_layer]])
-
-    # Episodic Memory, Decision and Control
-    # em = EpisodicMemoryMechanism(name='EM', content_size=5, assoc_size=5)
-    em = EpisodicMemoryMechanism(name='EM', size=5,
-                                 # function=DictionaryMemory(initializer=[[[0,0,0,0,0],[0,0,0,0,0]]])
-                                 )
-    ctl = ControlMechanism(control=(STORAGE_PROB, em))
-    decision = DDM(name='DECISION')
-
-    resp_decision = Pathway([match_output_layer, (decision, NodeRole.OUTPUT)])
-    # FIX: ENHANCE add_linear_processing_pathway TO SUPPORT InputPort at end, or OutputPort at beginning:
-    # stimulus_encoding = [stim, em.input_ports[KEY_INPUT]]
-    # context_encoding = [context, em.input_ports[VALUE_INPUT]]
-
-    # MappingProjection(sender=stim, receiver=stim_input_layer)
-    # MappingProjection(sender=stim, receiver=em.input_ports[KEY_INPUT])
-    # MappingProjection(sender=context, receiver=context_input_layer)
-    # MappingProjection(sender=context, receiver=em.input_ports[VALUE_INPUT])
-    # MappingProjection(sender=em.output_ports[KEY_OUTPUT], receiver=stim_input_layer)
-    # MappingProjection(sender=em.output_ports[VALUE_OUTPUT], receiver=context_input_layer)
-    # stim_processing = Pathway([stim, ffn])
-    # context_processing = Pathway([context, ffn])
-    # stim_encoding = Pathway([stim, em])
-    # context_encoding = Pathway([context, em])
-    # stim_retrieval = Pathway([em, stim_input_layer])
-    # context_retrieval = Pathway([em, context_input_layer])
-    # storage = Pathway([(decision, NodeRole.OUTPUT), (ctl, NodeRole.FEEDBACK_SENDER), em])
-    # # FIX: show_graph NOT RECOGNIZING STIM->STIM_INPUT_LAYER AND CONTEXT->CONTEXT_INPUT_LAYER
-    # comp = Composition(pathways=[stim_processing,
-    #                              context_processing,
-    #                              ffn,
-    #                              context_encoding,
-    #                              stim_encoding,
-    #                              resp_decision,
-    #                              stim_retrieval,
-    #                              context_retrieval,
-    #                              storage])
-    # FIX: show_graph NOT RECOGNIZING STIM->STIM_INPUT_LAYER AND CONTEXT->CONTEXT_INPUT_LAYER
-    # comp = Composition(pathways=[[stim, ffn],
-    #                              [stim,em],
-    #                              [context,ffn],
-    #                              [context,em],
-    #                              [em,ffn],
-    #                              [ffn, em],
-    #                              [ffn, decision, ctl, em]])
-
-    # comp = Composition(pathways=[ffn,
-    #                              [stim, stim_input_layer],
-    #                              [stim, MappingProjection(stim, em.input_ports[KEY_INPUT]), em],
-    #                              [context, context_input_layer],
-    #                              [context, MappingProjection(context, em.input_ports[VALUE_INPUT]), em],
-    #                              [em,stim_input_layer],
-    #                              [em,context_input_layer],
-    #                              [ffn, decision, ctl, em]])
-
-    comp = Composition()
-    comp.add_nodes([stim, context, ffn, em, (decision, NodeRole.OUTPUT), ctl])
-    comp.add_projection(MappingProjection(), stim, stim_input_layer)
-    comp.add_projection(MappingProjection(), context, context_input_layer)
-    comp.add_projection(MappingProjection(), stim, em.input_ports[KEY_INPUT])
-    comp.add_projection(MappingProjection(), context, em.input_ports[VALUE_INPUT])
-    comp.add_projection(MappingProjection(), em.output_ports[KEY_OUTPUT], stim_input_layer)
-    comp.add_projection(MappingProjection(), em.output_ports[VALUE_OUTPUT], context_input_layer)
-    comp.add_projection(MappingProjection(), match_output_layer, decision)
-    comp.add_projection(MappingProjection(), decision, ctl)
-    # comp.add_projection(MappingProjection(), decision, stim_input_layer)
-
-    # comp._analyze_graph()
-    comp.show_graph()
-    # comp.show_graph(show_cim=True,
-    #                 show_node_structure=ALL,
-    #                 show_projection_labels=True,
-    #                 show_dimensions=True)
-    # comp.show_graph(show_cim=True,
-    #                 show_node_structure=ALL,
-    #                 show_projection_labels=True,
-    #                 show_dimensions=True)
-    # comp.run(inputs={stim:[1,2,3,4,5],
-    #                  context:[6,7,8,9,10]},
-    #          report_output=ReportOutput.ON)
-    # comp.run(inputs={a:2.5}, report_output=ReportOutput.FULL)
-#endregion
-n_back_model()
diff --git a/Scripts/Models (Under Development)/Nback/SphericalDrift Tests.py b/Scripts/Models (Under Development)/Nback/SphericalDrift Tests.py
new file mode 100644
index 00000000000..3fb2cbed191
--- /dev/null
+++ b/Scripts/Models (Under Development)/Nback/SphericalDrift Tests.py	
@@ -0,0 +1,34 @@
+import numpy as np
+from psyneulink import *
+
+NUM_TRIALS = 48
+
+stims = np.array([x[0] for x in em.memory])
+contexts = np.array([x[1] for x in em.memory])
+cos = Distance(metric=COSINE)
+dist = Distance(metric=EUCLIDEAN)
+diffs = [np.sum([contexts[i+1] - contexts[1]]) for i in range(NUM_TRIALS)]
+diffs_1 = [np.sum([contexts[i+1] - contexts[i]]) for i in range(NUM_TRIALS)]
+diffs_2 = [np.sum([contexts[i+2] - contexts[i]]) for i in range(NUM_TRIALS-1)]
+dots = [[contexts[i+1] @ contexts[1]] for i in range(NUM_TRIALS)]
+dot_diffs_1 = [[contexts[i+1] @ contexts[i]] for i in range(NUM_TRIALS)]
+dot_diffs_2 = [[contexts[i+2] @ contexts[i]] for i in range(NUM_TRIALS-1)]
+angle = [cos([contexts[i+1], contexts[1]]) for i in range(NUM_TRIALS)]
+angle_1 = [cos([contexts[i+1], contexts[i]]) for i in range(NUM_TRIALS)]
+angle_2 = [cos([contexts[i+2], contexts[i]]) for i in range(NUM_TRIALS-1)]
+euclidean = [dist([contexts[i+1], contexts[1]]) for i in range(NUM_TRIALS)]
+euclidean_1 = [dist([contexts[i+1], contexts[i]]) for i in range(NUM_TRIALS)]
+euclidean_2 = [dist([contexts[i+2], contexts[i]]) for i in range(NUM_TRIALS-1)]
+print("STIMS:", stims, "\n")
+print("DIFFS:", diffs, "\n")
+print("DIFFS 1:", diffs_1, "\n")
+print("DIFFS 2:", diffs_2, "\n")
+print("DOT PRODUCTS:", dots, "\n")
+print("DOT DIFFS 1:", dot_diffs_1, "\n")
+print("DOT DIFFS 2:", dot_diffs_2, "\n")
+print("ANGLE: ", angle, "\n")
+print("ANGLE_1: ", angle_1, "\n")
+print("ANGLE_2: ", angle_2, "\n")
+print("EUCILDEAN: ", euclidean, "\n")
+print("EUCILDEAN 1: ", euclidean_1, "\n")
+print("EUCILDEAN 2: ", euclidean_2, "\n")
diff --git a/Scripts/Models (Under Development)/Nback/__init__.py b/Scripts/Models (Under Development)/Nback/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/Scripts/Models (Under Development)/Nback/nback.ipynb b/Scripts/Models (Under Development)/Nback/nback.ipynb
new file mode 100644
index 00000000000..dddf6748da5
--- /dev/null
+++ b/Scripts/Models (Under Development)/Nback/nback.ipynb	
@@ -0,0 +1,365 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from nback import construct_model, train_network, run_model, analyze_results\n",
+    "from psyneulink import *"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "## Model Parameters:\n",
+    "\n",
+    "##### Fixed (structural) parameters:"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "outputs": [],
+   "source": [
+    "MAX_NBACK_LEVELS = 3\n",
+    "NUM_STIM = 8 # number of different stimuli in stimulus set -  QUESTION: WHY ISN\"T THIS EQUAL TO STIM_SIZE OR VICE VERSA?\n",
+    "FFN_TRANSFER_FUNCTION = ReLU"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "##### Constructor parameters:  (values are from nback-paper)"
+   ],
+   "metadata": {
+    "collapsed": false
+   }
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "STIM_SIZE = 8                    # length of stimulus vector\n",
+    "CONTEXT_SIZE = 25                # length of context vector\n",
+    "HIDDEN_SIZE = STIM_SIZE*4        # dimension of hidden units in ff\n",
+    "NBACK_LEVELS = [2,3]           # Currently restricted to these\n",
+    "NUM_NBACK_LEVELS = len(NBACK_LEVELS)\n",
+    "CONTEXT_DRIFT_NOISE = 0.0        # noise used by DriftOnASphereIntegrator (function of Context mech)\n",
+    "RANDOM_WEIGHTS_INITIALIZATION=\\\n",
+    "    RandomMatrix(center=0.0, range=0.1)  # Matrix spec used to initialize all Projections\n",
+    "RETRIEVAL_SOFTMAX_TEMP = 1/8     # express as gain # precision of retrieval process\n",
+    "RETRIEVAL_HAZARD_RATE = 0.04     # rate of re=sampling of em following non-match determination in a pass through ffn\n",
+    "RETRIEVAL_STIM_WEIGHT = 0.05      # weighting of stimulus field in retrieval from em\n",
+    "RETRIEVAL_CONTEXT_WEIGHT \\\n",
+    "    = 1-RETRIEVAL_STIM_WEIGHT # weighting of context field in retrieval from em\n",
+    "DECISION_SOFTMAX_TEMP=1"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "execution_count": 7,
+   "outputs": []
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "##### Training parameters:"
+   ],
+   "metadata": {
+    "collapsed": false
+   }
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "NUM_EPOCHS = 6250    # nback-paper: 400,000 @ one trial per epoch = 6,250 @ 64 trials per epoch\n",
+    "LEARNING_RATE =0.001  # nback-paper: .001\n",
+    "\n",
+    "#### Execution parameters:\n",
+    "CONTEXT_DRIFT_RATE=.1 # drift rate used for DriftOnASphereIntegrator (function of Context mech) on each trial\n",
+    "NUM_TRIALS = 48 # number of stimuli presented in a trial sequence\n",
+    "REPORT_OUTPUT = ReportOutput.OFF   # Sets console output during run\n",
+    "REPORT_PROGRESS = ReportProgress.OFF  # Sets console progress bar during run\n",
+    "REPORT_LEARNING = ReportLearning.OFF  # Sets console progress bar during training\n",
+    "ANIMATE = False # {UNIT:EXECUTION_SET} # Specifies whether to generate animation of execution"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "execution_count": null,
+   "outputs": []
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "##### Names of Compositions and Mechanisms:"
+   ],
+   "metadata": {
+    "collapsed": false
+   }
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "NBACK_MODEL = \"N-back Model\"\n",
+    "FFN_COMPOSITION = \"WORKING MEMORY (fnn)\"\n",
+    "FFN_STIMULUS_INPUT = \"CURRENT STIMULUS\"\n",
+    "FFN_CONTEXT_INPUT = \"CURRENT CONTEXT\"\n",
+    "FFN_STIMULUS_RETRIEVED = \"RETRIEVED STIMULUS\"\n",
+    "FFN_CONTEXT_RETRIEVED = \"RETRIEVED CONTEXT\"\n",
+    "FFN_TASK = \"CURRENT TASK\"\n",
+    "FFN_HIDDEN = \"HIDDEN LAYER\"\n",
+    "FFN_OUTPUT = \"DECISION LAYER\"\n",
+    "MODEL_STIMULUS_INPUT ='STIM'\n",
+    "MODEL_CONTEXT_INPUT = 'CONTEXT'\n",
+    "MODEL_TASK_INPUT = \"TASK\"\n",
+    "EM = \"EPISODIC MEMORY (dict)\"\n",
+    "CONTROLLER = \"READ/WRITE CONTROLLER\""
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "execution_count": null,
+   "outputs": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   },
+   "source": [
+    "## Construct the model:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "constructing 'WORKING MEMORY (fnn)'...\n",
+      "'constructing N-back Model'...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/jdc/PycharmProjects/PsyNeuLink/psyneulink/core/globals/utilities.py:443: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n",
+      "  if reference is not None and (candidate == reference):\n",
+      "/Users/jdc/PycharmProjects/PsyNeuLink/psyneulink/core/globals/utilities.py:443: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n",
+      "  if reference is not None and (candidate == reference):\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "full model constructed\n"
+     ]
+    }
+   ],
+   "source": [
+    "clear_registry()\n",
+    "nback_model = construct_model(stim_size=10 # Size of stimulus input layer\n",
+    "                              )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "10"
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(nback_model.nodes['STIM'].variable[0])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Display the model:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "<graphviz.dot.Digraph at 0x7faf828fb280>",
+      "image/svg+xml": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Generated by graphviz version 2.40.1 (20161225.0304)\n -->\n<!-- Title: N&#45;back Model Pages: 1 -->\n<svg width=\"961pt\" height=\"446pt\"\n viewBox=\"0.00 0.00 961.00 446.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 442)\">\n<title>N&#45;back Model</title>\n<polygon fill=\"#ffffff\" stroke=\"transparent\" points=\"-4,4 -4,-442 957,-442 957,4 -4,4\"/>\n<text text-anchor=\"middle\" x=\"476.5\" y=\"-7.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">N&#45;back Model</text>\n<g id=\"clust1\" class=\"cluster\">\n<title>cluster_WORKING MEMORY (fnn)</title>\n<polygon fill=\"none\" stroke=\"#ff0000\" points=\"20,-211 20,-430 945,-430 945,-211 20,-211\"/>\n<text text-anchor=\"middle\" x=\"482.5\" y=\"-218.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">WORKING MEMORY (fnn)</text>\n</g>\n<!-- TASK -->\n<g id=\"node1\" class=\"node\">\n<title>TASK</title>\n<ellipse fill=\"none\" stroke=\"#00ff00\" stroke-width=\"3\" cx=\"96\" cy=\"-185\" rx=\"29.5964\" ry=\"18\"/>\n<text text-anchor=\"middle\" x=\"96\" y=\"-181.9\" font-family=\"arial\" font-size=\"12.00\" fill=\"#000000\">TASK</text>\n</g>\n<!-- CURRENT TASK -->\n<g id=\"node5\" class=\"node\">\n<title>CURRENT TASK</title>\n<ellipse fill=\"none\" stroke=\"#00ff00\" stroke-width=\"3\" cx=\"96\" cy=\"-260\" rx=\"67.9298\" ry=\"18\"/>\n<text text-anchor=\"middle\" x=\"96\" y=\"-256.9\" font-family=\"arial\" font-size=\"12.00\" fill=\"#000000\">CURRENT TASK</text>\n</g>\n<!-- TASK&#45;&gt;CURRENT TASK -->\n<g id=\"edge2\" class=\"edge\">\n<title>TASK&#45;&gt;CURRENT TASK</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M96,-203.1554C96,-211.6599 96,-221.9924 96,-231.5036\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"92.5001,-231.7518 96,-241.7518 99.5001,-231.7519 92.5001,-231.7518\"/>\n</g>\n<!-- CONTEXT -->\n<g id=\"node2\" class=\"node\">\n<title>CONTEXT</title>\n<ellipse fill=\"none\" stroke=\"#00ff00\" stroke-width=\"3\" cx=\"269\" cy=\"-113\" rx=\"45.6069\" ry=\"18\"/>\n<text text-anchor=\"middle\" x=\"269\" y=\"-109.9\" font-family=\"arial\" font-size=\"12.00\" fill=\"#000000\">CONTEXT</text>\n</g>\n<!-- EPISODIC MEMORY (dict) -->\n<g id=\"node6\" class=\"node\">\n<title>EPISODIC MEMORY (dict)</title>\n<ellipse fill=\"none\" stroke=\"#000000\" cx=\"458\" cy=\"-185\" rx=\"99.2191\" ry=\"18\"/>\n<text text-anchor=\"middle\" x=\"458\" y=\"-181.9\" font-family=\"arial\" font-size=\"12.00\" fill=\"#000000\">EPISODIC MEMORY (dict)</text>\n</g>\n<!-- CONTEXT&#45;&gt;EPISODIC MEMORY (dict) -->\n<g id=\"edge6\" class=\"edge\">\n<title>CONTEXT&#45;&gt;EPISODIC MEMORY (dict)</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M301.9213,-125.5414C330.5103,-136.4325 372.4174,-152.3971 405.6108,-165.0422\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"404.5738,-168.3925 415.1647,-168.6818 407.0658,-161.851 404.5738,-168.3925\"/>\n</g>\n<!-- CURRENT CONTEXT -->\n<g id=\"node8\" class=\"node\">\n<title>CURRENT CONTEXT</title>\n<ellipse fill=\"none\" stroke=\"#00ff00\" stroke-width=\"3\" cx=\"265\" cy=\"-260\" rx=\"83.9402\" ry=\"18\"/>\n<text text-anchor=\"middle\" x=\"265\" y=\"-256.9\" font-family=\"arial\" font-size=\"12.00\" fill=\"#000000\">CURRENT CONTEXT</text>\n</g>\n<!-- CONTEXT&#45;&gt;CURRENT CONTEXT -->\n<g id=\"edge4\" class=\"edge\">\n<title>CONTEXT&#45;&gt;CURRENT CONTEXT</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M268.5095,-131.0251C267.8258,-156.1525 266.5793,-201.9607 265.7707,-231.676\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"262.2679,-231.7359 265.4945,-241.8275 269.2653,-231.9264 262.2679,-231.7359\"/>\n</g>\n<!-- STIM -->\n<g id=\"node3\" class=\"node\">\n<title>STIM</title>\n<ellipse fill=\"none\" stroke=\"#00ff00\" stroke-width=\"3\" cx=\"737\" cy=\"-113\" rx=\"27.9043\" ry=\"18\"/>\n<text text-anchor=\"middle\" x=\"737\" y=\"-109.9\" font-family=\"arial\" font-size=\"12.00\" fill=\"#000000\">STIM</text>\n</g>\n<!-- CURRENT STIMULUS -->\n<g id=\"node4\" class=\"node\">\n<title>CURRENT STIMULUS</title>\n<ellipse fill=\"none\" stroke=\"#00ff00\" stroke-width=\"3\" cx=\"851\" cy=\"-260\" rx=\"85.6332\" ry=\"18\"/>\n<text text-anchor=\"middle\" x=\"851\" y=\"-256.9\" font-family=\"arial\" font-size=\"12.00\" fill=\"#000000\">CURRENT STIMULUS</text>\n</g>\n<!-- STIM&#45;&gt;CURRENT STIMULUS -->\n<g id=\"edge1\" class=\"edge\">\n<title>STIM&#45;&gt;CURRENT STIMULUS</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M749.6356,-129.2932C769.2416,-154.5747 807.1744,-203.488 830.6676,-233.7819\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"827.9897,-236.0402 836.8838,-241.7975 833.5213,-231.7504 827.9897,-236.0402\"/>\n</g>\n<!-- STIM&#45;&gt;EPISODIC MEMORY (dict) -->\n<g id=\"edge8\" class=\"edge\">\n<title>STIM&#45;&gt;EPISODIC MEMORY (dict)</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M711.2524,-119.6445C669.2747,-130.4775 584.7757,-152.2837 524.9473,-167.7233\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"524.0045,-164.3518 515.1963,-170.2397 525.7537,-171.1298 524.0045,-164.3518\"/>\n</g>\n<!-- HIDDEN LAYER -->\n<g id=\"node13\" class=\"node\">\n<title>HIDDEN LAYER</title>\n<ellipse fill=\"none\" stroke=\"#000000\" cx=\"361\" cy=\"-332\" rx=\"64.7736\" ry=\"18\"/>\n<text text-anchor=\"middle\" x=\"361\" y=\"-328.9\" font-family=\"arial\" font-size=\"12.00\" fill=\"#000000\">HIDDEN LAYER</text>\n</g>\n<!-- CURRENT STIMULUS&#45;&gt;HIDDEN LAYER -->\n<g id=\"edge12\" class=\"edge\">\n<title>CURRENT STIMULUS&#45;&gt;HIDDEN LAYER</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M788.6382,-272.4279C778.0955,-274.3893 767.2484,-276.3213 757,-278 643.1391,-296.651 510.1172,-313.8558 430.8655,-323.6241\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"430.0213,-320.2014 420.5226,-324.8943 430.8746,-327.1492 430.0213,-320.2014\"/>\n</g>\n<!-- CURRENT TASK&#45;&gt;HIDDEN LAYER -->\n<g id=\"edge13\" class=\"edge\">\n<title>CURRENT TASK&#45;&gt;HIDDEN LAYER</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M143.3767,-272.8722C188.5477,-285.145 256.608,-303.6369 304.5651,-316.6667\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"303.9101,-320.1156 314.478,-319.3601 305.7455,-313.3605 303.9101,-320.1156\"/>\n</g>\n<!-- RETRIEVED STIMULUS -->\n<g id=\"node7\" class=\"node\">\n<title>RETRIEVED STIMULUS</title>\n<ellipse fill=\"none\" stroke=\"#00ff00\" stroke-width=\"3\" cx=\"458\" cy=\"-260\" rx=\"91.9455\" ry=\"18\"/>\n<text text-anchor=\"middle\" x=\"458\" y=\"-256.9\" font-family=\"arial\" font-size=\"12.00\" fill=\"#000000\">RETRIEVED STIMULUS</text>\n</g>\n<!-- EPISODIC MEMORY (dict)&#45;&gt;RETRIEVED STIMULUS -->\n<g id=\"edge3\" class=\"edge\">\n<title>EPISODIC MEMORY (dict)&#45;&gt;RETRIEVED STIMULUS</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M458,-203.1554C458,-211.6599 458,-221.9924 458,-231.5036\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"454.5001,-231.7518 458,-241.7518 461.5001,-231.7519 454.5001,-231.7518\"/>\n</g>\n<!-- RETRIEVED CONTEXT -->\n<g id=\"node9\" class=\"node\">\n<title>RETRIEVED CONTEXT</title>\n<ellipse fill=\"none\" stroke=\"#00ff00\" stroke-width=\"3\" cx=\"658\" cy=\"-260\" rx=\"89.7516\" ry=\"18\"/>\n<text text-anchor=\"middle\" x=\"658\" y=\"-256.9\" font-family=\"arial\" font-size=\"12.00\" fill=\"#000000\">RETRIEVED CONTEXT</text>\n</g>\n<!-- EPISODIC MEMORY (dict)&#45;&gt;RETRIEVED CONTEXT -->\n<g id=\"edge5\" class=\"edge\">\n<title>EPISODIC MEMORY (dict)&#45;&gt;RETRIEVED CONTEXT</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M520.8552,-199.0217C533.6294,-202.472 546.8647,-206.4906 559,-211 579.2068,-218.5086 600.7554,-228.8704 618.5718,-238.1322\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"617.0437,-241.2833 627.5214,-242.8543 620.3103,-235.0922 617.0437,-241.2833\"/>\n</g>\n<!-- RETRIEVED STIMULUS&#45;&gt;HIDDEN LAYER -->\n<g id=\"edge15\" class=\"edge\">\n<title>RETRIEVED STIMULUS&#45;&gt;HIDDEN LAYER</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M434.519,-277.4292C421.7966,-286.8727 405.9112,-298.6639 392.1651,-308.8672\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"389.8737,-306.2091 383.93,-314.9798 394.0458,-311.8299 389.8737,-306.2091\"/>\n</g>\n<!-- CURRENT CONTEXT&#45;&gt;HIDDEN LAYER -->\n<g id=\"edge11\" class=\"edge\">\n<title>CURRENT CONTEXT&#45;&gt;HIDDEN LAYER</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M288.2389,-277.4292C300.8302,-286.8727 316.5518,-298.6639 330.1562,-308.8672\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"328.2063,-311.7797 338.3063,-314.9798 332.4063,-306.1797 328.2063,-311.7797\"/>\n</g>\n<!-- RETRIEVED CONTEXT&#45;&gt;HIDDEN LAYER -->\n<g id=\"edge14\" class=\"edge\">\n<title>RETRIEVED CONTEXT&#45;&gt;HIDDEN LAYER</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M600.7354,-273.8823C548.794,-286.4742 472.5391,-304.9602 420.0498,-317.6849\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"419.104,-314.3127 410.2101,-320.0703 420.7532,-321.1157 419.104,-314.3127\"/>\n</g>\n<!-- READ/WRITE CONTROLLER -->\n<g id=\"node10\" class=\"node\">\n<title>READ/WRITE CONTROLLER</title>\n<polygon fill=\"none\" stroke=\"#0000ff\" stroke-width=\"3\" points=\"573.8726,-105.5442 573.8726,-120.4558 504.8244,-131 407.1756,-131 338.1274,-120.4558 338.1274,-105.5442 407.1756,-95 504.8244,-95 573.8726,-105.5442\"/>\n<text text-anchor=\"middle\" x=\"456\" y=\"-109.9\" font-family=\"arial\" font-size=\"12.00\" fill=\"#000000\">READ/WRITE CONTROLLER</text>\n</g>\n<!-- READ/WRITE CONTROLLER&#45;&gt;EPISODIC MEMORY (dict) -->\n<g id=\"edge7\" class=\"edge\">\n<title>READ/WRITE CONTROLLER&#45;&gt;EPISODIC MEMORY (dict)</title>\n<path fill=\"none\" stroke=\"#0000ff\" d=\"M456.5047,-131.1686C456.7186,-138.869 456.9729,-148.0257 457.2106,-156.5834\"/>\n<polygon fill=\"#0000ff\" stroke=\"#0000ff\" points=\"461.487,-166.4756 453.4901,-166.6978 453.2678,-158.7009 461.2648,-158.4787 461.487,-166.4756\"/>\n<polyline fill=\"none\" stroke=\"#0000ff\" points=\"457.2663,-158.5898 457.2108,-156.5906 \"/>\n</g>\n<!-- OBJECTIVE MECHANISM -->\n<g id=\"node11\" class=\"node\">\n<title>OBJECTIVE MECHANISM</title>\n<ellipse fill=\"none\" stroke=\"#000000\" cx=\"223\" cy=\"-41\" rx=\"98.4878\" ry=\"18\"/>\n<text text-anchor=\"middle\" x=\"223\" y=\"-37.9\" font-family=\"arial\" font-size=\"12.00\" fill=\"#000000\">OBJECTIVE MECHANISM</text>\n</g>\n<!-- OBJECTIVE MECHANISM&#45;&gt;READ/WRITE CONTROLLER -->\n<g id=\"edge9\" class=\"edge\">\n<title>OBJECTIVE MECHANISM&#45;&gt;READ/WRITE CONTROLLER</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M273.2707,-56.5343C307.6564,-67.1599 353.569,-81.3475 390.7694,-92.8429\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"389.9315,-96.2472 400.5191,-95.8557 391.9982,-89.5592 389.9315,-96.2472\"/>\n</g>\n<!-- DECISION LAYER -->\n<g id=\"node12\" class=\"node\">\n<title>DECISION LAYER</title>\n<ellipse fill=\"none\" stroke=\"#ff0000\" stroke-width=\"3\" cx=\"231\" cy=\"-404\" rx=\"72.0474\" ry=\"18\"/>\n<text text-anchor=\"middle\" x=\"231\" y=\"-400.9\" font-family=\"arial\" font-size=\"12.00\" fill=\"#000000\">DECISION LAYER</text>\n</g>\n<!-- DECISION LAYER&#45;&gt;OBJECTIVE MECHANISM -->\n<g id=\"edge10\" class=\"edge\">\n<title>DECISION LAYER&#45;&gt;OBJECTIVE MECHANISM</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M171.8117,-393.7125C103.3697,-378.0226 0,-340.9298 0,-260 0,-185 0,-185 0,-185 0,-113.439 82.0445,-75.7104 146.604,-57.0997\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"147.6948,-60.4294 156.3947,-54.3828 145.8229,-53.6843 147.6948,-60.4294\"/>\n</g>\n<!-- HIDDEN LAYER&#45;&gt;DECISION LAYER -->\n<g id=\"edge16\" class=\"edge\">\n<title>HIDDEN LAYER&#45;&gt;DECISION LAYER</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M331.8255,-348.1582C313.5187,-358.2973 289.6982,-371.4902 269.8075,-382.5066\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"268.0311,-379.4895 260.9789,-387.3963 271.4226,-385.613 268.0311,-379.4895\"/>\n</g>\n</g>\n</svg>\n"
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "nback_model.show_graph(output_fmt='jupyter')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "## Train the model:"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "ffn = nback_model.nodes['WORKING MEMORY (fnn)']\n",
+    "train_network(ffn, num_epochs=100)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Run the model:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "source": [
+    "results = run_model(nback_model)"
+   ],
+   "execution_count": 11,
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'nback_model' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
+      "\u001B[0;31mNameError\u001B[0m                                 Traceback (most recent call last)",
+      "\u001B[0;32m/var/folders/_8/09rzl01902954fwz0xrgrx7h0000gp/T/ipykernel_57864/313089602.py\u001B[0m in \u001B[0;36m<module>\u001B[0;34m\u001B[0m\n\u001B[0;32m----> 1\u001B[0;31m \u001B[0mresults\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mrun_model\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mnback_model\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m      2\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n",
+      "\u001B[0;32m~/PycharmProjects/PsyNeuLink/Scripts/Models (Under Development)/N-Back/nback.py\u001B[0m in \u001B[0;36mrun_model\u001B[0;34m(model, load_weights_from, context_drift_rate, num_trials, report_output, report_progress, animate, save_results_to)\u001B[0m\n\u001B[1;32m    627\u001B[0m         \u001B[0;32mif\u001B[0m \u001B[0;32mNone\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mthose\u001B[0m \u001B[0mare\u001B[0m \u001B[0mreturned\u001B[0m \u001B[0mby\u001B[0m \u001B[0mcall\u001B[0m \u001B[0mbut\u001B[0m \u001B[0;32mnot\u001B[0m \u001B[0msaved\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m    628\u001B[0m     \"\"\"\n\u001B[0;32m--> 629\u001B[0;31m     \u001B[0mffn\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mnback_model\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mnodes\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0mFFN_COMPOSITION\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m    630\u001B[0m     \u001B[0mem\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mmodel\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mnodes\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0mEM\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m    631\u001B[0m     \u001B[0;32mif\u001B[0m \u001B[0mload_weights_from\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
+      "\u001B[0;31mNameError\u001B[0m: name 'nback_model' is not defined"
+     ]
+    }
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "## Analyze the results:"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "coded_responses, stats = analyze_results(results,\n",
+    "                                         num_trials=NUM_TRIALS,\n",
+    "                                         nback_levels=NBACK_LEVELS)\n",
+    "\n"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
\ No newline at end of file
diff --git a/Scripts/Models (Under Development)/Nback/nback.py b/Scripts/Models (Under Development)/Nback/nback.py
new file mode 100644
index 00000000000..daf3c503956
--- /dev/null
+++ b/Scripts/Models (Under Development)/Nback/nback.py	
@@ -0,0 +1,822 @@
+"""
+This implements a model of the `Nback task <https://en.wikipedia.org/wiki/N-back#Neurobiology_of_n-back_task>`_
+described in `Beukers et al. (2022) <https://psyarxiv.com/jtw5p>`_.  The model uses a simple implementation of episodic
+(content-addressable) memory to store previous stimuli and the temporal context in which they occured,
+and a feedforward neural network to evaluate whether the current stimulus is a match to the n'th preceding stimulus
+(n-back level).  This model is an example of proposed interactions between working memory (e.g., in neocortex) and
+episodic memory e.g., in hippocampus and/or cerebellum) in the performance of tasks demanding of sequential processing
+and control, and along the lines of models emerging machine learning that augment the use of recurrent neural networks
+(e.g., long short-term memory mechanisms; LSTMs) for active memory and control with an external memory capable of
+rapid storage and content-based retrieval, such as the Neural Turing Machine (NTN; `Graves et al., 2016
+<https://arxiv.org/abs/1410.5401>`_), Episodic Planning Networks (EPN; `Ritter et al., 2020
+<https://arxiv.org/abs/2006.03662>`_), and Emergent Symbols through Binding Networks (ESBN; `Webb et al., 2021
+<https://arxiv.org/abs/2012.14601>`_).
+
+There are three primary methods in the script:
+
+* construct_model(args):
+  takes as arguments parameters used to construct the model;  for convenience, defaults are defined below,
+  (under "Construction parameters")
+
+* train_network(args)
+  takes as arguments the feedforward neural network Composition (FFN_COMPOSITION) and number of epochs to train.
+  Note: learning_rate is set at construction (can specify using LEARNING_RATE under "Training parameters" below).
+
+* run_model()
+  takes the context drift rate to be applied on each trial and the number of trials to execute as args, as well as
+  reporting and animation specifications (see "Execution parameters" below).
+
+See "Settings for running the script" to specify whether the model is trained and/or executed when the script is run,
+and whether a graphic display of the network is generated when it is constructed.
+
+Sequences of stimuli are constructed to match those used in the study by `Kane et al.,
+2007 <https://psycnet.apa.org/record/2007-06096-010?doi=1>`_
+
+
+TODO:
+    - from Andre
+             - network architecture;  in particular, size of hidden layer and projection patterns to and from it
+                - the stim+context input vector (length 90) projects to a hidden layer (length 80);
+                - the task input vector (length 2) projects to a different hidden layer (length 80);
+                - those two hidden layers project (over fixed, nonlearnable, one-one-projections?) to a third hidden layer (length 80) that simply sums them;
+                - the third hidden layer projects to the length 2 output layer;
+                - a softmax is taken over the output layer to determine the response.
+                - fix: were biases trained?
+          - training:
+              - learning rate: 0.001; epoch: 1 trial per epoch of training
+              - fix: state_dict with weights (still needed)
+          - get empirical stimulus sequences (still needed)
+          - put Nback script (with pointer to latest version on PNL) in nback-paper repo
+    - train_network() and run_model(): refactor to take inputs and trial_types, and training_set, respectively
+    - fix: get rid of objective_mechanism (see "VERSION *WITHOUT* ObjectiveMechanism" under control(...)
+    - fix: warnings on run
+    - complete documentation in BeukersNbackModel.rst
+    - validate against nback-paper results
+    - after validation:
+        - try with STIM_SIZE = NUM_STIMS rather than 20 (as in nback-paper)
+        - refactor generate_stim_sequence() to use actual empirical stimulus sequences
+        - replace get_input_sequence and get_training_inputs with generators passed to nback_model.run() and ffn.learn
+        - build version that *can* maintain in WM, and uses EVC to decide which would be easier:
+           maintenance in WM vs. storage/retrieval from EM (and the fit to Jarrod's data)
+"""
+
+import random
+import timeit
+from enum import IntEnum
+import warnings
+
+import numpy as np
+from graph_scheduler import *
+from psyneulink import *
+
+# Settings for running script:
+DISPLAY_MODEL = False # show visual graphic of model
+TRAIN = True
+RUN = True
+ANALYZE = True # Analyze results of run
+REPORT_OUTPUT = ReportOutput.OFF       # Sets console output during run
+REPORT_PROGRESS = ReportProgress.ON   # Sets console progress bar during run
+REPORT_LEARNING = ReportLearning.OFF   # Sets console progress bar during training
+ANIMATE = False # {UNIT:EXECUTION_SET} # Specifies whether to generate animation of execution
+
+#region ========================================= PARAMETERS ===========================================================
+
+# Fixed (structural) parameters:
+MAX_NBACK_LEVELS = 3
+NUM_STIM = 8 # number of different stimuli in stimulus set -  QUESTION: WHY ISN"T THIS EQUAL TO STIM_SIZE OR VICE VERSA?
+FFN_TRANSFER_FUNCTION = ReLU
+
+# Constructor parameters:  (values are from nback-paper)
+STIM_SIZE=8 # length of stimulus vector
+CONTEXT_SIZE=25 # length of context vector
+HIDDEN_SIZE=STIM_SIZE*4 # dimension of hidden units in ff
+NBACK_LEVELS = [2,3] # Currently restricted to these
+NUM_NBACK_LEVELS = len(NBACK_LEVELS)
+CONTEXT_DRIFT_NOISE=0.0  # noise used by DriftOnASphereIntegrator (function of Context mech)
+RANDOM_WEIGHTS_INITIALIZATION=RandomMatrix(center=0.0, range=0.1)  # Matrix spec used to initialize all Projections
+RETRIEVAL_SOFTMAX_TEMP=1/8 # express as gain # precision of retrieval process
+RETRIEVAL_HAZARD_RATE=0.04 # rate of re=sampling of em following non-match determination in a pass through ffn
+RETRIEVAL_STIM_WEIGHT=.05 # weighting of stimulus field in retrieval from em
+RETRIEVAL_CONTEXT_WEIGHT = 1-RETRIEVAL_STIM_WEIGHT # weighting of context field in retrieval from em
+# DECISION_SOFTMAX_TEMP=1
+
+# Training parameters:
+NUM_EPOCHS= 6250    # nback-paper: 400,000 @ one trial per epoch = 6,250 @ 64 trials per epoch
+LEARNING_RATE=0.001  # nback-paper: .001
+
+# Execution parameters:
+CONTEXT_DRIFT_RATE=.1 # drift rate used for DriftOnASphereIntegrator (function of Context mech) on each trial
+NUM_TRIALS = 48 # number of stimuli presented in a trial sequence
+
+# Names of Compositions and Mechanisms:
+NBACK_MODEL = "Nback Model"
+FFN_COMPOSITION = "WORKING MEMORY (fnn)"
+FFN_STIMULUS_INPUT = "CURRENT STIMULUS"
+FFN_CONTEXT_INPUT = "CURRENT CONTEXT"
+FFN_STIMULUS_RETRIEVED = "RETRIEVED STIMULUS"
+FFN_CONTEXT_RETRIEVED = "RETRIEVED CONTEXT"
+FFN_TASK = "CURRENT TASK"
+FFN_HIDDEN = "HIDDEN LAYER"
+FFN_OUTPUT = "DECISION LAYER"
+MODEL_STIMULUS_INPUT ='STIM'
+MODEL_CONTEXT_INPUT = 'CONTEXT'
+MODEL_TASK_INPUT = "TASK"
+EM = "EPISODIC MEMORY (dict)"
+CONTROLLER = "READ/WRITE CONTROLLER"
+
+class trial_types(IntEnum):
+    """Trial types explicitly assigned and counter-balanced in get_run_inputs()
+    In notation below, "A" is always current stimulus.
+    Foils are only explicitly assigned to items immediately following nback item.
+    Subseq designated below as "not explicitly assigned" may still appear in the overall stimulus seq,
+        either within the subseq through random assignment,
+        and/or through cross-subseq relationships that are not controlled in this design
+    """
+    MATCH_NO_FOIL = 0       # ABA (2-back) or ABCA (3-back); not explicitly assigned: ABBA
+    MATCH_WITH_FOIL = 1     # AAA (2-back) or AABA (3-back); not explicitly assigned: ABAA or AAAA
+    NO_MATCH_NO_FOIL = 2    # ABB (2-back) or BCDA (3-back); not explicitly assigned: BBCA, BCCA or BBBA
+    NO_MATCH_WITH_FOIL = 3  # BAA (2-back) or BACA (3-back); not explicitly assigned: BCAA or BAAA
+num_trial_types = len(trial_types)
+#endregion
+
+#region ===================================== MODEL CONSTRUCTION =======================================================
+
+def construct_model(stim_size = STIM_SIZE,
+                    context_size = CONTEXT_SIZE,
+                    hidden_size = HIDDEN_SIZE,
+                    num_nback_levels = NUM_NBACK_LEVELS,
+                    context_drift_noise = CONTEXT_DRIFT_NOISE,
+                    retrievel_softmax_temp = RETRIEVAL_SOFTMAX_TEMP,
+                    retrieval_hazard_rate = RETRIEVAL_HAZARD_RATE,
+                    retrieval_stimulus_weight = RETRIEVAL_STIM_WEIGHT,
+                    retrieval_context_weight = RETRIEVAL_CONTEXT_WEIGHT,
+                    # decision_softmax_temp = DECISION_SOFTMAX_TEMP
+                    ):
+    """Construct nback_model
+    Arguments
+    ---------
+    context_size: int : default CONTEXT_SIZE
+    hidden_size: int : default HIDDEN_SIZE
+    num_nback_levels: int : default NUM_NBACK_LEVELS
+    context_drift_noise: float : default CONTEXT_DRIFT_NOISE
+    retrievel_softmax_temp: float : default RETRIEVAL_SOFTMAX_TEMP
+    retrieval_hazard_rate: float : default RETRIEVAL_HAZARD_RATE
+    retrieval_stimulus_weight: float : default RETRIEVAL_STIM_WEIGHT
+    retrieval_context_weight: float : default RETRIEVAL_CONTEXT_WEIGHT
+    # decision_softmax_temp: float : default DECISION_SOFTMAX_TEMP)
+
+    Returns
+    -------
+    Composition implementing Nback model
+    """
+
+    print(f"constructing '{FFN_COMPOSITION}'...")
+
+    # FEED FORWARD NETWORK -----------------------------------------
+
+    #     inputs: encoding of current stimulus and context, retrieved stimulus and retrieved context,
+    #     output: decision: match [1,0] or non-match [0,1]
+    # Must be trained to detect match for specified task (1-back, 2-back, etc.)
+    input_current_stim = TransferMechanism(name=FFN_STIMULUS_INPUT,
+                                           size=stim_size,
+                                           function=FFN_TRANSFER_FUNCTION)
+    input_current_context = TransferMechanism(name=FFN_CONTEXT_INPUT,
+                                              size=context_size,
+                                              function=FFN_TRANSFER_FUNCTION)
+    input_retrieved_stim = TransferMechanism(name=FFN_STIMULUS_RETRIEVED,
+                                             size=stim_size,
+                                             function=FFN_TRANSFER_FUNCTION)
+    input_retrieved_context = TransferMechanism(name=FFN_CONTEXT_RETRIEVED,
+                                                size=context_size,
+                                                function=FFN_TRANSFER_FUNCTION)
+    input_task = TransferMechanism(name=FFN_TASK,
+                                   size=num_nback_levels,
+                                   function=FFN_TRANSFER_FUNCTION)
+    hidden = TransferMechanism(name=FFN_HIDDEN,
+                               size=hidden_size,
+                               function=FFN_TRANSFER_FUNCTION)
+    decision = ProcessingMechanism(name=FFN_OUTPUT,
+                                   size=2,
+                                   function=ReLU)
+
+    ffn = AutodiffComposition(([{input_current_stim,
+                                 input_current_context,
+                                 input_retrieved_stim,
+                                 input_retrieved_context,
+                                 input_task},
+                                hidden, decision],
+                               RANDOM_WEIGHTS_INITIALIZATION),
+                              name=FFN_COMPOSITION,
+                              learning_rate=LEARNING_RATE,
+                              loss_spec=Loss.CROSS_ENTROPY
+                              # loss_spec=Loss.MSE
+                              )
+
+    # FULL MODEL (Outer Composition, including input, EM and control Mechanisms) ------------------------
+
+    print(f"constructing '{NBACK_MODEL}'...")
+
+    # Stimulus Encoding: takes STIM_SIZE vector as input
+    stim = TransferMechanism(name=MODEL_STIMULUS_INPUT, size=stim_size)
+
+    # Context Encoding: takes scalar as drift step for current trial
+    context = ProcessingMechanism(name=MODEL_CONTEXT_INPUT,
+                                  function=DriftOnASphereIntegrator(
+                                      initializer=np.random.random(context_size-1),
+                                      noise=context_drift_noise,
+                                      dimension=context_size))
+
+    # Task: task one-hot indicating n-back (1, 2, 3 etc.) - must correspond to what ffn has been trained to do
+    task = ProcessingMechanism(name=MODEL_TASK_INPUT,
+                               size=num_nback_levels)
+
+    # Episodic Memory:
+    #    - entries: stimulus (field[0]) and context (field[1]); randomly initialized
+    #    - uses Softmax to retrieve best matching input, subject to weighting of stimulus and context by STIM_WEIGHT
+    em = EpisodicMemoryMechanism(name=EM,
+                                 input_ports=[{NAME:"STIMULUS_FIELD",
+                                               SIZE:stim_size},
+                                              {NAME:"CONTEXT_FIELD",
+                                               SIZE:context_size}],
+                                 function=ContentAddressableMemory(
+                                     initializer=[[[0]*stim_size, [0]*context_size]],
+                                     distance_field_weights=[retrieval_stimulus_weight,
+                                                             retrieval_context_weight],
+                                     # equidistant_entries_select=NEWEST,
+                                     selection_function=SoftMax(output=MAX_INDICATOR,
+                                                                gain=retrievel_softmax_temp)),
+                                 )
+
+    # Control Mechanism
+    #  Ensures current stimulus and context are only encoded in EM once (at beginning of trial)
+    #    by controlling the storage_prob parameter of em:
+    #      - if outcome of decision signifies a match or hazard rate is realized:
+    #        - set  EM[store_prob]=1 (as prep encoding stimulus in EM on next trial)
+    #        - this also serves to terminate trial (see nback_model.termination_processing condition)
+    #      - if outcome of decision signifies a non-match
+    #        - set  EM[store_prob]=0 (as prep for another retrieval from EM without storage)
+    #        - continue trial
+    control = ControlMechanism(name=CONTROLLER,
+                               default_variable=[[1]],  # Ensure EM[store_prob]=1 at beginning of first trial
+                               # ---------
+                               # VERSION *WITH* ObjectiveMechanism:
+                               objective_mechanism=ObjectiveMechanism(name="OBJECTIVE MECHANISM",
+                                                                      monitor=decision,
+                                                                      # Outcome=1 if match, else 0
+                                                                      function=lambda x: int(x[0][0]>x[0][1])),
+                               # Set ControlSignal for EM[store_prob]
+                               function=lambda outcome: int(bool(outcome)
+                                                            or (np.random.random() > retrieval_hazard_rate)),
+                               # ---------
+                               # # VERSION *WITHOUT* ObjectiveMechanism:
+                               # monitor_for_control=decision,
+                               # # Set Evaluate outcome and set ControlSignal for EM[store_prob]
+                               # #   - outcome is received from decision as one hot in the form: [[match, no-match]]
+                               # function=lambda outcome: int(int(outcome[0][1]>outcome[0][0])
+                               #                              or (np.random.random() > retrieval_hazard_rate)),
+                               # ---------
+                               control=(STORAGE_PROB, em))
+
+    nback_model = Composition(name=NBACK_MODEL,
+                              nodes=[stim, context, task, ffn, em, control],
+                              # Terminate trial if value of control is still 1 after first pass through execution
+                              termination_processing={TimeScale.TRIAL: And(Condition(lambda: control.value),
+                                                                           AfterPass(0, TimeScale.TRIAL))},
+                              )
+    # # Terminate trial if value of control is still 1 after first pass through execution
+    nback_model.add_projection(MappingProjection(), stim, input_current_stim)
+    nback_model.add_projection(MappingProjection(), context, input_current_context)
+    nback_model.add_projection(MappingProjection(), task, input_task)
+    nback_model.add_projection(MappingProjection(), em.output_ports["RETRIEVED_STIMULUS_FIELD"], input_retrieved_stim)
+    nback_model.add_projection(MappingProjection(), em.output_ports["RETRIEVED_CONTEXT_FIELD"], input_retrieved_context)
+    nback_model.add_projection(MappingProjection(), stim, em.input_ports["STIMULUS_FIELD"])
+    nback_model.add_projection(MappingProjection(), context, em.input_ports["CONTEXT_FIELD"])
+
+    if DISPLAY_MODEL:
+        nback_model.show_graph(
+            # show_cim=True,
+            # show_node_structure=ALL,
+            # show_dimensions=True
+        )
+
+    print(f'full model constructed')
+    return nback_model
+#endregion
+
+#region =====================================STIMULUS GENERATION =======================================================
+
+def get_stim_set(num_stim=STIM_SIZE):
+    """Construct an array of unique stimuli for use in an experiment, used by train_network() and run_model()"""
+    # For now, use one-hots
+    return np.eye(num_stim)
+
+def get_task_input(nback_level):
+    """Construct input to task Mechanism for a given nback_level, used by train_network() and run_model()"""
+    task_input = list(np.zeros_like(NBACK_LEVELS))
+    task_input[nback_level-NBACK_LEVELS[0]] = 1
+    return task_input
+
+def get_training_inputs(network, num_epochs, nback_levels):
+    """Construct set of training stimuli used by ffn.learn() in train_network()
+    Construct one example of each condition:
+        match:  stim_current = stim_retrieved  and context_current = context_retrieved
+        stim_lure:  stim_current = stim_retrieved  and context_current != context_retrieved
+        context_lure:  stim_current != stim_retrieved  and context_current == context_retrieved
+        non_lure:  stim_current != stim_retrieved  and context_current != context_retrieved
+    """
+    assert is_iterable(nback_levels) and all([0<i<=MAX_NBACK_LEVELS for i in nback_levels])
+    stimuli = get_stim_set()
+    context_fct =  DriftOnASphereIntegrator(initializer=np.random.random(CONTEXT_SIZE-1),
+                                            noise=CONTEXT_DRIFT_NOISE,
+                                            dimension=CONTEXT_SIZE)
+    contexts = []
+    trial_types = ['match', 'stim_lure', 'context_lure', 'non_lure']
+
+    stim_current = []
+    context_current = []
+    stim_retrieved = []
+    context_retrieved = []
+    target = []
+    num_nback_levels = len(nback_levels)
+    current_task = []
+
+    # for i in range(num_epochs):
+    for nback_level in nback_levels:
+        # Construct one hot encoding for nback level
+        # task_input = list(np.zeros(num_nback_levels))
+        # task_input[nback_level-nback_levels[0]] = 1
+        task_input = get_task_input(nback_level)
+        for i in range(len(stimuli)):
+            # Get current stimulus and distractor
+            stims = list(stimuli.copy())
+            # Get stim, and remove from stims so distractor can be picked randomly from remaining ones
+            current_stim = stims.pop(i)
+            # Pick distractor randomly from stimuli remaining in set
+            distractor_stim = stims[np.random.randint(0,len(stims))]
+
+            # Get current context, nback context, and distractor
+            # Get nback+1 contexts (to bracket correct one)
+            for i in range(num_nback_levels+1):
+                contexts.append(context_fct(CONTEXT_DRIFT_RATE))
+            # Get current context as one that is next to last from list (leaving last one as potential lure)
+            current_context = contexts.pop(num_nback_levels-1)
+            #
+            nback_context = contexts.pop(0)
+            distractor_context = contexts[np.random.randint(0,len(contexts))]
+
+            # Assign retrieved stimulus and context accordingly to trial_type
+            for trial_type in trial_types:
+                stim_current.append(current_stim)
+                context_current.append(current_context)
+                # Assign retrieved stimulus
+                if trial_type in {'match','stim_lure'}:
+                    stim_retrieved.append(current_stim)
+                else: # context_lure or non_lure
+                    stim_retrieved.append(distractor_stim)
+                # Assign retrieved context
+                if trial_type in {'match','context_lure'}:
+                    context_retrieved.append(nback_context)
+                else: # stimulus_lure or non_lure
+                    context_retrieved.append(distractor_context)
+                # Assign target
+                if trial_type == 'match':
+                    target.append([1,0])
+                else:
+                    target.append([0,1])
+                current_task.append([task_input])
+
+    batch_size = len(target)
+    training_set = {INPUTS: {network.nodes[FFN_STIMULUS_INPUT]: stim_current,
+                             network.nodes[FFN_CONTEXT_INPUT]: context_current,
+                             network.nodes[FFN_STIMULUS_RETRIEVED]: stim_retrieved,
+                             network.nodes[FFN_CONTEXT_RETRIEVED]: context_retrieved,
+                             network.nodes[FFN_TASK]: current_task},
+                    TARGETS: {network.nodes[FFN_OUTPUT]:  target},
+                    # EPOCHS: num_epochs*batch_size}
+                    EPOCHS: num_epochs}
+
+    return training_set, batch_size
+
+def get_run_inputs(model, nback_level,
+                   context_drift_rate=CONTEXT_DRIFT_RATE,
+                   num_stim=NUM_STIM,
+                   num_trials=NUM_TRIALS,
+                   mini_blocks=True):
+    """Construct set of stimulus inputs for run_model(), balancing across four conditions.
+    Trial_type assignments:
+      - trial_types are assigned to subseqs of nback_level+1 stimuli that are concatenated to form the full trial seq
+      - trial_type subseqs are constructed in get_stim_subseq_for_trial_type(), by randomly picking a target stimulus,
+          and then assigning the preceding stimuli in the subseq to conform the trial_type
+      - the balancing of trial_types is enforced *only* for the last stimulus in each set;
+          the others are inferred and may not be fully balanced across conditions
+          (depending on number of stimuli, this may be impossible).
+    Mini_blocks:
+      - if True (default) trials are sequenced in mini-blocks each of which contains one set of trials
+          for each trial_type; order of trial_type subseq within each mini_block is randomized across them;
+          number of trials in a mini-block = nback_level+1 * num_trial_types; trials not assigned to
+          mini_blocks (i.e., modulus of num_trial % (num_mini_blocks * mini_block_size) are assigned random
+          stimuli and trial_type is inferred posthoc).
+      - if False, sampling of trial_types is balanced,
+          but order of presentation is randomized over the entire sequence
+    Returns
+    -------
+    dict with inputs to each input node of model for each trial and array with corresponding trial_type_assignments
+    """
+
+    def generate_stim_sequence(nback_level, num_trials):
+        assert nback_level in {2,3} # At present, only 2- and 3-back levels are supported
+
+        stim_set = get_stim_set()
+
+        def get_stim_subseq_for_trial_type(trial_type):
+            """Return stimulus seq (as indices into stim_set) for the specified trial_type."""
+            subseq_size = nback_level+1
+            subseq = [None] * subseq_size
+            curr_stim = subseq[nback_level] = random.choice(np.arange(len(stim_set)))
+            other_stims = np.setdiff1d(np.arange(len(stim_set)),curr_stim).tolist()
+
+            if trial_type == trial_types.MATCH_NO_FOIL:           # ABA (2-back) or ABCA (3-back)
+                subseq[0] = curr_stim  # Assign nback stim to match
+                # Assign remaining items in sequence to anything stimuli than curr_stim
+                subseq[1:nback_level] = random.sample(other_stims, nback_level-1)
+            elif trial_type == trial_types.MATCH_WITH_FOIL:        # AAA (2-back) or AABA (3-back)
+                subseq[0] = curr_stim  # Assign nback stim to match current stim
+                subseq[1] = curr_stim  # Assign curr_stim to stim next to nback as foil
+                # Assign any remaining items in sequence to any stimuli other than curr_stim
+                subseq[2:nback_level] = random.sample(other_stims, nback_level-2)
+            elif trial_type == trial_types.NO_MATCH_NO_FOIL:       # ABB (2-back) or BCDA (3-back)
+                # Assign remaining items in sequence to any stimuli than curr_stim
+                subseq[0:nback_level] = random.sample(other_stims, nback_level)
+            elif trial_type == trial_types.NO_MATCH_WITH_FOIL:     # BAA (2-back) or BACA (3-back)
+                # Assign remaining items in sequence to any stimuli than curr_stim
+                subseq[1] = curr_stim  # Assign curr_stim to stim next to nback as foil
+                subseq[0:1] = random.sample(other_stims, 1)
+                subseq[2:nback_level] = random.sample(other_stims, nback_level-2)
+            assert not None in subseq, "Failed to assign all stims for subseq in get_stim_subseq_for_trial_type."
+            return subseq
+
+        def get_trial_type_for_stim(subseq):
+            # assert len(subseq) == nback_level+1, \
+            #     f"Bad subseq len ({len(subseq)}) for nback_level ({nback_level})."
+            # assert all(trial_type==None for trial_type in trial_type_seq[trial_num:trial_num + nback_level]), \
+            #     f"trial_type should still be None for trials {trial_num - nback_level} to {trial_num - 1}."
+            if subseq[-1] == subseq[0] and not subseq[-1] in subseq[1:-1]:
+                return trial_types.MATCH_NO_FOIL.value
+            elif subseq[-1] == subseq[0] and subseq[-1] in subseq[0:-1]:
+                return trial_types.MATCH_WITH_FOIL.value
+            elif subseq[-1] not in subseq[0:-1]:
+                return trial_types.NO_MATCH_NO_FOIL.value
+            elif subseq[-1] != subseq[0] and subseq[-1] in subseq[0:-1]:
+                # Note: for 3back, this includes: BAXA, BXAA, and BAAA
+                return trial_types.NO_MATCH_WITH_FOIL.value
+
+        subseq_size = nback_level+1
+        num_sub_seqs = int(num_trials / num_trial_types)
+        extra_trials = num_trials % num_trial_types
+
+        # Construct seq of mini-blocks (subseqs) each containing one sample of each trial_type in a random order
+        #    note: this is done over number of mini-blocks that fit into num_trials;
+        #          remaining trials are randomly assigned trial_types below
+
+        num_mini_blocks = int(num_trials / (num_trial_types * (nback_level+1)))
+        mini_block_size = subseq_size * num_trial_types # Number of trials in a mini_block
+        # seq_of_trial_type_subseqs = [None] * num_mini_blocks * num_trial_types
+        seq_of_trial_type_subseqs = []
+        # Generate randomly ordered trial_type assignments for subseqs in each mini_block
+        for i in range(num_mini_blocks):
+            # seq_of_trial_type_subseqs[i*num_trial_types:i+num_trial_types] = \
+            #     random.sample(range(num_trial_types), num_trial_types)
+            seq_of_trial_type_subseqs.extend(random.sample(range(num_trial_types), num_trial_types))
+        # seq_of_trial_type_subseqs = random.sample(range(num_trial_types), num_trial_types) * mini_block_size
+        if not mini_blocks:
+            # Randomize the order of trial types across the entire sequence:
+            random.shuffle(seq_of_trial_type_subseqs)
+
+        if extra_trials:  # Warn if conditions can't be fully balanced
+            warnings.warn(f"Number of trials ({num_trials}) precludes fully balancing across all five trial types")
+
+        stim_seq = [None] * num_trials
+        trial_type_seq = [None] * num_trials
+        # Construct actual stimulus sequence by getting stimuli for each subseq, up to num_sub_seqs
+        #   note: the trial type only applies (and a trial_type is only assigned) to the last trial of each subsequence;
+        #         trial_type of preceding ones set below on the full sequence of stimuli is assigned
+        # stim_seq.append(get_stim_seq_for_trial_type(i) for i in seq_of_trial_type_subseqs) # <- CONDENSED VERSION
+        for i, trial_type in enumerate(seq_of_trial_type_subseqs):  # <- LOOP VERSION
+            idx = i * subseq_size
+            # Get seq of stimuli for subseq of specified trial_type
+            stim_seq[idx:idx+nback_level+1] = get_stim_subseq_for_trial_type(trial_type)
+            # Assign trial_type to last stim in subseq (since it was constructed specifically for that trial_type)
+            trial_type_seq[idx+nback_level] = trial_type
+        # Pad remainder to get to num_trials with randomly selected stimuli
+        stim_seq.extend(random.sample(range(num_trial_types),extra_trials))
+        # Infer trial_types for all remaining stimuli (which should currently be marked as None)
+        for i in range(subseq_size,num_trials,subseq_size):
+            for j in range(i,i+nback_level):
+                assert trial_type_seq[j]==None, f"trial_type should still be None for trial {j}."
+                trial_type_seq[j] = get_trial_type_for_stim(stim_seq[i-subseq_size:i])
+                assert True
+
+        trial_type_counts = [None] * num_trial_types
+        for i in range(num_trial_types):
+            trial_type_counts[i] = trial_type_seq.count(i)
+
+        return(stim_seq, trial_type_seq)
+
+    def get_input_sequence(nback_level, num_trials=NUM_TRIALS):
+        # Construct sequence of stimulus and trial_type indices
+        stim_seq, trial_type_seq = generate_stim_sequence(nback_level, num_trials)
+        # Return list of corresponding stimulus input vectors
+
+        input_set = [get_stim_set()[i] for i in stim_seq]
+        return input_set, trial_type_seq
+
+    input_set, trial_type_seq = get_input_sequence(nback_level, num_trials)
+    return {model.nodes[MODEL_STIMULUS_INPUT]: input_set,
+            model.nodes[MODEL_CONTEXT_INPUT]: [[context_drift_rate]]*num_trials,
+            model.nodes[MODEL_TASK_INPUT]: [get_task_input(nback_level)]*num_trials}, \
+           trial_type_seq
+#endregion
+
+#region ================================== MODEL EXECUTION METHODS =====================================================
+
+def train_network(network,
+                  training_set=None,
+                  minibatch_size=None,
+                  learning_rate=LEARNING_RATE,
+                  num_epochs=NUM_EPOCHS,
+                  save_weights_to=None):
+    """Train the network on trarining set.
+
+    Arguments
+    ---------
+    network: AutodiffComposition
+        specified the network to be trained;  this must be an `AutodiffComposition`.
+    training_set: dict : default None,
+        specifies inputs (see `Composition_Input_Dictionary`), including targets (`Composition_Target_Inputs`)
+        to use for training;  these are constructed in a call to get_training_inputs() if not specified here.
+    minibatch_size: int : default None,
+        specified number of inputs that will be presented within a single training epoch
+        (i.e. over which weight changes are aggregated and applied);  this is determined by the call to
+        get_training_inputs() if **training_set** is not specified explicitly.
+    learning_rate: float : default LEARNING_RATE
+        specifies learning_rate to use for current training;  this overrides the value of `learning_rate
+        <AutodiffComposition.learning_rate>` specified in construction of the network.  If None is specified
+         here, either the value specified at construction, or the default for `AutodiffComposition
+         <AutodiffComposition.learning_rate>` is used.
+    num_epochs: int : default NUM_EPOCHS,
+        specifies number of training epochs (i.e., sets of minibatchs) to execute during training.
+    save_weights_to: Path : default None
+        specifies location to store weights at end of training.
+
+    Returns
+    -------
+    Path containing saved weights for matrices of feedforward Projections in network.
+    """
+    print(f"constructing training set for '{network.name}'...")
+    if training_set == None:
+        training_set, minibatch_size = get_training_inputs(network=network,
+                                                           num_epochs=num_epochs,
+                                                           nback_levels=NBACK_LEVELS)
+    print(f'num training stimuli per training set (minibatch size): {minibatch_size}')
+    print(f'num weight updates (num_epochs): {num_epochs}')
+    print(f'total num trials: {num_epochs*minibatch_size}')
+    print(f"\ntraining '{network.name}'...")
+    start_time = timeit.default_timer()
+    network.learn(inputs=training_set,
+                  minibatch_size=minibatch_size,
+                  report_output=REPORT_OUTPUT,
+                  report_progress=REPORT_PROGRESS,
+                  # report_learning=REPORT_LEARNING,
+                  learning_rate=learning_rate,
+                  # execution_mode=ExecutionMode.LLVMRun
+                  # execution_mode=ExecutionMode.Python
+                  execution_mode=ExecutionMode.PyTorch
+                  )
+    stop_time = timeit.default_timer()
+    print(f"'{network.name}' trained")
+    training_time = stop_time-start_time
+    if training_time <= 60:
+        training_time_str = f'{int(training_time)} seconds'
+    else:
+        training_time_str = f'{int(training_time/60)} minutes {int(training_time%60)} seconds'
+    print(f'training time: {training_time_str} for {num_epochs} epochs')
+    path = network.save(filename=save_weights_to)
+    print(f'max weight: {np.max(nback_model.nodes[FFN_COMPOSITION].nodes[FFN_HIDDEN].efferents[0].matrix.base)}')
+    print(f'saved weights to: {save_weights_to}')
+    return path
+    # print(f'saved weights sample: {network.nodes[FFN_HIDDEN].path_afferents[0].matrix.base[0][:3]}...')
+    # network.load(path)
+    # print(f'loaded weights sample: {network.nodes[FFN_HIDDEN].path_afferents[0].matrix.base[0][:3]}...')
+
+def run_model(model,
+              # load_weights_from=None,
+              load_weights_from='ffn.wts_nep_6250_lr_001.pnl',
+              context_drift_rate=CONTEXT_DRIFT_RATE,
+              num_trials=NUM_TRIALS,
+              report_output=REPORT_OUTPUT,
+              report_progress=REPORT_PROGRESS,
+              animate=ANIMATE,
+              save_results_to=None
+              ):
+    """Run model for all nback levels with a specified context drift rate and number of trials
+    Arguments
+    --------
+    load_weights_from:  Path : default None
+        specifies file from which to load pre-trained weights for matrices of FFN_COMPOSITION.
+    context_drift_rate: float : CONTEXT_DRIFT_RATE
+        specifies drift rate as input to CONTEXT_INPUT, used by DriftOnASphere function of FFN_CONTEXT_INPUT.
+    num_trials: int : default 48
+        number of trials (stimuli) to run.
+    report_output: REPORT_OUTPUT : default REPORT_OUTPUT.OFF
+        specifies whether to report results during execution of run (see `Report_Output` for additional details).
+    report_progress: REPORT_PROGRESS : default REPORT_PROGRESS.OFF
+        specifies whether to report progress of execution during run (see `Report_Progress` for additional details).
+    animate: dict or bool : default False
+        specifies whether to generate animation of execution (see `ShowGraph_Animation` for additional details).
+    save_results_to: Path : default None
+        specifies location to save results of the run along with trial_type_sequences for each nback level;
+        if None, those are returned by call but not saved.
+    """
+    ffn = model.nodes[FFN_COMPOSITION]
+    em = model.nodes[EM]
+    if load_weights_from:
+        print(f"nback_model loading '{FFN_COMPOSITION}' weights from {load_weights_from}...")
+        ffn.load(filename=load_weights_from)
+    print(f'max weight: {np.max(nback_model.nodes[FFN_COMPOSITION].nodes[FFN_HIDDEN].efferents[0].matrix.base)}')
+    print(f"'{model.name}' executing...")
+    trial_type_seqs = [None] * NUM_NBACK_LEVELS
+    start_time = timeit.default_timer()
+    for i, nback_level in enumerate(NBACK_LEVELS):
+        # Reset episodic memory for new task using first entry (original initializer)
+        em.function.reset(em.memory[0])
+        inputs, trial_type_seqs[i] = get_run_inputs(model, nback_level, context_drift_rate, num_trials)
+        model.run(inputs=inputs,
+                  report_output=report_output,
+                  report_progress=report_progress,
+                  animate=animate
+                  )
+    # print("Number of entries in EM: ", len(model.nodes[EM].memory))
+    stop_time = timeit.default_timer()
+    assert len(model.nodes[EM].memory) == NUM_TRIALS + 1 # extra one is for initializer
+    if REPORT_PROGRESS == ReportProgress.ON:
+        print('\n')
+    print(f"'{model.name}' done: {len(model.results)} trials executed")
+    execution_time = stop_time - start_time
+    if execution_time <= 60:
+        execution_time_str = f'{int(execution_time)} seconds'
+    else:
+        execution_time_str = f'{int(execution_time/60)} minutes {int(execution_time%60)} seconds'
+    print(f'execution time: {execution_time_str}')
+    results = np.array([model.results, trial_type_seqs])
+    if save_results_to:
+        np.save(save_results_to, results)
+    # print(f'results: \n{model.results}')
+    return results
+#endregion
+
+#region ================================= MODEL PERFORMANCE ANALYSIS ===================================================
+
+def analyze_results(results, num_trials=NUM_TRIALS, nback_levels=NBACK_LEVELS):
+    responses_and_trial_types = [None] * len(nback_levels)
+    stats = np.zeros((len(nback_levels),num_trial_types))
+    MATCH = 'match'
+    NON_MATCH = 'non-match'
+
+    for i, nback_level in enumerate(nback_levels):
+        # Code responses for given nback_level as 1 (match) or 0 (non-match)
+        relevant_responses = [int(r[0][0]) for r in results[0][i*num_trials:i*num_trials+num_trials]]
+        relevant_responses = [MATCH if r == 1 else NON_MATCH for r in relevant_responses]
+        responses_and_trial_types[i] = list(zip(relevant_responses, results[1][i]))
+        # x = zip(relevant_responses, results[1][i])
+        for trial_type in trial_types:
+            # relevant_data = [[response,condition] for response,condition in x if condition == trial_type]
+            relevant_data = [[response,condition] for response,condition in zip(relevant_responses, results[1][i])
+                             if condition == trial_type]
+            if trial_type in {trial_types.MATCH_NO_FOIL, trial_types.MATCH_WITH_FOIL}:
+                #  is the correct response for a match trial
+                stats[i][trial_type] = [d[0] for d in relevant_data
+                                        if d[0] is not None].count(MATCH) / (len(relevant_data))
+            else:
+                # [0,1] is the correct response for a match trial
+                stats[i][trial_type] = [d[0] for d in relevant_data
+                                        if d[0] is not None].count(NON_MATCH) / (len(relevant_data))
+    for i, nback_level in enumerate(nback_levels):
+        print(f"nback level {nback_level}:")
+        for j, performance in enumerate(stats[i]):
+            print(f"\t{list(trial_types)[j].name}: {performance:.1f}")
+
+    data_dict = {k:v for k,v in zip(nback_levels, responses_and_trial_types)}
+    stats_dict = {}
+    for i, nback_level in enumerate(nback_levels):
+        stats_dict.update({nback_level: {trial_type.name:stat for trial_type,stat in zip (trial_types, stats[i])}})
+
+    return data_dict, stats_dict
+
+
+
+
+
+
+
+
+def compute_dprime(hit_rate, fa_rate):
+    """ returns dprime and sensitivity
+    """
+    def clamp(n, minn, maxn):
+        return max(min(maxn, n), minn)
+    # hit_rate = clamp(hit_rate, 0.01, 0.99)
+    # fa_rate = clamp(fa_rate, 0.01, 0.99)
+
+    dl = np.log(hit_rate * (1 - fa_rate) / ((1 - hit_rate) * fa_rate))
+    c = 0.5 * np.log((1 - hit_rate) * (1 - fa_rate) / (hit_rate * fa_rate))
+    return dl, c
+
+
+def plot_results(response_and_trial_types, stats):
+    hits_stderr = np.concatenate((score.mean(2).std(-1)/np.sqrt(neps))[:,(0,1)])
+    correj_stderr = np.concatenate((score.mean(2).std(-1)/np.sqrt(neps))[:,(2,3)])
+    d,s = compute_dprime(
+      np.concatenate(score.mean(2)[:,(0,1)]),
+      np.concatenate(score.mean(2)[:,(2,3)])
+    )
+    print(d.shape,s.shape)
+    dprime_stderr = d.std(-1)/np.sqrt(neps)
+    bias_stderr = s.std(-1)/np.sqrt(neps)
+    #%%
+    # 2back-target, 2back-lure, 3back-target, 3back-lure
+    hits = np.concatenate(acc[:,(0,1)])
+    correj = np.concatenate(acc[:,(2,3)])
+    dprime = np.zeros(4)
+    bias = np.zeros(4)
+    for i in range(4):
+      d,s = compute_dprime(hits[i], 1-correj[i])
+      dprime[i]=d
+      bias[i]=s
+
+    #%%
+    f,axar = plt.subplots(2,2,figsize=(15,8));axar=axar.reshape(-1)
+    cL = ['blue','darkblue','lightgreen','forestgreen']
+    labL = ['2b,ctrl','2b,lure','3b,ctrl','3b,lure']
+
+    # correct reject
+    ax = axar[0]
+    ax.set_title('correct rejection')
+    ax.bar(range(4),correj,color=cL,yerr=correj_stderr)
+
+    # hits
+    ax = axar[1]
+    ax.set_title('hits')
+    ax.bar(range(4),hits,color=cL,yerr=hits_stderr)
+
+    #
+    ax = axar[2]
+    ax.set_title('dprime')
+    ax.bar(range(4),dprime,color=cL,yerr=dprime_stderr)
+
+    #
+    ax = axar[3]
+    ax.set_title('bias')
+    ax.bar(range(4),bias,color=cL,yerr=bias_stderr)
+
+    ##
+    for ax in axar[:2]:
+      ax.set_xticks(np.arange(4))
+      ax.set_xticklabels(labL)
+      ax.set_ylim(0,1)
+
+    plt.savefig('figures/EMmetrics-%s-t%i.jpg'%(mtag,tstamp))
+    plt.savefig('figures/EMmetrics_yerr-%s-t%i.svg'%(mtag,tstamp))
+
+
+
+
+
+
+
+
+#endregion
+
+
+#region ===================================== SCRIPT EXECUTION =========================================================
+# Construct, train and/or run model based on settings at top of script
+
+nback_model = construct_model()
+
+if TRAIN:
+    weights_filename = f'ffn.wts_nep_{NUM_EPOCHS}_lr_{str(LEARNING_RATE).split(".")[1]}.pnl'
+    saved_weights = train_network(nback_model.nodes[FFN_COMPOSITION],
+                                  save_weights_to=weights_filename)
+if RUN:
+    from pathlib import Path
+    import os
+    results_filename = f'nback.results_nep_{NUM_EPOCHS}_lr_{str(LEARNING_RATE).split(".")[1]}.pnl'
+    results = run_model(nback_model,
+                        # load_weights_from=Path(os.path.join(os.getcwd(),'ffn.wts_nep_1_lr_01.pnl')),
+                        # load_weights_from=Path(os.path.join(os.getcwd(),'ffn.wts_nep_6250_lr_01.pnl')),
+                        # load_weights_from=INITIALIZER
+                        save_results_to= results_filename)
+if ANALYZE:
+    coded_responses, stats = analyze_results(results,
+                                             num_trials=NUM_TRIALS,
+                                             nback_levels=NBACK_LEVELS)
+#endregion
\ No newline at end of file
diff --git a/Scripts/Models (Under Development)/Nback/results/WORKING MEMORY (fnn)_matrix_wts_20stim_2500ep.pnl b/Scripts/Models (Under Development)/Nback/results/WORKING MEMORY (fnn)_matrix_wts_20stim_2500ep.pnl
new file mode 100644
index 00000000000..2a46665f63c
Binary files /dev/null and b/Scripts/Models (Under Development)/Nback/results/WORKING MEMORY (fnn)_matrix_wts_20stim_2500ep.pnl differ
diff --git a/Scripts/Models (Under Development)/Nback/results/ffn.wts_nep_1_lr_01.pnl b/Scripts/Models (Under Development)/Nback/results/ffn.wts_nep_1_lr_01.pnl
new file mode 100644
index 00000000000..4903636b03d
Binary files /dev/null and b/Scripts/Models (Under Development)/Nback/results/ffn.wts_nep_1_lr_01.pnl differ
diff --git a/Scripts/Models (Under Development)/Nback/results/ffn.wts_nep_6250_lr_001.pnl b/Scripts/Models (Under Development)/Nback/results/ffn.wts_nep_6250_lr_001.pnl
new file mode 100644
index 00000000000..57838e60968
Binary files /dev/null and b/Scripts/Models (Under Development)/Nback/results/ffn.wts_nep_6250_lr_001.pnl differ
diff --git a/Scripts/Models (Under Development)/Nback/results/ffn.wts_nep_6250_lr_01.pnl b/Scripts/Models (Under Development)/Nback/results/ffn.wts_nep_6250_lr_01.pnl
new file mode 100644
index 00000000000..81c92a32c59
Binary files /dev/null and b/Scripts/Models (Under Development)/Nback/results/ffn.wts_nep_6250_lr_01.pnl differ
diff --git a/Scripts/Models (Under Development)/Nback/results/nback.results_nep_1_lr_01.pnl.npy b/Scripts/Models (Under Development)/Nback/results/nback.results_nep_1_lr_01.pnl.npy
new file mode 100644
index 00000000000..dc1b2a21074
Binary files /dev/null and b/Scripts/Models (Under Development)/Nback/results/nback.results_nep_1_lr_01.pnl.npy differ
diff --git a/Scripts/Models (Under Development)/Nback/results/nback.results_nep_6250_lr_001.pnl.npy b/Scripts/Models (Under Development)/Nback/results/nback.results_nep_6250_lr_001.pnl.npy
new file mode 100644
index 00000000000..271d26badc3
Binary files /dev/null and b/Scripts/Models (Under Development)/Nback/results/nback.results_nep_6250_lr_001.pnl.npy differ
diff --git a/Scripts/Models (Under Development)/Nback/results/nback.results_nep_6250_lr_01.pnl.npy b/Scripts/Models (Under Development)/Nback/results/nback.results_nep_6250_lr_01.pnl.npy
new file mode 100644
index 00000000000..767924e2f46
Binary files /dev/null and b/Scripts/Models (Under Development)/Nback/results/nback.results_nep_6250_lr_01.pnl.npy differ
diff --git a/Scripts/Models (Under Development)/Nback/stim/Archive.zip b/Scripts/Models (Under Development)/Nback/stim/Archive.zip
new file mode 100644
index 00000000000..1677306b6af
Binary files /dev/null and b/Scripts/Models (Under Development)/Nback/stim/Archive.zip differ
diff --git a/Scripts/Models (Under Development)/Nback/stim/Kane stimuli.py b/Scripts/Models (Under Development)/Nback/stim/Kane stimuli.py
new file mode 100644
index 00000000000..ea1f28283dc
--- /dev/null
+++ b/Scripts/Models (Under Development)/Nback/stim/Kane stimuli.py	
@@ -0,0 +1,50 @@
+r"""
+Stimuli from `Kane et al., 2007 <https://psycnet.apa.org/record/2007-06096-010?doi=1>`_
+Constructed from unique stimuli:  B, F, H, K, M, Q, R, X
+
+From Kane et al:
+Control targets are designated with 1\1\0 and there are 6 of them
+Experimental targets are designated with 1\2\0 and there are 2 of them
+Control foils are designated with 2\1\0 and there are 34 of them
+All experimental foils are designated with 2\2\0 and there are 6 of them
+
+*** 48 trials per block
+*** Each letter should appear as a target once (i.e., 8 targets; 6 control, 2 experimental)
+*** The 2 experimental target letters should not be used as experimental target letters in lists b, c, or d (the 2 experimental target letters in this list are B and F)
+*** Each letter should appear as an experimental foil once if possible (the 6 in this list are B, F, H, K,M, R, and X)
+*** Each letter should appear in the list 6 times
+*** There should not be any 3-back lures
+
+"""
+
+Nback2_stims_a = ['Q', 'F', 'B', 'R', 'X', 'X', 'X', 'M', 'M', 'K', 'B', 'B', 'M', 'Q', 'M', 'X',
+               'H', 'B', 'H', 'X', 'K', 'Q', 'F', 'F', 'F', 'K', 'K', 'M', 'R', 'H', 'H', 'M',
+               'B', 'R', 'B', 'F', 'Q', 'H', 'Q', 'R', 'F', 'R', 'H', 'K', 'X', 'K', 'R', 'Q']
+Nback2_stims_b = ['R', 'Q', 'H', 'K', 'F', 'F', 'R', 'B', 'B', 'B', 'F', 'M', 'K', 'H', 'X', 'B',
+               'X', 'H', 'Q', 'H', 'F', 'K', 'Q', 'Q', 'Q', 'K', 'M', 'K', 'R', 'X', 'R', 'B',
+               'M', 'H', 'M', 'R', 'R', 'F', 'X', 'F', 'B', 'H', 'K', 'M', 'M', 'Q', 'X', 'X']
+Nback2_stims_c = ['F', 'X', 'H', 'M', 'F', 'X', 'X', 'M', 'H', 'F', 'Q', 'R', 'Q', 'B', 'B', 'M',
+               'X', 'M', 'F', 'H', 'F', 'K', 'M', 'H', 'H', 'H', 'B', 'Q', 'B', 'K', 'K', 'K',
+               'R', 'B', 'R', 'X', 'Q', 'X', 'M', 'K', 'R', 'R', 'F', 'Q', 'Q', 'K', 'R', 'B']
+Nback2_stims_d = ['K', 'F', 'X', 'B', 'R', 'H', 'Q', 'Q', 'K', 'F', 'K', 'M', 'R', 'R', 'R', 'X',
+               'B', 'X', 'Q', 'R', 'Q', 'K', 'K', 'X', 'R', 'B', 'H', 'B', 'F', 'F', 'H', 'B',
+               'H', 'M', 'M', 'M', 'Q', 'F', 'X', 'F', 'B', 'H', 'H', 'M', 'K', 'Q', 'X', 'M']
+Nback2_stims_e = ['F', 'M', 'Q', 'H', 'B', 'R', 'B', 'F', 'M', 'F', 'X', 'R', 'R', 'F', 'X', 'B',
+               'X', 'Q', 'K', 'K', 'H', 'Q', 'B', 'Q', 'K', 'X', 'K', 'Q', 'Q', 'R', 'M', 'R',
+               'H', 'H', 'H', 'B', 'B', 'K', 'F', 'X', 'M', 'M', 'M', 'R', 'X', 'F', 'H', 'K']
+Nback2_stims_f = ['Q', 'H', 'K', 'M', 'Q', 'Q', 'F', 'K', 'F', 'X', 'X', 'M', 'R', 'M', 'H', 'B',
+               'H', 'M', 'K', 'K', 'K', 'B', 'R', 'B', 'M', 'X', 'Q', 'X', 'R', 'B', 'H', 'H',
+               'X', 'B', 'F', 'Q', 'H', 'Q', 'F', 'F', 'B', 'X', 'K', 'R', 'R', 'F', 'R', 'M']
+Nback2_stims_g = ['R', 'B', 'Q', 'F', 'X', 'X', 'X', 'K', 'B', 'K', 'X', 'H', 'R', 'H', 'F', 'M',
+               'F', 'H', 'B', 'B', 'M', 'H', 'M', 'Q', 'F', 'F', 'K', 'M', 'Q', 'Q', 'Q', 'R',
+               'X', 'K', 'K', 'R', 'H', 'R', 'M', 'M', 'X', 'B', 'Q', 'B', 'H', 'K', 'F', 'R']
+Nback2_stims_h = ['R', 'X', 'K', 'Q', 'R', 'M', 'M', 'H', 'B', 'H', 'F', 'F', 'F', 'X', 'K', 'Q',
+               'R', 'R', 'K', 'B', 'K', 'R', 'H', 'R', 'Q', 'F', 'Q', 'K', 'H', 'H', 'F', 'X',
+               'X', 'M', 'B', 'M', 'H', 'Q', 'B', 'B', 'B', 'M', 'F', 'X', 'K', 'X', 'Q', 'M']
+
+Nback2_conds_a = ['2\1\0', '2\1\0', '2\1\0', '2\1\0', '2\1\0', '2\2\0', '1\2\0', '2\1\0',
+                 '2\2\0', '2\1\0', '2\1\0', '2\2\0', '2\1\0', '2\1\0', '1\1\0', '2\1\0',
+                 '2\1\0', '2\1\0', '1\1\0', '2\1\0', '2\1\0', '2\1\0', '2\1\0', '2\2\0',
+                 '1\2\0', '2\1\0', '2\2\0', '2\1\0', '2\1\0', '2\1\0', '2\2\0', '2\1\0',
+                 '2\1\0', '2\1\0', '1\1\0', '2\1\0', '2\1\0', '2\1\0', '1\1\0', '2\1\0',
+                 '2\1\0', '1\1\0', '2\1\0', '2\1\0', '2\1\0', '1\1\0', '2\1\0', '2\1\0']
\ No newline at end of file
diff --git a/Scripts/Models (Under Development)/Nback/stim/__init__.py b/Scripts/Models (Under Development)/Nback/stim/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/Scripts/Models (Under Development)/Nback/stim/ckm_2_back_a.doc b/Scripts/Models (Under Development)/Nback/stim/ckm_2_back_a.doc
new file mode 100644
index 00000000000..a98c118ac7f
Binary files /dev/null and b/Scripts/Models (Under Development)/Nback/stim/ckm_2_back_a.doc differ
diff --git a/Scripts/Models (Under Development)/Nback/stim/ckm_2_back_b.doc b/Scripts/Models (Under Development)/Nback/stim/ckm_2_back_b.doc
new file mode 100644
index 00000000000..0325c032c36
Binary files /dev/null and b/Scripts/Models (Under Development)/Nback/stim/ckm_2_back_b.doc differ
diff --git a/Scripts/Models (Under Development)/Nback/stim/ckm_2_back_c.doc b/Scripts/Models (Under Development)/Nback/stim/ckm_2_back_c.doc
new file mode 100644
index 00000000000..735aee691bf
Binary files /dev/null and b/Scripts/Models (Under Development)/Nback/stim/ckm_2_back_c.doc differ
diff --git a/Scripts/Models (Under Development)/Nback/stim/ckm_2_back_d.doc b/Scripts/Models (Under Development)/Nback/stim/ckm_2_back_d.doc
new file mode 100644
index 00000000000..1889d7a7d16
Binary files /dev/null and b/Scripts/Models (Under Development)/Nback/stim/ckm_2_back_d.doc differ
diff --git a/Scripts/Models (Under Development)/Nback/stim/ckm_2_back_e.doc b/Scripts/Models (Under Development)/Nback/stim/ckm_2_back_e.doc
new file mode 100644
index 00000000000..6862c069b56
Binary files /dev/null and b/Scripts/Models (Under Development)/Nback/stim/ckm_2_back_e.doc differ
diff --git a/Scripts/Models (Under Development)/Nback/stim/ckm_2_back_f.doc b/Scripts/Models (Under Development)/Nback/stim/ckm_2_back_f.doc
new file mode 100644
index 00000000000..7a2d4e60b8f
Binary files /dev/null and b/Scripts/Models (Under Development)/Nback/stim/ckm_2_back_f.doc differ
diff --git a/Scripts/Models (Under Development)/Nback/stim/ckm_2_back_g.doc b/Scripts/Models (Under Development)/Nback/stim/ckm_2_back_g.doc
new file mode 100644
index 00000000000..689f2ebcc17
Binary files /dev/null and b/Scripts/Models (Under Development)/Nback/stim/ckm_2_back_g.doc differ
diff --git a/Scripts/Models (Under Development)/Nback/stim/ckm_2_back_h.doc b/Scripts/Models (Under Development)/Nback/stim/ckm_2_back_h.doc
new file mode 100644
index 00000000000..bad93c4c49a
Binary files /dev/null and b/Scripts/Models (Under Development)/Nback/stim/ckm_2_back_h.doc differ
diff --git a/Scripts/Models (Under Development)/Nback/stim/ckm_3_back_a.doc b/Scripts/Models (Under Development)/Nback/stim/ckm_3_back_a.doc
new file mode 100644
index 00000000000..3c8d650415d
Binary files /dev/null and b/Scripts/Models (Under Development)/Nback/stim/ckm_3_back_a.doc differ
diff --git a/Scripts/Models (Under Development)/Nback/stim/ckm_3_back_b.doc b/Scripts/Models (Under Development)/Nback/stim/ckm_3_back_b.doc
new file mode 100644
index 00000000000..91015070841
Binary files /dev/null and b/Scripts/Models (Under Development)/Nback/stim/ckm_3_back_b.doc differ
diff --git a/Scripts/Models (Under Development)/Nback/stim/ckm_3_back_c.doc b/Scripts/Models (Under Development)/Nback/stim/ckm_3_back_c.doc
new file mode 100644
index 00000000000..93f2066ba5e
Binary files /dev/null and b/Scripts/Models (Under Development)/Nback/stim/ckm_3_back_c.doc differ
diff --git a/Scripts/Models (Under Development)/Nback/stim/ckm_3_back_d.doc b/Scripts/Models (Under Development)/Nback/stim/ckm_3_back_d.doc
new file mode 100644
index 00000000000..b53802bfb41
Binary files /dev/null and b/Scripts/Models (Under Development)/Nback/stim/ckm_3_back_d.doc differ
diff --git a/Scripts/Models (Under Development)/Nback/stim/ckm_3_back_e.doc b/Scripts/Models (Under Development)/Nback/stim/ckm_3_back_e.doc
new file mode 100644
index 00000000000..1488ec28e5f
Binary files /dev/null and b/Scripts/Models (Under Development)/Nback/stim/ckm_3_back_e.doc differ
diff --git a/Scripts/Models (Under Development)/Nback/stim/ckm_3_back_f.doc b/Scripts/Models (Under Development)/Nback/stim/ckm_3_back_f.doc
new file mode 100644
index 00000000000..409ef523ac5
Binary files /dev/null and b/Scripts/Models (Under Development)/Nback/stim/ckm_3_back_f.doc differ
diff --git a/Scripts/Models (Under Development)/Nback/stim/ckm_3_back_g.doc b/Scripts/Models (Under Development)/Nback/stim/ckm_3_back_g.doc
new file mode 100644
index 00000000000..da4eb94ddb8
Binary files /dev/null and b/Scripts/Models (Under Development)/Nback/stim/ckm_3_back_g.doc differ
diff --git a/Scripts/Models (Under Development)/Nback/stim/ckm_3_back_h.doc b/Scripts/Models (Under Development)/Nback/stim/ckm_3_back_h.doc
new file mode 100644
index 00000000000..e1fc8bb549d
Binary files /dev/null and b/Scripts/Models (Under Development)/Nback/stim/ckm_3_back_h.doc differ
diff --git a/Scripts/Models (Under Development)/WORKING MEMORY (fnn)_matrix_wts.pnl b/Scripts/Models (Under Development)/WORKING MEMORY (fnn)_matrix_wts.pnl
new file mode 100644
index 00000000000..f94b91cb028
Binary files /dev/null and b/Scripts/Models (Under Development)/WORKING MEMORY (fnn)_matrix_wts.pnl differ
diff --git a/Scripts/Models (Under Development)/ffn.wts.pnl b/Scripts/Models (Under Development)/ffn.wts.pnl
new file mode 100644
index 00000000000..072920a24fe
Binary files /dev/null and b/Scripts/Models (Under Development)/ffn.wts.pnl differ
diff --git a/Scripts/Models (Under Development)/ffn.wts_01.pnl b/Scripts/Models (Under Development)/ffn.wts_01.pnl
new file mode 100644
index 00000000000..20016bdf831
Binary files /dev/null and b/Scripts/Models (Under Development)/ffn.wts_01.pnl differ
diff --git a/Scripts/Models (Under Development)/ffn.wts_nep_1_lr_01.pnl b/Scripts/Models (Under Development)/ffn.wts_nep_1_lr_01.pnl
new file mode 100644
index 00000000000..f5f9c4d160b
Binary files /dev/null and b/Scripts/Models (Under Development)/ffn.wts_nep_1_lr_01.pnl differ
diff --git a/Scripts/Models (Under Development)/nback.results_nep_6250_lr_01.pnl.npy b/Scripts/Models (Under Development)/nback.results_nep_6250_lr_01.pnl.npy
new file mode 100644
index 00000000000..35604e7c555
Binary files /dev/null and b/Scripts/Models (Under Development)/nback.results_nep_6250_lr_01.pnl.npy differ
diff --git a/autodiff_composition_matrix_wts.pnl b/autodiff_composition_matrix_wts.pnl
new file mode 100644
index 00000000000..4053d03da1d
Binary files /dev/null and b/autodiff_composition_matrix_wts.pnl differ
diff --git a/conftest.py b/conftest.py
index 94a4de81cc4..85c6f2eea8c 100644
--- a/conftest.py
+++ b/conftest.py
@@ -1,8 +1,10 @@
+import contextlib
 import doctest
+import io
+import numpy as np
 import psyneulink
 import pytest
-import numpy as np
-
+import re
 
 from psyneulink import clear_registry, primary_registries
 from psyneulink.core import llvm as pnlvm
@@ -36,15 +38,6 @@ def pytest_addoption(parser):
     parser.addoption('--fp-precision', action='store', default='fp64', choices=['fp32', 'fp64'],
                      help='Set default fp precision for the runtime compiler. Default: fp64')
 
-def pytest_sessionstart(session):
-    precision = session.config.getvalue("--fp-precision")
-    if precision == 'fp64':
-        pnlvm.LLVMBuilderContext.default_float_ty = pnlvm.ir.DoubleType()
-    elif precision == 'fp32':
-        pnlvm.LLVMBuilderContext.default_float_ty = pnlvm.ir.FloatType()
-    else:
-        assert False, "Unsupported precision parameter: {}".format(precision)
-
 def pytest_runtest_setup(item):
     # Check that all 'cuda' tests are also marked 'llvm'
     assert 'llvm' in item.keywords or 'cuda' not in item.keywords
@@ -84,11 +77,36 @@ def pytest_generate_tests(metafunc):
 
     if "autodiff_mode" in metafunc.fixturenames:
         auto_modes = [
-            pnlvm.ExecutionMode.Python,
+            # pnlvm.ExecutionMode.Python,
+            pnlvm.ExecutionMode.PyTorch,
             pytest.param(pnlvm.ExecutionMode.LLVMRun, marks=pytest.mark.llvm)
         ]
         metafunc.parametrize("autodiff_mode", auto_modes)
 
+
+_old_register_prefix = None
+
+# Collection hooks
+def pytest_sessionstart(session):
+    """Initialize session with the right floating point precision and component name prefix."""
+
+    precision = session.config.getvalue("--fp-precision")
+    if precision == 'fp64':
+        pnlvm.LLVMBuilderContext.default_float_ty = pnlvm.ir.DoubleType()
+    elif precision == 'fp32':
+        pnlvm.LLVMBuilderContext.default_float_ty = pnlvm.ir.FloatType()
+    else:
+        assert False, "Unsupported precision parameter: {}".format(precision)
+
+    global _old_register_prefix
+    _old_register_prefix = psyneulink.core.globals.registry._register_auto_name_prefix
+    psyneulink.core.globals.registry._register_auto_name_prefix = "__pnl_pytest_"
+
+def pytest_collection_finish(session):
+    """Restore component prefix at the end of test collection."""
+    psyneulink.core.globals.registry._register_auto_name_prefix = _old_register_prefix
+
+# Runtest hooks
 def pytest_runtest_call(item):
     # seed = int(item.config.getoption('--pnl-seed'))
     seed = 0
@@ -112,6 +130,29 @@ def comp_mode_no_llvm():
     # dummy fixture to allow 'comp_mode' filtering
     pass
 
+class FirstBench():
+    def __init__(self, benchmark):
+        super().__setattr__("benchmark", benchmark)
+
+    def __call__(self, f, *args, **kwargs):
+        res = []
+        # Compute the first result if benchmark is enabled
+        if self.benchmark.enabled:
+            res.append(f(*args, **kwargs))
+
+        res.append(self.benchmark(f, *args, **kwargs))
+        return res[0]
+
+    def __getattr__(self, attr):
+        return getattr(self.benchmark, attr)
+
+    def __setattr__(self, attr, val):
+        return setattr(self.benchmark, attr, val)
+
+@pytest.fixture
+def benchmark(benchmark):
+    return FirstBench(benchmark)
+
 @pytest.helpers.register
 def llvm_current_fp_precision():
     float_ty = pnlvm.LLVMBuilderContext.get_current().float_ty
@@ -125,6 +166,7 @@ def llvm_current_fp_precision():
 @pytest.helpers.register
 def get_comp_execution_modes():
     return [pytest.param(pnlvm.ExecutionMode.Python),
+            # pytest.param(pnlvm.ExecutionMode.PyTorch, marks=pytest.mark.pytorch),
             pytest.param(pnlvm.ExecutionMode.LLVM, marks=pytest.mark.llvm),
             pytest.param(pnlvm.ExecutionMode.LLVMExec, marks=pytest.mark.llvm),
             pytest.param(pnlvm.ExecutionMode.LLVMRun, marks=pytest.mark.llvm),
@@ -161,6 +203,14 @@ def mech_wrapper(x):
     else:
         assert False, "Unknown mechanism mode: {}".format(mech_mode)
 
+@pytest.helpers.register
+def numpy_uses_avx512():
+    out = io.StringIO()
+    with contextlib.redirect_stdout(out):
+        np.show_config()
+
+    return re.search('  found = .*AVX512.*', out.getvalue()) is not None
+
 @pytest.helpers.register
 def expand_np_ndarray(arr):
     # this will fail on an input containing a float (not np.ndarray)
diff --git a/dev_requirements.txt b/dev_requirements.txt
index ad283dfc78d..3e2f63b1c7b 100644
--- a/dev_requirements.txt
+++ b/dev_requirements.txt
@@ -1,9 +1,10 @@
 jupyter<=1.0.0
-pytest<7.1.3
-pytest-benchmark<3.4.2
-pytest-cov<3.0.1
+packaging<24.0
+pytest<7.2.1
+pytest-benchmark<4.0.1
+pytest-cov<4.0.1
 pytest-helpers-namespace<2021.12.30
 pytest-profiling<=1.7.0
 pytest-pycodestyle<2.4.0
 pytest-pydocstyle<2.4.0
-pytest-xdist<2.6.0
+pytest-xdist<3.2.0
diff --git a/docs/source/BeukersNBackModel.rst b/docs/source/BeukersNBackModel.rst
new file mode 100644
index 00000000000..ea2a3adf26e
--- /dev/null
+++ b/docs/source/BeukersNBackModel.rst
@@ -0,0 +1,81 @@
+
+N-Back Model (Beukers et al., 2022)
+==================================================================
+`"When Working Memory is Just Working, Not Memory" <https://psyarxiv.com/jtw5p>`_
+
+Overview
+--------
+This implements a model of the `N-back task <https://en.wikipedia.org/wiki/N-back#Neurobiology_of_n-back_task>`_
+described in `Beukers et al. (2022) <https://psyarxiv.com/jtw5p>`_.  The model uses a simple implementation of episodic
+memory (EM, as a form of content-retrieval memory) to store previous stimuli along with the temporal context in which
+they occured, and a feedforward neural network (FFN)to evaluate whether the current stimulus is a match to the n'th
+preceding stimulus (nback-level)retrieved from episodic memory.  The temporal context is provided by a randomly
+drifting high dimensional vector that maintains a constant norm (i.e., drifts on a sphere).  The FFN is
+trained, given an n-back level of *n*, to identify when the current stimulus matches one stored in EM
+with a temporal context vector that differs by an amount corresponding to *n* time steps of drift.  During n-back
+performance, the model encodes the current stimulus and temporal context, retrieves an item from EM that matches the
+current stimulus, weighted by the similarity of its temporal context vector (i.e., most recent), and then uses the
+FFN to evaluate whether it is an n-back match.  The model responds "match" if the FFN detects a match; otherwise, it
+either responds "non-match" or, with a fixed probability (hazard rate), it uses the current stimulus and temporal
+context to retrieve another sample from EM and repeat the evaluation.
+
+This model is an example of proposed interactions between working memory (e.g., in neocortex) and episodic memory
+e.g., in hippocampus and/or cerebellum) in the performance of tasks demanding of sequential processing and control,
+and along the lines of models emerging machine learning that augment the use of recurrent neural networks (e.g., long
+short-term memory mechanisms; LSTMs) for active memory and control with an external memory capable of rapid storage
+and content-based retrieval, such as the Neural Turing Machine (NTN;
+`Graves et al., 2016 <https://arxiv.org/abs/1410.5401>`_), Episodic Planning Networks (EPN;
+`Ritter et al., 2020 <https://arxiv.org/abs/2006.03662>`_), and Emergent Symbols through Binding Networks (ESBN;
+`Webb et al., 2021 <https://arxiv.org/abs/2012.14601>`_).
+
+The script respectively, to construct, train and run the model:
+
+* construct_model(args):
+  takes as arguments parameters used to construct the model; for convenience, defaults are defined toward the top
+  of the script (see "Construction parameters").
+..
+* train_network(args)
+  takes as arguments the feedforward neural network Composition (FFN_COMPOSITION) and number of epochs to train.
+  Note: learning_rate is set at construction (which can be specified using LEARNING_RATE under "Training parameters").
+..
+* run_model()
+  takes as arguments the drift rate in the temporal context vector to be applied on each trial,
+  and the number of trials to execute, as well as reporting and animation specifications
+  (see "Execution parameters").
+
+The default parameters are ones that have been fit to empirical data concerning human performance
+(taken from `Kane et al., 2007 <https://psycnet.apa.org/record/2007-06096-010?doi=1>`_).
+
+
+The Model
+---------
+
+The models is composed of two `Compositions <Composition>`: an outer one that contains the full model (nback_model),
+and an `AutodiffComposition` (ffn), nested within nback_model (see red box in Figure), that implements the
+feedforward neural network (ffn).
+
+nback_model
+~~~~~~~~~~~
+
+This contains three input Mechanisms (
+
+Both of these are constructed in the construct_model function.
+The ffn Composition is trained use
+
+.. _nback_Fig:
+
+.. figure:: _static/N-Back_Model_movie.gif
+   :align: left
+   :alt: N-Back Model Animation
+
+
+Training
+--------
+
+
+Execution
+---------
+
+
+Script: :download:`N-back.py <../../Scripts/Models (Under Development)/Beukers_N-Back_2022.py>`
+.. Script: :download:`N-back.py <../../psyneulink/library/models/Beukers -Back.py>`
diff --git a/docs/source/Function.rst b/docs/source/Function.rst
index eb23c588103..2dc5b4ca902 100644
--- a/docs/source/Function.rst
+++ b/docs/source/Function.rst
@@ -12,6 +12,6 @@ Function
    :maxdepth: 3
 
 .. automodule:: psyneulink.core.components.functions.function
-   :members: Function_Base, ArgumentTherapy
+   :members: Function_Base, ArgumentTherapy, RandomMatrix
    :private-members:
    :exclude-members: Parameters
diff --git a/docs/source/Functions.rst b/docs/source/Functions.rst
index 4148855aa37..7388dfd3541 100644
--- a/docs/source/Functions.rst
+++ b/docs/source/Functions.rst
@@ -9,6 +9,6 @@ Functions
    UserDefinedFunction
 
 .. automodule:: psyneulink.core.components.functions.function
-   :members: Function_Base, ArgumentTherapy,
+   :members: Function_Base, ArgumentTherapy, RandomMatrix
    :private-members:
    :exclude-members: Parameters
\ No newline at end of file
diff --git a/docs/source/Keywords.rst b/docs/source/Keywords.rst
index 6ec329ee96c..d4e5e2488bc 100644
--- a/docs/source/Keywords.rst
+++ b/docs/source/Keywords.rst
@@ -2,6 +2,6 @@ Keywords
 ========
 
 .. automodule:: psyneulink.core.globals.keywords
-   :members: MechanismRoles, MatrixKeywords, DistanceMetrics
+   :members: MechanismRoles, MatrixKeywords, DistanceMetrics, Loss
    :private-members:
    :exclude-members: random, LinearCombination, Parameters
diff --git a/docs/source/LLVM.rst b/docs/source/LLVM.rst
new file mode 100644
index 00000000000..7c1da2a6ffd
--- /dev/null
+++ b/docs/source/LLVM.rst
@@ -0,0 +1,7 @@
+LLVM Compilation
+================
+
+.. automodule:: psyneulink.core.llvm.__init__
+   :members: ExecutionMode
+   :private-members:
+   :exclude-members: random, LLVMBBinaryFunction
diff --git a/docs/source/Models.rst b/docs/source/Models.rst
index 8e8eed5db7d..24e4e3889fc 100644
--- a/docs/source/Models.rst
+++ b/docs/source/Models.rst
@@ -17,3 +17,6 @@ illustrate principles of neural and/or psychological function.
 • `BotvinickConflictMonitoringModel`
 
 • `BustamanteStroopXORLVOCModel`
+
+• `BeukersNBackModel`
+
diff --git a/docs/source/_static/N-Back-Model_fig.svg b/docs/source/_static/N-Back-Model_fig.svg
new file mode 100644
index 00000000000..836e7b87bc9
--- /dev/null
+++ b/docs/source/_static/N-Back-Model_fig.svg
@@ -0,0 +1 @@
+<svg id="Layer_1" data-name="Layer 1" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 958 427.695"><defs><style>.cls-1{font-size:14px;font-family:Times-Roman, Times;}.cls-10,.cls-11,.cls-12,.cls-14,.cls-2,.cls-5{fill:none;stroke-miterlimit:10;}.cls-14,.cls-2{stroke:red;}.cls-3{letter-spacing:-0.054em;}.cls-4{letter-spacing:-0.036em;}.cls-5{stroke:lime;}.cls-11,.cls-14,.cls-5{stroke-width:3px;}.cls-6{font-size:12px;font-family:ArialMT, Arial;}.cls-7{letter-spacing:-0.074em;}.cls-8{letter-spacing:-0.018em;}.cls-9{letter-spacing:-0.074em;}.cls-10{stroke:#000;}.cls-11,.cls-12{stroke:blue;}.cls-13{fill:blue;}</style></defs><text class="cls-1" transform="translate(441.205 421.7)">N-Back Model</text><rect class="cls-2" x="20.5" y="0.5" width="937" height="218"/><text class="cls-1" transform="translate(406.815 211.7)">WORKING MEMO<tspan class="cls-3" x="114.714" y="0">R</tspan><tspan class="cls-4" x="123.295" y="0">Y</tspan><tspan x="132.902" y="0" xml:space="preserve"> (fnn)</tspan></text><ellipse class="cls-5" cx="96.5" cy="244.5" rx="29.37" ry="18"/><text class="cls-6" transform="translate(81.274 247.2)"><tspan class="cls-7">T</tspan><tspan x="6.442" y="0">ASK</tspan></text><ellipse class="cls-5" cx="96.5" cy="170.5" rx="67.728" ry="18"/><text class="cls-6" transform="translate(50.492 173.2)">CURREN<tspan class="cls-8" x="51.334" y="0">T </tspan><tspan class="cls-9" x="61.566" y="0">T</tspan><tspan x="68.008" y="0">ASK</tspan></text><path class="cls-10" d="M136,273.708V246.362" transform="translate(-39.5 -47.5)"/><polygon points="93 198.582 96.5 188.582 100 198.582 93 198.582"/><polygon class="cls-10" points="93 198.582 96.5 188.582 100 198.582 93 198.582"/><ellipse class="cls-5" cx="270.5" cy="316.5" rx="46.148" ry="18"/><text class="cls-6" transform="translate(241.833 319.2)">CONTEXT</text><ellipse class="cls-10" cx="461.5" cy="244.5" rx="100.858" ry="18"/><text class="cls-6" transform="translate(389.377 247.2)">EPISODIC MEMO<tspan class="cls-8" x="98.01" y="0">RY</tspan><tspan x="114.248" y="0" xml:space="preserve"> (dict)</tspan></text><path class="cls-10" d="M343.27,351.459l104.786-39.5" transform="translate(-39.5 -47.5)"/><polygon points="407.62 261.071 418.211 260.818 410.089 267.621 407.62 261.071"/><polygon class="cls-10" points="407.62 261.071 418.211 260.818 410.089 267.621 407.62 261.071"/><ellipse class="cls-5" cx="267.5" cy="170.5" rx="84.78" ry="18"/><text class="cls-6" transform="translate(207.942 173.2)">CURREN<tspan class="cls-8" x="51.334" y="0">T</tspan><tspan x="58.448" y="0" xml:space="preserve"> CONTEXT</tspan></text><path class="cls-10" d="M309.626,345.8c-.512-24.917-1.436-69.91-2.041-99.316" transform="translate(-39.5 -47.5)"/><polygon points="264.585 198.995 267.879 188.925 271.583 198.851 264.585 198.995"/><polygon class="cls-10" points="264.585 198.995 267.879 188.925 271.583 198.851 264.585 198.995"/><ellipse class="cls-5" cx="745.5" cy="316.5" rx="28.123" ry="18"/><text class="cls-6" transform="translate(731.168 319.2)">STIM</text><path class="cls-10" d="M758.791,357.355,569.147,309.277" transform="translate(-39.5 -47.5)"/><polygon points="528.555 265.111 519.721 259.26 530.275 258.325 528.555 265.111"/><polygon class="cls-10" points="528.555 265.111 519.721 259.26 530.275 258.325 528.555 265.111"/><ellipse class="cls-5" cx="862.5" cy="170.5" rx="86.976" ry="18"/><text class="cls-6" transform="translate(801.272 173.2)">CURREN<tspan class="cls-8" x="51.334" y="0">T</tspan><tspan x="58.448" y="0" xml:space="preserve"> STIMULUS</tspan></text><path class="cls-10" d="M797.968,347.818,881.133,244.04" transform="translate(-39.5 -47.5)"/><polygon points="839.028 194.193 848.012 188.579 844.49 198.571 839.028 194.193"/><polygon class="cls-10" points="839.028 194.193 848.012 188.579 844.49 198.571 839.028 194.193"/><ellipse class="cls-5" cx="461.5" cy="170.5" rx="91.135" ry="18"/><text class="cls-6" transform="translate(396.827 173.2)">RETRIEVED CONTEXT</text><path class="cls-10" d="M501,273.708V246.362" transform="translate(-39.5 -47.5)"/><polygon points="458 198.582 461.5 188.582 465 198.582 458 198.582"/><polygon class="cls-10" points="458 198.582 461.5 188.582 465 198.582 458 198.582"/><ellipse class="cls-5" cx="664.5" cy="170.5" rx="93.331" ry="18"/><text class="cls-6" transform="translate(598.157 173.2)">RETRIEVED STIMULUS</text><path class="cls-10" d="M562.883,277.787C575.619,274.34,588.846,270.371,601,266c20.8-7.482,43.132-17.612,61.663-26.637" transform="translate(-39.5 -47.5)"/><polygon points="621.688 188.688 632.205 187.401 624.786 194.965 621.688 188.688"/><polygon class="cls-10" points="621.688 188.688 632.205 187.401 624.786 194.965 621.688 188.688"/><polygon class="cls-11" points="579.802 323.956 579.802 309.044 509.331 298.5 409.669 298.5 339.198 309.044 339.198 323.956 409.669 334.5 509.331 334.5 579.802 323.956"/><text class="cls-6" transform="translate(379.159 319.2)">READ/WRITE CONTROLLER</text><path class="cls-12" d="M499.505,345.831c.214-7.7.468-16.857.706-25.414" transform="translate(-39.5 -47.5)"/><rect class="cls-13" x="496.377" y="310.412" width="8" height="8" transform="translate(132.689 758.363) rotate(-88.408)"/><rect class="cls-12" x="496.377" y="310.412" width="8" height="8" transform="translate(132.689 758.363) rotate(-88.408)"/><line class="cls-12" x1="460.766" y1="270.91" x2="460.711" y2="272.909"/><ellipse class="cls-10" cx="364.5" cy="98.5" rx="65.427" ry="18"/><text class="cls-6" transform="translate(320.602 101.201)">HIDDEN L<tspan class="cls-7" x="56.01" y="0">A</tspan><tspan x="63.125" y="0">YER</tspan></text><path class="cls-10" d="M330.481,200.571l42.354-31.438" transform="translate(-39.5 -47.5)"/><polygon points="331.454 118.67 341.57 115.52 335.626 124.291 331.454 118.67"/><polygon class="cls-10" points="331.454 118.67 341.57 115.52 335.626 124.291 331.454 118.67"/><path class="cls-10" d="M838.565,205.626C827.718,203.647,816.547,201.7,806,200c-116.012-18.655-251.581-35.933-331.923-45.7" transform="translate(-39.5 -47.5)"/><polygon points="434.083 110.262 424.577 105.584 434.926 103.313 434.083 110.262"/><polygon class="cls-10" points="434.083 110.262 424.577 105.584 434.926 103.313 434.083 110.262"/><path class="cls-10" d="M477.519,200.571l-42.354-31.438" transform="translate(-39.5 -47.5)"/><polygon points="393.374 124.291 387.43 115.52 397.546 118.67 393.374 124.291"/><polygon class="cls-10" points="393.374 124.291 387.43 115.52 397.546 118.67 393.374 124.291"/><path class="cls-10" d="M645.445,203.947l-182.084-43.7" transform="translate(-39.5 -47.5)"/><polygon points="422.872 116.109 413.965 110.372 424.506 109.302 422.872 116.109"/><polygon class="cls-10" points="422.872 116.109 413.965 110.372 424.506 109.302 422.872 116.109"/><path class="cls-10" d="M183.913,205.128,347.23,161.252" transform="translate(-39.5 -47.5)"/><polygon points="306.886 110.354 317.451 111.14 308.702 117.115 306.886 110.354"/><polygon class="cls-10" points="306.886 110.354 317.451 111.14 308.702 117.115 306.886 110.354"/><ellipse class="cls-10" cx="225.5" cy="388.5" rx="100.231" ry="18"/><text class="cls-6" transform="translate(153.825 391.2)">OBJECTIVE MECHANISM</text><path class="cls-10" d="M315.775,420.377l117.47-36.145" transform="translate(-39.5 -47.5)"/><polygon points="392.913 333.326 403.5 333.731 394.971 340.017 392.913 333.326"/><polygon class="cls-10" points="392.913 333.326 403.5 333.731 394.971 340.017 392.913 333.326"/><ellipse class="cls-14" cx="233.5" cy="26.5" rx="72.513" ry="18"/><text class="cls-6" transform="translate(183.599 29.201)">DECISION L<tspan class="cls-7" x="68.016" y="0">A</tspan><tspan x="75.131" y="0">YER</tspan></text><path class="cls-10" d="M213,84.19C143.961,99.751,40,136.684,40,218v74c0,71.794,82.274,109.416,147.32,127.941" transform="translate(-39.5 -47.5)"/><polygon points="148.968 369.126 157.688 375.145 147.118 375.877 148.968 369.126"/><polygon class="cls-10" points="148.968 369.126 157.688 375.145 147.118 375.877 148.968 369.126"/><path class="cls-10" d="M374.6,129.842,312.106,95.493" transform="translate(-39.5 -47.5)"/><polygon points="270.787 50.988 263.71 43.104 274.159 44.853 270.787 50.988"/><polygon class="cls-10" points="270.787 50.988 263.71 43.104 274.159 44.853 270.787 50.988"/></svg>
\ No newline at end of file
diff --git a/docs/source/_static/N-Back_Model_movie.gif b/docs/source/_static/N-Back_Model_movie.gif
new file mode 100644
index 00000000000..3a11c1f8eeb
Binary files /dev/null and b/docs/source/_static/N-Back_Model_movie.gif differ
diff --git a/psyneulink/__init__.py b/psyneulink/__init__.py
index 14911f370e7..09c949c7170 100644
--- a/psyneulink/__init__.py
+++ b/psyneulink/__init__.py
@@ -84,10 +84,8 @@ def filter(self, record):
 
 primary_registries = [
     CompositionRegistry,
-    ControlMechanismRegistry,
     DeferredInitRegistry,
     FunctionRegistry,
-    GatingMechanismRegistry,
     MechanismRegistry,
     PathwayRegistry,
     PortRegistry,
diff --git a/psyneulink/core/components/functions/function.py b/psyneulink/core/components/functions/function.py
index 968cd52a77c..abcb8cdb5bd 100644
--- a/psyneulink/core/components/functions/function.py
+++ b/psyneulink/core/components/functions/function.py
@@ -157,21 +157,22 @@
     ARGUMENT_THERAPY_FUNCTION, AUTO_ASSIGN_MATRIX, EXAMPLE_FUNCTION_TYPE, FULL_CONNECTIVITY_MATRIX,
     FUNCTION_COMPONENT_CATEGORY, FUNCTION_OUTPUT_TYPE, FUNCTION_OUTPUT_TYPE_CONVERSION, HOLLOW_MATRIX,
     IDENTITY_MATRIX, INVERSE_HOLLOW_MATRIX, NAME, PREFERENCE_SET_NAME, RANDOM_CONNECTIVITY_MATRIX, VALUE, VARIABLE,
-    MODEL_SPEC_ID_METADATA, MODEL_SPEC_ID_MDF_VARIABLE
+    MODEL_SPEC_ID_MDF_VARIABLE
 )
+from psyneulink.core.globals.mdf import _get_variable_parameter_name
 from psyneulink.core.globals.parameters import Parameter, check_user_specified
 from psyneulink.core.globals.preferences.basepreferenceset import REPORT_OUTPUT_PREF, is_pref_set
 from psyneulink.core.globals.preferences.preferenceset import PreferenceEntry, PreferenceLevel
 from psyneulink.core.globals.registry import register_category
 from psyneulink.core.globals.utilities import (
     convert_to_np_array, get_global_seed, is_instance_or_subclass, object_has_single_value, parameter_spec, parse_valid_identifier, safe_len,
-    SeededRandomState, contains_type, is_numeric
+    SeededRandomState, contains_type, is_numeric, random_matrix
 )
 
 __all__ = [
     'ArgumentTherapy', 'EPSILON', 'Function_Base', 'function_keywords', 'FunctionError', 'FunctionOutputType',
     'FunctionRegistry', 'get_param_value_for_function', 'get_param_value_for_keyword', 'is_Function',
-    'is_function_type', 'PERTINACITY', 'PROPENSITY'
+    'is_function_type', 'PERTINACITY', 'PROPENSITY', 'RandomMatrix'
 ]
 
 EPSILON = np.finfo(float).eps
@@ -552,6 +553,7 @@ class Function_Base(Function):
     classPreferenceLevel = PreferenceLevel.CATEGORY
 
     _model_spec_id_parameters = 'args'
+    _mdf_stateful_parameter_indices = {}
 
     _specified_variable_shape_flexibility = DefaultsFlexibility.INCREASE_DIMENSION
 
@@ -674,7 +676,18 @@ def function(self,
                  params=None,
                  target_set=None,
                  **kwargs):
-        assert True
+
+        # IMPLEMENTATION NOTE:
+        # The following is a convenience feature that supports specification of params directly in call to function
+        # by moving the to a params dict, which treats them as runtime_params
+        if kwargs:
+            for key in kwargs.copy():
+                if key in self.parameters.names():
+                    if not params:
+                        params = {key: kwargs.pop(key)}
+                    else:
+                        params.update({key: kwargs.pop(key)})
+
         # Validate variable and assign to variable, and validate params
         variable = self._check_args(variable=variable,
                                     context=context,
@@ -817,11 +830,22 @@ def _model_spec_parameter_blacklist(self):
             'multiplicative_param', 'additive_param',
         })
 
-    def _get_mdf_noise_function(self):
+    def _assign_to_mdf_model(self, model, input_id) -> str:
+        """Adds an MDF representation of this function to MDF object
+        **model**, including all necessary auxiliary functions.
+        **input_id** is the input to the singular MDF function or first
+        function representing this psyneulink Function, if applicable.
+
+        Returns:
+            str: the identifier of the final MDF function representing
+            this psyneulink Function
+        """
         import modeci_mdf.mdf as mdf
 
         extra_noise_functions = []
 
+        self_model = self.as_mdf_model()
+
         def handle_noise(noise):
             if is_instance_or_subclass(noise, Component):
                 if inspect.isclass(noise) and issubclass(noise, Component):
@@ -834,14 +858,47 @@ def handle_noise(noise):
             else:
                 return None
 
-        noise = handle_noise(self.defaults.noise)
+        try:
+            noise_val = handle_noise(self.defaults.noise)
+        except AttributeError:
+            noise_val = None
 
-        if noise is not None:
-            return mdf.Function(
-                id=f'{parse_valid_identifier(self.name)}_noise',
+        if noise_val is not None:
+            noise_func = mdf.Function(
+                id=f'{model.id}_{parse_valid_identifier(self.name)}_noise',
                 value=MODEL_SPEC_ID_MDF_VARIABLE,
-                args={MODEL_SPEC_ID_MDF_VARIABLE: noise},
-            ), extra_noise_functions
+                args={MODEL_SPEC_ID_MDF_VARIABLE: noise_val},
+            )
+            self._set_mdf_arg(self_model, 'noise', noise_func.id)
+
+            model.functions.extend(extra_noise_functions)
+            model.functions.append(noise_func)
+
+        self_model.id = f'{model.id}_{self_model.id}'
+        self._set_mdf_arg(self_model, _get_variable_parameter_name(self), input_id)
+        model.functions.append(self_model)
+
+        # assign stateful parameters
+        for param, index in self._mdf_stateful_parameter_indices.items():
+            initializer_name = getattr(self.parameters, param).initializer
+
+            # in this case, parameter gets updated to its function's final value
+            try:
+                initializer_value = self_model.args[initializer_name]
+            except KeyError:
+                initializer_value = self_model.metadata[initializer_name]
+
+            index_str = f'[{index}]' if index is not None else ''
+
+            model.parameters.append(
+                mdf.Parameter(
+                    id=param,
+                    default_initial_value=initializer_value,
+                    value=f'{self_model.id}{index_str}'
+                )
+            )
+
+        return self_model.id
 
     def as_mdf_model(self):
         import modeci_mdf.mdf as mdf
@@ -849,7 +906,6 @@ def as_mdf_model(self):
 
         parameters = self._mdf_model_parameters
         metadata = self._mdf_metadata
-        metadata[MODEL_SPEC_ID_METADATA]['function_stateful_params'] = {}
         stateful_params = set()
 
         # add stateful parameters into metadata for mechanism to get
@@ -860,17 +916,6 @@ def as_mdf_model(self):
                 continue
 
             if param.initializer is not None:
-                # in this case, parameter gets updated to its function's final value
-                try:
-                    initializer_value = parameters[self._model_spec_id_parameters][param.initializer]
-                except KeyError:
-                    initializer_value = metadata[MODEL_SPEC_ID_METADATA]['initializer']
-
-                metadata[MODEL_SPEC_ID_METADATA]['function_stateful_params'][name] = {
-                    'id': name,
-                    'default_initial_value': initializer_value,
-                    'value': parse_valid_identifier(self.name)
-                }
                 stateful_params.add(name)
 
         # stateful parameters cannot show up as args or they will not be
@@ -1167,6 +1212,48 @@ def __init__(self,
                          )
 
 
+class RandomMatrix():
+    """Function that returns matrix with random elements distributed uniformly around **center** across **range**.
+
+    The **center** and **range** arguments are passed at construction, and used for all subsequent calls.
+    Once constructed, the function must be called with two floats, **sender_size** and **receiver_size**,
+    that specify the number of rows and columns of the matrix, respectively.
+
+    Can be used to specify the `matrix <MappingProjection.matrix>` parameter of a `MappingProjection
+    <MappingProjection_Matrix_Specification>`, and to specify a default matrix for Projections in the
+    construction of a `Pathway` (see `Pathway_Specification_Projections`) or in a call to a Composition's
+    `add_linear_processing_pathway<Composition.add_linear_processing_pathway>` method.
+
+    .. technical_note::
+       A call to the class calls `random_matrix <Utilities.random_matrix>`, passing **sender_size** and
+       **receiver_size** to `random_matrix <Utilities.random_matrix>` as its **num_rows** and **num_cols**
+       arguments, respectively, and passing the `center <RandomMatrix.offset>`-0.5 and `range <RandomMatrix.scale>`
+       attributes specified at construction to `random_matrix <Utilities.random_matrix>` as its **offset**
+       and **scale** arguments, respectively.
+
+    Arguments
+    ----------
+    center : float
+        specifies the value around which the matrix elements are distributed in all calls to the function.
+    range : float
+        specifies range over which all matrix elements are distributed in all calls to the function.
+
+    Attributes
+    ----------
+    center : float
+        determines the center of the distribution of the matrix elements;
+    range : float
+        determines the range of the distribution of the matrix elements;
+    """
+
+    def __init__(self, center:float=0.0, range:float=1.0):
+        self.center=center
+        self.range=range
+
+    def __call__(self, sender_size:int, receiver_size:int):
+        return random_matrix(sender_size, receiver_size, offset=self.center - 0.5, scale=self.range)
+
+
 def get_matrix(specification, rows=1, cols=1, context=None):
     """Returns matrix conforming to specification with dimensions = rows x cols or None
 
@@ -1181,6 +1268,7 @@ def get_matrix(specification, rows=1, cols=1, context=None):
             + INVERSE_HOLLOW_MATRIX: 0's on diagonal, -1's elsewhere (must be square matrix), otherwise generates error
             + FULL_CONNECTIVITY_MATRIX: all 1's
             + RANDOM_CONNECTIVITY_MATRIX (random floats uniformly distributed between 0 and 1)
+            + RandomMatrix (random floats uniformly distributed around a specified center value with a specified range)
         + 2D list or np.ndarray of numbers
 
      Returns 2D array with length=rows in dim 0 and length=cols in dim 1, or none if specification is not recognized
@@ -1188,9 +1276,6 @@ def get_matrix(specification, rows=1, cols=1, context=None):
 
     # Matrix provided (and validated in _validate_params); convert to array
     if isinstance(specification, (list, np.matrix)):
-        # # MODIFIED 4/9/22 OLD:
-        # return convert_to_np_array(specification)
-        # MODIFIED 4/9/22 NEW:
         if is_numeric(specification):
             return convert_to_np_array(specification)
         else:
@@ -1238,7 +1323,7 @@ def get_matrix(specification, rows=1, cols=1, context=None):
         return np.random.rand(rows, cols)
 
     # Function is specified, so assume it uses random.rand() and call with sender_len and receiver_len
-    if isinstance(specification, types.FunctionType):
+    if isinstance(specification, (types.FunctionType, RandomMatrix)):
         return specification(rows, cols)
 
     # (7/12/17 CW) this is a PATCH (like the one in MappingProjection) to allow users to
diff --git a/psyneulink/core/components/functions/nonstateful/combinationfunctions.py b/psyneulink/core/components/functions/nonstateful/combinationfunctions.py
index be28b1d62eb..adeae5c2dce 100644
--- a/psyneulink/core/components/functions/nonstateful/combinationfunctions.py
+++ b/psyneulink/core/components/functions/nonstateful/combinationfunctions.py
@@ -35,14 +35,15 @@
 
 import numpy as np
 import typecheck as tc
+from typing import Union
 
 from psyneulink.core import llvm as pnlvm
 from psyneulink.core.components.functions.function import Function_Base, FunctionError, FunctionOutputType
 from psyneulink.core.globals.keywords import \
     ADDITIVE_PARAM, ARRANGEMENT, COMBINATION_FUNCTION_TYPE, COMBINE_MEANS_FUNCTION, CONCATENATE_FUNCTION, \
-    DEFAULT_VARIABLE, EXPONENTS, LINEAR_COMBINATION_FUNCTION, MULTIPLICATIVE_PARAM, OFFSET, OPERATION, \
+    CROSS_ENTROPY, DEFAULT_VARIABLE, EXPONENTS, LINEAR_COMBINATION_FUNCTION, MULTIPLICATIVE_PARAM, OFFSET, OPERATION, \
     PREDICTION_ERROR_DELTA_FUNCTION, PRODUCT, REARRANGE_FUNCTION, REDUCE_FUNCTION, SCALE, SUM, WEIGHTS, \
-    PREFERENCE_SET_NAME, VARIABLE
+    PREFERENCE_SET_NAME
 from psyneulink.core.globals.utilities import convert_to_np_array, is_numeric, np_array_less_than_2d, parameter_spec
 from psyneulink.core.globals.context import ContextFlags
 from psyneulink.core.globals.parameters import Parameter, check_user_specified
@@ -1026,9 +1027,9 @@ class LinearCombination(
         and there must be the same number of items as there are in `variable <LinearCombination.variable>`
         (see `exponents <LinearCombination.exponents>` for details of how exponents are applied).
 
-    operation : SUM or PRODUCT : default SUM
+    operation : SUM, PRODUCT or CROSS_ENTROPY : default SUM
         specifies whether the `function <LinearCombination.function>` takes the elementwise (Hadamarad)
-        sum or product of the arrays in `variable  <LinearCombination.variable>`.
+        sum, product or cross entropy of the arrays in `variable  <LinearCombination.variable>`.
 
     scale : float or np.ndarray : default None
         specifies a value by which to multiply each element of the result of `function <LinearCombination.function>`
@@ -1078,8 +1079,8 @@ class LinearCombination(
         <LinearCombination.weights>` (if any are specified).
 
     operation : SUM or PRODUCT
-        determines whether the `function <LinearCombination.function>` takes the elementwise (Hadamard) sum or
-        product of the arrays in `variable  <LinearCombination.variable>`.
+        determines whether the `function <LinearCombination.function>` takes the elementwise (Hadamard) sum,
+        product, or cross entropy of the arrays in `variable  <LinearCombination.variable>`.
 
     scale : float or np.ndarray
         value is applied multiplicatively to each element of the array after applying the
@@ -1176,7 +1177,8 @@ def __init__(self,
                  # exponents: tc.optional(parameter_spec)=None,
                  weights=None,
                  exponents=None,
-                 operation: tc.optional(tc.enum(SUM, PRODUCT)) = None,
+                 operation: tc.optional(tc.enum(SUM, PRODUCT, CROSS_ENTROPY)) = None,
+                 # operation: Union[SUM, PRODUCT, CROSS_ENTROPY] = None,
                  scale=None,
                  offset=None,
                  params=None,
@@ -1391,6 +1393,10 @@ def _function(self,
             combination = np.sum(variable, axis=0)
         elif operation == PRODUCT:
             combination = np.product(variable, axis=0)
+        elif operation == CROSS_ENTROPY:
+            v1 = variable[0]
+            v2 = variable[1]
+            combination = np.where(np.logical_and(v1 == 0, v2 == 0), 0.0, v1 * np.log(v2))
         else:
             raise FunctionError("Unrecognized operator ({0}) for LinearCombination function".
                                 format(operation.self.Operation.SUM))
@@ -1429,6 +1435,16 @@ def _gen_llvm_combine(self, builder, index, ctx, vi, vo, params):
         elif operation == PRODUCT:
             val = ctx.float_ty(1.0)
             comb_op = "fmul"
+        elif operation == CROSS_ENTROPY:
+            raise FunctionError(f"LinearCombination Function does not (yet) support CROSS_ENTROPY operation.")
+            # FIX: THIS NEEDS TO BE REPLACED TO GENERATE A VECTOR WITH HADAMARD CROSS-ENTROPY OF vi AND vo
+            # ptr1 = builder.gep(vi, [index])
+            # ptr2 = builder.gep(vo, [index])
+            # val1 = builder.load(ptr1)
+            # val2 = builder.load(ptr2)
+            # log_f = ctx.get_builtin("log", [ctx.float_ty])
+            # log = builder.call(log_f, [val2])
+            # prod = builder.fmul(val1, log)
         else:
             assert False, "Unknown operation: {}".format(operation)
 
diff --git a/psyneulink/core/components/functions/nonstateful/distributionfunctions.py b/psyneulink/core/components/functions/nonstateful/distributionfunctions.py
index b8a64bc1510..904751a9cb8 100644
--- a/psyneulink/core/components/functions/nonstateful/distributionfunctions.py
+++ b/psyneulink/core/components/functions/nonstateful/distributionfunctions.py
@@ -1378,20 +1378,32 @@ def csch(x):
             moments["mean_rt_minus"] = noise**2 / (drift_rate**2) * (2 * Z * coth(2 * Z) - (-X + Z) * coth(-X + Z))
 
             moments["var_rt_plus"] = noise**4 / (drift_rate**4) * \
-                              ((2 * Z)**2 * (csch(2 * Z))**2 + (2 * Z) * coth(2 * Z) -
-                               (Z + X)**2 * (csch(Z + X))**2 - (Z + X) * coth(Z + X))
+                              (((2 * Z)**2 * csch(2 * Z)**2 -
+                                (Z + X)**2 * csch(Z + X)**2) +
+                               ((2 * Z) * coth(2 * Z) -
+                                (Z + X) * coth(Z + X)))
 
             moments["var_rt_minus"] = noise**4 / (drift_rate**4) * \
-                              ((2 * Z)**2 * (csch(2 * Z))**2 + (2 * Z) * coth(2 * Z) -
-                               (Z - X)**2 * (csch(Z - X))**2 - (Z - X) * coth(Z - X))
+                              (((2 * Z)**2 * csch(2 * Z)**2 -
+                                (Z - X)**2 * csch(Z - X)**2) +
+                               ((2 * Z) * coth(2 * Z) -
+                                (Z - X) * coth(Z - X)))
 
             moments["skew_rt_plus"] = noise**6 / (drift_rate**6) * \
-                               (3 * (2 * Z)**2 * (csch(2 * Z))**2 + 2 * (2 * Z)**3 * coth(2 * Z) * (csch(2 * Z))**2 + 3 * (2 * Z) * coth(2 * Z) -
-                                3 * (Z + X)**2 * (csch(Z + X))**2 - 2 * (Z + X)**3 * coth(Z + X) * (csch(Z + X))**2 - 3 * (Z + X) * coth(Z + X))
+                               (3 * ((2 * Z)**2 * csch(2 * Z)**2 -
+                                     (Z + X)**2 * csch(Z + X)**2) +
+                                2 * ((2 * Z)**3 * coth(2 * Z) * csch(2 * Z)**2 -
+                                     (Z + X)**3 * coth(Z + X) * csch(Z + X)**2) +
+                                3 * ((2 * Z) * coth(2 * Z) -
+                                     (Z + X) * coth(Z + X)))
 
             moments["skew_rt_minus"] = noise**6 / (drift_rate**6) * \
-                               (3 * (2 * Z)**2 * (csch(2 * Z))**2 + 2 * (2 * Z)**3 * coth(2 * Z) * (csch(2 * Z))**2 + 3 * (2 * Z) * coth(2 * Z) -
-                                3 * (Z - X)**2 * (csch(Z - X))**2 - 2 * (Z - X)**3 * coth(Z - X) * (csch(Z - X))**2 - 3 * (Z - X) * coth(Z - X))
+                               (3 * ((2 * Z)**2 * csch(2 * Z)**2 -
+                                     (Z - X)**2 * csch(Z - X)**2) +
+                                2 * ((2 * Z)**3 * coth(2 * Z) * csch(2 * Z)**2 -
+                                     (Z - X)**3 * coth(Z - X) * csch(Z - X)**2) +
+                                3 * ((2 * Z) * coth(2 * Z) -
+                                     (Z - X) * coth(Z - X)))
 
             # divide third central moment by var_rt**1.5 to get skewness
             moments['skew_rt_plus'] /= moments['var_rt_plus']**1.5
@@ -1493,7 +1505,7 @@ def _get_arg_out_ptr(idx):
                 x0tilde = builder.fdiv(y0tilde, drift_rate_normed)
 
                 exp_f = ctx.get_builtin("exp", [bias_adj.type])
-                # Precompute the same values as Python above
+                # Pre-compute the same values as Python above
                 neg2_x0tilde_atilde = builder.fmul(x0tilde.type(-2), x0tilde)
                 neg2_x0tilde_atilde = builder.fmul(neg2_x0tilde_atilde, atilde)
                 exp_neg2_x0tilde_atilde = builder.call(exp_f, [neg2_x0tilde_atilde])
@@ -1615,9 +1627,9 @@ def _get_arg_out_ptr(idx):
         ZmX_sqr_csch_ZmX_sqr = builder.fmul(ZmX_sqr, csch_ZmX_sqr)
 
         # Variance plus
-        v_rt_p = builder.fadd(Z2_sqr_csch_Z2_sqr, Z2_coth_Z2)
-        v_rt_p = builder.fsub(v_rt_p, ZpX_sqr_csch_ZpX_sqr)
-        v_rt_p = builder.fsub(v_rt_p, ZpX_coth_ZpX)
+        v_rt_pA = builder.fsub(Z2_sqr_csch_Z2_sqr, ZpX_sqr_csch_ZpX_sqr)
+        v_rt_pB = builder.fsub(Z2_coth_Z2, ZpX_coth_ZpX)
+        v_rt_p = builder.fadd(v_rt_pA, v_rt_pB)
         v_rt_p = builder.fmul(noise_q_drift_q, v_rt_p)
         builder.store(v_rt_p, var_rt_plus_ptr)
 
@@ -1625,9 +1637,9 @@ def _get_arg_out_ptr(idx):
         v_rt_p_1_5 = builder.call(pow_f, [v_rt_p, v_rt_p.type(1.5)])
 
         # Variance minus
-        v_rt_m = builder.fadd(Z2_sqr_csch_Z2_sqr, Z2_coth_Z2)
-        v_rt_m = builder.fsub(v_rt_m, ZmX_sqr_csch_ZmX_sqr)
-        v_rt_m = builder.fsub(v_rt_m, ZmX_coth_ZmX)
+        v_rt_mA = builder.fsub(Z2_sqr_csch_Z2_sqr, ZmX_sqr_csch_ZmX_sqr)
+        v_rt_mB = builder.fsub(Z2_coth_Z2, ZmX_coth_ZmX)
+        v_rt_m = builder.fadd(v_rt_mA, v_rt_mB)
         v_rt_m = builder.fmul(noise_q_drift_q, v_rt_m)
         builder.store(v_rt_m, var_rt_minus_ptr)
 
@@ -1639,38 +1651,43 @@ def _get_arg_out_ptr(idx):
         drift_rate_6 = builder.fmul(drift_rate_q, drift_rate_sqr)
 
         srt_tmp0 = builder.fdiv(noise_6, drift_rate_6)
-        srt_tmp1a = builder.fmul(Z2_sqr_csch_Z2_sqr.type(3),
-                                  Z2_sqr_csch_Z2_sqr)
-        srt_tmp2a = builder.fmul(Z2_coth_Z2, Z2_sqr_csch_Z2_sqr)
-        srt_tmp2a = builder.fmul(srt_tmp2a.type(2), srt_tmp2a)
-        srt_tmp3a = builder.fmul(Z2_coth_Z2.type(3), Z2_coth_Z2)
-        s_rt = builder.fadd(srt_tmp1a, srt_tmp2a)
-        s_rt = builder.fadd(s_rt, srt_tmp3a)
+
+        Z2_cub_coth_Z2_csch_Z2_sqr = builder.fmul(Z2_coth_Z2, Z2_sqr_csch_Z2_sqr)
+        ZpX_cub_coth_ZpX_csch_Z2_sqr = builder.fmul(ZpX_coth_ZpX, ZpX_sqr_csch_ZpX_sqr)
+        ZmX_cub_coth_ZmX_csch_Z2_sqr = builder.fmul(ZmX_coth_ZmX, ZmX_sqr_csch_ZmX_sqr)
 
         # Skew plus
-        srtp_tmp1b = builder.fmul(ZpX_sqr_csch_ZpX_sqr.type(3),
-                                  ZpX_sqr_csch_ZpX_sqr)
-        srtp_tmp2b = builder.fmul(ZpX_coth_ZpX, ZpX_sqr_csch_ZpX_sqr)
-        srtp_tmp2b = builder.fmul(srtp_tmp2b.type(2), srtp_tmp2b)
-        srtp_tmp3b = builder.fmul(ZpX_coth_ZpX.type(3), ZpX_coth_ZpX)
-
-        s_rt_p = builder.fsub(s_rt, srtp_tmp1b)
-        s_rt_p = builder.fsub(s_rt_p, srtp_tmp2b)
-        s_rt_p = builder.fsub(s_rt_p, srtp_tmp3b)
+        s_rt_p_tmpA = builder.fsub(Z2_sqr_csch_Z2_sqr, ZpX_sqr_csch_ZpX_sqr)
+        s_rt_p_tmpA = builder.fmul(s_rt_p_tmpA, s_rt_p_tmpA.type(3))
+
+        s_rt_p_tmpB = builder.fsub(Z2_cub_coth_Z2_csch_Z2_sqr,
+                                   ZpX_cub_coth_ZpX_csch_Z2_sqr)
+        s_rt_p_tmpB = builder.fadd(s_rt_p_tmpB, s_rt_p_tmpB)
+
+        s_rt_p_tmpC = builder.fsub(Z2_coth_Z2, ZpX_coth_ZpX)
+        s_rt_p_tmpC = builder.fmul(s_rt_p_tmpC, s_rt_p_tmpC.type(3))
+
+        s_rt_p = builder.fadd(s_rt_p_tmpA, s_rt_p_tmpB)
+        s_rt_p = builder.fadd(s_rt_p, s_rt_p_tmpC)
+
         s_rt_p = builder.fmul(srt_tmp0, s_rt_p)
         s_rt_p = builder.fdiv(s_rt_p, v_rt_p_1_5)
         builder.store(s_rt_p, skew_rt_plus_ptr)
 
         # Skew minus
-        srtm_tmp1b = builder.fmul(ZmX_sqr_csch_ZmX_sqr.type(3),
-                                  ZmX_sqr_csch_ZmX_sqr)
-        srtm_tmp2b = builder.fmul(ZmX_coth_ZmX, ZmX_sqr_csch_ZmX_sqr)
-        srtm_tmp2b = builder.fmul(srtm_tmp2b.type(2), srtm_tmp2b)
-        srtm_tmp3b = builder.fmul(ZmX_coth_ZmX.type(3), ZmX_coth_ZmX)
-
-        s_rt_m = builder.fsub(s_rt, srtm_tmp1b)
-        s_rt_m = builder.fsub(s_rt_m, srtm_tmp2b)
-        s_rt_m = builder.fsub(s_rt_m, srtm_tmp3b)
+        s_rt_m_tmpA = builder.fsub(Z2_sqr_csch_Z2_sqr, ZmX_sqr_csch_ZmX_sqr)
+        s_rt_m_tmpA = builder.fmul(s_rt_m_tmpA, s_rt_m_tmpA.type(3))
+
+        s_rt_m_tmpB = builder.fsub(Z2_cub_coth_Z2_csch_Z2_sqr,
+                                   ZmX_cub_coth_ZmX_csch_Z2_sqr)
+        s_rt_m_tmpB = builder.fadd(s_rt_m_tmpB, s_rt_m_tmpB)
+
+        s_rt_m_tmpC = builder.fsub(Z2_coth_Z2, ZmX_coth_ZmX)
+        s_rt_m_tmpC = builder.fmul(s_rt_m_tmpC, s_rt_m_tmpC.type(3))
+
+        s_rt_m = builder.fadd(s_rt_m_tmpA, s_rt_m_tmpB)
+        s_rt_m = builder.fadd(s_rt_m, s_rt_m_tmpC)
+
         s_rt_m = builder.fmul(srt_tmp0, s_rt_m)
         s_rt_m = builder.fdiv(s_rt_m, v_rt_m_1_5)
         builder.store(s_rt_m, skew_rt_minus_ptr)
diff --git a/psyneulink/core/components/functions/nonstateful/learningfunctions.py b/psyneulink/core/components/functions/nonstateful/learningfunctions.py
index c4727f52628..dd1b830a7e2 100644
--- a/psyneulink/core/components/functions/nonstateful/learningfunctions.py
+++ b/psyneulink/core/components/functions/nonstateful/learningfunctions.py
@@ -37,8 +37,8 @@
 from psyneulink.core.globals.context import handle_external_context
 from psyneulink.core.globals.keywords import \
     CONTRASTIVE_HEBBIAN_FUNCTION, TDLEARNING_FUNCTION, LEARNING_FUNCTION_TYPE, LEARNING_RATE, \
-    KOHONEN_FUNCTION, GAUSSIAN, LINEAR, EXPONENTIAL, HEBBIAN_FUNCTION, RL_FUNCTION, BACKPROPAGATION_FUNCTION, MATRIX, \
-    MSE, SSE
+    KOHONEN_FUNCTION, GAUSSIAN, LINEAR, EXPONENTIAL, HEBBIAN_FUNCTION, RL_FUNCTION, BACKPROPAGATION_FUNCTION, \
+    MATRIX, Loss
 from psyneulink.core.globals.parameters import Parameter, check_user_specified
 from psyneulink.core.globals.preferences.basepreferenceset import is_pref_set
 from psyneulink.core.globals.utilities import is_numeric, scalar_distance, convert_to_np_array
@@ -1123,7 +1123,6 @@ def _function(self,
         #                          then need to assign it to the default value
         # If learning_rate was not specified for instance or composition, use default value
         learning_rate = self._get_current_parameter_value(LEARNING_RATE, context)
-        # learning_rate = self.learning_rate
         if learning_rate is None:
             learning_rate = self.defaults.learning_rate
         #
@@ -1696,6 +1695,7 @@ class BackPropagation(LearningFunction):
         default_variable=None,                           \
         activation_derivative_fct=Logistic().derivative, \
         learning_rate=None,                              \
+        loss_function=None,                              \
         params=None,                                     \
         name=None,                                       \
         prefs=None)
@@ -1703,6 +1703,10 @@ class BackPropagation(LearningFunction):
     Calculate and return a matrix of weight changes and weighted error signal from arrays of inputs, outputs and error
     terms.
 
+    This implements the standard form of the `backpropagation learning algorithm
+    <https://en.wikipedia.org/wiki/Backpropagation>`_, using a form of loss determined by the `error_signal
+    <LearningMechanism_Input_Error_Signal>` of the `LearningMechanism` to which it is assigned.
+
     `function <BackPropagation.function>` calculates a matrix of weight changes using the
     `backpropagation <https://en.wikipedia.org/wiki/Backpropagation>`_ (`Generalized Delta Rule
     <http://www.nature.com/nature/journal/v323/n6088/abs/323533a0.html>`_) learning algorithm, computed as:
@@ -1799,6 +1803,10 @@ class BackPropagation(LearningFunction):
         supersedes any specification for the `Process` and/or `System` to which the function's
         `owner <Function.owner>` belongs (see `learning_rate <BackPropagation.learning_rate>` for details).
 
+    loss_function : Loss : default None
+        specifies the operation to apply to the error signal (i.e., method of calculating the derivative of the errror
+        with respect to activation) before computing weight changes.
+
     params : Dict[param keyword: param value] : default None
         a `parameter dictionary <ParameterPort_Specification>` that specifies the parameters for the
         function.  Values specified for parameters in the dictionary override any assigned to those parameters in
@@ -1854,8 +1862,9 @@ class BackPropagation(LearningFunction):
     default_learning_rate : float
         the value used for the `learning_rate <BackPropagation.learning_rate>` if it is not otherwise specified.
 
-    loss_function : string : default 'MSE'
-        the operation to apply to the error signal before computing weight changes.
+    loss_function : Loss or None
+        the operation to apply to the error signal (i.e., method of calculating the derivative of the errror
+        with respect to activation) before computing weight changes.
 
     owner : Component
         `Mechanism <Mechanism>` to which the Function belongs.
@@ -1981,8 +1990,8 @@ def _validate_variable(self, variable, context=None):
         variable = super()._validate_variable(variable, context)
 
         if len(variable) != 3:
-            raise ComponentError("Variable for {} ({}) must have three items: {}, {}, and {})".
-                                 format(self.name, variable, ACTIVATION_INPUT, ACTIVATION_OUTPUT, ERROR_SIGNAL))
+            raise ComponentError(f"Variable for '{self.name}' ({variable}) must have three items: "
+                                 f"{ACTIVATION_INPUT}, {ACTIVATION_OUTPUT}, and {ERROR_SIGNAL}).")
 
         return variable
 
@@ -2006,13 +2015,6 @@ def _validate_params(self, request_set, target_set=None, context=None):
               or MappingProjection, its current value can be accessed at runtime (i.e., it can be used as a "pointer")
         """
 
-        # # MODIFIED 3/22/17 OLD:
-        # # This allows callers to specify None as learning_rate (e.g., _instantiate_learning_components)
-        # if request_set[LEARNING_RATE] is None:
-        #     request_set[LEARNING_RATE] = 1.0
-        # # request_set[LEARNING_RATE] = request_set[LEARNING_RATE] or 1.0
-        # # MODIFIED 3/22/17 END
-
         super()._validate_params(request_set=request_set, target_set=target_set, context=context)
 
         if LEARNING_RATE in target_set and target_set[LEARNING_RATE] is not None:
@@ -2026,26 +2028,25 @@ def _validate_params(self, request_set, target_set=None, context=None):
             from psyneulink.core.components.ports.parameterport import ParameterPort
             from psyneulink.core.components.projections.pathway.mappingprojection import MappingProjection
             if not isinstance(error_matrix, (list, np.ndarray, np.matrix, ParameterPort, MappingProjection)):
-                raise FunctionError("The {} arg for {} ({}) must be a list, 2d np.array, ParamaterState or "
-                                    "MappingProjection".format(ERROR_MATRIX, self.__class__.__name__, error_matrix))
+                raise FunctionError(f"The '{ERROR_MATRIX}' arg for {self.__class__.__name__} ({error_matrix}) "
+                                    f"must be a list, 2d np.array, ParamaterPort or MappingProjection.")
 
             if isinstance(error_matrix, MappingProjection):
                 try:
                     error_matrix = error_matrix._parameter_ports[MATRIX].value
                     param_type_string = "MappingProjection's ParameterPort"
                 except KeyError:
-                    raise FunctionError("The MappingProjection specified for the {} arg of {} ({}) must have a {} "
-                                        "paramaterState that has been assigned a 2d array or matrix".
-                                        format(ERROR_MATRIX, self.__class__.__name__, error_matrix.shape, MATRIX))
+                    raise FunctionError(f"The MappingProjection specified for the '{ERROR_MATRIX}' arg of "
+                                        f"of {self.__class__.__name__} ({error_matrix.shape}) must have a "
+                                        f"{MATRIX} ParamaterPort that has been assigned a 2d array or matrix.")
 
             elif isinstance(error_matrix, ParameterPort):
                 try:
                     error_matrix = error_matrix.value
                     param_type_string = "ParameterPort"
                 except KeyError:
-                    raise FunctionError("The value of the {} parameterPort specified for the {} arg of {} ({}) "
-                                        "must be a 2d array or matrix".
-                                        format(MATRIX, ERROR_MATRIX, self.__class__.__name__, error_matrix.shape))
+                    raise FunctionError(f"The value of the {MATRIX} ParameterPort specified for the '{ERROR_MATRIX}' "
+                                        f"arg of {self.__class__.__name__} ({error_matrix.shape}).")
 
             else:
                 param_type_string = "array or matrix"
@@ -2057,24 +2058,24 @@ def _validate_params(self, request_set, target_set=None, context=None):
             error_signal_len = len(self.defaults.variable[LEARNING_ERROR_OUTPUT])
 
             if error_matrix.ndim != 2:
-                raise FunctionError("The value of the {} specified for the {} arg of {} ({}) "
-                                    "must be a 2d array or matrix".
-                                    format(param_type_string, ERROR_MATRIX, self.name, error_matrix))
+                raise FunctionError(f"The value of the {param_type_string} specified for the '{ERROR_MATRIX}' arg "
+                                    f"of '{self.name}' ({error_matrix}) must be a 2d array or matrix.")
 
             # The length of the sender outputPort.value (the error signal) must be the
             #     same as the width (# columns) of the MappingProjection's weight matrix (# of receivers)
 
             # Validate that columns (number of receiver elements) of error_matrix equals length of error_signal
             if cols != error_signal_len:
-                raise FunctionError("The width (number of columns, {}) of the \'{}\' arg ({}) specified for {} "
-                                    "must match the length of the error signal ({}) it receives".
-                                    format(cols, MATRIX, error_matrix.shape, self.name, error_signal_len))
+                raise FunctionError(f"The width (number of columns, {cols}) of the '{MATRIX}' arg "
+                                    f"({error_matrix.shape}) specified for '{self.name}' must match "
+                                    f"the length of the error signal ({error_signal_len}) it receives.")
 
             # Validate that rows (number of sender elements) of error_matrix equals length of activity_output,
             if rows != activity_output_len:
-                raise FunctionError("The height (number of rows, {}) of \'{}\' arg specified for {} must match the "
-                                    "length of the output {} of the activity vector being monitored ({})".
-                                    format(rows, MATRIX, self.name, activity_output_len))
+                activation_input = self._get_current_parameter_value(ACTIVATION_INPUT, context)
+                raise FunctionError(f"The height (number of rows, {rows}) of '{MATRIX}' arg specified for "
+                                    f"'{self.name}' must match the length of the output {activity_output_len} "
+                                    f"of the activity vector being monitored ({activation_input}).")
 
     def _function(self,
                  variable=None,
@@ -2117,6 +2118,10 @@ def _function(self,
 
         self._check_args(variable=variable, context=context, params=params)
 
+        # IMPLEMENTATION NOTE: if error_matrix is an arg, it must in params (put there by super.function()
+        if params:
+            error_matrix = params.pop(ERROR_MATRIX, None)
+
         # Manage error_matrix param
         # During init, function is called directly from Component (i.e., not from LearningMechanism execute() method),
         #     so need "placemarker" error_matrix for validation
@@ -2130,8 +2135,8 @@ def _function(self,
                 owner_string = ""
                 if self.owner:
                     owner_string = " of " + self.owner.name
-                raise FunctionError("Call to {} function{} must include \'ERROR_MATRIX\' in params arg".
-                                    format(self.__class__.__name__, owner_string))
+                raise FunctionError(f"Call to {self.__class__.__name__} function {owner_string} "
+                                    f"must include '{ERROR_MATRIX}' in params arg.")
 
         self.parameters.error_matrix._set(error_matrix, context)
         # self._check_args(variable=variable, context=context, params=params, context=context)
@@ -2153,21 +2158,22 @@ def _function(self,
 
         # Derivative of error with respect to output activity (contribution of each output unit to the error above)
         loss_function = self.parameters.loss_function.get(context)
-        if loss_function == MSE:
+        if loss_function == Loss.MSE:
             num_output_units = self._get_current_parameter_value(ERROR_SIGNAL, context).shape[0]
             dE_dA = np.dot(error_matrix, self._get_current_parameter_value(ERROR_SIGNAL, context)) / num_output_units * 2
-        elif loss_function == SSE:
+        elif loss_function == Loss.SSE:
             dE_dA = np.dot(error_matrix, self._get_current_parameter_value(ERROR_SIGNAL, context)) * 2
         else:
+            # Use L0 (this applies to hidden layers)
             dE_dA = np.dot(error_matrix, self._get_current_parameter_value(ERROR_SIGNAL, context))
 
         # Derivative of the output activity
         activation_output = self._get_current_parameter_value(ACTIVATION_OUTPUT, context)
-        # FIX: THIS ASSUMES DERIVATIVE CAN BE COMPUTED FROM output OF FUNCTION (AS IT CAN FOR THE Logistic)
         dA_dW = self.activation_derivative_fct(input=None, output=activation_output, context=context)
 
         # Chain rule to get the derivative of the error with respect to the weights
         dE_dW = dE_dA * dA_dW
+        # dE_dW = np.matmul(dE_dA,dA_dW)
 
         # Weight changes = delta rule (learning rate * activity * error)
         weight_change_matrix = learning_rate * activation_input * dE_dW
diff --git a/psyneulink/core/components/functions/nonstateful/objectivefunctions.py b/psyneulink/core/components/functions/nonstateful/objectivefunctions.py
index 1e8ac37f370..931d5671ffb 100644
--- a/psyneulink/core/components/functions/nonstateful/objectivefunctions.py
+++ b/psyneulink/core/components/functions/nonstateful/objectivefunctions.py
@@ -29,7 +29,7 @@
 from psyneulink.core.components.component import DefaultsFlexibility
 from psyneulink.core.components.functions.function import EPSILON, FunctionError, Function_Base, get_matrix
 from psyneulink.core.globals.keywords import \
-    CORRELATION, COSINE, CROSS_ENTROPY, \
+    CORRELATION, COSINE, COSINE_SIMILARITY, CROSS_ENTROPY, \
     DEFAULT_VARIABLE, DIFFERENCE, DISTANCE_FUNCTION, DISTANCE_METRICS, DistanceMetrics, \
     ENERGY, ENTROPY, EUCLIDEAN, HOLLOW_MATRIX, MATRIX, MAX_ABS_DIFF, \
     NORMED_L0_SIMILARITY, OBJECTIVE_FUNCTION_TYPE, SIZE, STABILITY_FUNCTION
@@ -992,7 +992,7 @@ def _gen_llvm_function_body(self, ctx, builder, params, _, arg_in, arg_out, *, t
             inner = functools.partial(self.__gen_llvm_sum_product, **kwargs)
         elif self.metric == CROSS_ENTROPY:
             inner = functools.partial(self.__gen_llvm_cross_entropy, **kwargs)
-        elif self.metric == COSINE:
+        elif self.metric in {COSINE, COSINE_SIMILARITY}:
             del kwargs['acc']
             numer_acc = builder.alloca(ctx.float_ty)
             denom1_acc = builder.alloca(ctx.float_ty)
@@ -1044,7 +1044,7 @@ def _gen_llvm_function_body(self, ctx, builder, params, _, arg_in, arg_out, *, t
             ret = builder.call(sqrt, [ret])
         elif self.metric == MAX_ABS_DIFF:
             ret = builder.load(max_diff_ptr)
-        elif self.metric == COSINE:
+        elif self.metric in {COSINE, COSINE_SIMILARITY}:
             numer = builder.load(numer_acc)
             denom1 = builder.load(denom1_acc)
             denom1 = builder.call(sqrt, [denom1])
@@ -1111,7 +1111,7 @@ def _gen_llvm_function_body(self, ctx, builder, params, _, arg_in, arg_out, *, t
             ret = builder.fsub(ctx.float_ty(1), ret)
 
         # MAX_ABS_DIFF, CORRELATION, and COSINE ignore normalization
-        ignores = frozenset((MAX_ABS_DIFF, CORRELATION, COSINE))
+        ignores = frozenset((MAX_ABS_DIFF, CORRELATION, COSINE, COSINE_SIMILARITY))
         if self.normalize and self.metric not in ignores:
             norm_factor = input_length
             if self.metric == ENERGY:
@@ -1170,7 +1170,7 @@ def _function(self,
             result = np.linalg.norm(v2 - v1)
 
         # Cosine similarity of v1 and v2
-        elif self.metric == COSINE:
+        elif self.metric in {COSINE, COSINE_SIMILARITY}:
             # result = np.correlate(v1, v2)
             result = 1.0 - np.fabs(Distance.cosine(v1, v2))
             return self.convert_output_type(result)
diff --git a/psyneulink/core/components/functions/nonstateful/optimizationfunctions.py b/psyneulink/core/components/functions/nonstateful/optimizationfunctions.py
index 6f4ce9f8588..d64947cc55f 100644
--- a/psyneulink/core/components/functions/nonstateful/optimizationfunctions.py
+++ b/psyneulink/core/components/functions/nonstateful/optimizationfunctions.py
@@ -1652,7 +1652,7 @@ def _gen_llvm_select_min_function(self, *, ctx:pnlvm.LLVMBuilderContext, tags:fr
         if ocm is not None:
             assert ocm.function is self
             sample_t = ocm._get_evaluate_alloc_struct_type(ctx)
-            value_t = ocm._get_evaluate_output_struct_type(ctx)
+            value_t = ocm._get_evaluate_output_struct_type(ctx, tags=tags)
         else:
             obj_func = ctx.import_llvm_function(self.objective_function)
             sample_t = obj_func.args[2].type.pointee
@@ -1751,7 +1751,7 @@ def _gen_llvm_function_body(self, ctx, builder, params, state_features, arg_in,
         controller = self._get_optimized_controller()
         if controller is not None:
             assert controller.function is self
-            obj_func = ctx.import_llvm_function(controller, tags=tags.union({"evaluate"}))
+            obj_func = ctx.import_llvm_function(controller, tags=tags.union({"evaluate", "evaluate_type_objective"}))
             comp_args = builder.function.args[-3:]
             obj_param_ptr = comp_args[0]
             obj_state_ptr = comp_args[1]
@@ -1845,7 +1845,8 @@ def _gen_llvm_function_body(self, ctx, builder, params, state_features, arg_in,
 
             # Check if smaller than current best.
             # the argument pointers are already offset, so use range <0,1)
-            select_min_f = ctx.import_llvm_function(self, tags=tags.union({"select_min"}))
+            min_tags = tags.union({"select_min", "evaluate_type_objective"})
+            select_min_f = ctx.import_llvm_function(self, tags=min_tags)
             b.call(select_min_f, [params, state_features, min_sample_ptr, sample_ptr,
                                   min_value_ptr, value_ptr, opt_count_ptr,
                                   ctx.int32_ty(0), ctx.int32_ty(1)])
@@ -1997,7 +1998,8 @@ def _function(self,
                 # Reduce array of values to min/max
                 # select_min params are:
                 # params, state, min_sample_ptr, sample_ptr, min_value_ptr, value_ptr, opt_count_ptr, count
-                bin_func = pnlvm.LLVMBinaryFunction.from_obj(self, tags=frozenset({"select_min"}))
+                min_tags = frozenset({"select_min", "evaluate_type_objective"})
+                bin_func = pnlvm.LLVMBinaryFunction.from_obj(self, tags=min_tags)
                 ct_param = bin_func.byref_arg_types[0](*self._get_param_initializer(context))
                 ct_state = bin_func.byref_arg_types[1](*self._get_state_initializer(context))
                 ct_opt_sample = bin_func.byref_arg_types[2](float("NaN"))
diff --git a/psyneulink/core/components/functions/nonstateful/transferfunctions.py b/psyneulink/core/components/functions/nonstateful/transferfunctions.py
index eef16ca3f36..fdc38698db1 100644
--- a/psyneulink/core/components/functions/nonstateful/transferfunctions.py
+++ b/psyneulink/core/components/functions/nonstateful/transferfunctions.py
@@ -28,6 +28,11 @@
 
 Functions that transform their variable but maintain its shape.
 
+.. _TransferFunction_StandardAttributes:
+
+Standard Attributes
+~~~~~~~~~~~~~~~~~~~
+
 All TransferFunctions have the following attributes:
 
 * **bounds**:  specifies the lower and upper limits of the result;  if there are none, the attribute is set to
@@ -39,6 +44,21 @@
   parameters and used by `ModulatoryProjections <ModulatoryProjection>` to modulate the output of the
   TransferFunction's function (see `Function_Modulatory_Params`).
 
+.. _TransferFunction_Derivative:
+
+Derivatives
+~~~~~~~~~~~
+
+Most TransferFunctions have a derivative method.  These take both an **input** and **output** argument.  In general,
+the **input** is used to compute the derivative of the function at that value. If that is not provided, some
+Functions can compute the derivative using the function's output, either directly (such as `Logistic.derivative`) or by
+inferring the input from the **output** and then computing the derivative for that value (such as `ReLU.derivative`)
+
+
+TranferFunction Class References
+--------------------------------
+
+
 """
 
 import numbers
@@ -52,11 +72,11 @@
 
 from psyneulink.core import llvm as pnlvm
 from psyneulink.core.components.component import parameter_keywords
-from psyneulink.core.components.functions.nonstateful.combinationfunctions import LinearCombination
 from psyneulink.core.components.functions.function import (
     DEFAULT_SEED, Function, Function_Base, FunctionError, _random_state_getter, _seed_setter, function_keywords,
     get_matrix, is_function_type,
 )
+from psyneulink.core.components.functions.nonstateful.combinationfunctions import LinearCombination
 from psyneulink.core.components.functions.nonstateful.selectionfunctions import OneHot
 from psyneulink.core.components.functions.stateful.integratorfunctions import SimpleIntegrator
 from psyneulink.core.components.shellclasses import Projection
@@ -961,16 +981,17 @@ def _gen_llvm_transfer(self, builder, index, ctx, vi, vo, params, state, *, tags
         exp_f = ctx.get_builtin("exp", [ctx.float_ty])
         val = builder.load(ptri)
 
-        val = builder.fadd(val, bias)
-        val = builder.fsub(val, x_0)
-        val = builder.fmul(val, gain)
-        val = builder.fsub(offset, val)
-        val = builder.call(exp_f, [val])
-        val = builder.fadd(ctx.float_ty(1), val)
-        val = builder.fdiv(ctx.float_ty(1), val)
-        val = builder.fmul(val, scale)
+        if "derivative_out" not in tags:
+            val = builder.fadd(val, bias)
+            val = builder.fsub(val, x_0)
+            val = builder.fmul(val, gain)
+            val = builder.fsub(offset, val)
+            val = builder.call(exp_f, [val])
+            val = builder.fadd(ctx.float_ty(1), val)
+            val = builder.fdiv(ctx.float_ty(1), val)
+            val = builder.fmul(val, scale)
 
-        if "derivative" in tags:
+        if "derivative" in tags or "derivative_out" in tags:
             # f(x) = g * s * o * (1-o)
             function_val = val
             val = builder.fsub(ctx.float_ty(1), function_val)
@@ -1023,9 +1044,18 @@ def derivative(self, input=None, output=None, context=None):
 
         Derivative of `function <Exponential._function>` at either **input** or **output**.
 
-        Either **input** or **ouput** must be specified.  If **output** is not specified, it is computed from **input**.
+        COMMENT:  RESTORE WHEN TEST IN DERIVATIVE IS RESTORED
+        Either **input** or **output** must be specified.
+        If **output** is not specified, it is computed from  **input**.
         If both are specified, **input** is ignored unless paramValidationPref is set, in which case
         an error is generated if **output** does not correspond to `function <Logistic._function>`\\(**input**).
+        COMMENT
+        Either **input** or **output** must be specified.
+        If **output** is not specified, derivative is computed from **input**.
+        If both are specified, **input** is ignored and derivative is computed from **output**
+        .. technical_note::
+           allowing both to be specified is supported for consistency with `BackPropagation` `LearningFunction`
+           which uses output to compute Logistic
 
         Arguments
         ---------
@@ -1042,20 +1072,11 @@ def derivative(self, input=None, output=None, context=None):
         Deriviative of logistic transform at output:  number or array
 
         """
-        if output is not None and input is not None and self.prefs.paramValidationPref:
-            if isinstance(input, numbers.Number):
-                valid = output == self.function(input, context=context)
-            else:
-                valid = all(output[i] == self.function(input, context=context)[i] for i in range(len(input)))
-            if not valid:
-                raise FunctionError("Value of {} arg passed to {} ({}) "
-                                    "does not match the value expected for specified {} ({})".
-                                    format(repr('output'), self.__class__.__name__ + '.' + 'derivative', output,
-                                           repr('input'), input))
 
         gain = self._get_current_parameter_value(GAIN, context)
         scale = self._get_current_parameter_value(SCALE, context)
 
+        # Favor use of output: compute it from input if it is not provided
         if output is None:
             output = self.function(input, context=context)
 
@@ -1426,7 +1447,7 @@ class ReLU(TransferFunction):  # -----------------------------------------------
         specifies a value by which to multiply `variable <ReLU.variable>` after `bias <ReLU.bias>` is subtracted
         from it.
     bias : float : default 0.0
-        specifies a value to subtract from each element of `variable <ReLU.variable>`.
+        specifies a value to subtract from each element of `variable <ReLU.variable>`; functions as threshold.
     leak : float : default 0.0
         specifies a scaling factor between 0 and 1 when (variable - bias) is less than or equal to 0.
     params : Dict[param keyword: param value] : default None
@@ -1451,7 +1472,7 @@ class ReLU(TransferFunction):  # -----------------------------------------------
         from it.
 
     bias : float : default 0.0
-        value to subtract from each element of `variable <ReLU.variable>`.
+        value to subtract from each element of `variable <ReLU.variable>`; functions as threshold.
 
     leak : float : default 0.0
         scaling factor between 0 and 1 when (variable - bias) is less than or equal to 0.
@@ -1569,12 +1590,15 @@ def _gen_llvm_transfer(self, builder, index, ctx, vi, vo, params, state, *, tags
         # Maxnum for some reason needs full function prototype
         max_f = ctx.get_builtin("maxnum", [ctx.float_ty])
         var = builder.load(ptri)
+        if "derivative_out" in tags:
+            val = builder.fdiv(var, gain)
+        else:
+            val = builder.fsub(var, bias)
 
-        if "derivative" in tags:
-            predicate = builder.fcmp_ordered('>', var, var.type(0))
+        if "derivative" in tags or "derivative_out" in tags:
+            predicate = builder.fcmp_ordered('>', val, val.type(0))
             val = builder.select(predicate, gain, builder.fmul(gain, leak))
         else:
-            val = builder.fsub(var, bias)
             val1 = builder.fmul(val, gain)
             val2 = builder.fmul(val1, leak)
 
@@ -1583,11 +1607,13 @@ def _gen_llvm_transfer(self, builder, index, ctx, vi, vo, params, state, *, tags
         builder.store(val, ptro)
 
     @handle_external_context()
-    def derivative(self, input, output=None, context=None):
+    def derivative(self, input=None, output=None, context=None):
         """
-        derivative(input)
+        derivative(input or else output)
 
-        Derivative of `function <ReLU._function>` at **input**.
+        Derivative of `function <ReLU._function>` at **input** or **output**.  If **input** is specified, that
+        is used to compute the derivative;  if **input** is not specified, it is inferred from the **output**
+        and then used to compute the derivative.
 
         Arguments
         ---------
@@ -1599,17 +1625,21 @@ def derivative(self, input, output=None, context=None):
         -------
 
         derivative :  number or array
-
         """
+
         gain = self._get_current_parameter_value(GAIN, context)
         leak = self._get_current_parameter_value(LEAK, context)
+        bias = self._get_current_parameter_value(BIAS, context)
 
-        input = np.asarray(input).copy()
-        input[input>0] = gain
-        input[input<=0] = gain * leak
-
-        return input
+        if input is not None:
+            # Use input if provided
+            variable = np.array(input) - bias
+        else:
+            # Infer input from output
+            variable = np.array(output) / gain
 
+        value = np.where(variable > 0, gain, gain * leak)
+        return value
 
 # **********************************************************************************************************************
 #                                                    Angle
@@ -1726,27 +1756,6 @@ def __init__(self,
             prefs=prefs,
         )
 
-    # def _gen_llvm_transfer(self, builder, index, ctx, vi, vo, params, state, *, tags:frozenset):
-    #     ptri = builder.gep(vi, [ctx.int32_ty(0), index])
-    #     ptro = builder.gep(vo, [ctx.int32_ty(0), index])
-    #     slope_ptr = pnlvm.helpers.get_param_ptr(builder, self, params, SLOPE)
-    #     intercept_ptr = pnlvm.helpers.get_param_ptr(builder, self, params, INTERCEPT)
-    #
-    #     slope = pnlvm.helpers.load_extract_scalar_array_one(builder, slope_ptr)
-    #     intercept = pnlvm.helpers.load_extract_scalar_array_one(builder, intercept_ptr)
-    #
-    #
-    #     if "derivative" in tags:
-    #         # f'(x) = m
-    #         val = slope
-    #     else:
-    #         # f(x) = mx + b
-    #         val = builder.load(ptri)
-    #         val = builder.fmul(val, slope)
-    #         val = builder.fadd(val, intercept)
-    #
-    #     builder.store(val, ptro)
-
     def _function(self,
                  variable=None,
                  context=None,
@@ -1800,16 +1809,62 @@ def _angle(self, value):
         value = np.squeeze(value)
         dim = len(value) + 1
         angle = np.zeros(dim)
-        angle[0] = np.cos(value[0])
-        prod = np.product([np.sin(value[k]) for k in range(1, dim - 1)])
-        n_prod = prod
-        for j in range(dim - 2):
-            n_prod /= np.sin(value[j + 1])
-            amt = n_prod * np.cos(value[j + 1])
-            angle[j + 1] = amt
-        angle[dim - 1] = prod
+        sin_value = np.sin(value)
+        cos_value = np.cos(value)
+        angle[0] = cos_value[0]
+        prod_a = np.cumprod(np.flip(sin_value))[:-1]
+        angle[dim - 1] = prod_a[-1]
+        prod_a[-1] = 1.
+
+        # going down from the top of cumprod we skip: 2 edge values +1 extra for output size
+        for j in range(1, dim - 1):
+            angle[j] = prod_a[dim -3 -j] * cos_value[j]
         return angle
 
+    def _gen_llvm_function_body(self, ctx, builder, params, state, arg_in, arg_out, *, tags:frozenset):
+        assert isinstance(arg_in.type.pointee, pnlvm.ir.ArrayType)
+        assert isinstance(arg_out.type.pointee, pnlvm.ir.ArrayType)
+        assert len(arg_in.type.pointee) + 1 == len(arg_out.type.pointee)
+
+        # The first cos
+        res0_ptr = builder.gep(arg_out, [ctx.int32_ty(0), ctx.int32_ty(0)])
+        val0_ptr = builder.gep(arg_in, [ctx.int32_ty(0), ctx.int32_ty(0)])
+        val0 = builder.load(val0_ptr)
+        cos_f = ctx.get_builtin("cos", [val0.type])
+        cos_val0 = builder.call(cos_f, [val0])
+        builder.store(cos_val0, res0_ptr)
+
+        # calculate suffix product
+        sin_f = ctx.get_builtin("sin", [val0.type])
+        prod_ptr = builder.alloca(val0.type)
+        builder.store(prod_ptr.type.pointee(1.0), prod_ptr)
+
+        dim_m1 = ctx.int32_ty(len(arg_out.type.pointee) - 1)
+        with pnlvm.helpers.for_loop(builder, dim_m1.type(1), dim_m1, dim_m1.type(1), id="suff_prod") as (b, idx):
+            #revert the index to go from the end
+            idx = b.sub(dim_m1, idx)
+
+            prod = b.load(prod_ptr)
+            val_ptr = b.gep(arg_in, [ctx.int32_ty(0), idx])
+            val = b.load(val_ptr)
+
+            # calculate suffix product of sin(input)
+            val_sin = b.call(sin_f, [val])
+            new_prod = b.fmul(prod, val_sin)
+            b.store(new_prod, prod_ptr)
+
+            # output value is suffix product * cos(val)
+            val_cos = b.call(cos_f, [val])
+            res = b.fmul(prod, val_cos)
+            res_ptr = b.gep(arg_out, [ctx.int32_ty(0), idx])
+            b.store(res, res_ptr)
+
+        # The last element is just the suffix product * 1
+        last_ptr = builder.gep(arg_out, [ctx.int32_ty(0), dim_m1])
+        builder.store(builder.load(prod_ptr), last_ptr)
+
+        return builder
+
     # @handle_external_context()
     # def derivative(self, input=None, output=None, context=None):
     #     """
@@ -2462,7 +2517,7 @@ class SoftMax(TransferFunction):
               0 for all others.
 
     per_item : boolean : default True
-        for 2d variables, determines whether the SoftMax function will be applied to the entire variable (per_item =
+        for 2d variables, determines whether the SoftMax function is applied to the entire variable (per_item =
         False), or applied to each item in the variable separately (per_item = True).
 
     bounds : None if `output <SoftMax.output>` == MAX_VAL, else (0,1) : default (0,1)
@@ -2611,7 +2666,7 @@ def __gen_llvm_exp_div(self, builder, index, ctx, vi, vo, gain, exp_sum):
 
         builder.store(val, ptro)
 
-    def __gen_llvm_apply(self, ctx, builder, params, state, arg_in, arg_out, tags:frozenset):
+    def __gen_llvm_apply(self, ctx, builder, params, state, arg_in, arg_out, output_type, tags:frozenset):
         exp_sum_ptr = builder.alloca(ctx.float_ty)
         builder.store(exp_sum_ptr.type.pointee(0), exp_sum_ptr)
 
@@ -2624,7 +2679,7 @@ def __gen_llvm_apply(self, ctx, builder, params, state, arg_in, arg_out, tags:fr
 
         exp_sum = builder.load(exp_sum_ptr)
 
-        if self.output == ALL:
+        if output_type == ALL:
             with pnlvm.helpers.array_ptr_loop(builder, arg_in, "exp_div") as args:
                 self.__gen_llvm_exp_div(ctx=ctx, vi=arg_in, vo=arg_out,
                                         gain=gain, exp_sum=exp_sum, *args)
@@ -2638,14 +2693,14 @@ def __gen_llvm_apply(self, ctx, builder, params, state, arg_in, arg_out, tags:fr
         one_hot_out = arg_out
         one_hot_in = builder.alloca(one_hot_f.args[2].type.pointee)
 
-        if self.output in {MAX_VAL, MAX_INDICATOR}:
+        if output_type in {MAX_VAL, MAX_INDICATOR}:
             with pnlvm.helpers.array_ptr_loop(builder, arg_in, "exp_div") as (b, i):
                 self.__gen_llvm_exp_div(ctx=ctx, vi=arg_in, vo=one_hot_in,
                                         gain=gain, exp_sum=exp_sum, builder=b, index=i)
 
             builder.call(one_hot_f, [one_hot_p, one_hot_s, one_hot_in, one_hot_out])
 
-        elif self.output == PROB:
+        elif output_type in PROB:
             one_hot_in_data = builder.gep(one_hot_in, [ctx.int32_ty(0), ctx.int32_ty(0)])
             one_hot_in_dist = builder.gep(one_hot_in, [ctx.int32_ty(0), ctx.int32_ty(1)])
 
@@ -2660,21 +2715,78 @@ def __gen_llvm_apply(self, ctx, builder, params, state, arg_in, arg_out, tags:fr
 
             builder.call(one_hot_f, [one_hot_p, one_hot_s, one_hot_in, one_hot_out])
         else:
-            assert False, "Unsupported output in {}: {}".format(self, self.output)
+            assert False, "Unsupported output in {}: {}".format(self, out_type)
 
         return builder
 
-    def _gen_llvm_function_body(self, ctx, builder, params, state, arg_in, arg_out, *, tags:frozenset):
+    def _gen_llvm_function_derivative_body(self, ctx, builder, params, state, arg_in, arg_out, *, tags:frozenset):
+        assert "derivative" in tags or "derivative_out" in tags
+        assert arg_in.type == arg_out.type
+        forward_tags = tags.difference({"derivative", "derivative_out"})
+
+        # SoftMax derivative is calculated from the "ALL" results.
+        # Those can provided from outside, but we don't support receiving data in arg_out
+        if "derivative_out" in tags:
+            all_out = arg_in
+        else:
+            all_out = builder.alloca(arg_out.type.pointee)
+            builder = self._gen_llvm_function_body(ctx, builder, params, state, arg_in, all_out, output_type=ALL, tags=forward_tags)
+
+        # The rest of the algorithm is for MAX_VAL and MAX_INDICATOR only
+        assert self.output in {MAX_VAL, MAX_INDICATOR}, \
+            "Derivative of SoftMax is only implemented for MAX_VAL and MAX_INDICATOR! ({})".format(self.output)
+
+        if not pnlvm.helpers.is_scalar(arg_out.type.pointee.element):
+            assert len(arg_out.type.pointee) == 1
+            arg_out = builder.gep(arg_out, [ctx.int32_ty(0), ctx.int32_ty(0)])
+            all_out = builder.gep(all_out, [ctx.int32_ty(0), ctx.int32_ty(0)])
+
+        max_pos_ptr = builder.alloca(ctx.int32_ty)
+        builder.store(max_pos_ptr.type.pointee(-1), max_pos_ptr)
+        max_val_ptr = builder.alloca(arg_out.type.pointee.element)
+        builder.store(max_val_ptr.type.pointee(float("NaN")), max_val_ptr)
+
+        with pnlvm.helpers.array_ptr_loop(builder, all_out, id="max") as (b, idx):
+            val_ptr = b.gep(all_out, [ctx.int32_ty(0), idx])
+            val = b.load(val_ptr)
+            max_val = b.load(max_val_ptr)
+            new_max = b.fcmp_unordered(">", val, max_val)
+            with b.if_then(new_max):
+                b.store(val, max_val_ptr)
+                b.store(idx, max_pos_ptr)
+
+        max_val = builder.load(max_val_ptr)
+        max_pos = builder.load(max_pos_ptr)
+
+        with pnlvm.helpers.array_ptr_loop(builder, all_out, id="derivative") as (b, idx):
+            val_ptr = b.gep(all_out, [ctx.int32_ty(0), idx])
+            val = b.load(val_ptr)
+            is_max_pos = b.icmp_unsigned("==", idx, max_pos)
+
+            d = b.select(is_max_pos, val.type(1), val.type(0))
+            dv = b.fsub(d, max_val)
+            val = b.fmul(val, dv)
+
+            out_ptr = b.gep(arg_out, [ctx.int32_ty(0), idx])
+            b.store(val, out_ptr)
+
+        return builder
+
+    def _gen_llvm_function_body(self, ctx, builder, params, state, arg_in, arg_out, output_type=None, *, tags:frozenset):
+        output_type = self.output if output_type is None else output_type
+        if "derivative" in tags or "derivative_out" in tags:
+            return self._gen_llvm_function_derivative_body(ctx, builder, params, state, arg_in, arg_out, tags=tags)
+
         if self.parameters.per_item.get():
             assert isinstance(arg_in.type.pointee.element, pnlvm.ir.ArrayType)
             assert isinstance(arg_out.type.pointee.element, pnlvm.ir.ArrayType)
             for i in range(arg_in.type.pointee.count):
                 inner_in = builder.gep(arg_in, [ctx.int32_ty(0), ctx.int32_ty(i)])
                 inner_out = builder.gep(arg_out, [ctx.int32_ty(0), ctx.int32_ty(i)])
-                builder = self.__gen_llvm_apply(ctx, builder, params, state, inner_in, inner_out, tags=tags)
+                builder = self.__gen_llvm_apply(ctx, builder, params, state, inner_in, inner_out, output_type, tags=tags)
             return builder
         else:
-            return self.__gen_llvm_apply(ctx, builder, params, state, arg_in, arg_out, tags=tags)
+            return self.__gen_llvm_apply(ctx, builder, params, state, arg_in, arg_out, output_type, tags=tags)
 
     def apply_softmax(self, input_value, gain, output_type):
         # Modulate input_value by gain
@@ -2735,36 +2847,50 @@ def _function(self,
         return self.convert_output_type(output)
 
     @handle_external_context()
-    def derivative(self, output, input=None, context=None):
+    def derivative(self, input=None, output=None, context=None):
         """
         derivative(output)
 
+        .. technical note::
+           If MAX_VAL is specified for the `output <SoftMax.output>` parameter, and there is a tie for the maximum
+           value, the element with the lower index is used to compute the derivative (see IMPLEMENTATION NOTE below).
+
         Returns
         -------
-
         derivative of values returned by SoftMax :  1d or 2d array (depending on *OUTPUT_TYPE* of SoftMax)
         """
 
-        output_type = self.output_type
-        size = len(output)
-        sm = self.function(output, params={OUTPUT_TYPE: ALL}, context=context)
+        if output is None:
+            output = self.function(input, params={OUTPUT_TYPE: ALL}, context=context)
+        else:
+            assert not np.any(np.equal(0, output))
+
+        sm = np.squeeze(output)
+        size = len(sm)
+        assert (len(output) == 1 and len(output[0]) == size) or len(output) == size
 
+        output_type = self._get_current_parameter_value(OUTPUT_TYPE, context)
         if output_type == ALL:
-            # Return full Jacobian matrix of derivatives
+            # Return full Jacobian matrix of derivatives using Kronecker's delta method:
             derivative = np.empty([size, size])
-            for j in range(size):
-                for i, val in zip(range(size), output):
-                    if i == j:
-                        d = 1
-                    else:
-                        d = 0
-                    derivative[j, i] = sm[i] * (d - sm[j])
-
+            for i, j in np.ndindex(size, size):
+                if i == j:
+                    d = 1
+                else:
+                    d = 0
+                derivative[j, i] = sm[i] * (d - sm[j])
         elif output_type in {MAX_VAL, MAX_INDICATOR}:
             # Return 1d array of derivatives for max element (i.e., the one chosen by SoftMax)
             derivative = np.empty(size)
-            # Get the element of output returned as non-zero when output_type is not ALL
-            index_of_max = int(np.where(output == np.max(output))[0])
+            # Get the element of output returned as non-zero (max val) when output_type is not ALL
+            # IMPLEMENTATION NOTES:
+            #    if there is a tie for max, this chooses the item in sm with the lowest index in sm:
+            index_of_max = int(np.where(sm == np.max(sm))[-1][0])
+            #    the following would randomly choose a value in case of a tie,
+            #    but may cause problems with compilation:
+            # index_of_max = np.where(sm == np.max(sm))[0]
+            # if len(index_of_max)>1:
+            #     index_of_max = int(np.random.choice(index_of_max))
             max_val = sm[index_of_max]
             for i in range(size):
                 if i == index_of_max:
@@ -2772,7 +2898,6 @@ def derivative(self, output, input=None, context=None):
                 else:
                     d = 0
                 derivative[i] = sm[i] * (d - max_val)
-
         else:
             raise FunctionError("Can't assign derivative for SoftMax function{} since OUTPUT_TYPE is PROB "
                                 "(and therefore the relevant element is ambiguous)".format(self.owner_name))
diff --git a/psyneulink/core/components/functions/stateful/integratorfunctions.py b/psyneulink/core/components/functions/stateful/integratorfunctions.py
index 7764829ffdb..83613c87f40 100644
--- a/psyneulink/core/components/functions/stateful/integratorfunctions.py
+++ b/psyneulink/core/components/functions/stateful/integratorfunctions.py
@@ -33,7 +33,7 @@
 
 from psyneulink.core import llvm as pnlvm
 from psyneulink.core.components.component import DefaultsFlexibility
-from psyneulink.core.components.functions.nonstateful.distributionfunctions import DistributionFunction
+from psyneulink.core.components.functions.nonstateful.distributionfunctions import DistributionFunction, NormalDist
 from psyneulink.core.components.functions.function import (
     DEFAULT_SEED, FunctionError, _random_state_getter,
     _seed_setter, _noise_setter
@@ -51,7 +51,7 @@
 from psyneulink.core.globals.parameters import Parameter, check_user_specified
 from psyneulink.core.globals.preferences.basepreferenceset import is_pref_set
 from psyneulink.core.globals.utilities import parameter_spec, all_within_range, \
-    convert_all_elements_to_np_array
+    convert_all_elements_to_np_array, parse_valid_identifier
 
 __all__ = ['SimpleIntegrator', 'AdaptiveIntegrator', 'DriftDiffusionIntegrator', 'DriftOnASphereIntegrator',
            'OrnsteinUhlenbeckIntegrator', 'FitzHughNagumoIntegrator', 'AccumulatorIntegrator',
@@ -429,7 +429,7 @@ class AccumulatorIntegrator(IntegratorFunction):  # ----------------------------
     so that, with each call to `function <AccumulatorIntegrator._function>`, the accumulated value increases by:
 
     .. math::
-        increment \\cdot rate^{time\\ step}.
+        increment \\cdot rate^{time\\_step}.
 
     Thus, accumulation increases lineary in steps of `increment <AccumulatorIntegrator.increment>`
     if `rate <AccumulatorIntegrator.rate>`\\=1.0, and exponentially otherwise.
@@ -2216,7 +2216,7 @@ class DriftDiffusionIntegrator(IntegratorFunction):  # -------------------------
 
     offset : float, list or 1d array : default 0.0
         specifies constant value added to integral in each call to `function <DriftDiffusionIntegrator._function>`
-        if it's absolute value is below `threshold <DriftDiffusionIntegrator.threshold>`\;
+        if it's absolute value is below `threshold <DriftDiffusionIntegrator.threshold>`;
         if it is a list or array, it must be the same length as `variable <DriftDiffusionIntegrator.variable>`
         (see `offset <DriftDiffusionIntegrator.offset>` for details).
 
@@ -2339,6 +2339,10 @@ class DriftDiffusionIntegrator(IntegratorFunction):  # -------------------------
     """
 
     componentName = DRIFT_DIFFUSION_INTEGRATOR_FUNCTION
+    _mdf_stateful_parameter_indices = {
+        'previous_value': 0,
+        'previous_time': 1,
+    }
 
     class Parameters(IntegratorFunction.Parameters):
         """
@@ -2407,7 +2411,7 @@ class Parameters(IntegratorFunction.Parameters):
         non_decision_time = Parameter(0.0, modulable=True)
         threshold = Parameter(100.0, modulable=True)
         time_step_size = Parameter(1.0, modulable=True)
-        previous_time = Parameter(None, initializer='non_decision_time', pnl_internal=True)
+        previous_time = Parameter(0.0, initializer='non_decision_time')
         random_state = Parameter(None, loggable=False, getter=_random_state_getter, dependencies='seed')
         seed = Parameter(DEFAULT_SEED, modulable=True, fallback_default=True, setter=_seed_setter)
         enable_output_type_conversion = Parameter(
@@ -2417,6 +2421,8 @@ class Parameters(IntegratorFunction.Parameters):
             pnl_internal=True,
             read_only=True
         )
+        # used only to allow putting random_draw in runtime_params for MDF tests
+        random_draw = Parameter()
 
         def _parse_initializer(self, initializer):
             if initializer.ndim > 1:
@@ -2504,7 +2510,11 @@ def _function(self,
         variable = self.parameters._parse_initializer(variable)
         previous_value = self.parameters.previous_value._get(context)
 
-        random_draw = np.array([random_state.normal() for _ in list(variable)])
+        try:
+            random_draw = params['random_draw']
+        except (KeyError, TypeError):
+            random_draw = np.array([random_state.normal() for _ in list(variable)])
+
         value = previous_value + rate * variable * time_step_size \
                 + noise * np.sqrt(time_step_size) * random_draw
 
@@ -2585,6 +2595,50 @@ def reset(self, previous_value=None, previous_time=None, context=None):
             context=context
         )
 
+    def _assign_to_mdf_model(self, model, input_id):
+        import modeci_mdf.mdf as mdf
+
+        self_id = parse_valid_identifier(self.name)
+        parameters = self._mdf_model_parameters
+        parameters[self._model_spec_id_parameters][MODEL_SPEC_ID_MDF_VARIABLE] = input_id
+        threshold = parameters[self._model_spec_id_parameters]['threshold']
+
+        random_draw_func = mdf.Function(
+            id=f'{self_id}_random_draw',
+            function=NormalDist._model_spec_generic_type_name,
+            args={'shape': (len(self.defaults.variable),), 'seed': self.defaults.seed}
+        )
+        unclipped_func = mdf.Function(
+            id=f'{self_id}_unclipped',
+            value=f'(previous_value + rate * {MODEL_SPEC_ID_MDF_VARIABLE} * time_step_size + noise * math.sqrt(time_step_size) * {random_draw_func.id}) + offset',
+            args={
+                k: v for k, v in parameters[self._model_spec_id_parameters].items()
+                if (k not in self.parameters or getattr(self.parameters, k).initializer is None)
+            }
+        )
+        lower_clipped = mdf.Function(
+            id=f'{self_id}_lower_clipped',
+            value=f'max({unclipped_func.id}, -1 * threshold)',
+            args={'threshold': np.full_like(self.defaults.previous_value, threshold)}
+        )
+        result = mdf.Function(
+            id=f'{self_id}_value_result',
+            value=f'min({lower_clipped.id}, threshold)',
+            args={'threshold': np.full_like(self.defaults.previous_value, threshold)}
+        )
+
+        model.functions.extend([
+            random_draw_func,
+            unclipped_func,
+            lower_clipped,
+            result,
+        ])
+
+        return super()._assign_to_mdf_model(model, input_id)
+
+    def as_expression(self):
+        return f'{parse_valid_identifier(self.name)}_value_result, previous_time'
+
 
 class DriftOnASphereIntegrator(IntegratorFunction):  # -----------------------------------------------------------------
     """
@@ -2782,8 +2836,8 @@ class DriftOnASphereIntegrator(IntegratorFunction):  # -------------------------
         `dimension <DriftOnASphereIntegrator.dimension>`.
 
     angle_function : TransferFunction
-        determines the function used to compute angle (reproted as result) from coordinates on sphere specified by
-        coordinates in `previous_value <DriftOnASphereIntegrator.previous_value>` displace by `variable
+        determines the function used to compute angle (reported as result) from coordinates on sphere specified by
+        coordinates in `previous_value <DriftOnASphereIntegrator.previous_value>` displaced by `variable
         <DriftOnASphereIntegrator.variable>` and possibly `noise <DriftOnASphereIntegrator.noise>`.
 
     previous_time : float
@@ -2822,7 +2876,7 @@ class Parameters(IntegratorFunction.Parameters):
                 dimension
                     see `dimension <DriftOnASphereIntegrator.dimension>`
 
-                    :default value: 2
+                    :default value: 3
                     :type: ``int``
 
                 enable_output_type_conversion
@@ -2894,9 +2948,9 @@ class Parameters(IntegratorFunction.Parameters):
         starting_point = 0.0
         # threshold = Parameter(100.0, modulable=True)
         time_step_size = Parameter(1.0, modulable=True)
-        previous_time = Parameter(None, initializer='starting_point', pnl_internal=True)
+        previous_time = Parameter(0.0, initializer='starting_point', pnl_internal=True)
         dimension = Parameter(3, stateful=False, read_only=True)
-        initializer = Parameter([0], initalizer='variable', stateful=True)
+        initializer = Parameter([0], initalizer='variable', dependencies=dimension, stateful=True)
         angle_function = Parameter(None, stateful=False, loggable=False)
         random_state = Parameter(None, loggable=False, getter=_random_state_getter, dependencies='seed')
         seed = Parameter(DEFAULT_SEED, modulable=True, fallback_default=True, setter=_seed_setter)
diff --git a/psyneulink/core/components/functions/stateful/memoryfunctions.py b/psyneulink/core/components/functions/stateful/memoryfunctions.py
index c6fb7d67731..32e58130ea0 100644
--- a/psyneulink/core/components/functions/stateful/memoryfunctions.py
+++ b/psyneulink/core/components/functions/stateful/memoryfunctions.py
@@ -466,7 +466,7 @@ class ContentAddressableMemory(MemoryFunction): # ------------------------------
     An entry is stored and retrieved as an array containing a set of `fields <EpisodicMemoryMechanism_Memory_Fields>`
     each of which is a 1d array.  An array containing such entries can be used to initialize the contents of `memory
     <ContentAddressableMemory.memory>` by providing it in the **initializer** argument of the ContentAddressableMemory's
-    constructor, or in a call to its  `reset  <ContentAddressableMemory.reset>` method.  The current contents of `memory
+    constructor, or in a call to its `reset  <ContentAddressableMemory.reset>` method.  The current contents of `memory
     <ContentAddressableMemory.memory>` can be inspected using the `memory <ContentAddressableMemory.memory>` attribute,
     which returns a list containing the current entries, each as a list containing all fields for that entry.  The
     `memory_num_fields <ContentAddressableMemory.memory_num_fields>` contains the number of fields expected for each
@@ -501,7 +501,7 @@ class ContentAddressableMemory(MemoryFunction): # ------------------------------
       the entry closest to `variable <ContentAddressableMemory.variable>` is retrieved from is retrieved from `memory
       <ContentAddressableMemory.memory>`.  The entry is chosen by calling, in order:
 
-        * `distance_function <ContentAddressableMemory.distance_function>`\: generates a list of and compares
+        * `distance_function <ContentAddressableMemory.distance_function>`: generates a list of and compares
           `distances <ContentAddressableMemory.distances>` between `variable <ContentAddressableMemory.variable>`
           and each entry in `memory <ContentAddressableMemory.memory>`, possibly weighted by `distance_field_weights
           <ContentAddressableMemory.distance_field_weights>`, as follows:
@@ -528,7 +528,7 @@ class ContentAddressableMemory(MemoryFunction): # ------------------------------
                between `variable <ContentAddressableMemory.variable>` and entries for those fields are not included
                in the averaging of distances by field.
 
-        * `selection_function <ContentAddressableMemory.selection_function>`\: called with the list of distances
+        * `selection_function <ContentAddressableMemory.selection_function>`: called with the list of distances
           to determine which entries to select for consideration. If more than on entry from `memory
           <ContentAddressableMemory.memory>` is identified, `equidistant_entries_select
           <ContentAddressableMemory.equidistant_entries_select>` is used to determine which to retrieve.  If no
@@ -765,7 +765,7 @@ class ContentAddressableMemory(MemoryFunction): # ------------------------------
 
     noise : float, list, 2d array, or Function : default 0.0
         specifies random value(s) added to `variable <ContentAddressableMemory.variable>` before storing in
-        `memory <ContentAddressableMemory.memory>`\;  if a list or 2d array, it must be the same shape as `variable
+        `memory <ContentAddressableMemory.memory>`;  if a list or 2d array, it must be the same shape as `variable
          ContentAddressableMemory.variable>` (see `noise <ContentAddressableMemory.noise>` for details).
 
     initializer : 3d array or list : default None
@@ -1815,6 +1815,18 @@ def _parse_memories(self, entries, method, context=None):
 
         return memories
 
+    def store(self, entry, context=None, **kwargs):
+        """Store value in `memory <ContentAddressableMemory.memory>`.
+        Convenience method for storing entry in memory.
+        """
+        return self(entry, retrieval_prob=0.0, context=context, **kwargs)
+
+    def retrieve(self, entry, context=None, **kwargs):
+        """Retrieve value from `memory <ContentAddressableMemory.memory>`.
+        Convenience method for retrieving entry from memory.
+        """
+        return self(entry, storage_prob=0.0, context=context, **kwargs)
+
     @property
     def memory(self):
         """Return entries in self._memory as lists in an outer np.array;
diff --git a/psyneulink/core/components/functions/stateful/statefulfunction.py b/psyneulink/core/components/functions/stateful/statefulfunction.py
index 5e22d460526..37156607a43 100644
--- a/psyneulink/core/components/functions/stateful/statefulfunction.py
+++ b/psyneulink/core/components/functions/stateful/statefulfunction.py
@@ -168,6 +168,11 @@ class StatefulFunction(Function_Base): #  --------------------------------------
     componentType = STATEFUL_FUNCTION_TYPE
     componentName = STATEFUL_FUNCTION
 
+    # TODO: consider moving this to a Parameter attribute
+    _mdf_stateful_parameter_indices = {
+        'previous_value': None
+    }
+
     class Parameters(Function_Base.Parameters):
         """
             Attributes
diff --git a/psyneulink/core/components/mechanisms/mechanism.py b/psyneulink/core/components/mechanisms/mechanism.py
index 567c2f3eeca..b439846e79f 100644
--- a/psyneulink/core/components/mechanisms/mechanism.py
+++ b/psyneulink/core/components/mechanisms/mechanism.py
@@ -1098,7 +1098,6 @@
     REMOVE_PORTS, PORT_SPEC, _parse_port_spec, PORT_SPECIFIC_PARAMS, PROJECTION_SPECIFIC_PARAMS
 from psyneulink.core.components.shellclasses import Mechanism, Projection, Port
 from psyneulink.core.globals.context import Context, ContextFlags, handle_external_context
-from psyneulink.core.globals.mdf import _get_variable_parameter_name
 # TODO: remove unused keywords
 from psyneulink.core.globals.keywords import \
     ADDITIVE_PARAM, EXECUTION_PHASE, EXPONENT, FUNCTION_PARAMS, \
@@ -2635,14 +2634,15 @@ def _get_variable_from_input(self, input, context=None):
                 # Call input_port._execute with newly assigned variable and assign result to input_port.value
                 base_error_msg = f"Input to '{self.name}' ({input_item}) is incompatible " \
                                  f"with its corresponding {InputPort.__name__} ({input_port.full_name})"
+                variable = input_port.parameters.variable.get(context)
                 try:
-                    input_port.parameters.value._set(
-                        input_port._execute(input_port.parameters.variable.get(context), context),
-                        context)
-                except (RunError,TypeError) as error:
+                    value = input_port._execute(variable, context)
+                except (RunError, TypeError) as error:
                     raise MechanismError(f"{base_error_msg}: '{error.args[0]}.'")
                 except:
                     raise MechanismError(f"{base_error_msg}.")
+                else:
+                    input_port.parameters.value._set(value, context)
             else:
                 raise MechanismError(f"Length ({len(input_item)}) of input ({input_item}) does not match "
                                      f"required length ({input_port.default_input_shape.size}) for input "
@@ -3089,7 +3089,7 @@ def _gen_llvm_invoke_function(self, ctx, builder, function, f_params, f_state,
 
         return f_out, builder
 
-    def _gen_llvm_is_finished_cond(self, ctx, builder, m_params, m_state):
+    def _gen_llvm_is_finished_cond(self, ctx, builder, m_base_params, m_state, m_inputs):
         return ctx.bool_ty(1)
 
     def _gen_llvm_mechanism_functions(self, ctx, builder, m_base_params, m_params, m_state, m_in,
@@ -3142,7 +3142,7 @@ def _gen_llvm_function_internal(self, ctx, builder, m_params, m_state, arg_in,
 
         # is_finished should be checked after output ports ran
         is_finished_f = ctx.import_llvm_function(self, tags=tags.union({"is_finished"}))
-        is_finished_cond = builder.call(is_finished_f, [m_params, m_state, arg_in,
+        is_finished_cond = builder.call(is_finished_f, [m_base_params, m_state, arg_in,
                                                         arg_out])
         return builder, is_finished_cond
 
@@ -3159,6 +3159,12 @@ def _gen_llvm_function_reset(self, ctx, builder, params, state, arg_in, arg_out,
         return builder
 
     def _gen_llvm_function(self, *, extra_args=[], ctx:pnlvm.LLVMBuilderContext, tags:frozenset):
+        """
+        Overloaded main function LLVM generation method.
+
+        Mechanisms need to support "is_finished" execution variant (used by scheduling conditions)
+        on top of the variants supported by Component.
+        """
         if "is_finished" not in tags:
             return super()._gen_llvm_function(extra_args=extra_args, ctx=ctx,
                                               tags=tags)
@@ -3171,12 +3177,20 @@ def _gen_llvm_function(self, *, extra_args=[], ctx:pnlvm.LLVMBuilderContext, tag
 
         builder = ctx.create_llvm_function(args, self, return_type=ctx.bool_ty,
                                            tags=tags)
-        params, state = builder.function.args[:2]
-        finished = self._gen_llvm_is_finished_cond(ctx, builder, params, state)
+        params, state, inputs = builder.function.args[:3]
+        finished = self._gen_llvm_is_finished_cond(ctx, builder, params, state, inputs)
         builder.ret(finished)
         return builder.function
 
     def _gen_llvm_function_body(self, ctx, builder, base_params, state, arg_in, arg_out, *, tags:frozenset):
+        """
+        Overloaded LLVM code generation method.
+
+        Implements main mechanisms loop (while not finished). Calls two other internal Mechanism functions;
+        'is_finished' to terminate the loop, and '_gen_llvm_function_internal' to generate body of the
+        loop (invocation of Ports and Functions).
+        """
+
         assert "reset" not in tags
 
         params, builder = self._gen_llvm_param_ports_for_obj(
@@ -3204,7 +3218,9 @@ def _gen_llvm_function_body(self, ctx, builder, base_params, state, arg_in, arg_
         builder.branch(loop_block)
         builder.position_at_end(loop_block)
 
-        # Get internal function
+        # Get internal function. Use function call to get proper stack manipulation
+        # inside the body of the execution loop. We could use 'stacksave' and
+        # 'stackrestore', but not all LLVM targets support those ops.
         args_t = [a.type for a in builder.function.args]
         args_t[4:4] = [base_params.type]
         internal_builder = ctx.create_llvm_function(args_t, self,
@@ -4194,20 +4210,12 @@ def as_mdf_model(self):
 
             model.output_ports.append(op_model)
 
-        function_model = self.function.as_mdf_model()
-
-        for _, func_param in function_model.metadata['function_stateful_params'].items():
-            model.parameters.append(mdf.Parameter(**func_param))
-
         if len(ip.path_afferents) > 1:
             primary_function_input_name = combination_function_dimreduce_id
         else:
             primary_function_input_name = model.input_ports[0].id
 
-        self.function._set_mdf_arg(
-            function_model, _get_variable_parameter_name(self.function), primary_function_input_name
-        )
-        model.functions.append(function_model)
+        self.function._assign_to_mdf_model(model, primary_function_input_name)
 
         return model
 
diff --git a/psyneulink/core/components/mechanisms/modulatory/control/controlmechanism.py b/psyneulink/core/components/mechanisms/modulatory/control/controlmechanism.py
index b4d82a6662e..cecbe5bb451 100644
--- a/psyneulink/core/components/mechanisms/modulatory/control/controlmechanism.py
+++ b/psyneulink/core/components/mechanisms/modulatory/control/controlmechanism.py
@@ -357,18 +357,18 @@
 By default, a ControlMechanism has a single `input_port <Mechanism_Base.input_port>` named *OUTCOME*. If it has an
 `objective_mechanism <ControlMechanism.objective_mechanism>`, then the *OUTCOME* `input_port
 <ControlMechanism.outcome_input_ports>` receives a single `MappingProjection` from the `objective_mechanism
-<ControlMechanism.objective_mechanism>`\\'s *OUTCOME* `OutputPort` (see `ControlMechanism_ObjectiveMechanism` for
-additional details). If the ControlMechanism has no `objective_mechanism <ControlMechanism.objective_mechanism>` then,
-when it is added to a `Composition`, MappingProjections are created from the items specified in `monitor_for_control
-<ControlMechanism.monitor_for_control>` directly to InputPorts on the ControlMechanism (see
-`ControlMechanism_Monitor_for_Control` for additional details). The number of InputPorts created, and how the items
-listed in `monitor_for_control <ControlMechanism.monitor_for_control>` project to them is deterimined by the
-ControlMechanism's `outcome_input_ports_option <ControlMechanism.outcome_input_ports_option>`.  All of the Inports
-that receive Projections from those items, or the `objective_mechanism <ControlMechanism.objective_mechanism>` if
-the ControlMechanism has one, are listed in its `outcome_input_ports <ControlMechanism.outcome_input_ports>` attribute,
-and their values in the `outcome <ControlMechanism.outcome>` attribute.  The latter is used as the input to the
-ControlMechanism's `function <ControlMechanism.function>` to determine its `control_allocation
-<ControlMechanism.control_allocation>`.
+<ControlMechanism.objective_mechanism>`\\'s *OUTCOME* `OutputPort <OutputPort>` (see
+`ControlMechanism_ObjectiveMechanism` for additional details). If the ControlMechanism has no `objective_mechanism
+<ControlMechanism.objective_mechanism>` then, when it is added to a `Composition`, MappingProjections are created
+from the items specified in `monitor_for_control <ControlMechanism.monitor_for_control>` directly to InputPorts on
+the ControlMechanism (see `ControlMechanism_Monitor_for_Control` for additional details). The number of InputPorts
+created, and how the items listed in `monitor_for_control <ControlMechanism.monitor_for_control>` project to them is
+deterimined by the ControlMechanism's `outcome_input_ports_option <ControlMechanism.outcome_input_ports_option>`.
+All of the Inports that receive Projections from those items, or the `objective_mechanism
+<ControlMechanism.objective_mechanism>` if the ControlMechanism has one, are listed in its `outcome_input_ports
+<ControlMechanism.outcome_input_ports>` attribute, and their values in the `outcome <ControlMechanism.outcome>`
+attribute.  The latter is used as the input to the ControlMechanism's `function <ControlMechanism.function>` to
+determine its `control_allocation <ControlMechanism.control_allocation>`.
 
 .. _ControlMechanism_Function:
 
@@ -612,7 +612,7 @@
 from psyneulink.core.globals.utilities import ContentAddressableList, convert_to_list, convert_to_np_array, is_iterable
 
 __all__ = [
-    'CONTROL_ALLOCATION', 'GATING_ALLOCATION', 'ControlMechanism', 'ControlMechanismError', 'ControlMechanismRegistry',
+    'CONTROL_ALLOCATION', 'GATING_ALLOCATION', 'ControlMechanism', 'ControlMechanismError',
 ]
 
 CONTROL_ALLOCATION = 'control_allocation'
@@ -620,8 +620,6 @@
 
 MonitoredOutputPortTuple = collections.namedtuple("MonitoredOutputPortTuple", "output_port weight exponent matrix")
 
-ControlMechanismRegistry = {}
-
 def _is_control_spec(spec):
     from psyneulink.core.components.projections.modulatory.controlprojection import ControlProjection
     if isinstance(spec, tuple):
@@ -857,7 +855,7 @@ class ControlMechanism(ModulatoryMechanism_Base):
 
     outcome_input_ports_option : , SEPARATE, COMBINE, or CONCATENATE
         determines how items specified in `monitor_for_control <ControlMechanism.monitor_for_control>` project to
-        the ControlMechanism if not `objective_mechanism <ControlMechanism.objective_mechanism>` is specified.  If
+        the ControlMechanism if no `objective_mechanism <ControlMechanism.objective_mechanism>` is specified.  If
         *SEPARATE* is specified (the default), the `Projection` from each item specified in `monitor_for_control
         <ControlMechanism.monitor_for_control>` is assigned its own `InputPort`.  All of the InputPorts are assigned
         to a list in the ControlMechanism's `outcome_input_ports <ControlMechanism.outcome_input_ports>` attribute.
@@ -1300,7 +1298,7 @@ def __init__(self,
         )
 
     def _validate_params(self, request_set, target_set=None, context=None):
-        """Validate SYSTEM, monitor_for_control, CONTROL_SIGNALS and GATING_SIGNALS
+        """Validate monitor_for_control, objective_mechanism, CONTROL_SIGNALS and GATING_SIGNALS
 
         """
         from psyneulink.core.components.mechanisms.processing.objectivemechanism import ObjectiveMechanism
diff --git a/psyneulink/core/components/mechanisms/modulatory/control/gating/gatingmechanism.py b/psyneulink/core/components/mechanisms/modulatory/control/gating/gatingmechanism.py
index 8aa950f2b4a..c41022d2ef5 100644
--- a/psyneulink/core/components/mechanisms/modulatory/control/gating/gatingmechanism.py
+++ b/psyneulink/core/components/mechanisms/modulatory/control/gating/gatingmechanism.py
@@ -196,11 +196,9 @@
 from psyneulink.core.globals.utilities import ContentAddressableList, convert_to_list
 
 __all__ = [
-    'GatingMechanism', 'GatingMechanismError', 'GatingMechanismRegistry'
+    'GatingMechanism', 'GatingMechanismError',
 ]
 
-GatingMechanismRegistry = {}
-
 
 def _is_gating_spec(spec):
     from psyneulink.core.components.projections.modulatory.gatingprojection import GatingProjection
diff --git a/psyneulink/core/components/mechanisms/modulatory/control/optimizationcontrolmechanism.py b/psyneulink/core/components/mechanisms/modulatory/control/optimizationcontrolmechanism.py
index 8abba09428e..3070b74719a 100644
--- a/psyneulink/core/components/mechanisms/modulatory/control/optimizationcontrolmechanism.py
+++ b/psyneulink/core/components/mechanisms/modulatory/control/optimizationcontrolmechanism.py
@@ -3196,7 +3196,8 @@ def evaluate_agent_rep(self, control_allocation, context=None, return_results=Fa
                                            context=context
                                            )
 
-    def _get_evaluate_output_struct_type(self, ctx):
+    def _get_evaluate_output_struct_type(self, ctx, *, tags):
+        assert "evaluate_type_objective" in tags, "Unknown evaluate type: {}".format(tags)
         # Returns a scalar that is the predicted net_outcome
         return ctx.float_ty
 
@@ -3210,7 +3211,7 @@ def _gen_llvm_net_outcome_function(self, *, ctx, tags=frozenset()):
                 ctx.get_state_struct_type(self).as_pointer(),
                 self._get_evaluate_alloc_struct_type(ctx).as_pointer(),
                 ctx.float_ty.as_pointer(),
-                ctx.float_ty.as_pointer()]
+                self._get_evaluate_output_struct_type(ctx, tags=tags).as_pointer()]
 
         builder = ctx.create_llvm_function(args, self, str(self) + "_net_outcome")
         llvm_func = builder.function
@@ -3308,7 +3309,12 @@ def _gen_llvm_evaluate_alloc_range_function(self, *, ctx:pnlvm.LLVMBuilderContex
         allocation = builder.alloca(evaluate_f.args[2].type.pointee, name="allocation")
         with pnlvm.helpers.for_loop(builder, start, stop, stop.type(1), "alloc_loop") as (b, idx):
 
-            func_out = b.gep(arg_out, [idx])
+            if "evaluate_type_objective" in tags:
+                out_idx = idx
+            else:
+                assert False, "Evaluation type not detected in tags, or unknown: {}".format(tags)
+
+            func_out = b.gep(arg_out, [out_idx])
             pnlvm.helpers.create_sample(b, allocation, search_space, idx)
 
             b.call(evaluate_f, [params, state, allocation, func_out, arg_in, data])
@@ -3321,7 +3327,7 @@ def _gen_llvm_evaluate_function(self, *, ctx:pnlvm.LLVMBuilderContext, tags=froz
         args = [ctx.get_param_struct_type(self.agent_rep).as_pointer(),
                 ctx.get_state_struct_type(self.agent_rep).as_pointer(),
                 self._get_evaluate_alloc_struct_type(ctx).as_pointer(),
-                self._get_evaluate_output_struct_type(ctx).as_pointer(),
+                self._get_evaluate_output_struct_type(ctx, tags=tags).as_pointer(),
                 ctx.get_input_struct_type(self.agent_rep).as_pointer(),
                 ctx.get_data_struct_type(self.agent_rep).as_pointer()]
 
@@ -3419,25 +3425,32 @@ def _gen_llvm_evaluate_function(self, *, ctx:pnlvm.LLVMBuilderContext, tags=froz
         builder.store(num_inputs.type.pointee(1), num_inputs)
 
         # Simulations don't store output
-        comp_output = sim_f.args[4].type(None)
+        if "evaluate_type_objective" in tags:
+            comp_output = sim_f.args[4].type(None)
+        else:
+            assert False, "Evaluation type not detected in tags, or unknown: {}".format(tags)
+
         builder.call(sim_f, [comp_state, comp_params, comp_data, comp_input,
                              comp_output, num_trials, num_inputs])
 
-        # Extract objective mechanism value
-        idx = self.agent_rep._get_node_index(self.objective_mechanism)
-        # Mechanisms' results are stored in the first substructure
-        objective_os_ptr = builder.gep(comp_data, [ctx.int32_ty(0),
-                                                   ctx.int32_ty(0),
-                                                   ctx.int32_ty(idx)])
-        # Objective mech output shape should be 1 single element 2d array
-        objective_val_ptr = builder.gep(objective_os_ptr,
-                                        [ctx.int32_ty(0), ctx.int32_ty(0),
-                                         ctx.int32_ty(0)], "obj_val_ptr")
-
-        net_outcome_f = ctx.import_llvm_function(self, tags=tags.union({"net_outcome"}))
-        builder.call(net_outcome_f, [controller_params, controller_state,
-                                     allocation_sample, objective_val_ptr,
-                                     arg_out])
+        if "evaluate_type_objective" in tags:
+            # Extract objective mechanism value
+            idx = self.agent_rep._get_node_index(self.objective_mechanism)
+            # Mechanisms' results are stored in the first substructure
+            objective_op_ptr = builder.gep(comp_data, [ctx.int32_ty(0),
+                                                       ctx.int32_ty(0),
+                                                       ctx.int32_ty(idx)])
+            # Objective mech output shape should be 1 single element 2d array
+            objective_val_ptr = builder.gep(objective_op_ptr,
+                                            [ctx.int32_ty(0), ctx.int32_ty(0),
+                                             ctx.int32_ty(0)], "obj_val_ptr")
+
+            net_outcome_f = ctx.import_llvm_function(self, tags=tags.union({"net_outcome"}))
+            builder.call(net_outcome_f, [controller_params, controller_state,
+                                         allocation_sample, objective_val_ptr,
+                                         arg_out])
+        else:
+            assert False, "Evaluation type not detected in tags, or unknown: {}".format(tags)
 
         builder.ret_void()
 
diff --git a/psyneulink/core/components/mechanisms/modulatory/learning/learningmechanism.py b/psyneulink/core/components/mechanisms/modulatory/learning/learningmechanism.py
index 2ae1da4c11b..e8cfca7b532 100644
--- a/psyneulink/core/components/mechanisms/modulatory/learning/learningmechanism.py
+++ b/psyneulink/core/components/mechanisms/modulatory/learning/learningmechanism.py
@@ -1313,7 +1313,7 @@ def _execute(
         # Get error_signals (from ERROR_SIGNAL InputPorts) and error_matrices relevant for the current execution:
         error_signal_indices = self.error_signal_indices
         error_signal_inputs = variable[error_signal_indices]
-        # FIX 7/22/19 [JDC]: MOVE THIS TO ITS OWN METHOD CALLED ON INITALIZATION AND UPDTATED AS NECESSARY
+        # FIX 7/22/19 [JDC]: MOVE THIS TO ITS OWN METHOD CALLED ON INITALIZATION AND UPDATED AS NECESSARY
         if self.error_matrices is None:
             # KAM 6/28/19 Hack to get the correct shape and contents for initial error matrix in backprop
             if self.function is BackPropagation or isinstance(self.function, BackPropagation):
@@ -1354,7 +1354,6 @@ def _execute(
                 ]
             )
             learning_signal, error_signal = super()._execute(variable=function_variable,
-            # MODIFIED CROSS_PATHWAYS 7/22/19 END
                                                              context=context,
                                                              error_matrix=error_matrix,
                                                              runtime_params=runtime_params,
@@ -1368,7 +1367,7 @@ def _execute(
                 and self.initialization_status != ContextFlags.INITIALIZING):
             print("\n{} weight change matrix: \n{}\n".format(self.name, summed_learning_signal))
 
-        # Durning initialization return zeros so that the first "real" trial for Backprop does not start
+        # During initialization return zeros so that the first "real" trial for Backprop does not start
         # with the error computed during initialization
         if (self.in_composition and
                 isinstance(self.function, BackPropagation) and
diff --git a/psyneulink/core/components/mechanisms/processing/integratormechanism.py b/psyneulink/core/components/mechanisms/processing/integratormechanism.py
index e11dd8b47b4..548d3cd6dfd 100644
--- a/psyneulink/core/components/mechanisms/processing/integratormechanism.py
+++ b/psyneulink/core/components/mechanisms/processing/integratormechanism.py
@@ -94,7 +94,6 @@
 from psyneulink.core.globals.parameters import Parameter, check_user_specified
 from psyneulink.core.globals.preferences.basepreferenceset import is_pref_set, REPORT_OUTPUT_PREF
 from psyneulink.core.globals.preferences.preferenceset import PreferenceEntry, PreferenceLevel
-from psyneulink.core.globals.utilities import parse_valid_identifier
 
 __all__ = [
     'DEFAULT_RATE', 'IntegratorMechanism', 'IntegratorMechanismError'
@@ -230,29 +229,3 @@ def _handle_default_variable(self, default_variable=None, size=None, input_ports
                                                 input_ports=input_ports,
                                                 function=function,
                                                 params=params)
-
-    def as_mdf_model(self):
-        import modeci_mdf.mdf as mdf
-
-        model = super().as_mdf_model()
-        function_model = [
-            f for f in model.functions
-            if f.id == parse_valid_identifier(self.function.name)
-        ][0]
-        assert function_model.id == parse_valid_identifier(self.function.name), (function_model.id, parse_valid_identifier(self.function.name))
-
-        for _, func_param in function_model.metadata['function_stateful_params'].items():
-            model.parameters.append(mdf.Parameter(**func_param))
-
-        res = self.function._get_mdf_noise_function()
-        try:
-            main_noise_function, extra_noise_functions = res
-        except TypeError:
-            pass
-        else:
-            main_noise_function.id = f'{model.id}_{main_noise_function.id}'
-            model.functions.append(main_noise_function)
-            model.functions.extend(extra_noise_functions)
-            function_model.args['noise'] = main_noise_function.id
-
-        return model
diff --git a/psyneulink/core/components/mechanisms/processing/transfermechanism.py b/psyneulink/core/components/mechanisms/processing/transfermechanism.py
index ca81117a2c5..3d54250be40 100644
--- a/psyneulink/core/components/mechanisms/processing/transfermechanism.py
+++ b/psyneulink/core/components/mechanisms/processing/transfermechanism.py
@@ -1531,14 +1531,18 @@ def _clip_result(self, clip, current_input):
             current_input[maxCapIndices] = np.max(clip)
         return current_input
 
-    def _gen_llvm_is_finished_cond(self, ctx, builder, params, state):
-        current = pnlvm.helpers.get_state_ptr(builder, self, state, "value")
-        threshold_ptr = pnlvm.helpers.get_param_ptr(builder, self, params,
+    def _gen_llvm_is_finished_cond(self, ctx, builder, m_base_params, m_state, m_in):
+        current = pnlvm.helpers.get_state_ptr(builder, self, m_state, "value")
+
+        m_params, builder = self._gen_llvm_param_ports_for_obj(
+                self, m_base_params, ctx, builder, m_base_params, m_state, m_in)
+        threshold_ptr = pnlvm.helpers.get_param_ptr(builder, self, m_params,
                                                     "termination_threshold")
+
         if isinstance(threshold_ptr.type.pointee, pnlvm.ir.LiteralStructType):
             # Threshold is not defined, return the old value of finished flag
             assert len(threshold_ptr.type.pointee) == 0
-            is_finished_ptr = pnlvm.helpers.get_state_ptr(builder, self, state,
+            is_finished_ptr = pnlvm.helpers.get_state_ptr(builder, self, m_state,
                                                           "is_finished_flag")
             is_finished_flag = builder.load(is_finished_ptr)
             return builder.fcmp_ordered("!=", is_finished_flag,
@@ -1564,7 +1568,7 @@ def _gen_llvm_is_finished_cond(self, ctx, builder, params, state):
                 b.store(max_val, cmp_val_ptr)
 
         elif isinstance(self.termination_measure, Function):
-            prev_val_ptr = pnlvm.helpers.get_state_ptr(builder, self, state, "value", 1)
+            prev_val_ptr = pnlvm.helpers.get_state_ptr(builder, self, m_state, "value", 1)
             prev_val = builder.load(prev_val_ptr)
 
             expected = np.empty_like([self.defaults.value[0], self.defaults.value[0]])
@@ -1576,8 +1580,8 @@ def _gen_llvm_is_finished_cond(self, ctx, builder, params, state):
                 self.termination_measure.defaults.variable = expected
 
             func = ctx.import_llvm_function(self.termination_measure)
-            func_params = pnlvm.helpers.get_param_ptr(builder, self, params, "termination_measure")
-            func_state = pnlvm.helpers.get_state_ptr(builder, self, state, "termination_measure")
+            func_params = pnlvm.helpers.get_param_ptr(builder, self, m_base_params, "termination_measure")
+            func_state = pnlvm.helpers.get_state_ptr(builder, self, m_state, "termination_measure")
             func_in = builder.alloca(func.args[2].type.pointee, name="is_finished_func_in")
             # Populate input
             func_in_current_ptr = builder.gep(func_in, [ctx.int32_ty(0),
@@ -1591,7 +1595,7 @@ def _gen_llvm_is_finished_cond(self, ctx, builder, params, state):
 
             builder.call(func, [func_params, func_state, func_in, cmp_val_ptr])
         elif isinstance(self.termination_measure, TimeScale):
-            ptr = builder.gep(pnlvm.helpers.get_state_ptr(builder, self, state, "num_executions"),
+            ptr = builder.gep(pnlvm.helpers.get_state_ptr(builder, self, m_state, "num_executions"),
                               [ctx.int32_ty(0), ctx.int32_ty(self.termination_measure.value)])
             ptr_val = builder.sitofp(builder.load(ptr), threshold.type)
             pnlvm.helpers.printf(builder, f"TERM MEASURE {self.termination_measure} %d %d\n",ptr_val, threshold)
@@ -1811,47 +1815,21 @@ def _update_default_variable(self, new_default_variable, context=None):
         super()._update_default_variable(new_default_variable, context=context)
 
     def as_mdf_model(self):
-        import modeci_mdf.mdf as mdf
-
         model = super().as_mdf_model()
         function_model = [
             f for f in model.functions
-            if f.id == parse_valid_identifier(self.function.name)
+            if f.id == f'{model.id}_{parse_valid_identifier(self.function.name)}'
         ][0]
-        assert function_model.id == parse_valid_identifier(self.function.name), (function_model.id, parse_valid_identifier(self.function.name))
+        assert function_model.id == f'{model.id}_{parse_valid_identifier(self.function.name)}', (function_model.id, parse_valid_identifier(self.function.name))
 
         if self.defaults.integrator_mode:
-            integrator_function_model = self.integrator_function.as_mdf_model()
-
             primary_input = function_model.args[_get_variable_parameter_name(self.function)]
-            self.integrator_function._set_mdf_arg(
-                integrator_function_model,
-                _get_variable_parameter_name(self.integrator_function),
-                primary_input
-            )
+            integrator_function_id = self.integrator_function._assign_to_mdf_model(model, primary_input)
+
             self.function._set_mdf_arg(
                 function_model,
                 _get_variable_parameter_name(self.function),
-                integrator_function_model.id
+                integrator_function_id
             )
 
-            for _, func_param in integrator_function_model.metadata['function_stateful_params'].items():
-                model.parameters.append(mdf.Parameter(**func_param))
-
-            model.functions.append(integrator_function_model)
-
-            res = self.integrator_function._get_mdf_noise_function()
-            try:
-                main_noise_function, extra_noise_functions = res
-            except TypeError:
-                pass
-            else:
-                main_noise_function.id = f'{model.id}_{main_noise_function.id}'
-                model.functions.append(main_noise_function)
-                model.functions.extend(extra_noise_functions)
-
-                self.integrator_function._set_mdf_arg(
-                    integrator_function_model, 'noise', main_noise_function.id
-                )
-
         return model
diff --git a/psyneulink/core/components/ports/outputport.py b/psyneulink/core/components/ports/outputport.py
index 5e1c2bc1eba..7db0e5f05f4 100644
--- a/psyneulink/core/components/ports/outputport.py
+++ b/psyneulink/core/components/ports/outputport.py
@@ -1288,7 +1288,7 @@ def get_label(self, context=None):
     def as_mdf_model(self):
         import modeci_mdf.mdf as mdf
 
-        owner_func_name = parse_valid_identifier(self.owner.function.name)
+        owner_func_name = parse_valid_identifier(f'{self.owner.name}_{self.owner.function.name}')
         if self._variable_spec == OWNER_VALUE:
             value = owner_func_name
         elif isinstance(self._variable_spec, tuple) and self._variable_spec[0] == OWNER_VALUE:
@@ -1302,6 +1302,8 @@ def as_mdf_model(self):
         return mdf.OutputPort(
             id=parse_valid_identifier(self.name),
             value=value,
+            shape=str(self.defaults.value.shape),
+            type=str(self.defaults.value.dtype),
             **self._mdf_metadata
         )
 
diff --git a/psyneulink/core/components/ports/port.py b/psyneulink/core/components/ports/port.py
index d73ab06be0b..5320aabfe4b 100644
--- a/psyneulink/core/components/ports/port.py
+++ b/psyneulink/core/components/ports/port.py
@@ -779,7 +779,8 @@ def test_multiple_modulatory_projections_with_mech_and_port_Name_specs(self):
 
 from psyneulink.core import llvm as pnlvm
 from psyneulink.core.components.component import ComponentError, DefaultsFlexibility, component_keywords
-from psyneulink.core.components.functions.function import Function, get_param_value_for_keyword, is_function_type
+from psyneulink.core.components.functions.function import \
+    Function, get_param_value_for_keyword, is_function_type, RandomMatrix
 from psyneulink.core.components.functions.nonstateful.combinationfunctions import CombinationFunction, LinearCombination
 from psyneulink.core.components.functions.nonstateful.transferfunctions import Linear
 from psyneulink.core.components.shellclasses import Mechanism, Projection, Port
@@ -2953,6 +2954,12 @@ def _parse_port_spec(port_type=None,
     if isinstance(port_specification, types.FunctionType):
         port_specification = port_specification()
 
+    # RandomMatrix (used for Projection); try to resolve to a matrix
+    if isinstance(port_specification, RandomMatrix):
+        rows = len(owner.sender.value)
+        cols = len(owner.receiver.value)
+        port_specification = port_specification(rows,cols)
+
     # ModulatorySpecification of some kind
     if _is_modulatory_spec(port_specification):
         # If it is a ModulatoryMechanism specification, get its ModulatorySignal class
diff --git a/psyneulink/core/components/projections/pathway/mappingprojection.py b/psyneulink/core/components/projections/pathway/mappingprojection.py
index 557c1b3dbd4..c6a3871c268 100644
--- a/psyneulink/core/components/projections/pathway/mappingprojection.py
+++ b/psyneulink/core/components/projections/pathway/mappingprojection.py
@@ -19,7 +19,7 @@
       - `MappingProjection_Deferred_Initialization`
   * `MappingProjection_Structure`
       - `MappingProjection_Matrix`
-      - `Mapping_Matrix_ParameterPort`
+      - `MappingProjection_Matrix_ParameterPort`
   * `MappingProjection_Execution`
       - `MappingProjection_Learning`
   * `MappingProjection_Class_Reference`
@@ -98,10 +98,8 @@
     <Keywords.MatrixKeywords>` can be used.
 
   ..
-  * **Random matrix function** (`random_matrix <Utilities.random_matrix>`) -- a convenience function
-    that provides more flexibility than `RANDOM_CONNECTIVITY_MATRIX`.  It generates a random matrix sized for a
-    **sender** and **receiver**, with random numbers drawn from a uniform distribution within a specified **range** and
-    with a specified **offset**.
+  * `RandomMatrix` -- assigns a matrix sized appropriately for the **sender** and **receiver**, with random values
+    drawn from a uniform distribution with a specified **center** and **range**.
 
   .. _MappingProjection_Tuple_Specification:
 
@@ -185,14 +183,14 @@
 In addition to its `sender <MappingProjection.sender>`, `receiver <MappingProjection.receiver>`, and `function
 <Projection_Base.function>`, a MappingProjection has the following characteristic attributes:
 
-.. _Mapping_Matrix:
+.. _MappingProjection_Matrix:
 
 * `matrix <MappingProjection.matrix>` parameter - used by the MappingProjection's `function <Projection_Base.function>`
   to carry out a matrix transformation of its input, that is then provided to its `receiver
   <MappingProjection.receiver>`. It can be specified in a variety of ways, as described `above
   <MappingProjection_Matrix_Specification>`.
 
-  .. _Mapping_Matrix_Dimensionality
+  .. _MappingProjection_Matrix_Dimensionality
 
   * **Matrix Dimensionality** -- this must match the dimensionality of the MappingProjection's `sender
     <MappingProjection.sender>` and `receiver <MappingProjection.receiver>`.  For a standard 2d "weight" matrix (i.e.,
@@ -204,7 +202,7 @@
     `receiver <MappingProjection.receiver>`'s `variable <Projection_Base.variable>` (equal to the dimensionality of the
     matrix minus its sender dimensionality).
 
-.. _Mapping_Matrix_ParameterPort:
+.. _MappingProjection_Matrix_ParameterPort:
 
 * *MATRIX* `ParameterPort` - this receives any `LearningProjections <LearningProjection>` that are assigned to the
   MappingProjection (see `MappingProjection_Learning_Specification` above), and updates the current value of the
@@ -286,6 +284,7 @@
 import copy
 
 import numpy as np
+from typing import Union
 
 from psyneulink.core.components.component import parameter_keywords
 from psyneulink.core.components.functions.stateful.integratorfunctions import AccumulatorIntegrator
@@ -304,7 +303,7 @@
 from psyneulink.core.globals.preferences.preferenceset import PreferenceLevel
 
 __all__ = [
-    'MappingError', 'MappingProjection',
+    'MappingError', 'MappingProjection'
 ]
 
 parameter_keywords.update({MAPPING_PROJECTION})
@@ -355,10 +354,11 @@ class MappingProjection(PathwayProjection_Base):
         the context in which the Projection is used, or its initialization will be `deferred
         <MappingProjection_Deferred_Initialization>`.
 
-    matrix : list, np.ndarray, np.matrix, function or keyword : default DEFAULT_MATRIX
+    matrix : list, np.ndarray, np.matrix, function, `RandomMatrix` or keyword : default DEFAULT_MATRIX
         specifies the matrix used by `function <Projection_Base.function>` (default: `LinearCombination`) to
         transform the `value <Projection_Base.value>` of the `sender <MappingProjection.sender>` into a form suitable
-        for the `variable <InputPort.variable>` of its `receiver <MappingProjection.receiver>` `InputPort`.
+        for the `variable <InputPort.variable>` of its `receiver <MappingProjection.receiver>` `InputPort`
+        (see `MappingProjection_Matrix_Specification` for additional details).
 
     Attributes
     ----------
diff --git a/psyneulink/core/components/projections/projection.py b/psyneulink/core/components/projections/projection.py
index d0f8c4c39b2..796cd14c281 100644
--- a/psyneulink/core/components/projections/projection.py
+++ b/psyneulink/core/components/projections/projection.py
@@ -106,13 +106,13 @@
   * **Keyword** -- creates a default instance of the specified type, which can be any of the following:
 
       * *MAPPING_PROJECTION* -- if the `sender <MappingProjection.sender>` and/or its `receiver
-        <MappingProjection.receiver>` cannot be inferred from the context in which this specification occurs, then its
-        `initialization is deferred <MappingProjection_Deferred_Initialization>` until both of those have been
-        determined (e.g., it is used in the specification of a `pathway <Process.pathway>` for a `Process`). For
-        MappingProjections, a `matrix specification <MappingProjection_Matrix_Specification>` can also be used to
-        specify the projection (see **value** below).
-      COMMENT:
+        <MappingProjection.receiver>` cannot be inferred from the context in which this specification occurs, then
+        its `initialization is deferred <MappingProjection_Deferred_Initialization>` until both of those have been
+        determined (e.g., it is used in the specification of a `Pathway` for a `Composition`). For MappingProjections,
+        a `matrix specification <MappingProjection_Matrix_Specification>` can also be used to specify the Projection
+        (see **value** below).
 
+      COMMENT:
       * *LEARNING_PROJECTION*  (or *LEARNING*) -- this can only be used in the specification of a `MappingProjection`
         (see `tuple <MappingProjection_Matrix_Specification>` format).  If the `receiver <MappingProjection.receiver>`
         of the MappingProjection projects to a `LearningMechanism` or a `ComparatorMechanism` that projects to one,
@@ -122,7 +122,9 @@
         <LearningProjection.sender>`. See `LearningMechanism_Learning_Configurations` for additional details.
       COMMENT
 
+      COMMENT:
       # FIX 5/8/20 [JDC] ELIMINATE SYSTEM:  IS IT TRUE THAT CONTROL SIGNALS ARE AUTOMATICALLY CREATED BY COMPOSITIONS?
+      COMMENT
       * *CONTROL_PROJECTION* (or *CONTROL*) -- this can be used when specifying a parameter using the `tuple format
         <ParameterPort_Tuple_Specification>`, to create a default `ControlProjection` to the `ParameterPort` for that
         parameter.  If the `Component <Component>` to which the parameter belongs is part of a `Composition`, then a
@@ -422,7 +424,8 @@
 from psyneulink.core.globals.preferences.preferenceset import PreferenceLevel
 from psyneulink.core.globals.registry import register_category, remove_instance_from_registry
 from psyneulink.core.globals.socket import ConnectionInfo
-from psyneulink.core.globals.utilities import ContentAddressableList, is_matrix, is_numeric, parse_valid_identifier
+from psyneulink.core.globals.utilities import \
+    ContentAddressableList, is_matrix, is_numeric, parse_valid_identifier
 
 __all__ = [
     'Projection_Base', 'projection_keywords', 'PROJECTION_SPEC_KEYWORDS',
@@ -1110,7 +1113,7 @@ def as_mdf_model(self, simple_edge_format=True):
             edge_function = edge_node.function
             edge_node = edge_node.as_mdf_model()
 
-            func_model = [f for f in edge_node.functions if f.id == parse_valid_identifier(edge_function.name)][0]
+            func_model = [f for f in edge_node.functions if f.id == parse_valid_identifier(f'{edge_node.id}_{edge_function.name}')][0]
             var_name = _get_variable_parameter_name(edge_function)
 
             # 2d variable on LinearMatrix will be incorrect on import back to psyneulink
diff --git a/psyneulink/core/compositions/composition.py b/psyneulink/core/compositions/composition.py
index df4aba619eb..a83be7b2531 100644
--- a/psyneulink/core/compositions/composition.py
+++ b/psyneulink/core/compositions/composition.py
@@ -74,10 +74,6 @@
 Overview
 --------
 
-    .. warning::
-        As of PsyNeuLink 0.7.5, the API for using Compositions for Learning has been slightly changed!
-        Please see `this link <RefactoredLearningGuide>` for more details.
-
 Composition is the base class for objects that combine PsyNeuLink `Components <Component>` into an executable model.
 It defines a common set of attributes possessed, and methods used by all Composition objects.
 
@@ -99,10 +95,10 @@
 A Composition can be created by calling the constructor and specifying `Components <Component>` to be added, using
 either arguments of the constructor and/or methods that allow Components to be added once it has been constructed.
 
-.. hint::
-    Although Components (Nodes and Projections) can be added individually to a Composition, it is often easier to use
-    `Pathways <Composition_Pathways>` to construct a Composition, which in many cases can automaticially construct the
-    Projections needed without having to specify those explicitly.
+    .. hint::
+        Although Components (Nodes and Projections) can be added individually to a Composition, it is often easier
+        to use `Pathways <Composition_Pathways>` to construct a Composition, which in many cases can automaticially
+        construct the Projections needed without having to specify those explicitly.
 
 .. _Composition_Constructor:
 
@@ -228,9 +224,9 @@
         <BackPropagation>` in the specified pathway; returns the `learning Pathway <Composition_Learning_Pathway>`
         added to the Composition.
 
-.. note::
-  Only Mechanisms and Projections added to a Composition using the methods above belong to a Composition, even if
-  other Mechanism and/or Projections are constructed in the same Python script.
+    .. note::
+      Only Mechanisms and Projections added to a Composition using the methods above belong to a Composition, even if
+      other Mechanism and/or Projections are constructed in the same Python script.
 
 A `Node <Composition_Nodes>` can be removed from a Composition using the `remove_node <Composition.remove_node>` method.
 
@@ -824,21 +820,22 @@
     ..
     .. _OBJECTIVE_MECHANISM:
     * *OBJECTIVE_MECHANISM* -- usually a `ComparatorMechanism`, used to `calculate an error signal
-      <ComparatorMechanism_Execution>` for the sequence by comparing the value received by the ComparatorMechanism's
-      *SAMPLE* `InputPort <ComparatorMechanism_Structure>` (from the `output <LearningMechanism_Activation_Output>` of
-      the last Processing Mechanism in the `learning Pathway <Composition_Learning_Pathway>`) with the value received
-      in the *OBJECTIVE_MECHANISM*'s *TARGET* `InputPort <ComparatorMechanism_Structure>` (from the *TARGET_MECHANISM*
-      generated by the method -- see below); this is assigned the `NodeRole` `LEARNING` in the Composition.
+      <ComparatorMechanism_Execution>` (i.e., loss) for the sequence by comparing the value received by
+      the ComparatorMechanism's *SAMPLE* `InputPort <ComparatorMechanism_Structure>` (from the `output
+      <LearningMechanism_Activation_Output>` of the last Processing Mechanism in the `learning Pathway
+      <Composition_Learning_Pathway>`) with the value received in the *OBJECTIVE_MECHANISM*'s *TARGET*
+      `InputPort <ComparatorMechanism_Structure>` (from the *TARGET_MECHANISM* generated by the method
+      -- see below); this is assigned the `NodeRole` `LEARNING` in the Composition.
     ..
     .. _LEARNING_MECHANISMS:
     * *LEARNING_MECHANISMS* -- a `LearningMechanism` for each MappingProjection in the sequence, each of which
       calculates the `learning_signal <LearningMechanism.learning_signal>` used to modify the `matrix
-      <MappingProjection.matrix>` parameter for the coresponding MappingProjection, along with a `LearningSignal` and
-      `LearningProjection` that convey the `learning_signal <LearningMechanism.learning_signal>` to the
+      <MappingProjection.matrix>` parameter for the coresponding MappingProjection, along with a `LearningSignal`
+      and `LearningProjection` that convey the `learning_signal <LearningMechanism.learning_signal>` to the
       MappingProjection's *MATRIX* `ParameterPort<Mapping_Matrix_ParameterPort>`;  depending on learning method,
       additional MappingProjections may be created to and/or from the LearningMechanism -- see
-      `LearningMechanism_Learning_Configurations` for details); these are assigned the `NodeRole` `LEARNING` in the
-      Composition.
+      `LearningMechanism_Learning_Configurations` for details); these are assigned the `NodeRole` `LEARNING` in
+      the Composition.
     ..
     .. _LEARNING_FUNCTION:
     * *LEARNING_FUNCTION* -- the `LearningFunction` used by each of the `LEARNING_MECHANISMS` in the learning pathway.
@@ -951,12 +948,12 @@
        Animation of XOR Composition in example above when it is executed by calling its `learn <Composition.learn>`
        method with the argument ``animate={'show_learning':True}``.
 
-.. note::
-    Since the `learning components <Composition_Learning_Components>` are not executed until after the
-    processing components, the change to the weights of the MappingProjections in a learning pathway are not
-    made until after it has executed.  Thus, as with `execution of a Projection <Projection_Execution>`, those
-    changes will not be observed in the values of their `matrix <MappingProjection.matrix>` parameters until after
-    they are next executed (see `Lazy Evaluation <Component_Lazy_Updating>` for an explanation of "lazy" updating).
+    .. note::
+        Since the `learning components <Composition_Learning_Components>` are not executed until after the
+        processing components, the change to the weights of the MappingProjections in a learning pathway are not
+        made until after it has executed.  Thus, as with `execution of a Projection <Projection_Execution>`, those
+        changes will not be observed in the values of their `matrix <MappingProjection.matrix>` parameters until after
+        they are next executed (see `Lazy Evaluation <Component_Lazy_Updating>` for an explanation of "lazy" updating).
 
 .. _Composition_Learning_AutodiffComposition:
 
@@ -967,32 +964,50 @@
     Change reference to example below to point to Rumelhart Semantic Network Model Script once implemented
 COMMENT
 
-`AutodiffCompositions <AutodiffComposition>` provide the ability to execute a composition using `PyTorch
-<https://pytorch.org>`_ (see `example <BasicsAndPrimer_Rumelhart_Model>` in `BasicsAndPrimer`).  The
-AutodiffComposition constructor provides arguments for configuring the PyTorch implementation in various ways; the
-Composition is then built using the same methods (e.g., `add_node`, `add_projection`, `add_linear_processing_pathway`,
-etc.) as any other Composition. Note that there is no need to use any `learning methods <Composition_Learning_Methods>`
-— AutodiffCompositions automatically creates backpropagation learning pathways <Composition_Learning_Pathway>` between
-all input - output `Node <Composition_Nodes>` paths. It can be run just as a standard Composition would - using `learn
-<AutodiffComposition.learn>` for learning mode, and `run <AutodiffComposition.run>` for test mode.
-
-The advantage of this approach is that it allows the Composition to be implemented in PsyNeuLink, while exploiting
-the efficiency of execution in PyTorch (which can yield as much as three orders of magnitude improvement).  However,
-a disadvantage is that there are restrictions on the kinds of Compositions that be implemented in this way.
-First, because it relies on PyTorch, it is best suited for use with `supervised
-learning <Composition_Learning_Supervised>`, although it can be used for some forms of `unsupervised learning
-<Composition_Learning_Unsupervised>` that are supported in PyTorch (e.g., `self-organized maps
-<https://github.com/giannisnik/som>`_).  Second, all of the Components in the Composition are be subject to and must
-be with compatible with learning.   This means that it cannot be used with a Composition that contains any
-`modulatory components <ModulatorySignal_Anatomy_Figure>` or that are subject to modulation, whether by
-ControlMechanisms within or outside the Composition;  this includes a `controller <Composition_Controller>`
-or any LearningMechanisms.  An AutodiffComposition can be `nested in a Composition <Composition_Nested>`
-that has such other Components.  During learning, none of the internal Components of the AutodiffComposition (e.g.,
-intermediate layers of a neural network model) are accessible to the other Components of the outer Composition,
-(e.g., as sources of information, or for modulation).  However, when learning turned off, then the  AutodiffComposition
-functions like any other, and all of its internal  Components accessible to other Components of the outer Composition.
-Thus, as long as access to its internal Components is not needed during learning, an `AutodiffComposition` can be
-trained, and then used to execute the trained Composition like any other.
+`AutodiffCompositions <AutodiffComposition>` provide the ability to execute backpropagation learning much more
+efficiently than using a standard Composition.  An AutodiffComposition is constructed in the same way, but there
+is no need to specify any `learning components <Composition_Learning_Components>`>` or using any `learning methods
+<Composition_Learning_Methods>` -- in fact, they should *not* be specified (see `warning
+<Autodiff_Learning_Components_Warning>`) -- an AutodiffComposition automatically creates backpropagation
+`learning pathways <Composition_Learning_Pathway>` from all input to all output `Nodes <Composition_Nodes>`.
+While learning in an AutodiffComposition is restricted to the `BackPropagation` learning algorithm, its `loss
+function can be specified (using the **loss_spec** parameter of its constructor), which implements different kinds of
+`supervised learning <Composition_Learning_Supervised>` (for example, `Loss.MSE` can be used for regression,
+or `Loss.CROSS_ENTROPY` for classification).
+
+The advantage of using an AutodiffComposition is that it allows a model to be implemented in PsyNeuLink, and then
+exploit the acceleration of optimized implementations of learning. This can be achieved by executing the `learn
+<Composition.learn>` method in one of two modes (specified using its **execution_mode** argument):  using direct
+compilation (**execution_mode** = `ExecutionMode.LLVMRun`); or by automatically translating the model to `PyTorch
+<https://pytorch.org>`_ for training (**execution_mode** = `ExecutionMode.PyTorch`). The advantage of these modes is
+that they can provide up to three orders of magnitude speed-up in training a model. However, there are restrictions
+on the kinds of Compositions that be implemented in this way.  The features of the different ways to implement and
+execute learning are outlined in the following table, and described in more detail in `AutodiffComposition`.
+
+.. _Composition_Compilation_Table:
+
+.. table::
+    :align: left
+
+    +-----------------+------------------------+------------------------------------------------+
+    |                 |  **Composition**       |          **AutodiffComposition**               |
+    +-----------------+------------------------+-----------------------+------------------------+
+    |                 |      *Python*          | *Direct Compilation*  |         *PyTorch*      |
+    +=================+========================+=======================+========================+
+    | execution_mode =| `ExecutionMode.Python` |`ExecutionMode.LLVMRun`|`ExecutionMode.PyTorch` |
+    +-----------------+------------------------+-----------------------+------------------------+
+    |  *learn()*      |  Python interpreted    |   LLVM compiled       |     PyTorch compiled   |
+    |                 |                        |                       |                        |
+    |  *run()*        |  Python interpreted    |   LLVM compiled       |     Python interpreted |
+    +-----------------+------------------------+-----------------------+------------------------+
+    |  *Speed:*       |       slow             |      fastest          |           fast         |
+    +-----------------+------------------------+-----------------------+------------------------+
+    |                 |* Backpropagation       | * Backpropagation     |* Backpropagation       |
+    |                 |* Reinforcement learning|                       |* RNN, inclduing LSTM   |
+    |  *Supports:*    |* Unspervised learning  |                       |* Unsupervised learning |
+    |                 |* modulation, inspection|                       |                        |
+    +-----------------+------------------------+-----------------------+------------------------+
+
 
 .. _Composition_Learning_UDF:
 
@@ -1045,15 +1060,15 @@
 <Composition.execute>` method for each `TRIAL <TimeScale.TRIAL>`.  The `execute <Composition.execute>` method
 can also be called directly, but this is useful mostly for debugging.
 
-.. hint::
-   Once a Composition has been constructed, it can be called directly. If it is called with no arguments, and
-   has executed previously, the `result <Composition_Execution_Results>` of the last `TRIAL <TimeScale.TRIAL>`
-   of execution is returned; otherwise None is returned.  If it is called with arguments, then either `run
-   <Composition.run>` or `learn <Composition.learn>` is called, based on the arguments provided:  If the
-   Composition has any `learning_pathways <Composition_Learning_Pathway>`, and the relevant `TARGET_MECHANISM
-   <Composition_Learning_Components>`\\s are specified in the `inputs argument <Composition_Execution_Inputs>`,
-   then `learn <Composition.learn>` is called;  otherwise, `run <Composition.run>` is called.  In either case,
-   the return value of the corresponding method is returned.
+    .. hint::
+       Once a Composition has been constructed, it can be called directly. If it is called with no arguments, and
+       has executed previously, the `result <Composition_Execution_Results>` of the last `TRIAL <TimeScale.TRIAL>`
+       of execution is returned; otherwise None is returned.  If it is called with arguments, then either `run
+       <Composition.run>` or `learn <Composition.learn>` is called, based on the arguments provided:  If the
+       Composition has any `learning_pathways <Composition_Learning_Pathway>`, and the relevant `TARGET_MECHANISM
+       <Composition_Learning_Components>`\\s are specified in the `inputs argument <Composition_Execution_Inputs>`,
+       then `learn <Composition.learn>` is called;  otherwise, `run <Composition.run>` is called.  In either case,
+       the return value of the corresponding method is returned.
 
 .. _Composition_Execution_Num_Trials:
 
@@ -1137,17 +1152,18 @@
 
 .. _Composition_Input_Internal_Only:
 
-.. note::
-   Most Mechanisms have only a single `InputPort`, and thus require only a single input to be specified for
-   them for each `TRIAL <TimeScale.TRIAL>`. However some Mechanisms have more than one InputPort (for example,
-   a `ComparatorMechanism`), in which case inputs can be specified for some or all of them (see `below
-   <Composition_Input_Dictionary_InputPort_Entries>`). Conversely, some Mechanisms have InputPorts that are designated
-   as `internal_only <InputPort.internal_only>` (for example, the `input_port <Mechanism_Base.input_port>` for a
-   `RecurrentTransferMechanism`, if its `has_recurrent_input_port <RecurrentTransferMechanism.has_recurrent_input_port>`
-   attribute is True), in which case no input should be specified for those input_ports. Similar considerations
-   extend to the `external_input_ports_of_all_input_nodes <Composition.external_input_ports_of_all_input_nodes>` of a
-   `nested Composition <Composition_Nested>`, based on the Mechanisms (and/or additionally nested Compositions) that
-   comprise its set of `INPUT` `Nodes <Composition_Nodes>`.
+    .. note::
+       Most Mechanisms have only a single `InputPort`, and thus require only a single input to be specified for
+       them for each `TRIAL <TimeScale.TRIAL>`. However some Mechanisms have more than one InputPort (for example,
+       a `ComparatorMechanism`), in which case inputs can be specified for some or all of them (see `below
+       <Composition_Input_Dictionary_InputPort_Entries>`). Conversely, some Mechanisms have InputPorts that
+       are designated as `internal_only <InputPort.internal_only>` (for example, the `input_port
+       <Mechanism_Base.input_port>` for a `RecurrentTransferMechanism`, if its `has_recurrent_input_port
+       <RecurrentTransferMechanism.has_recurrent_input_port>` attribute is True), in which case no input should be
+       specified for those input_ports. Similar considerations extend to the `external_input_ports_of_all_input_nodes
+       <Composition.external_input_ports_of_all_input_nodes>` of a `nested Composition <Composition_Nested>`,
+       based on the Mechanisms (and/or additionally nested Compositions) thatcomprise its set of `INPUT` `Nodes
+       <Composition_Nodes>`.
 
 The factors above determine the format of each entry in an `inputs dictionary <Composition_Input_Dictionary>`, or the
 return value of the function or generator used for `programmatic specification <Composition_Programmatic_Inputs>` of
@@ -1333,10 +1349,10 @@
 the function must return the input values for each `INPUT` `Node <Composition_Nodes>` for a single `TRIAL
 <TimeScale.TRIAL>`.
 
-.. note::
-    Default behavior when passing a function as input to a Composition is to execute for only one `TRIAL
-    <TimeScale.TRIAL>`. Remember to set the num_trials argument of Composition.run if you intend to cycle through
-    multiple `TRIAL <TimeScale.TRIAL>`\\s.
+    .. note::
+        Default behavior when passing a function as input to a Composition is to execute for only one `TRIAL
+        <TimeScale.TRIAL>`. Remember to set the num_trials argument of Composition.run if you intend to cycle through
+        multiple `TRIAL <TimeScale.TRIAL>`\\s.
 
 Complete input specification:
 
@@ -1370,10 +1386,10 @@
 standard input specification. The only difference is that on each execution, the generator must yield the input values
 for each `INPUT` `Node <Composition_Nodes>` for a single `TRIAL <TimeScale.TRIAL>`.
 
-.. note::
-    Default behavior when passing a generator is to execute until the generator is exhausted. If the num_trials
-    argument of Composition.run is set, the Composition will execute EITHER until exhaustion, or until num_trials has
-    been reached - whichever comes first.
+    .. note::
+        Default behavior when passing a generator is to execute until the generator is exhausted. If the num_trials
+        argument of Composition.run is set, the Composition will execute EITHER until exhaustion, or until num_trials
+        has been reached - whichever comes first.
 
 Complete input specification:
 
@@ -1566,10 +1582,10 @@ def input_function(env, result):
 within another is added to the one in which it is nested, and all are treated as part of the same cycle. All Nodes
 within a cycle are assigned the `NodeRole` `CYCLE`.
 
-.. note::
-   A `RecurrentTransferMechanism` (and its subclaseses) are treated as single-Node cylces, formed by their
-   `AutoAssociativeProjection` (since the latter is subclass of MappingProjection and thus not designated as feedback
-   (see `below <Composition_Feedback_Designation>`).
+    .. note::
+       A `RecurrentTransferMechanism` (and its subclaseses) are treated as single-Node cylces, formed by their
+       `AutoAssociativeProjection` (since the latter is subclass of MappingProjection and thus not designated as
+       feedback (see `below <Composition_Feedback_Designation>`).
 
 .. _Composition_Cycle_Synchronous_Execution:
 
@@ -1587,13 +1603,13 @@ def input_function(env, result):
  FIGURE HERE
 COMMENT
 
-.. note::
-   Although all the Nodes in a cycle receive either the initial value or previous value of other Nodes in the cycle,
-   they receive the *current* value of any Nodes that project to them from *outisde* the cycle, and pass their current
-   value (i.e., the ones computed in the current execution of the cycle) to any Nodes to which they project outside of
-   the cycle.  The former means that any Nodes within the cycle that receive such input are "a step ahead" of those
-   within the cycle and also, unless the use a `StatefulFunction`, others within the cycle will not see the effects of
-   that input within or across `TRIALS <TimeScale.TRIAL>`.
+    .. note::
+       Although all the Nodes in a cycle receive either the initial value or previous value of other Nodes in the cycle,
+       they receive the *current* value of any Nodes that project to them from *outisde* the cycle, and pass their
+       current value (i.e., the ones computed in the current execution of the cycle) to any Nodes to which they project
+       outside of the cycle.  The former means that any Nodes within the cycle that receive such input are "a step
+       ahead" of those within the cycle and also, unless the use a `StatefulFunction`, others within the cycle will
+       not see the effects of that input within or across `TRIALS <TimeScale.TRIAL>`.
 
 .. _Composition_Cycle_Initialization:
 
@@ -1607,18 +1623,18 @@ def input_function(env, result):
 cycle in that run, whereas any Nodes not specified will retain the last `value <Component.value>` they were assigned
 in the uprevious call to `run <Composition.run>` or `learn <Composition.learn>`.
 
-Nodes in a cycle can also be initialized outside of a call to `run <Composition.run>` or `learn <Composition.run>` using
-the `initialize <Composition.initialize>` method.
+Nodes in a cycle can also be initialized outside of a call to `run <Composition.run>` or `learn <Composition.run>`
+using the `initialize <Composition.initialize>` method.
 
-.. note::
-   If a `Mechanism` belonging to a cycle in a Composition is first executed on its own (i.e., using its own `execute
-   <Mechanism_Base.execute>` method), the value it is assigned will be used as its initial value when it is executed
-   within the Composition, unless an `execution_id <Context.execution_id>` is assigned to the **context** argument
-   of the Mechanism's `execute <Mechanism_Base.execute>` method when it is called.  This is because the first time
-   a Mechanism is executed in a Composition, its initial value is copied from the `value <Mechanism_Base.value>`
-   last assigned in the None context.  As described aove, this can be overridden by specifying an initial value for
-   the Mechanism in the **initialize_cycle_values** argument of the call to the Composition's `run <Composition.run>`
-   or `learn  <Composition.learn>` methods.
+    .. note::
+       If a `Mechanism` belonging to a cycle in a Composition is first executed on its own (i.e., using its own `execute
+       <Mechanism_Base.execute>` method), the value it is assigned will be used as its initial value when it is executed
+       within the Composition, unless an `execution_id <Context.execution_id>` is assigned to the **context** argument
+       of the Mechanism's `execute <Mechanism_Base.execute>` method when it is called.  This is because the first time
+       a Mechanism is executed in a Composition, its initial value is copied from the `value <Mechanism_Base.value>`
+       last assigned in the None context.  As described aove, this can be overridden by specifying an initial value for
+       the Mechanism in the **initialize_cycle_values** argument of the call to the Composition's `run <Composition.run>`
+       or `learn  <Composition.learn>` methods.
 
 .. _Composition_Feedback:
 
@@ -1880,41 +1896,60 @@ def input_function(env, result):
 specified and fails, an error is generated indicating the unsupported feature that failed. The compiled modes,
 in order of their power, are:
 
-.. _Composition_Compilation_LLVM:
+.. _Composition_Compilation_Modes:
 
     * *True* -- try to use the one that yields the greatesst improvement, progressively reverting to less powerful
       but more forgiving modes, in the order listed below, for each that fails;
 
-    * *LLVMRun* -- compile and run multiple `TRIAL <TimeScale.TRIAL>`\\s; if successful, the compiled binary is
-      semantically equivalent to the execution of the `run <Composition.run>` method using the Python interpreter;
+    * `ExecutionMode.LLVMRun` - compile and run multiple `TRIAL <TimeScale.TRIAL>`\\s; if successful,
+      the compiled binary is semantically equivalent to the execution of the `run <Composition.run>` method
+      using the Python interpreter;
+
+    * `ExecutionMode.LLVMExec` -- compile and run each `TRIAL <TimeScale.TRIAL>`, using the Python interpreter
+      to iterate over them; if successful, the compiled binary for each `TRIAL <TimeScale.TRIAL>` is semantically
+      equivalent the execution of the `execute <Composition.execute>` method using the Python interpreter;
+
+    * `ExecutionMode.LLVM` -- compile and run `Node <Composition_Nodes>` of the `Composition` and their `Projections
+      <Projection>`, using the Python interpreter to call the Composition's `scheduler <Composition.scheduler>`,
+      execute each Node and iterate over `TRIAL <TimeScale.TRIAL>`\\s; note that, in this mode, scheduling
+      `Conditions <Condition>` that rely on Node `Parameters` is not supported;
+
+    * `ExecutionMode.Python` (same as *False*; the default) -- use the Python interpreter to execute the `Composition`.
+
+    * `ExecutionMode.PyTorch` -- used only for `AutodiffComposition`: executes `learn <AutodiffComposition.learn>`
+       using `PyTorch` and `run <AutodiffComposition.run>` using Python interpreter (see `below
+       <Composition_Compilation_PyTorch>` for additional details).
 
-    * *LLVMExec* -- compile and run each `TRIAL <TimeScale.TRIAL>`, using the Python interpreter to iterate over them;
-      if successful, the compiled binary for each `TRIAL <TimeScale.TRIAL>` is semantically equivalent the execution
-      of the `execute <Composition.execute>` method using the Python interpreter;
+      .. warning::
+         For clarity, `ExecutionMode.PyTorch` should only be used when executing an `AutodiffComposition`;
+         using it with a standard `Composition` is possible, but it will **not** have the expected effect of
+         executing its `learn <Composition.learn>` method using PyTorch.
 
-    * *LLVM* -- compile and run `Node <Composition_Nodes>` of the `Composition` and their `Projections <Projection>`,
-      using the Python interpreter to call the Composition's `scheduler <Composition.scheduler>`, execute each Node
-      and iterate over `TRIAL <TimeScale.TRIAL>`\\s; note that, in this mode, scheduling `Conditions <Condition>`
-      that rely on Node `Parameters` is not supported;
+    * `ExecutionMode.PTXrun` -- compile multiple `TRIAL <TimeScale.TRIAL>`\\s  for execution on GPU
+      (see `below <Composition_Compilation_PTX>` for additional details).
 
-    * *Python* (same as *False*; the default) -- use the Python interpreter to execute the `Composition`.
+    * `ExecutionMode.PTXExec` -- compile individual `TRIAL <TimeScale.TRIAL>`\\s  for execution on GPU
+      (see `below <Composition_Compilation_PTX>` for additional details).
+
+.. _Composition_Compilation_PyTorch:
+
+*PyTorch support.*  When using an `AutodiffComposition`, `ExecutionMode.PyTorch` can be used to execute its
+`learn <AutodiffComposition.learn>` method using Pytorch; however, its `run <AutodiffComposition.run>` method
+will execute using the Python interpreter.  See `Composition_Learning_AutodiffComposition` for additional details.
 
 .. _Composition_Compilation_PTX:
 
 *GPU support.*  In addition to compilation for CPUs, support is being developed for `CUDA
 <https://developer.nvidia.com/about-cuda>`_ capable `Invidia GPUs
-<https://en.wikipedia.org/wiki/List_of_Nvidia_graphics_processing_units>`_.  This can be invoked by specifying one
-of the following modes in the **execution_mode** argument of a `Composition execution method
-<Composition_Execution_Methods>`:
-
-    * *PTXExec|PTXRun* -- equivalent to the LLVM counterparts but run in a single thread of a CUDA capable GPU.
-
-This requires that a working `pycuda package <https://documen.tician.de/pycuda/>`_ is
-`installed <https://wiki.tiker.net/PyCuda/Installation>`_, and that CUDA execution is explicitly enabled by setting
-the ``PNL_LLVM_DEBUG`` environment variable to ``cuda``.  At present compilation using these modes runs on a single
-GPU thread, and therefore does not produce any performance benefits over running in compiled mode on a CPU;  (see
-`this <https://github.com/PrincetonUniversity/PsyNeuLink/projects/1>`_ for progress extending support of parallization
-in compiled modes).
+<https://en.wikipedia.org/wiki/List_of_Nvidia_graphics_processing_units>`_.  This can be invoked by
+specifying either `ExecutionMode.PTXRun` or `ExecutionMode.PTXExec` oin the **execution_mode** argument
+of a `Composition execution method <Composition_Execution_Methods>`, which are equivalent to the LLVM
+counterparts but run in a single thread of a CUDA capable GPU. This requires that a working `pycuda package
+<https://documen.tician.de/pycuda/>`_ is `installed <https://wiki.tiker.net/PyCuda/Installation>`_, and that
+CUDA execution is explicitly enabled by setting the ``PNL_LLVM_DEBUG`` environment variable to ``cuda``.  At present
+compilation using these modes runs on a single GPU thread, and therefore does not produce any performance benefits
+over running in compiled mode on a CPU;  (see `this <https://github.com/PrincetonUniversity/PsyNeuLink/projects/1>`_
+for progress extending support of parallization in compiled modes).
 
 
 .. _Composition_Execution_Results_and_Reporting:
@@ -2153,29 +2188,32 @@ def input_function(env, result):
                 AS SOME InputPorts CAN HAVE FUNCTIONS THAT CHANGE THE SHAPE OF variable->value (e.g., Reduce)
  # Furthermore, Mechanisms can also have InputPorts with a `function <InputPort.function>` that changes
  #    the size of its input when generatings its `value <InputPort.value>`, in which case its `e
-.. note::
-    A `Node's <Composition_Nodes>` `external_input_values` attribute is always a 2d list in which the index i
-    element is the variable of the i'th element of the Node's `external_input_ports` attribute.  For Mechanisms,
-    the `external_input_values <Mechanism_Base.external_input_values>` is often the same as its `variable
-    <Mechanism_Base.variable>`.  However, some Mechanisms may have InputPorts marked as `internal_only
-    <InputPort.internal_only>` which are excluded from its `external_input_ports <Mechanism_Base.external_input_ports>`
-    and therefore its `external_input_values <Mechanism_Base.external_input_values>`, and so should not receive an
-    input value.  The same considerations extend to the `external_input_ports <Composition.external_input_ports>`
-    and `external_input_values <Composition.external_input_values>` of a Composition, based on the Mechanisms and/or
-    `nested Compositions <Composition_Nested>` that comprise its `INPUT` Nodes.
+    .. note::
+        A `Node's <Composition_Nodes>` `external_input_values` attribute is always a 2d list in which the index i
+        element is the variable of the i'th element of the Node's `external_input_ports` attribute.  For Mechanisms,
+        the `external_input_values <Mechanism_Base.external_input_values>` is often the same as its `variable
+        <Mechanism_Base.variable>`.  However, some Mechanisms may have InputPorts marked as `internal_only
+        <InputPort.internal_only>` which are excluded from its `external_input_ports
+        <Mechanism_Base.external_input_ports>` and therefore its `external_input_values
+        <Mechanism_Base.external_input_values>`, and so should not receive an
+        input value.  The same considerations extend to the `external_input_ports
+        <Composition.external_input_ports>` and `external_input_values <Composition.external_input_values>`
+        of a Composition, based on the Mechanisms and/or `nested Compositions <Composition_Nested>`
+        that comprise its `INPUT` Nodes.
 MODIFIED 2/4/22 NEW:
 COMMENT
-.. note::
-    A `Node's <Composition_Nodes>` `external_input_variables` attribute is always a 2d list in which the index i
-    element is the variable of the i'th element of the Node's `external_input_ports` attribute.  For Mechanisms,
-    the `external_input_variables <Mechanism_Base.external_input_variables>` is often the same as its `variable
-    <Mechanism_Base.variable>`.  However, some Mechanisms may have InputPorts marked as `internal_only
-    <InputPort.internal_only>` which are excluded from its `external_input_ports <Mechanism_Base.external_input_ports>`
-    and therefore its `external_input_variables <Mechanism_Base.external_input_variables>`, and so should not receive
-    an input value.  The same considerations extend to the `external_input_ports_of_all_input_nodes
-    <Composition.external_input_ports_of_all_input_nodes>` and `external_input_variables
-    <Composition.external_input_variables>` of a Composition, based on the Mechanisms and/or `nested Compositions
-    <Composition_Nested>` that comprise its `INPUT` Nodes.
+    .. note::
+        A `Node's <Composition_Nodes>` `external_input_variables` attribute is always a 2d list in which the index i
+        element is the variable of the i'th element of the Node's `external_input_ports` attribute.  For Mechanisms,
+        the `external_input_variables <Mechanism_Base.external_input_variables>` is often the same as its `variable
+        <Mechanism_Base.variable>`.  However, some Mechanisms may have InputPorts marked as `internal_only
+        <InputPort.internal_only>` which are excluded from its `external_input_ports
+        <Mechanism_Base.external_input_ports>` and therefore its `external_input_variables
+        <Mechanism_Base.external_input_variables>`, and so should not receive
+        an input value.  The same considerations extend to the `external_input_ports_of_all_input_nodes
+        <Composition.external_input_ports_of_all_input_nodes>` and `external_input_variables
+        <Composition.external_input_variables>` of a Composition, based on the Mechanisms and/or
+        `nested Compositions <Composition_Nested>` that comprise its `INPUT` Nodes.
 
 If num_trials is not in use, the number of inputs provided determines the number of `TRIAL <TimeScale.TRIAL>`\\s in
 the run. For example, if five inputs are provided for each `INPUT` `Node <Composition_Nodes>`, and num_trials is not
@@ -2520,9 +2558,9 @@ def input_function(env, result):
 as indicated by the results of S.run(), the original parameter values were used on trials 0 and 1,
 the runtime intercept was used on trials 2, 3, and 4, and the runtime slope was used on trial 3.
 
-.. note::
-    Runtime parameter values are subject to the same type, value, and shape requirements as the original parameter
-    value.
+    .. note::
+        Runtime parameter values are subject to the same type, value, and shape requirements as the original parameter
+        value.
 
 
 .. _Composition_Examples_Execution_Context:
@@ -2734,12 +2772,12 @@ def input_function(env, result):
 from psyneulink.core import llvm as pnlvm
 from psyneulink.core.components.component import Component, ComponentsMeta
 from psyneulink.core.components.functions.fitfunctions import make_likelihood_function
-from psyneulink.core.components.functions.function import is_function_type
-from psyneulink.core.components.functions.nonstateful.combinationfunctions import LinearCombination, \
-    PredictionErrorDeltaFunction
+from psyneulink.core.components.functions.function import is_function_type, RandomMatrix
+from psyneulink.core.components.functions.nonstateful.combinationfunctions import \
+    LinearCombination, PredictionErrorDeltaFunction
 from psyneulink.core.components.functions.nonstateful.learningfunctions import \
     LearningFunction, Reinforcement, BackPropagation, TDLearning
-from psyneulink.core.components.functions.nonstateful.transferfunctions import Identity
+from psyneulink.core.components.functions.nonstateful.transferfunctions import Identity, Logistic, SoftMax
 from psyneulink.core.components.mechanisms.mechanism import Mechanism_Base, MechanismError, MechanismList
 from psyneulink.core.components.mechanisms.modulatory.control.controlmechanism import ControlMechanism
 from psyneulink.core.components.mechanisms.modulatory.control.optimizationcontrolmechanism import AGENT_REP, \
@@ -2766,20 +2804,20 @@ def input_function(env, result):
 from psyneulink.core.components.shellclasses import Mechanism, Projection
 from psyneulink.core.compositions.report import Report, \
     ReportOutput, ReportParams, ReportProgress, ReportSimulations, ReportDevices, \
-    EXECUTE_REPORT, CONTROLLER_REPORT, RUN_REPORT, PROGRESS_REPORT
+    EXECUTE_REPORT, CONTROLLER_REPORT, RUN_REPORT, COMPILED_REPORT, PROGRESS_REPORT
 from psyneulink.core.compositions.showgraph import ShowGraph, INITIAL_FRAME, SHOW_CIM, EXECUTION_SET, SHOW_CONTROLLER
 from psyneulink.core.globals.context import Context, ContextFlags, handle_external_context
 from psyneulink.core.globals.keywords import \
-    AFTER, ALL, ALLOW_PROBES, ANY, BEFORE, COMPONENT, COMPOSITION, CONTROL, CONTROL_SIGNAL, CONTROLLER, DEFAULT, \
-    DICT, FEEDBACK, FULL, FUNCTION, HARD_CLAMP, IDENTITY_MATRIX, INPUT, INPUT_PORTS, INPUTS, INPUT_CIM_NAME, \
-    LEARNED_PROJECTIONS, LEARNING_FUNCTION, LEARNING_MECHANISM, LEARNING_MECHANISMS, LEARNING_PATHWAY, \
-    MATRIX, MATRIX_KEYWORD_VALUES, MAYBE, \
-    MODEL_SPEC_ID_METADATA, \
+    AFTER, ALL, ALLOW_PROBES, ANY, BEFORE, COMPONENT, COMPOSITION, CONTROL, CONTROL_SIGNAL, CONTROLLER, CROSS_ENTROPY, \
+    DEFAULT, DICT, FEEDBACK, FULL, FUNCTION, HARD_CLAMP, IDENTITY_MATRIX, \
+    INPUT, INPUT_PORTS, INPUTS, INPUT_CIM_NAME, \
+    LEARNED_PROJECTIONS, LEARNING_FUNCTION, LEARNING_MECHANISM, LEARNING_MECHANISMS, LEARNING_PATHWAY, Loss, \
+    MATRIX, MATRIX_KEYWORD_VALUES, MAYBE, MODEL_SPEC_ID_METADATA, \
     MONITOR, MONITOR_FOR_CONTROL, NAME, NESTED, NO_CLAMP, NODE, OBJECTIVE_MECHANISM, ONLINE, OUTCOME, \
     OUTPUT, OUTPUT_CIM_NAME, OUTPUT_MECHANISM, OUTPUT_PORTS, OWNER_VALUE, \
     PARAMETER, PARAMETER_CIM_NAME, PORT, \
     PROCESSING_PATHWAY, PROJECTION, PROJECTION_TYPE, PROJECTION_PARAMS, PULSE_CLAMP, RECEIVER, \
-    SAMPLE, SENDER, SHADOW_INPUTS, SOFT_CLAMP, SSE, \
+    SAMPLE, SENDER, SHADOW_INPUTS, SOFT_CLAMP, SUM, \
     TARGET, TARGET_MECHANISM, TEXT, VARIABLE, WEIGHT, OWNER_MECH
 from psyneulink.core.globals.log import CompositionLog, LogCondition
 from psyneulink.core.globals.parameters import Parameter, ParametersBase, check_user_specified
@@ -2793,7 +2831,8 @@ def input_function(env, result):
 from psyneulink.core.scheduling.time import Time, TimeScale
 from psyneulink.library.components.mechanisms.modulatory.learning.autoassociativelearningmechanism import \
     AutoAssociativeLearningMechanism
-from psyneulink.library.components.mechanisms.processing.objective.comparatormechanism import ComparatorMechanism, MSE
+from psyneulink.library.components.mechanisms.processing.objective.comparatormechanism import \
+    ComparatorMechanism, OUTCOME, MSE, SSE, L0, L1, CROSS_ENTROPY
 from psyneulink.library.components.mechanisms.processing.objective.predictionerrormechanism import \
     PredictionErrorMechanism
 from psyneulink.library.components.mechanisms.processing.transfer.recurrenttransfermechanism import \
@@ -3761,6 +3800,7 @@ def __init__(
             show_graph_attributes=None,
             name=None,
             prefs=None,
+            termination_processing=None,
             **param_defaults
     ):
 
@@ -3812,6 +3852,7 @@ def __init__(
         self._partially_added_nodes = []
 
         self.disable_learning = disable_learning
+        self._runtime_learning_rate = None
 
         # graph and scheduler status attributes
         self.graph_consistent = True  # Tracks if Composition is in runnable state (no dangling projections (what else?)
@@ -3891,6 +3932,9 @@ def __init__(
         show_graph_attributes = show_graph_attributes or {}
         self._show_graph = ShowGraph(self, **show_graph_attributes)
 
+        if termination_processing is not None:
+            self.termination_processing = termination_processing
+
     @property
     def graph_processing(self):
         """
@@ -3915,10 +3959,12 @@ def scheduler(self):
             old_scheduler = self._scheduler
             if old_scheduler is not None:
                 orig_conds = old_scheduler._user_specified_conds
+                orig_term_conds = old_scheduler._user_specified_termination_conds
             else:
                 orig_conds = None
+                orig_term_conds = None
 
-            self._scheduler = Scheduler(composition=self, conditions=orig_conds)
+            self._scheduler = Scheduler(composition=self, conditions=orig_conds, termination_conds=orig_term_conds)
             self.needs_update_scheduler = False
 
         return self._scheduler
@@ -5591,6 +5637,7 @@ def add_projection(self,
                        projection=None,
                        sender=None,
                        receiver=None,
+                       default_matrix=None,
                        feedback=False,
                        learning_projection=False,
                        name=None,
@@ -5599,7 +5646,9 @@ def add_projection(self,
                        ):
         """Add **projection** to the Composition.
 
-        If **projection** is not specified, create a default `MappingProjection` using **sender** and **receiver**.
+        If **projection** is not specified, and one does not already exist between **sender** and **receiver**
+        create a default `MappingProjection` between them, using **default_projection_matrix** if specified
+        (otherwise default for MappingProjection is used).
 
         If **projection** is specified:
 
@@ -5648,15 +5697,23 @@ def add_projection(self,
         Arguments
         ---------
 
+        projection : Projection, list, array, matrix, RandomMatrix, MATRIX_KEYWORD
+            the projection to add.
+
         sender : Mechanism, Composition, or OutputPort
             the sender of **projection**.
 
-        projection : Projection, matrix
-            the projection to add.
-
         receiver : Mechanism, Composition, or InputPort
             the receiver of **projection**.
 
+        default_projection_matrix : list, array, matrix, RandomMatrix, MATRIX_KEYWORD
+            matrix to use in creating default; overrides default for MappingProjection.
+
+        default_projection_matrix : list, array, function, `RandomMatrix` or MATRIX_KEYWORD : default None
+            specifies matrix to use in creating default Projection if none is specifed in **projection**
+            and one does not already exist between **sender** and **receive**
+            (see `MappingProjection_Matrix_Specification` for details of specification).
+
         feedback : bool or FEEDBACK : False
             if False, the Projection is *never* designated as a `feedback Projection
             <Composition_Feedback_Designation>`, even if that may have been the default behavior (e.g.,
@@ -5733,6 +5790,7 @@ def add_projection(self,
                 return self.add_projection(proj_spec, sender=projection.sender, receiver=projection.receiver)
 
         # Create Projection if it doesn't exist
+        projection = projection or default_matrix
         try:
             # Note: this does NOT initialize the Projection if it is in deferred_init
             projection = self._instantiate_projection_from_spec(projection, name)
@@ -5918,7 +5976,7 @@ def _instantiate_projection_from_spec(self, projection, sender=None, receiver=No
             proj_type = projection.pop(PROJECTION_TYPE, None) or MappingProjection
             params = projection.pop(PROJECTION_PARAMS, None)
             projection = MappingProjection(params=params)
-        elif isinstance(projection, (np.ndarray, np.matrix, list)):
+        elif isinstance(projection, (np.ndarray, np.matrix, list, RandomMatrix)):
             return MappingProjection(matrix=projection, sender=sender, receiver=receiver, name=name)
         elif isinstance(projection, str):
             if projection in MATRIX_KEYWORD_VALUES:
@@ -5930,8 +5988,8 @@ def _instantiate_projection_from_spec(self, projection, sender=None, receiver=No
         elif projection is None:
             return MappingProjection(sender=sender, receiver=receiver, name=name)
         elif not isinstance(projection, Projection):
-            raise CompositionError("Invalid projection ({}) specified for {}. Must be a Projection."
-                                   .format(projection, self.name))
+            raise CompositionError(f"Invalid projection ({projection}) specified for {self.name}. "
+                                   f"Must be a Projection.")
         return projection
 
     def _parse_sender_spec(self, projection, sender):
@@ -6186,7 +6244,7 @@ def _check_for_projection_assignments(self, context=None):
                 projections.append(node)
                 continue
 
-            if context.source != ContextFlags.INITIALIZING and context.string != 'IGNORE_NO_AFFERENTS_WARNING':
+            if context.flags & ContextFlags.PREPARING and context.string != 'IGNORE_NO_AFFERENTS_WARNING':
                 for input_port in node.input_ports:
                     if input_port.require_projection_in_composition \
                             and not input_port.path_afferents and not input_port.default_input:
@@ -6384,7 +6442,15 @@ def _parse_pathway(self, pathway, name, pathway_arg_str):
         if isinstance(pathway, Pathway):
             # Give precedence to name specified in call to add_linear_processing_pathway
             pathway_name = name or pathway.name
+            # MODIFIED 11/3/22 OLD:
             pathway = pathway.pathway
+            # # MODIFIED 11/3/22 NEW:
+            # # If Pathway has default_projection_matrix, use tuple_spec to specify for handling below
+            # if pathway.default_projection_matrix:
+            #     pathway = (pathway.pathway, pathway. default_projection_matrix)
+            # else:
+            #     pathway = pathway.pathway
+            # MODIFIED 11/3/22 END
         else:
             pathway_name = name
 
@@ -6557,20 +6623,47 @@ def identify_pway_type_and_parse_tuple_prn(pway, tuple_or_dict_str):
                 pway_type = PROCESSING_PATHWAY
                 if isinstance(pway, set):
                     pway = [pway]
-                return pway_type, pway, None
+                return pway_type, pway, None, None
             elif isinstance(pway, tuple):
-                pway_type = LEARNING_PATHWAY
-                if len(pway)!=2:
+                # FIX: ADD SUPPORT FOR 3-ITEM TUPLE AND SPECIFCATION OF DEFAULT MATRIX HERE 10/29/22
+                # # MODIFIED 10/29/22 OLD:
+                # pway_type = LEARNING_PATHWAY
+                # if len(pway)!=2:
+                #     raise CompositionError(f"A tuple specified in the {pathways_arg_str}"
+                #                            f" has more than two items: {pway}")
+                # pway, learning_function = pway
+                # if not (_is_node_spec(pway) or isinstance(pway, (list, Pathway))):
+                #     raise CompositionError(f"The 1st item in {tuple_or_dict_str} specified in the "
+                #                            f" {pathways_arg_str} must be a node or a list: {pway}")
+                # if not (isinstance(learning_function, type) and issubclass(learning_function, LearningFunction)):
+                #     raise CompositionError(f"The 2nd item in {tuple_or_dict_str} specified in the "
+                #                            f"{pathways_arg_str} must be a LearningFunction: {learning_function}")
+                # return pway_type, pway, learning_function
+                # MODIFIED 10/29/22 NEW:
+                if len(pway) not in {2,3}:
                     raise CompositionError(f"A tuple specified in the {pathways_arg_str}"
-                                           f" has more than two items: {pway}")
-                pway, learning_function = pway
-                if not (_is_node_spec(pway) or isinstance(pway, (list, Pathway))):
-                    raise CompositionError(f"The 1st item in {tuple_or_dict_str} specified in the "
-                                           f" {pathways_arg_str} must be a node or a list: {pway}")
-                if not (isinstance(learning_function, type) and issubclass(learning_function, LearningFunction)):
-                    raise CompositionError(f"The 2nd item in {tuple_or_dict_str} specified in the "
-                                           f"{pathways_arg_str} must be a LearningFunction: {learning_function}")
-                return pway_type, pway, learning_function
+                                           f" must have either two or three items: {pway}")
+                pway_type = PROCESSING_PATHWAY
+                matrix_item = None
+                learning_function_item = None
+                for i, item in enumerate(pway):
+                    # Ensure that first item is a Pathway spec
+                    if i==0:
+                        if not (_is_node_spec(item) or isinstance(item, (list, Pathway))):
+                            raise CompositionError(f"The 1st item in {tuple_or_dict_str} specified in the "
+                                                   f" {pathways_arg_str} must be a node or a list: {pway}")
+                        pathway_item = item
+                    elif (isinstance(item, type) and issubclass(item, LearningFunction)):
+                        pway_type = LEARNING_PATHWAY
+                        learning_function_item = item
+                    elif is_matrix(item):
+                        matrix_item = item
+                    else:
+                        raise CompositionError(f"Bad spec for one of the items in {tuple_or_dict_str} "
+                                               f"specified for the {pathways_arg_str}: {item}; "
+                                               f"its item(s) must be a matrix specification and/or a LearningFunction")
+                return pway_type, pathway_item, matrix_item, learning_function_item
+                # MODIFIED 10/29/22 END
             else:
                 assert False, f"PROGRAM ERROR: arg to identify_pway_type_and_parse_tuple_prn in {self.name}" \
                               f"is not a Node, list or tuple: {pway}"
@@ -6583,13 +6676,22 @@ def identify_pway_type_and_parse_tuple_prn(pway, tuple_or_dict_str):
             pway_name = None
             if isinstance(pathway, Pathway):
                 pway_name = pathway.name
+                # MODIFIED 11/3/22 OLD:
                 pathway = pathway.pathway
+                # # MODIFIED 11/3/22 NEW:
+                # # If Pathway has default_projection_matrix, use tuple_spec to specify for later handling
+                # if pathway.default_projection_matrix:
+                #     pathway = (pathway.pathway, pathway.default_projection_matrix)
+                # else:
+                #     pathway = pathway.pathway
+                # MODIFIED 11/3/22 END
             if _is_node_spec(pathway) or isinstance(pathway, (list, set, tuple)):
                 if isinstance(pathway, set):
                     bad_entries = [repr(entry) for entry in pathway if not _is_node_spec(entry)]
                     if bad_entries:
                         raise CompositionError(f"{bad_entry_error_msg}{','.join(bad_entries)}")
-                pway_type, pway, pway_learning_fct = identify_pway_type_and_parse_tuple_prn(pathway, f"a tuple")
+                pway_type, pway, matrix, pway_learning_fct = identify_pway_type_and_parse_tuple_prn(pathway,
+                                                                                                    f"the tuple")
             elif isinstance(pathway, dict):
                 if len(pathway)!=1:
                     raise CompositionError(f"A dict specified in the {pathways_arg_str} "
@@ -6599,8 +6701,8 @@ def identify_pway_type_and_parse_tuple_prn(pway, tuple_or_dict_str):
                     raise CompositionError(f"The key in a dict specified in the {pathways_arg_str} must be a str "
                                            f"(to be used as its name): {pway_name}.")
                 if _is_node_spec(pway) or isinstance(pway, (list, tuple, Pathway)):
-                    pway_type, pway, pway_learning_fct = identify_pway_type_and_parse_tuple_prn(pway,
-                                                                                                f"the value of a dict")
+                    pway_type, pway, matrix, pway_learning_fct = identify_pway_type_and_parse_tuple_prn(pway,
+                                                                                                        f"the value of a dict")
                 else:
                     raise CompositionError(f"The value in a dict specified in the {pathways_arg_str} must be "
                                            f"a pathway specification (Node, list or tuple): {pway}.")
@@ -6610,11 +6712,13 @@ def identify_pway_type_and_parse_tuple_prn(pway, tuple_or_dict_str):
             context.source = ContextFlags.METHOD
             if pway_type == PROCESSING_PATHWAY:
                 new_pathway = self.add_linear_processing_pathway(pathway=pway,
+                                                                 default_projection_matrix=matrix,
                                                                  name=pway_name,
                                                                  context=context)
             elif pway_type == LEARNING_PATHWAY:
                 new_pathway = self.add_linear_learning_pathway(pathway=pway,
                                                                learning_function=pway_learning_fct,
+                                                               default_projection_matrix=matrix,
                                                                name=pway_name,
                                                                context=context)
             else:
@@ -6625,7 +6729,7 @@ def identify_pway_type_and_parse_tuple_prn(pway, tuple_or_dict_str):
         return added_pathways
 
     @handle_external_context()
-    def add_linear_processing_pathway(self, pathway, name:str=None, context=None, *args):
+    def add_linear_processing_pathway(self, pathway, default_projection_matrix=None, name:str=None, context=None, *args):
         """Add sequence of `Nodes <Composition_Nodes>` with optionally intercolated `Projections <Projection>`.
 
         .. _Composition_Add_Linear_Processing_Pathway:
@@ -6654,6 +6758,11 @@ def add_linear_processing_pathway(self, pathway, name:str=None, context=None, *a
             learning-related specifications are ignored, as are its `name <Pathway.name>` if the **name** argument
             of add_linear_processing_pathway is specified.
 
+        default_projection_matrix : list, array, function, `RandomMatrix` or MATRIX_KEYWORD : default None
+            specifies matrix to use for any unspecified Projections (overrides default matrix for `MappingProjection`)
+            if a default projection is not otherwise specified (see `Pathway_Specification_Projections`;
+            see `MappingProjection_Matrix_Specification` for details of specification)
+
         name : str
             species the name used for `Pathway`; supercedes `name <Pathway.name>` of `Pathway` object if it is has one.
 
@@ -6764,13 +6873,16 @@ def _get_node_specs_for_entry(entry, include_roles=None, exclude_roles=None):
             if all(_is_node_spec(entry) for entry in current_entry):
                 receivers = _get_node_specs_for_entry(current_entry, NodeRole.INPUT, NodeRole.TARGET)
                 # The preceding entry is a Node or set of them:
-                #  - if it is a set, list or array, leave as is, else place in set for consistnecy of processin below
+                #  - if it is a set, list or array, leave as is, else place in set for consistency of processing below
                 preceding_entry = (pathway[c - 1] if isinstance(pathway[c - 1], (set, list, np.ndarray))
                                    else {pathway[c - 1]})
                 if all(_is_node_spec(sender) for sender in preceding_entry):
                     senders = _get_node_specs_for_entry(preceding_entry, NodeRole.OUTPUT)
-                    projs = {self.add_projection(sender=s, receiver=r, allow_duplicates=False)
+                    projs = {self.add_projection(sender=s, receiver=r,
+                                                 default_matrix=default_projection_matrix,
+                                                 allow_duplicates=False)
                             for r in receivers for s in senders}
+                    # MODIFIED 11/2/22 END
                     if all(projs):
                         projs = projs.pop() if len(projs) == 1 else projs
                         projections.append(projs)
@@ -6835,8 +6947,10 @@ def _get_node_specs_for_entry(entry, include_roles=None, exclude_roles=None):
                     # Unpack if tuple spec, and assign feedback (with False as default)
                     default_proj_spec, feedback = (spec if isinstance(spec, tuple) else (spec, False))
                     # Get all specs other than default_proj_spec
-                    # proj_specs = [proj_spec for proj_spec in all_proj_specs if proj_spec not in possible_default_proj_spec]
                     proj_specs = [proj_spec for proj_spec in all_proj_specs if proj_spec is not spec]
+                # If default matrix is not specified within the pathway, use default_projection_matrix if specified
+                if default_proj_spec is None:
+                    default_proj_spec = default_projection_matrix
 
                 # Collect all Projection specifications (to add to Composition at end)
                 proj_set = []
@@ -7040,6 +7154,7 @@ def handle_duplicates(sender, receiver):
 
         pathway = Pathway(pathway=explicit_pathway,
                           composition=self,
+                          # default_projection_matrix=default_projection_matrix,
                           name=pathway_name,
                           context=context)
         self.pathways.append(pathway)
@@ -7060,6 +7175,7 @@ def add_linear_learning_pathway(self,
                                     learning_rate:tc.any(int,float)=0.05,
                                     error_function=LinearCombination,
                                     learning_update:tc.any(bool, tc.enum(ONLINE, AFTER))=AFTER,
+                                    default_projection_matrix=None,
                                     name:str=None,
                                     context=None):
         """Implement learning pathway (including necessary `learning components <Composition_Learning_Components>`.
@@ -7106,7 +7222,7 @@ def add_linear_learning_pathway(self,
             specifies the type of `LearningFunction` to use for the `LearningMechanism` constructued for each
             `MappingProjection` in the **pathway**.
 
-        loss_function : MSE or SSE : default None
+        loss_function : Loss : default Loss.MSE
             specifies the loss function used if `BackPropagation` is specified as the **learning_function**
             (see `add_backpropagation_learning_pathway <Composition.add_backpropagation_learning_pathway>`).
 
@@ -7130,6 +7246,11 @@ def add_linear_learning_pathway(self,
             <LearningMechanism>` in the pathway, and its `LearningProjection` (see `learning_enabled
             <LearningMechanism.learning_enabled>` for meaning of values).
 
+        default_projection_matrix : list, array, function, `RandomMatrix` or MATRIX_KEYWORD : default None
+            specifies matrix to use for any unspecified Projections (overrides default matrix for `MappingProjection`)
+            if a default projection is not otherwise specified (see `Pathway_Specification_Projections`;
+            see `MappingProjection_Matrix_Specification` for details of specification)
+
         name : str :
             species the name used for `Pathway`; supercedes `name <Pathway.name>` of `Pathway` object if it is has one.
 
@@ -7167,7 +7288,7 @@ def add_linear_learning_pathway(self,
 
         # Preserve existing NodeRole.OUTPUT status for any non-learning-related nodes
         for node in self.get_nodes_by_role(NodeRole.OUTPUT):
-            if not any(node for node in [pathway for pathway in self.pathways
+            if not any(n for n in [pathway for pathway in self.pathways
                                      if PathwayRole.LEARNING in pathway.roles]):
                 self._add_required_node_role(node, NodeRole.OUTPUT, context)
 
@@ -7179,6 +7300,7 @@ def add_linear_learning_pathway(self,
                                                                  loss_function,
                                                                  learning_update,
                                                                  name=pathway_name,
+                                                                 default_projection_matrix=default_projection_matrix,
                                                                  context=context)
 
         # If BackPropagation is not specified, then the learning pathway is "one-layered"
@@ -7187,7 +7309,7 @@ def add_linear_learning_pathway(self,
         # Processing Components
         try:
             input_source, output_source, learned_projection = \
-                self._unpack_processing_components_of_learning_pathway(pathway)
+                self._unpack_processing_components_of_learning_pathway(pathway, default_projection_matrix)
         except CompositionError as e:
             raise CompositionError(e.error_value.replace('this method',
                                                          f'{learning_function.__name__} {LearningFunction.__name__}'))
@@ -7196,6 +7318,7 @@ def add_linear_learning_pathway(self,
         self._add_required_node_role(output_source, NodeRole.OUTPUT, context)
 
         learning_pathway = self.add_linear_processing_pathway(pathway=[input_source, learned_projection, output_source],
+                                                              default_projection_matrix=default_projection_matrix,
                                                               name=pathway_name,
                                                               # context=context)
                                                               context=context)
@@ -7251,6 +7374,7 @@ def add_reinforcement_learning_pathway(self,
                                            learning_rate=0.05,
                                            error_function=None,
                                            learning_update:tc.any(bool, tc.enum(ONLINE, AFTER))=ONLINE,
+                                           default_projection_matrix=None,
                                            name:str=None):
         """Convenience method that calls `add_linear_learning_pathway` with **learning_function**=`Reinforcement`
 
@@ -7262,6 +7386,11 @@ def add_reinforcement_learning_pathway(self,
             specified, that projection is the learned projection. Otherwise, a default MappingProjection is
             automatically generated for the learned projection.
 
+        default_projection_matrix : list, array, function, `RandomMatrix` or MATRIX_KEYWORD : default None
+            specifies matrix to use for any unspecified Projections (overrides default matrix for `MappingProjection`)
+            if a default projection is not otherwise specified (see `Pathway_Specification_Projections`;
+            see `MappingProjection_Matrix_Specification` for details of specification)
+
         learning_rate : float : default 0.05
             specifies the `learning_rate <ReinforcementLearning.learning_rate>` used for the `ReinforcementLearning`
             function of the `LearningMechanism` in the **pathway**.
@@ -7292,6 +7421,7 @@ def add_reinforcement_learning_pathway(self,
                                                 learning_function=Reinforcement,
                                                 error_function=error_function,
                                                 learning_update=learning_update,
+                                                default_projection_matrix=default_projection_matrix,
                                                 name=name)
 
     def add_td_learning_pathway(self,
@@ -7299,6 +7429,7 @@ def add_td_learning_pathway(self,
                                 learning_rate=0.05,
                                 error_function=None,
                                 learning_update:tc.any(bool, tc.enum(ONLINE, AFTER))=ONLINE,
+                                default_projection_matrix=None,
                                 name:str=None):
         """Convenience method that calls `add_linear_learning_pathway` with **learning_function**=`TDLearning`
 
@@ -7325,6 +7456,11 @@ def add_td_learning_pathway(self,
             <LearningMechanism>` in the pathway, and its `LearningProjection` (see `learning_enabled
             <LearningMechanism.learning_enabled>` for meaning of values).
 
+        default_projection_matrix : list, array, function, `RandomMatrix` or MATRIX_KEYWORD : default None
+            specifies matrix to use for any unspecified Projections (overrides default matrix for `MappingProjection`)
+            if a default projection is not otherwise specified (see `Pathway_Specification_Projections`;
+            see `MappingProjection_Matrix_Specification` for details of specification)
+
         name : str :
             species the name used for `Pathway`; supercedes `name <Pathway.name>` of `Pathway` object if it is has one.
 
@@ -7339,14 +7475,16 @@ def add_td_learning_pathway(self,
                                                 learning_rate=learning_rate,
                                                 learning_function=TDLearning,
                                                 learning_update=learning_update,
+                                                default_projection_matrix=default_projection_matrix,
                                                 name=name)
 
     def add_backpropagation_learning_pathway(self,
                                              pathway,
                                              learning_rate=0.05,
                                              error_function=None,
-                                             loss_function:tc.enum(MSE,SSE)=MSE,
+                                             loss_function:tc.enum(Loss)=Loss.MSE,
                                              learning_update:tc.optional(tc.any(bool, tc.enum(ONLINE, AFTER)))=AFTER,
+                                             default_projection_matrix=None,
                                              name:str=None):
         """Convenience method that calls `add_linear_learning_pathway` with **learning_function**=`Backpropagation`
 
@@ -7365,9 +7503,8 @@ def add_backpropagation_learning_pathway(self,
             specifies the function assigned to `ComparatorMechanism` used to compute the error from the target and the
             output (`value <Mechanism_Base.value>`) of the `TARGET` (last) Mechanism in the **pathway**).
 
-        loss_function : MSE or SSE : default MSE
-            specifies the loss function used in computing the error term;
-            MSE = mean squared error, and SSE = sum squared error.
+        loss_function : Loss : default Loss.MSE
+            specifies the loss function used in computing the error term;  see `Loss` for values.
 
         learning_update : Optional[bool|ONLINE|AFTER] : default AFTER
             specifies when the `matrix <MappingProjection.matrix>` parameters of the `learned_projections` are updated
@@ -7376,6 +7513,11 @@ def add_backpropagation_learning_pathway(self,
             <LearningMechanism>` in the pathway, and their `LearningProjections <LearningProjection>`
             (see `learning_enabled <LearningMechanism.learning_enabled>` for meaning of values).
 
+        default_projection_matrix : list, array, function, `RandomMatrix` or MATRIX_KEYWORD : default None
+            specifies matrix to use for any unspecified Projections (overrides default matrix for `MappingProjection`)
+            if a default projection is not otherwise specified (see `Pathway_Specification_Projections`;
+            see `MappingProjection_Matrix_Specification` for details of specification)
+
         name : str :
             species the name used for `Pathway`; supercedes `name <Pathway.name>` of `Pathway` object if it is has one.
 
@@ -7392,6 +7534,7 @@ def add_backpropagation_learning_pathway(self,
                                                 loss_function=loss_function,
                                                 error_function=error_function,
                                                 learning_update=learning_update,
+                                                default_projection_matrix=default_projection_matrix,
                                                 name=name)
 
     # NOTES:
@@ -7411,13 +7554,15 @@ def add_backpropagation_learning_pathway(self,
     # Move creation of LearningProjections and learning-related projections (MappingProjections) here
     # ?Do add_nodes and add_projections here or in Learning-type-specific creation methods
 
-    def _unpack_processing_components_of_learning_pathway(self, processing_pathway):
+    def _unpack_processing_components_of_learning_pathway(self, processing_pathway, default_projection_matrix=None):
         # unpack processing components and add to composition
         if len(processing_pathway) == 3 and isinstance(processing_pathway[1], MappingProjection):
             input_source, learned_projection, output_source = processing_pathway
         elif len(processing_pathway) == 2:
             input_source, output_source = processing_pathway
-            learned_projection = MappingProjection(sender=input_source, receiver=output_source)
+            learned_projection = MappingProjection(sender=input_source,
+                                                   receiver=output_source,
+                                                   matrix=default_projection_matrix)
         else:
             raise CompositionError(f"Too many Nodes in learning pathway: {processing_pathway}. "
                                    f"Only single-layer learning is supported by this method. "
@@ -7616,8 +7761,9 @@ def _create_backpropagation_learning_pathway(self,
                                                  pathway,
                                                  learning_rate=0.05,
                                                  error_function=None,
-                                                 loss_function=MSE,
+                                                 loss_function=Loss.MSE,
                                                  learning_update=AFTER,
+                                                 default_projection_matrix=None,
                                                  name=None,
                                                  context=None):
 
@@ -7625,13 +7771,16 @@ def _create_backpropagation_learning_pathway(self,
         if not error_function:
             error_function = LinearCombination()
         if not loss_function:
-            loss_function = MSE
+            loss_function = Loss.MSE
 
         # Add pathway to graph and get its full specification (includes all ProcessingMechanisms and MappingProjections)
         # Pass ContextFlags.INITIALIZING so that it can be passed on to _analyze_graph() and then
         #    _check_for_projection_assignments() in order to ignore checks for require_projection_in_composition
         context.string = f"'pathway' arg for add_backpropagation_learning_pathway method of {self.name}"
-        learning_pathway = self.add_linear_processing_pathway(pathway, name, context)
+        learning_pathway = self.add_linear_processing_pathway(pathway=pathway,
+                                                              name=name,
+                                                              default_projection_matrix=default_projection_matrix,
+                                                              context=context)
         processing_pathway = learning_pathway.pathway
 
         path_length = len(processing_pathway)
@@ -7827,7 +7976,8 @@ def bfs(start):
         pathways = [p for n in self.get_nodes_by_role(NodeRole.INPUT) if
                     NodeRole.TARGET not in self.get_roles_by_node(n) for p in bfs(n)]
         for pathway in pathways:
-            self.add_backpropagation_learning_pathway(pathway=pathway)
+            self.add_backpropagation_learning_pathway(pathway=pathway,
+                                                      loss_function=self.loss_spec)
 
     def _create_terminal_backprop_learning_components(self,
                                                       input_source,
@@ -7851,17 +8001,47 @@ def _create_terminal_backprop_learning_components(self,
 
         # Otherwise, create new ones
         except KeyError:
+            # # MODIFIED 11/12/22 OLD:
+            # target_mechanism = ProcessingMechanism(name='Target',
+            #                                        default_variable=output_source.output_ports[0].value)
+            # objective_mechanism = ComparatorMechanism(name='Comparator',
+            #                                           target={NAME: TARGET,
+            #                                                   VARIABLE: target_mechanism.output_ports[0].value},
+            #                                           sample={NAME: SAMPLE,
+            #                                                   VARIABLE: output_source.output_ports[0].value,
+            #                                                   WEIGHT: -1},
+            #                                           function=error_function,
+            #                                           output_ports=[OUTCOME, Loss.MSE],
+            #                                           )
+            # # MODIFIED 11/12/22 NEW:
             target_mechanism = ProcessingMechanism(name='Target',
                                                    default_variable=output_source.output_ports[0].value)
+            # Base for object_mechanism output_ports:
+            sample={NAME: SAMPLE,
+                    VARIABLE: output_source.output_ports[0].value}
+            target={NAME: TARGET,
+                    VARIABLE: target_mechanism.output_ports[0].value}
+            if loss_function == Loss.CROSS_ENTROPY:
+                # error function:  use LinearCombination to implement cross_entropy: (SoftMax(sample), SoftMax(target))
+                sample.update({FUNCTION: SoftMax(output=ALL)})
+                target.update({FUNCTION: SoftMax(output=ALL)})
+                error_function = LinearCombination(operation=CROSS_ENTROPY)
+                output_ports = [OUTCOME, SUM]
+            else:
+                # error_function: use default for Comparator (LinearCombination) =>  target - sample
+                sample.update({WEIGHT: -1})
+                if loss_function == Loss.L0:
+                    output_ports = [OUTCOME, SUM]
+                elif loss_function == Loss.SSE:
+                    output_ports = [OUTCOME, SSE]
+                else:
+                    output_ports = [OUTCOME, MSE]
             objective_mechanism = ComparatorMechanism(name='Comparator',
-                                                      target={NAME: TARGET,
-                                                              VARIABLE: target_mechanism.output_ports[0].value},
-                                                      sample={NAME: SAMPLE,
-                                                              VARIABLE: output_source.output_ports[0].value,
-                                                              WEIGHT: -1},
+                                                      sample=sample,
+                                                      target=target,
                                                       function=error_function,
-                                                      output_ports=[OUTCOME, MSE],
-                                                      )
+                                                      output_ports=output_ports)
+            # MODIFIED 11/12/22 END
 
         learning_function = BackPropagation(default_variable=[input_source.output_ports[0].value,
                                                               output_source.output_ports[0].value,
@@ -7934,6 +8114,7 @@ def _create_non_terminal_backprop_learning_components(self,
             learning_function = BackPropagation(default_variable=[input_source.output_ports[0].value,
                                                                   output_source.output_ports[0].value,
                                                                   error_signal_template[0]],
+                                                loss_function=None,
                                                 activation_derivative_fct=output_source.function.derivative,
                                                 learning_rate=learning_rate)
 
@@ -8649,9 +8830,6 @@ def evaluate(
             buffer_animate_state = self._animate
 
         # Run Composition in "SIMULATION" context
-        # # MODIFIED 3/28/22 NEW:
-        # context.source = ContextFlags.COMPOSITION
-        # MODIFIED 3/28/22 END
         context.add_flag(ContextFlags.SIMULATION_MODE)
         context.remove_flag(ContextFlags.CONTROL)
 
@@ -9191,9 +9369,9 @@ def _instantiate_input_dict(self, inputs):
                 # Get number of trials of input specified for Port
                 num_trials = len(port_input)
                 if max_num_trials != 1 and num_trials not in {1, max_num_trials}:
-                    raise CompositionError(f"Number of trials of input specified for {port.full_name} of {node.name} "
-                                           f"({num_trials}) is different from the number ({max_num_trials}) "
-                                           f"specified for one or more others.")
+                    raise CompositionError(f"Number of trials of input specified for {port.full_name} of"
+                                           f"{INPUT_Node.name} ({num_trials}) is different from the"
+                                           f"number ({max_num_trials}) specified for one or more others.")
                 max_num_trials = max(num_trials, max_num_trials)
 
             # Construct node_input_shape based on max_num_trials across all input_ports for mech
@@ -9583,12 +9761,9 @@ def run(
             specifies fuction to call after each `TRIAL <TimeScale.TRIAL>` is executed.
 
         termination_processing : Condition  : default None
-            specifies
-            `termination Conditions <Scheduler_Termination_Conditions>`
-            to be used for the current `RUN <TimeScale.RUN>`. To change
-            these conditions for all future runs, use
-            `Composition.termination_processing` (or
-            `Scheduler.termination_conds`)
+            specifies `termination Conditions <Scheduler_Termination_Conditions>`
+            to be used for the current `RUN <TimeScale.RUN>`. To change these conditions for all future runs,
+            use `Composition.termination_processing` (or `Scheduler.termination_conds`)
 
         skip_analyze_graph : bool : default False
             setting to True suppresses call to _analyze_graph()
@@ -9619,14 +9794,15 @@ def run(
             details and `ReportDevices` for options.
 
         animate : dict or bool : default False
-            specifies use of the `show_graph`show_graph <ShowGraph.show_graph>` method to generate
-            a gif movie showing the sequence of Components executed in a run (see `example
-            <BasicsAndPrimer_Stroop_Example_Animation_Figure>`). A dict can be specified containing
-            options to pass to the `show_graph <ShowGraph.show_graph>` method; each key must be a legal
-            argument for the `show_graph <ShowGraph.show_graph>` method, and its value a specification for that
-            argument.  The entries listed below can also be included in the dict to specify parameters of the
-            animation.  If the **animate** argument is specified simply as `True`, defaults are used for all
-            arguments of `show_graph <ShowGraph.show_graph>` and the options below:
+            specifies use of the `show_graph <ShowGraph.show_graph>` method to generate a gif movie showing the
+            sequence of Components executed in a run (see `example <BasicsAndPrimer_Stroop_Example_Animation_Figure>`).
+            A dict can be specified containing options to pass to the `show_graph <ShowGraph.show_graph>` method in
+            order to customize the display of the graph in the animation. Each key of the dict must be a legal argument
+            for the `show_graph <ShowGraph.show_graph>` method, and its value a specification for that argument.
+            The entries listed below can also be included in the dict to specify parameters of the animation.
+            If the **animate** argument is specified simply as `True`, defaults are used for all arguments
+            of `show_graph <ShowGraph.show_graph>` and the options below.  See `Animation <ShowGraph_Animation>`
+            for additional information.
 
             * *UNIT*: *EXECUTION_SET* or *COMPONENT* (default=\\ *EXECUTION_SET*\\ ) -- specifies which Components
               to treat as active in each call to `show_graph() <ShowGraph.show_graph>`. *COMPONENT* generates an
@@ -9651,7 +9827,7 @@ def run(
 
             * *MOVIE_NAME*: str (default=\\ `name <Composition.name>` + 'movie') -- specifies the name to be used
               for the movie file; it is automatically appended with '.gif'.
-
+_
             * *SAVE_IMAGES*: bool (default=\\ `False`\\ ) -- specifies whether to save each of the images used to
               construct the animation in separate gif files, in addition to the file containing the animation.
 
@@ -9663,7 +9839,7 @@ def run(
             `projection <Composition.projections>` in the Composition, if it is not already set.
 
             .. note::
-               as when setting the `log_condition <Parameter.log_condition>` directly, a value of `True` will
+               As when setting the `log_condition <Parameter.log_condition>` directly, a value of `True` will
                correspond to the `EXECUTION` `LogCondition <LogCondition.EXECUTION>`.
 
         scheduler : Scheduler : default None
@@ -9675,17 +9851,16 @@ def run(
             for the current and all future runs of the Composition. See
             `Scheduler_Execution`
 
-        execution_mode : enum.Enum[Auto|LLVM|LLVMexec|LLVMRun|Python|PTXExec|PTXRun] : default Python
+        execution_mode : bool or ExecutionMode : default ExecutionMode.Python
             specifies whether to run using the Python interpreter or a `compiled mode <Composition_Compilation>`.
-            False is the same as ``Python``;  True tries LLVM compilation modes, in order of power, progressively
+            False uses the Python interpreter; True tries LLVM compilation modes, in order of power, progressively
             reverting to less powerful modes (in the order of the options listed), and to Python if no compilation
-            mode succeeds (see `Composition_Compilation` for explanation of modes). PTX modes are used for
-            CUDA compilation.
+            mode succeeds;  see `ExecutionMode` for other options, and `Compilation Modes
+            <Composition_Compilation_Modes>` for a more detailed explanation of their operation.
 
         default_absolute_time_unit : ``pint.Quantity`` : ``1ms``
-            if not otherwise determined by any absolute **conditions**,
-            specifies the absolute duration of a `TIME_STEP`. See
-            `Scheduler.default_absolute_time_unit`
+            if not otherwise determined by any absolute **conditions**, specifies the absolute duration
+            of a `TIME_STEP`. See `Scheduler.default_absolute_time_unit`
 
         context : `execution_id <Context.execution_id>` : default `default_execution_id`
             context in which the `Composition` will be executed;  set to self.default_execution_id ifunspecified.
@@ -9722,7 +9897,7 @@ def run(
         2d list of values of OUTPUT Nodes at end of last trial : list[list]
           each item in the list is the `output_values <Mechanism_Base.output_values>` for an `OUTPUT` `Node
           <Composition_Nodes>` of the Composition, listed in the order listed in `get_nodes_by_role
-          <Composition.get_nodes_by_role>`\(`NodeRole.OUTPUT <OUTPUT>`).
+          <Composition.get_nodes_by_role>`\\ (`NodeRole.OUTPUT <OUTPUT>`).
 
           .. note::
             The `results <Composition.results>` attribute of the Composition contains a list of the outputs for all
@@ -9735,6 +9910,12 @@ def run(
         execution_phase = context.execution_phase
         context.execution_phase = ContextFlags.PREPARING
 
+        # IMPLEMENTATION NOTE:  Restore if ExecutionMode.PyTorch can be distinguished from ExecutionMode.Python
+        # from psyneulink.library.compositions.autodiffcomposition import AutodiffComposition
+        # if execution_mode is pnlvm.ExecutionMode.PyTorch and not isinstance(self, AutodiffComposition):
+        #     warnings.warn(f"{pnlvm.ExecutionMode.PyTorch.name} is being used to execute {self.name} "
+        #                   f"but it is not an AutodiffComposition, therefore PyTorch will not be used.")
+
         for node in self.nodes:
             num_execs = node.parameters.num_executions._get(context)
             if num_execs is None:
@@ -9784,6 +9965,8 @@ def run(
         # Set animation attributes
         if animate is True:
             animate = {}
+        if animate is None:
+            animate = False
         self._animate = animate
         if self._animate is not False:
             self._set_up_animation(context)
@@ -9854,8 +10037,8 @@ def run(
         if not valid_reset_type:
             raise CompositionError(
                 f"{reset_stateful_functions_when} is not a valid specification for reset_integrator_nodes_when "
-                f"of {self.name}. reset_integrator_nodes_when must be a Condition or a dict comprised of "
-                f" {Node: Condition} pairs.")
+                f"of {self.name}. reset_integrator_nodes_when must be a Condition or a dict comprised of " +
+                "{Node: Condition pairs.")
 
         self._reset_stateful_functions_when_cache = {}
 
@@ -9880,33 +10063,60 @@ def run(
             # There's no mode to run simulations.
             # Simulations are run as part of the controller node wrapper.
             assert not is_simulation
-            try:
-                comp_ex_tags = frozenset({"learning"}) if self._is_learning(context) else frozenset()
-                _comp_ex = pnlvm.CompExecution.get(self, context, additional_tags=comp_ex_tags)
-                if execution_mode & pnlvm.ExecutionMode.LLVM:
-                    results += _comp_ex.run(inputs, num_trials, num_inputs_sets)
-                elif execution_mode & pnlvm.ExecutionMode.PTX:
-                    results += _comp_ex.cuda_run(inputs, num_trials, num_inputs_sets)
-                else:
-                    assert False, "Unknown execution mode: {}".format(execution_mode)
 
-                # Update the parameter for results
-                self.parameters.results._set(results, context)
+            with Report(self,
+                        report_output=report_output,
+                        report_params=report_params,
+                        report_progress=report_progress,
+                        report_simulations=report_simulations,
+                        report_to_devices=report_to_devices,
+                        context=context) as report:
 
-                if self._is_learning(context):
-                    # copies back matrix to pnl from param struct (after learning)
-                    _comp_ex._copy_params_to_pnl(context=context)
+                report_num = report.start_report(self, num_trials, context)
 
-                self._propagate_most_recent_context(context)
-                # KAM added the [-1] index after changing Composition run()
-                # behavior to return only last trial of run (11/7/18)
-                return results[-1]
+                report(self,
+                       [COMPILED_REPORT, PROGRESS_REPORT],
+                       report_num=report_num,
+                       scheduler=scheduler,
+                       content='run_start',
+                       context=context)
 
-            except Exception as e:
-                if not execution_mode & pnlvm.ExecutionMode._Fallback:
-                    raise e from None
+                try:
+                    comp_ex_tags = frozenset({"learning"}) if self._is_learning(context) else frozenset()
+                    _comp_ex = pnlvm.CompExecution.get(self, context, additional_tags=comp_ex_tags)
+                    if execution_mode & pnlvm.ExecutionMode.LLVM:
+                        results += _comp_ex.run(inputs, num_trials, num_inputs_sets)
+                    elif execution_mode & pnlvm.ExecutionMode.PTX:
+                        results += _comp_ex.cuda_run(inputs, num_trials, num_inputs_sets)
+                    else:
+                        assert False, "Unknown execution mode: {}".format(execution_mode)
 
-                warnings.warn("Failed to run `{}': {}".format(self.name, str(e)))
+                    # Update the parameter for results
+                    self.parameters.results._set(results, context)
+
+                    if self._is_learning(context):
+                        # copies back matrix to pnl from param struct (after learning)
+                        _comp_ex._copy_params_to_pnl(context=context)
+
+                    self._propagate_most_recent_context(context)
+
+                    report(self,
+                           [COMPILED_REPORT, PROGRESS_REPORT],
+                           report_num=report_num,
+                           scheduler=scheduler,
+                           content='run_end',
+                           context=context,
+                           node=self)
+
+                    # KAM added the [-1] index after changing Composition run()
+                    # behavior to return only last trial of run (11/7/18)
+                    return results[-1]
+
+                except Exception as e:
+                    if not execution_mode & pnlvm.ExecutionMode._Fallback:
+                        raise e from None
+
+                    warnings.warn("Failed to run `{}': {}".format(self.name, str(e)))
 
         # Reset gym forager environment for the current trial
         if self.env:
@@ -10071,6 +10281,7 @@ def learn(
             targets: tc.optional(dict) = None,
             num_trials: tc.optional(int) = None,
             epochs: int = 1,
+            learning_rate = None,
             minibatch_size: int = 1,
             patience: tc.optional(int) = None,
             min_delta: int = 0,
@@ -10119,8 +10330,14 @@ def learn(
             epochs : int (default=1)
                 specifies the number of training epochs (that is, repetitions of the batched input set) to run with
 
+            learning_rate : float : default None
+                specifies the learning_rate used by all `learning pathways <Composition_Learning_Pathway>`
+                when the Composition's learn method is called.  This overrides the `learning_rate specified
+                for any individual Pathways at construction, but only applies for the current execution of
+                the learn method.
+
             minibatch_size : int (default=1)
-                specifies the size of the minibatches to use. The input trials will be batched and ran, after which
+                specifies the size of the minibatches to use. The input trials will be batched and run, after which
                 learning mechanisms with learning mode TRIAL will update weights
 
             randomize_minibatch: bool (default=False)
@@ -10166,7 +10383,7 @@ def learn(
                 <Composition_Controller>`; see `Report_Simulations` for additional details.
 
             report_to_devices : list(ReportDevices) : default ReportDevices.CONSOLE
-                specifies where output and progress should be reported; see `Report_To_Devices` for additional
+                specifies where output and progress should be reported; see `Report_To_Device` for additional
                 details and `ReportDevices` for options.
 
             Returns
@@ -10175,8 +10392,14 @@ def learn(
             the results of the final epoch of training : list
         """
         from psyneulink.library.compositions import CompositionRunner
+        from psyneulink.library.compositions import AutodiffComposition
         runner = CompositionRunner(self)
 
+        if ((execution_mode is not pnlvm.ExecutionMode.Python)
+                and not isinstance(self, AutodiffComposition)):
+            raise CompositionError(f"ExecutionMode.{execution_mode.name} cannot be used in the learn() method of "
+                                   f"'{self.name}' because it is not an {AutodiffComposition.componentCategory}")
+
         context.add_flag(ContextFlags.LEARNING_MODE)
         # # MODIFIED 3/28/22 NEW:
         # context.source = ContextFlags.COMPOSITION
@@ -10208,6 +10431,7 @@ def learn(
             targets=targets,
             num_trials=num_trials,
             epochs=epochs,
+            learning_rate=learning_rate,
             minibatch_size=minibatch_size,
             patience=patience,
             min_delta=min_delta,
@@ -10253,11 +10477,14 @@ def _execute_controller(self,
                        context=context,
                        node=self.controller)
 
-                if self.controller and not execution_mode:
+                if self.controller and not execution_mode & pnlvm.ExecutionMode.COMPILED:
                     context.execution_phase = ContextFlags.PROCESSING
                     self.controller.execute(context=context)
 
-                if execution_mode:
+                else:
+                    assert (execution_mode == pnlvm.ExecutionMode.LLVM
+                            or execution_mode & pnlvm.ExecutionMode._Fallback),\
+                        f"PROGRAM ERROR: Unrecognized compiled execution_mode: '{execution_mode}'."
                     _comp_ex.execute_node(self.controller, context=context)
 
                 context.remove_flag(ContextFlags.PROCESSING)
@@ -10481,7 +10708,7 @@ def execute(
             # Run compiled execution (if compiled execution was requested
             # NOTE: This should be as high up as possible,
             # but still after the context has been initialized
-            if execution_mode:
+            if execution_mode & pnlvm.ExecutionMode.COMPILED:
                 is_simulation = (context is not None and
                                  ContextFlags.SIMULATION_MODE in context.runmode)
                 # Try running in Exec mode first
@@ -10595,7 +10822,7 @@ def execute(
                 inputs = self._validate_execution_inputs(inputs)
                 build_CIM_input = self._build_variable_for_input_CIM(inputs)
 
-            if execution_mode:
+            if execution_mode & pnlvm.ExecutionMode.COMPILED:
                 _comp_ex.execute_node(self.input_CIM, inputs, context)
                 # FIXME: parameter_CIM should be executed here as well,
                 #        but node execution of nested compositions with
@@ -10800,7 +11027,7 @@ def execute(
                 # This ensures that the order in which nodes execute does not affect the results of this timestep
                 frozen_values = {}
                 new_values = {}
-                if execution_mode:
+                if execution_mode & pnlvm.ExecutionMode.COMPILED:
                     _comp_ex.freeze_values()
 
                 # PURGE LEARNING IF NOT ENABLED ----------------------------------------------------------------
@@ -10882,7 +11109,7 @@ def execute(
                                 context.replace_flag(ContextFlags.PROCESSING, ContextFlags.LEARNING)
 
                         # Execute Mechanism
-                        if execution_mode:
+                        if execution_mode & pnlvm.ExecutionMode.COMPILED:
                             _comp_ex.execute_node(node, context=context)
                         else:
                             if node is not self.controller:
@@ -10905,7 +11132,7 @@ def execute(
 
                     elif isinstance(node, Composition):
 
-                        if execution_mode:
+                        if execution_mode & pnlvm.ExecutionMode.COMPILED:
                             # Invoking nested composition passes data via Python
                             # structures. Make sure all sources get their latest values
                             srcs = (proj.sender.owner for proj in node.input_CIM.afferents)
@@ -10944,7 +11171,7 @@ def execute(
                                            execution_mode=nested_execution_mode)
 
                         # Get output info from nested execution
-                        if execution_mode:
+                        if execution_mode & pnlvm.ExecutionMode.COMPILED:
                             # Update result in binary data structure
                             _comp_ex.insert_node_output(node, ret)
 
@@ -11086,7 +11313,7 @@ def execute(
                        context=context)
 
             # Extract result here
-            if execution_mode:
+            if execution_mode & pnlvm.ExecutionMode.COMPILED:
                 _comp_ex.freeze_values()
                 _comp_ex.execute_node(self.output_CIM, context=context)
                 report(self,
@@ -11103,7 +11330,7 @@ def execute(
             return self.get_output_values(context)
 
     def __call__(self, *args, **kwargs):
-        """Execute Composition of any args are provided;  else simply return results of last execution.
+        """Execute Composition if any args are provided; else simply return results of last execution.
         This allows Composition, after it has been constructed, to be run simply by calling it directly.
         """
         if not args and not kwargs:
diff --git a/psyneulink/core/compositions/pathway.py b/psyneulink/core/compositions/pathway.py
index da18203bc84..978b2fc00bb 100644
--- a/psyneulink/core/compositions/pathway.py
+++ b/psyneulink/core/compositions/pathway.py
@@ -115,20 +115,22 @@
 *Pathway Projection Specifications*
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Where no Projections are specified between entries in the list, default Projections (using a `FULL_CONNECTIVITY_MATRIX`;
-see `MappingProjection_Matrix_Specification`) are created from each Node in the first entry, as the sender(s),
-to each Node in the second, as receiver(s) (described further `below <Pathway_Projections>`).  Projections between
-Nodes in the two entries can also be specified explicitly, by intercolating a Projection or set of Projections between
-the two entries in the list.  If the sender and receiver are both a single Mechanism, then a single `MappingProjection`
-can be `specified<MappingProjection_Creation>` between them.  The same applies if the sender is a `Composition` with
-a single `OUTPUT <NodeRole.OUTPUT>` Node and/or the receiver is a `Composition` with a single `INPUT <NodeRole.INPUT>`
-Node.  If either is a set of Nodes, or is a `nested Composition <Composition_Nested>` with more than one `INPUT
-<NodeRole.INPUT>` or `OUTPUT <NodeRole.OUTPUT>` Node, respectively, then a collection of Projections can be specified
-between any or all pairs of the Nodes in the set(s) and/or nested Composition(s), using either a set or list of
-Projections (order of specification does not matter whether a set or a list is used). The collection can contain
-`MappingProjections <MappingProjection>` between a specific pairs of Nodes and/or a single default specification
-(either a `matrix <MappingProjection.matrix>` specification or a MappingProjection without any `sender
-<MappingProjection.sender>` or `receiver <MappingProjection.receiver>` specified).
+Where no Projections are specified between entries in the list, default Projections are created (using a
+`FULL_CONNECTIVITY_MATRIX`, or the Pathway's `default_projection <Pathway.default_projection_matrix>` if specified)
+from each Node in the first entry, as the sender(s), to each Node in the second, as receiver(s) (described further
+`below <Pathway_Projections>`).  Projections between Nodes in the two entries can also be specified explicitly, by
+intercolating a Projection or set of Projections between the two entries in the list. If the sender and receiver are
+both a single Mechanism, then a single `MappingProjection` can be `specified<MappingProjection_Creation>` between
+them.  The same applies if the sender is a `Composition` with a single `OUTPUT <NodeRole.OUTPUT>` Node and/or the
+receiver is a `Composition` with a single `INPUT <NodeRole.INPUT>` Node.  If either is a set of Nodes, or is a
+`nested Composition <Composition_Nested>` with more than one `INPUT <NodeRole.INPUT>` or `OUTPUT <NodeRole.OUTPUT>`
+Node, respectively, then a collection of Projections can be specified between any or all pairs of the Nodes in the
+set(s) and/or nested Composition(s), using either a set or list of Projections (order of specification does not matter
+whether a set or a list is used). The collection can contain `MappingProjections <MappingProjection>` between specific
+pairs of Nodes and/or a single default specification (either a `matrix <MappingProjection.matrix>` specification or a
+MappingProjection without any `sender <MappingProjection.sender>` or `receiver <MappingProjection.receiver>`
+specified; see MappingProject  MappingProjection_Matrix_Specification
+).
 
     .. _Pathway_Projection_Matrix_Note:
 
@@ -231,9 +233,14 @@
       <NodeRole.SINGLETON>`.  Sets can also be used in a list specification (see above; and see
       `add_linear_processing_pathway <Composition.add_linear_processing_pathway>` for additional details).
     ..
-    * **2-item tuple**: (Pathway, `LearningFunction`) -- used to specify a `learning Pathway
-      <Composition_Learning_Pathway>`;  the 1st item must be one of the forms of Pathway specification
-      described above, and the 2nd item must be a subclass of `LearningFunction`.
+    .. _Pathway_Specification_Tuple:
+
+    * **2 or 3-item tuple**: (Pathway, <learning_function>, <default_matrix_specification>) --
+      used to specify a `learning  Pathway <Composition_Learning_Pathway>` and/or a matrix to use for any unspecified
+      Projections (overrides default matrix for `MappingProjection`) if a default projection is not otherwise specified
+      (see `Pathway_Specification_Projections.  The 1st item of the tuple must be one of the forms of Pathway
+      specification described above.  The other items must be a subclass of `LearningFunction` and/or a `matrix
+      specification <MappingProjection_Matrix_Specification>`.
 
 .. _Pathway_Specification_Multiple:
 
@@ -245,7 +252,7 @@
 <Composition.add_pathways>` method), they can be specified in a list, in which each item of the list can be any of
 the forms above, or one of the following:
 
-    * **Pathway** object or constructor: Pathway(pathway=\ `Pathway specification <Pathway_Specification>`,...).
+    * **Pathway** object or constructor: Pathway(pathway=\\ `Pathway specification <Pathway_Specification>`,...).
     ..
     .. _Pathway_Specification_Dictionary:
     * **dict**: {name : Pathway) -- in which **name** is a str and **Pathway** is a Pathway object or constuctor,
@@ -293,6 +300,9 @@
   those `NodeRoles <NodeRole>` is assigned to a corresponding attribute on the Pathway.  If the Pathway does not belong
   to a Composition (i.e., it is a `template <Pathway_Template>`), then these attributes return None.
 
+* `default_projection_matrix <Pathway.default_projection_matrix>` - matrix used as default for Projections that are
+  not explicitly specified and for which no default is otherwise specified (see `Pathway_Specification_Projections`).
+
 * `learning_function <Pathway.learning_function>` - the LearningFunction assigned to the Pathway if it is a
   `learning Pathway <Composition_Learning_Pathway>` that belongs to a Composition; otherwise it is None.
 
@@ -322,6 +332,7 @@
 from psyneulink.core.globals.context import ContextFlags, handle_external_context
 from psyneulink.core.globals.keywords import \
     ANY, CONTEXT, FEEDBACK, MAYBE, NODE, LEARNING_FUNCTION, OBJECTIVE_MECHANISM, PROJECTION, TARGET_MECHANISM
+from psyneulink.core.globals.utilities import is_matrix
 from psyneulink.core.globals.registry import register_category
 
 __all__ = [
@@ -416,10 +427,17 @@ class PathwayRole(Enum):
 
 class Pathway(object):
     """
-    Pathway(      \
-        pathway,  \
-        name=None \
+    Pathway(                       \
+        pathway,                   \
+        name=None                  \
         )
+    COMMENT:
+    Pathway(                       \
+        pathway,                   \
+        default_projection_matrix, \
+        name=None                  \
+        )
+    COMMENT
 
     A sequence of `Nodes <Composition_Nodes>` and `Projections <Projection>` in a `Composition`, or a template
     for one that can be assigned to one or more Compositions.
@@ -431,8 +449,15 @@ class Pathway(object):
         specifies list of `Nodes <Composition_Nodes>` and intercolated `Projections <Projection>` to be
         created for the Pathway.
 
+    COMMENT:
+    default_projection_matrix : list, array, function, `RandomMatrix` or MATRIX_KEYWORD : default None
+        specifies matrix to use for any unspecified Projections (overrides default matrix for `MappingProjection`)
+        if a default projection is not otherwise specified (see `Pathway_Specification_Projections`;
+        see `MappingProjection_Matrix_Specification` for details of specification)
+    COMMENT
+
     name : str : default see `name <Pathway.name>`
-        specifies the name of the Pathway;  see `name <Pathway.name>` for additional information.
+        specifies the name of the Pathway (see `name <Pathway.name>` for additional information).
 
     Attributes
     ----------
@@ -452,6 +477,13 @@ class Pathway(object):
         Returns an empty list if belongs to a Composition but no `PathwayRoles <PathwayRole>` have been assigned,
         and None if the Pathway is a `tempalte <Pathway_Template>` (i.e., not assigned to a Composition).
 
+    default_projection_matrix : list, array, function, `RandomMatrix` or MATRIX_KEYWORD : default None
+        matrix used for any unspecified Projections (overrides default matrix for `MappingProjection`)
+        if a default projection is not otherwise specified (see `Pathway_Specification_Projections`;
+        see `MappingProjection_Matrix_Specification` for details of specification).  A default_projection_matrix
+        is specified by including it in a tuple specification in the **pathways** argument of the Pathway's
+        constructor (see `2 or 3-item tuple <Pathway_Specification_Tuple>`).
+
     learning_function : `LearningFunction` or None
         `LearningFunction` used by `LearningMechanism(s) <LearningMechanism>` associated with Pathway if
         it is a `learning pathway <Composition_Learning_Pathway>`.
@@ -500,6 +532,7 @@ class Pathway(object):
     def __init__(
             self,
             pathway:list,
+            # default_projection_matrix=None,
             name=None,
             **kwargs
     ):
@@ -547,6 +580,16 @@ def __init__(
             self.learning_components = None
             self.roles = None
 
+        # Assign default_projection_matrix attribute
+        # self.default_projection_matrix = default_projection_matrix
+        # Parse from tuple spec in **pathway** arg:
+        self.default_projection_matrix = None
+        if isinstance(self.pathway, tuple):
+            for item in self.pathway:
+                if is_matrix(item):
+                    self.default_projection_matrix = item
+        assert True
+
     def _assign_roles(self, composition):
         """Assign `PathwayRoles <PathwayRole>` to Pathway based `NodeRoles <NodeRole>` assigned to its `Nodes
         <Composition_Nodes>` by the **composition** to which it belongs.
diff --git a/psyneulink/core/compositions/report.py b/psyneulink/core/compositions/report.py
index c4d42de68c2..7f4476e8709 100644
--- a/psyneulink/core/compositions/report.py
+++ b/psyneulink/core/compositions/report.py
@@ -79,7 +79,6 @@
 
 .. _Report_To_Device:
 
-
 Devices
 -------
 
@@ -168,7 +167,7 @@
            'ReportLearning', 'CONSOLE', 'CONTROLLED', 'LOGGED', 'MODULATED', 'MONITORED', 'RECORD', 'DIVERT',
            'PNL_VIEW', ]
 
-# Used to specify self._run_mode
+# Used to specify self._run_mode (specified as roots, that are conjugated in messages)
 DEFAULT = 'Execut'
 LEARNING = 'Train'
 SIMULATION = 'Simulat'
@@ -182,6 +181,7 @@
 CONTROLLER_REPORT = 'controller_report'
 LEARN_REPORT = 'learn_report'
 RUN_REPORT = 'run_report'
+COMPILED_REPORT = 'compiled_report'
 PROGRESS_REPORT = 'progress_report'
 
 trial_sep_str = f'===================='
@@ -925,7 +925,8 @@ def __call__(self,
             simulation_mode = context.runmode & ContextFlags.SIMULATION_MODE
 
         # Call report_output
-        if any(r in {EXECUTE_REPORT, MECHANISM_REPORT, CONTROLLER_REPORT, LEARN_REPORT, RUN_REPORT} for r in reports):
+        if any(r in {EXECUTE_REPORT, MECHANISM_REPORT, CONTROLLER_REPORT,
+                     LEARN_REPORT, RUN_REPORT, COMPILED_REPORT} for r in reports):
 
             if content in {'run_start', 'execute_start'}:
                 if simulation_mode:
@@ -950,9 +951,7 @@ def __call__(self,
         if PROGRESS_REPORT in reports:
             # Just pass args relevant to report_progress()
             progress_args = {k:v for k,v in kwargs.items() if k in {'caller', 'report_num', 'content', 'context'}}
-            self.report_progress(caller, **progress_args)
-
-        assert True
+            self.report_progress(caller, reports, **progress_args)
 
     def report_output(self,
                       caller,
@@ -1690,6 +1689,7 @@ def is_logged(node, name):
 
     def report_progress(self,
                         caller,
+                        reports,
                         report_num:int,
                         content:str,
                         context:Context) -> None:
@@ -1737,6 +1737,29 @@ def report_progress(self,
 
         # Update progress report
         if self._use_rich:
+
+            # MODIFIED 11/12/22 NEW:
+            if content == 'run_start' and COMPILED_REPORT in reports and self._run_mode == LEARNING:
+                composition_type_name = list(self.output_reports.keys())[0].componentCategory
+                composition_name = list(self.output_reports.keys())[0].name
+                message = f"{composition_type_name} '{composition_name}' training " \
+                          f"{output_report.num_trials} trials using PyTorch..."
+                # Method 1: (direct print)
+                # self._rich_progress.console.print(message)
+                output_report.run_report = message
+                # Method 2: (ensure it is also recorded if that is enabled)
+                if self._record_reports:
+                    with self._recording_console.capture() as capture:
+                        self._recording_console.print(node_report)
+                    self._recorded_reports += capture.get()
+                # Method 3: (shadow standard processing)
+                # self._rich_progress.update(output_report.rich_task_id,
+                #                            total=1,
+                #                            description=message,
+                #                            advance=1,
+                #                            refresh=True)
+            # MODIFIED 11/12/22 END
+
             if content == 'run_end':
                 # If it is the end of a run, and num_trials was not known (and so rich progress was "indeterminate"),
                 #    close out progress bar
@@ -1776,12 +1799,18 @@ def report_progress(self,
                                   advance=1,
                                   refresh=True)
 
-        #  FIX: NEED COMMENT ON WHY THIS IS NEEDED:
-        #   WITHOUT THIS, WHEN RECORD_DEVICES IS ACTIVE,
-        #        EITHER PROGRESS REPORT IS MISSING OR IT IS DUPLICATED ABOVE THE OUTPUT REPORT
+        # This is needed so that, when _record_reports is active, progress report is generated and not duplicated
         if self._report_output is ReportOutput.OFF or self._report_progress is ReportProgress.OFF:
-            self._print_and_record_reports(PROGRESS_REPORT)
-        assert True
+            # # MODIFIED 11/12/22 OLD:
+            # self._print_and_record_reports(PROGRESS_REPORT)
+            # # MODIFIED 11/12/22 NEW: FIX: MAY BE A PROBLEM IF RUN_REPORT IS ALSO EVER PASSED IN HERE
+            # self._print_and_record_reports(reports)
+            # # MODIFIED 11/12/22 NEWER
+            if COMPILED_REPORT in reports:
+                self._print_and_record_reports(COMPILED_REPORT, output_report)
+            else:
+                self._print_and_record_reports(PROGRESS_REPORT)
+            # MODIFIED 11/12/22 END
 
     def _print_and_record_reports(self, report_type:str, output_report:OutputReport=None) -> None:
         """
@@ -1800,14 +1829,18 @@ def _print_and_record_reports(self, report_type:str, output_report:OutputReport=
             OutputReport for caller[_run_mode] in self.output_reports to use for reporting.
         """
 
-        # Print and record output report as they are created (progress reports are printed by _rich_progress.console)
-        if report_type in {EXECUTE_REPORT, RUN_REPORT}:
+        # Print and record output reports as they are created (progress reports are printed by _rich_progress.console)
+        # MODIFIED 11/12/22 OLD:
+        if report_type in {EXECUTE_REPORT, RUN_REPORT, COMPILED_REPORT}:
+        # # MODIFIED 11/12/22 NEW:
+        # if any(report in {EXECUTE_REPORT, RUN_REPORT, COMPILED_REPORT} for report in report_type):
+        # MODIFIED 11/12/22 END
             # Print output reports as they are created
             if self._rich_console or self._rich_divert:
-                if output_report.trial_report and report_type is EXECUTE_REPORT:
+                if output_report.trial_report and report_type == EXECUTE_REPORT:
                     self._rich_progress.console.print(output_report.trial_report)
                     self._rich_progress.console.print('')
-                elif output_report.run_report and report_type is RUN_REPORT:
+                elif output_report.run_report and report_type in {RUN_REPORT, COMPILED_REPORT}:
                     self._rich_progress.console.print(output_report.run_report)
                     self._rich_progress.console.print('')
             # Record output reports as they are created
@@ -1818,14 +1851,13 @@ def _print_and_record_reports(self, report_type:str, output_report:OutputReport=
                     with self._recording_console.capture() as capture:
                         if report_type == EXECUTE_REPORT:
                             self._recording_console.print(output_report.trial_report)
-                        elif report_type == RUN_REPORT:
+                        elif report_type in {RUN_REPORT or COMPILED_REPORT}:
                             self._recording_console.print(output_report.run_report)
                     self._recorded_reports += capture.get()
 
         # Record progress after execution of outer-most Composition
         if (self._report_output is not ReportOutput.OFF
                 or (len(self._execution_stack)<=1 and not self._simulating)):
-
             if report_type is PROGRESS_REPORT:
                 # add progress report to any already recorded for output
                 progress_reports = '\n'.join([t.description for t in self._rich_progress.tasks])
diff --git a/psyneulink/core/compositions/showgraph.py b/psyneulink/core/compositions/showgraph.py
index a4fbe76eb7c..b2d54019bc9 100644
--- a/psyneulink/core/compositions/showgraph.py
+++ b/psyneulink/core/compositions/showgraph.py
@@ -107,6 +107,22 @@
   - `CONTROLLER` : purple
   - `LEARNING` : orange
 
+.. _ShowGraph_Animation:
+
+*Animation*
+-----------
+
+An animation can be generated of the execution of a Composition by using the **animate** argument of the Composition's
+`run <Composition.run>` method.  The animation show a graphical display of the Composition, with each of its
+the Components highlighted in the sequence that they are executed.  The **animate** can be passed a dict containing
+any of the options described above to customize the display, as well as several others used to customize the animation
+(see **animate** argument under `run <Composition.run>`).
+
+  .. note::
+     At present, animation of the Components within a `nested Composition <Composition_Nested>` is not supported;
+     the box surrounding the nested Composition is highlighted when it is executed, followed by the next Component(s)
+     to execute.
+
 .. _ShowGraph_Examples_Visualization:
 
 *Examples*
@@ -828,7 +844,8 @@ def show_graph(self,
                                                show_dimensions,
                                                show_projection_labels,
                                                show_projections_not_in_composition,
-                                               nested_args)
+                                               nested_args,
+                                               context)
 
         # Add cim Components to graph if show_cim
         if show_cim:
@@ -907,7 +924,8 @@ def _assign_processing_components(self,
                                       show_dimensions,
                                       show_projection_labels,
                                       show_projections_not_in_composition,
-                                      nested_args):
+                                      nested_args,
+                                      context):
         """Assign nodes to graph"""
 
         from psyneulink.core.compositions.composition import Composition, NodeRole
@@ -922,23 +940,40 @@ def _assign_processing_components(self,
                                     COMP_HIERARCHY:comp_hierarchy,
                                     # 'composition': rcvr,
                                     ENCLOSING_COMP:composition,
-                                    NESTING_LEVEL:nesting_level + 1})
+                                    NESTING_LEVEL:nesting_level + 1,
+                                    })
                 # Get subgraph for nested Composition
+                # # MODIFIED 10/29/22 NEW: FIX: HACK SO NESTED COMPOSITIONS DON'T CRASH ANIMATION (THOUGH STILL NOT SHOWN)
+                if hasattr(composition, '_animate') and composition._animate is not False:
+                    rcvr._animate = composition._animate
+                    rcvr._set_up_animation(context)
+                    rcvr._animate_num_trials = composition._animate_num_trials + 1
+                # MODIFIED 10/29/22 END
                 nested_comp_graph = rcvr._show_graph.show_graph(**nested_args)
 
                 nested_comp_graph.name = "cluster_" + rcvr.name
                 rcvr_label = rcvr.name
+
+                # Assign color to nested_comp, including highlighting if it is the active_item
                 # if rcvr in composition.get_nodes_by_role(NodeRole.FEEDBACK_SENDER):
                 #     nested_comp_graph.attr(color=feedback_color)
+                # nested_comp_attributes = {"label":rcvr_label}
+                nested_comp_attributes = {}
                 if rcvr in composition.get_nodes_by_role(NodeRole.INPUT) and \
                         rcvr in composition.get_nodes_by_role(NodeRole.OUTPUT):
-                    nested_comp_graph.attr(color=self.input_and_output_color)
+                    nested_comp_attributes.update({"color": self.input_and_output_color})
                 elif rcvr in composition.get_nodes_by_role(NodeRole.INPUT):
-                    nested_comp_graph.attr(color=self.input_color)
+                    nested_comp_attributes.update({"color": self.input_color})
                 elif rcvr in composition.get_nodes_by_role(NodeRole.PROBE):
-                    nested_comp_graph.attr(color=self.probe_color)
+                    nested_comp_attributes.update({"color": self.probe_color})
                 elif rcvr in composition.get_nodes_by_role(NodeRole.OUTPUT):
-                    nested_comp_graph.attr(color=self.output_color)
+                    nested_comp_attributes.update({"color": self.output_color})
+                if rcvr in active_items:
+                    if self.active_color != BOLD:
+                        nested_comp_attributes.update({"color": self.active_color})
+                    nested_comp_attributes.update({"penwidth": str(self.default_width + self.active_thicker_by)})
+                    composition.active_item_rendered = True
+                nested_comp_graph.attr(**nested_comp_attributes)
                 nested_comp_graph.attr(label=rcvr_label)
                 g.subgraph(nested_comp_graph)
 
@@ -2722,6 +2757,7 @@ def _set_up_animation(self, context):
             if not isinstance(composition._show_animation, bool):
                 raise ShowGraphError(f"{repr(SHOW)} entry of {repr('animate')} argument for {repr('run')} "
                                        f"method of {composition.name} ({composition._show_animation}) must be a boolean.")
+
         elif composition._animate:
             # composition._animate should now be False or a dict
             raise ShowGraphError("{} argument for {} method of {} ({}) must be a boolean or "
@@ -2737,10 +2773,10 @@ def _animate_execution(self, active_items, context):
         else:
             composition._component_animation_execution_count += 1
         composition.show_graph(active_items=active_items,
-                        **composition._animate,
-                        output_fmt='gif',
-                        context=context,
-                        )
+                               **composition._animate,
+                               output_fmt='gif',
+                               context=context,
+                               )
 
     def _generate_gifs(self, G, active_items, context):
 
diff --git a/psyneulink/core/globals/keywords.py b/psyneulink/core/globals/keywords.py
index 713e9e16dfb..267e0eac33a 100644
--- a/psyneulink/core/globals/keywords.py
+++ b/psyneulink/core/globals/keywords.py
@@ -37,8 +37,8 @@
     'ContentAddressableMemory_FUNCTION', 'CONTEXT',
     'CONTROL', 'CONTROL_MECHANISM', 'CONTROL_PATHWAY', 'CONTROL_PROJECTION',  'CONTROL_PROJECTION_PARAMS',
     'CONTROL_PROJECTIONS', 'CONTROL_SIGNAL', 'CONTROL_SIGNAL_SPECS', 'CONTROL_SIGNALS', 'CONTROLLED_PARAMS',
-    'CONTROLLER', 'CONTROLLER_OBJECTIVE', 'CORRELATION', 'COSINE', 'COST_FUNCTION', 'COUNT', 'CROSS_ENTROPY',
-    'CURRENT_EXECUTION_TIME', 'CUSTOM_FUNCTION', 'CYCLE',
+    'CONTROLLER', 'CONTROLLER_OBJECTIVE', 'CORRELATION', 'COSINE', 'COSINE_SIMILARITY',
+    'COST_FUNCTION', 'COUNT', 'CROSS_ENTROPY', 'CURRENT_EXECUTION_TIME', 'CUSTOM_FUNCTION', 'CYCLE',
     'DDM_MECHANISM', 'DECAY', 'DEFAULT', 'DEFAULT_CONTROL_MECHANISM', 'DEFAULT_INPUT', 'DEFAULT_MATRIX',
     'DEFAULT_PREFERENCE_SET_OWNER', 'DEFAULT_PROCESSING_MECHANISM', 'DEFAULT_VARIABLE',
     'DEFERRED_ASSIGNMENT', 'DEFERRED_DEFAULT_NAME', 'DEFERRED_INITIALIZATION', 'DICT', 'DictionaryMemory_FUNCTION',
@@ -46,9 +46,10 @@
     'DIST_SHAPE', 'DISTANCE_FUNCTION', 'DISTANCE_METRICS', 'DISTRIBUTION_FUNCTION_TYPE', 'DIVISION',
     'DRIFT_DIFFUSION_INTEGRATOR_FUNCTION', 'DRIFT_ON_A_SPHERE_INTEGRATOR_FUNCTION', 'DUAL_ADAPTIVE_INTEGRATOR_FUNCTION',
     'EFFERENTS', 'EID_SIMULATION', 'EID_FROZEN', 'EITHER', 'ENABLE_CONTROLLER', 'ENABLED', 'ENERGY', 'ENTROPY',
-    'EPISODIC_MEMORY_MECHANISM', 'EQUAL', 'ERROR_DERIVATIVE_FUNCTION', 'EUCLIDEAN', 'EVC_MECHANISM', 'EVC_SIMULATION',
-    'EXAMPLE_FUNCTION_TYPE', 'EXECUTE_UNTIL_FINISHED', 'EXECUTING', 'EXECUTION', 'EXECUTION_COUNT', 'EXECUTION_ID',
-    'EXECUTION_PHASE', 'EXPONENTIAL', 'EXPONENT', 'EXPONENTIAL_DIST_FUNCTION', 'EXPONENTIAL_FUNCTION', 'EXPONENTS',
+    'EPISODIC_MEMORY_MECHANISM', 'EPOCHS', 'EQUAL', 'ERROR_DERIVATIVE_FUNCTION', 'EUCLIDEAN',
+    'EVC_MECHANISM', 'EVC_SIMULATION',  'EXAMPLE_FUNCTION_TYPE',
+    'EXECUTE_UNTIL_FINISHED', 'EXECUTING', 'EXECUTION', 'EXECUTION_COUNT', 'EXECUTION_ID', 'EXECUTION_PHASE',
+    'EXPONENTIAL', 'EXPONENT', 'EXPONENTIAL_DIST_FUNCTION', 'EXPONENTIAL_FUNCTION', 'EXPONENTS',
     'FEEDBACK', 'FITZHUGHNAGUMO_INTEGRATOR_FUNCTION', 'FINAL', 'FLAGS', 'FULL', 'FULL_CONNECTIVITY_MATRIX',
     'FUNCTION', 'FUNCTIONS', 'FUNCTION_COMPONENT_CATEGORY','FUNCTION_CHECK_ARGS',
     'FUNCTION_OUTPUT_TYPE', 'FUNCTION_OUTPUT_TYPE_CONVERSION', 'FUNCTION_PARAMS',
@@ -69,7 +70,7 @@
     'LEARNING_OBJECTIVE', 'LEARNING_MECHANISM', 'LEARNING_MECHANISMS', 'LEARNING_PATHWAY', 'LEARNING_PROJECTION',
     'LEARNING_PROJECTION_PARAMS', 'LEARNING_RATE', 'LEARNING_SIGNAL', 'LEARNING_SIGNAL_SPECS', 'LEARNING_SIGNALS',
     'LESS_THAN', 'LESS_THAN_OR_EQUAL', 'LINEAR', 'LINEAR_COMBINATION_FUNCTION', 'LINEAR_FUNCTION',
-    'LINEAR_MATRIX_FUNCTION', 'LOG_ENTRIES', 'LOGISTIC_FUNCTION', 'LOW', 'LVOC_CONTROL_MECHANISM', 'L0', 'L1',
+    'LINEAR_MATRIX_FUNCTION', 'LOG_ENTRIES', 'LOGISTIC_FUNCTION', 'Loss', 'LOW', 'LVOC_CONTROL_MECHANISM',
     'MAPPING_PROJECTION', 'MAPPING_PROJECTION_PARAMS', 'MASKED_MAPPING_PROJECTION',
     'MATRIX', 'MATRIX_KEYWORD_NAMES', 'MATRIX_KEYWORD_SET', 'MATRIX_KEYWORD_VALUES', 'MATRIX_KEYWORDS','MatrixKeywords',
     'MAX_ABS_DIFF', 'MAX_ABS_INDICATOR', 'MAX_ONE_HOT', 'MAX_ABS_ONE_HOT', 'MAX_ABS_VAL',
@@ -84,7 +85,7 @@
     'MODEL_SPEC_ID_RECEIVER_MECH', 'MODEL_SPEC_ID_RECEIVER_PORT',
     'MODEL_SPEC_ID_PARAMETER_INITIAL_VALUE', 'MODEL_SPEC_ID_PARAMETER_SOURCE',
     'MODEL_SPEC_ID_PARAMETER_VALUE', 'MODEL_SPEC_ID_TYPE',
-    'MSE', 'MULTIPLICATIVE', 'MULTIPLICATIVE_PARAM', 'MUTUAL_ENTROPY',
+    'MULTIPLICATIVE', 'MULTIPLICATIVE_PARAM', 'MUTUAL_ENTROPY',
     'NAME', 'NESTED', 'NEWEST',  'NODE', 'NOISE', 'NORMAL_DIST_FUNCTION', 'NORMED_L0_SIMILARITY', 'NOT_EQUAL',
     'NUM_EXECUTIONS_BEFORE_FINISHED',
     'OBJECTIVE_FUNCTION_TYPE', 'OBJECTIVE_MECHANISM', 'OBJECTIVE_MECHANISM_OBJECT', 'OFF', 'OFFSET', 'OLDEST', 'ON',
@@ -109,11 +110,11 @@
     'RELU_FUNCTION', 'REST', 'RESULT', 'RESULT', 'ROLES', 'RL_FUNCTION', 'RUN',
     'SAMPLE', 'SAVE_ALL_VALUES_AND_POLICIES', 'SCALAR', 'SCALE', 'SCHEDULER', 'SELF', 'SENDER', 'SEPARATE',
     'SEPARATOR_BAR', 'SHADOW_INPUT_NAME', 'SHADOW_INPUTS', 'SIMPLE', 'SIMPLE_INTEGRATOR_FUNCTION', 'SIMULATIONS',
-    'SINGLETON', 'SIZE', 'SLOPE', 'SOFT_CLAMP', 'SOFTMAX_FUNCTION', 'SOURCE', 'SSE', 'STABILITY_FUNCTION',
+    'SINGLETON', 'SIZE', 'SLOPE', 'SOFT_CLAMP', 'SOFTMAX_FUNCTION', 'SOURCE', 'STABILITY_FUNCTION',
     'STANDARD_ARGS', 'STANDARD_DEVIATION', 'STANDARD_OUTPUT_PORTS', 'SUBTRACTION', 'SUM',
-    'TARGET', 'TARGET_MECHANISM', 'TARGET_LABELS_DICT', 'TERMINAL', 'TERMINATION_MEASURE', 'TERMINATION_THRESHOLD',
-    'TERMINATION_COMPARISION_OP', 'TERSE', 'TEXT', 'THRESHOLD', 'TIME', 'TIME_STEP_SIZE', 'TIME_STEPS_DIM',
-    'TRAINING_SET',
+    'TARGET', 'TARGET_MECHANISM', 'TARGET_LABELS_DICT', 'TERMINAL', 'TARGETS',
+    'TERMINATION_MEASURE', 'TERMINATION_THRESHOLD', 'TERMINATION_COMPARISION_OP', 'TERSE', 'TEXT', 'THRESHOLD',
+    'TIME', 'TIME_STEP_SIZE', 'TIME_STEPS_DIM', 'TRAINING_SET',
     'TRANSFER_FUNCTION_TYPE', 'TRANSFER_MECHANISM', 'TRANSFER_WITH_COSTS_FUNCTION',
     'TRIAL', 'TRIALS_DIM',
     'UNCHANGED', 'UNIFORM_DIST_FUNCTION', 'USER_DEFINED_FUNCTION', 'USER_DEFINED_FUNCTION_TYPE',
@@ -125,6 +126,7 @@
 # ******************************************  KEYWORD CLASSES **********************************************************
 # **********************************************************************************************************************
 import operator
+from enum import Enum, auto
 
 class MatrixKeywords:
     """
@@ -240,6 +242,7 @@ def __init__(self):
         self.CORRELATION = CORRELATION
         # self.PEARSON = PEARSON
         self.COSINE = COSINE
+        self.COSINE_SIMILARITY = COSINE
         self.ENTROPY = CROSS_ENTROPY
         self.CROSS_ENTROPY = CROSS_ENTROPY
         self.ENERGY = ENERGY
@@ -270,6 +273,7 @@ def _is_metric(metric):
 ANGLE = 'angle'
 CORRELATION = 'correlation'
 COSINE = 'cosine'
+COSINE_SIMILARITY = 'cosine'
 PEARSON = 'Pearson'
 ENTROPY = 'cross-entropy'
 CROSS_ENTROPY = 'cross-entropy'
@@ -285,6 +289,58 @@ def _is_metric(metric):
 CONVERGENCE = 'CONVERGENCE'
 
 
+class Loss(Enum):
+    """Loss function used for `learning <Composition_Learning>`.
+
+    Each keyword specifies a loss function used for learning in a `Composition` or `AutodiffComposition`,
+    and the comparable loss functions used by `PyTorch` when an AutodiffComposition is executed in
+    `ExecutionMode.PyTorch` mode.
+    COMMENT:
+    Get latex for remaining equations from https://blmoistawinde.github.io/ml_equations_latex/#cross-entropy
+    COMMENT
+
+    Attributes
+    ----------
+
+    L0
+        sum of errors:   :math:`\\sum\\limits^{len}_i|target_i - output_i|`
+
+    COMMENT:
+    L1
+        mean
+    COMMENT
+
+    SSE
+        sum of squared errors:    :math:`\\sum\\limits^{len}_i(target_i - output_i)^2`
+
+    MSE
+        mean of squared errors:   :math:`\\frac{\\sum\\limits^{len}_i(target_i - output_i)^2}{len}`
+
+    CROSS_ENTROPY
+        cross entropy:   :math:`\\sum\\limits^{len}_ioutput_i\\log(target_i)`
+
+    KL_DIV
+        `Kullback-Leibler (KL) divergence
+        <https://pytorch.org/docs/stable/generated/torch.nn.KLDivLoss.html?highlight=kullback>`_:
+        :math:`\\sum\\limits^{len}_itarget_i\\log{(\\frac{target_i}{output_i})}`
+
+    NLL
+        `Negative log likelihood loss <https://pytorch.org/docs/stable/generated/torch.nn.NLLLoss.html?highlight=nll>`_:
+        :math:`-{\\log(target_i)}`
+
+    POISSON_NLL
+        `Poisson negative log likelihood loss <https://pytorch.org/docs/stable/generated/torch.nn.PoissonNLLLoss.html>`_
+    """
+    L0 = auto()
+    L1 = auto()
+    SSE = auto()
+    MSE = auto()
+    CROSS_ENTROPY = auto()
+    KL_DIV = auto()
+    NLL = auto()
+    POISSON_NLL = auto()
+
+
 # **********************************************************************************************************************
 # ******************************************    CONSTANTS  *************************************************************
 # **********************************************************************************************************************
@@ -411,6 +467,8 @@ def _is_metric(metric):
 LEARNING_PATHWAY = "learning_pathway"
 NODE = 'NODE'
 INPUTS = 'inputs'
+TARGETS = 'targets'
+EPOCHS = 'epochs'
 
 # Used in show_graph for show_nested
 NESTED = 'nested'
@@ -652,9 +710,10 @@ def _is_metric(metric):
 
 #endregion
 
-#region ---------------------------------------    AUTODIFF COMPOSITION   ----------------------------------------------
+#region ------------------------------------------  AUTODIFF COMPOSITION  ----------------------------------------------
 
 TRAINING_SET = 'training set'
+LEARNING_RATE = "learning_rate"
 
 #endregion
 
@@ -668,7 +727,6 @@ def _is_metric(metric):
 HARD_CLAMP = "hard_clamp"
 PULSE_CLAMP = "pulse_clamp"
 NO_CLAMP = "no_clamp"
-LEARNING_RATE = "learning_rate"
 # CONTROL = 'CONTROL'
 PROCESS_DEFAULT_PROJECTION_FUNCTION = "Default Projection Function"
 PROCESS_EXECUTE = "ProcessExecute"
@@ -975,8 +1033,6 @@ def _is_metric(metric):
 
 GAMMA = 'gamma'
 
-MSE = 'MSE'
-SSE = 'SSE'
 #endregion
 
 # model spec keywords
diff --git a/psyneulink/core/globals/parameters.py b/psyneulink/core/globals/parameters.py
index 819db375349..dfe7b589824 100644
--- a/psyneulink/core/globals/parameters.py
+++ b/psyneulink/core/globals/parameters.py
@@ -113,8 +113,11 @@ def __init__(p=None, q=1.0):
       `Parameter attributes <Parameter_Attributes_Table>`
 - default values for the parameters can be specified in the Parameters class body, or in the
   arguments for *B*.__init__. If both are specified and the values differ, an exception will be raised
-- if you want assignments to parameter *p* to be validated, add a method _validate_p(value),
+- if you want assignments to parameter *p* to be validated, add a method _validate_p(value), \
   that returns None if value is a valid assignment, or an error string if value is not a valid assignment
+    - NOTE: a validation method for *p* may reference other parameters \
+        only if they are listed in *p*'s \
+        `dependencies <Parameter.dependencies>`
 - if you want all values set to *p* to be parsed beforehand, add a method _parse_p(value) that returns the parsed value
     - for example, convert to a numpy array or float
 
@@ -123,6 +126,8 @@ def __init__(p=None, q=1.0):
             def _parse_p(value):
                 return np.asarray(value)
 
+    - NOTE: parsers may not reference other parameters
+
 - setters and getters (used for more advanced behavior than parsing) should both return the final value to return (getter) or set (setter)
 
     For example, `costs <ControlMechanism.costs>` of `ControlMechanism <ControlMechanism>` has a special
@@ -607,13 +612,15 @@ def _owner(self, value):
         except TypeError:
             self._owner_ref = value
 
-    @property
-    def _in_dependency_order(self):
+    def _dependency_order_key(self, names=False):
         """
-            Returns:
-                list[Parameter] - a list of Parameters such that any
-                Parameter is placed before all of its
-                `dependencies <Parameter.dependencies>`
+        Args:
+            names (bool, optional): Whether sorting key is based on
+            Parameter names or Parameter objects. Defaults to False.
+
+        Returns:
+            types.FunctionType: a function that may be passed in as sort
+            key so that any Parameter is placed before its dependencies
         """
         parameter_function_ordering = list(toposort.toposort({
             p.name: p.dependencies for p in self if p.dependencies is not None
@@ -622,13 +629,30 @@ def _in_dependency_order(self):
             itertools.chain.from_iterable(parameter_function_ordering)
         )
 
-        def ordering(p):
-            try:
-                return parameter_function_ordering.index(p.name)
-            except ValueError:
-                return -1
+        if names:
+            def ordering(p):
+                try:
+                    return parameter_function_ordering.index(p)
+                except ValueError:
+                    return -1
+        else:
+            def ordering(p):
+                try:
+                    return parameter_function_ordering.index(p.name)
+                except ValueError:
+                    return -1
+
+        return ordering
 
-        return sorted(self, key=ordering)
+    @property
+    def _in_dependency_order(self):
+        """
+            Returns:
+                list[Parameter] - a list of Parameters such that any
+                Parameter is placed before all of its
+                `dependencies <Parameter.dependencies>`
+        """
+        return sorted(self, key=self._dependency_order_key())
 
 
 class Defaults(ParametersTemplate):
diff --git a/psyneulink/core/globals/registry.py b/psyneulink/core/globals/registry.py
index 15c585d1c7f..71d2a34be7c 100644
--- a/psyneulink/core/globals/registry.py
+++ b/psyneulink/core/globals/registry.py
@@ -217,6 +217,8 @@ def register_category(entry,
         raise RegistryError("Requested entry {0} not of type {1}".format(entry, base_class))
 
 
+_register_auto_name_prefix = ""
+
 def register_instance(entry, name, base_class, registry, sub_dict):
 
     renamed_instance_counts = registry[sub_dict].renamed_instance_counts
@@ -224,11 +226,14 @@ def register_instance(entry, name, base_class, registry, sub_dict):
     # If entry (instance) name is None, set entry's name to sub_dict-n where n is the next available numeric suffix
     # (starting at 0) based on the number of unnamed/renamed sub_dict objects that have already been assigned names
     if name is None:
-        entry.name = '{0}-{1}'.format(sub_dict, renamed_instance_counts[sub_dict])
+        entry.name = '{0}{1}-{2}'.format(_register_auto_name_prefix, sub_dict, renamed_instance_counts[sub_dict])
         renamed = True
     else:
         entry.name = name
 
+    assert not entry.name.startswith("__pnl_") or entry.name.startswith(_register_auto_name_prefix), \
+        "Using reserved name: {}".format(entry.name)
+
     while entry.name in registry[sub_dict].instanceDict:
         # if the decided name (provided or determined) is already assigned to an object, get the non-suffixed name,
         # and append the proper new suffix according to the number of objects that have been assigned that name
diff --git a/psyneulink/core/globals/utilities.py b/psyneulink/core/globals/utilities.py
index 84fd6a73f93..9d43215225b 100644
--- a/psyneulink/core/globals/utilities.py
+++ b/psyneulink/core/globals/utilities.py
@@ -359,7 +359,11 @@ def is_matrix(m):
         try:
             return is_matrix(m())
         except:
-            return False
+            try:
+                # random_matrix and RandomMatrix are allowable functions, but require num_rows and num_cols parameters
+                return is_matrix(1,2)
+            except:
+                return False
     return False
 
 
@@ -434,16 +438,22 @@ def iscompatible(candidate, reference=None, **kargs):
     # If the two are equal, can settle it right here
     # IMPLEMENTATION NOTE: remove the duck typing when numpy supports a direct comparison of iterables
     try:
-        with warnings.catch_warnings():
-            warnings.simplefilter(action='ignore', category=FutureWarning)
-            if reference is not None and (candidate == reference):
-                return True
+        if (reference is not None and np.array(candidate, dtype=object).size > 0
+                and safe_equals(candidate, reference)):
+            return True
+
     except ValueError:
         # raise UtilitiesError("Could not compare {0} and {1}".format(candidate, reference))
         # IMPLEMENTATION NOTE: np.array generates the following error:
         # ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
         pass
 
+    # If the two are the same thing, can settle it right here
+    # This is a common pattern for tests that use the same structure
+    # as default variable and variable
+    if reference is not None and candidate is reference:
+        return True
+
     # If args not provided, assign to default values
     # if not specified in args, use these:
     #     args[kwCompatibilityType] = list
@@ -498,13 +508,11 @@ def iscompatible(candidate, reference=None, **kargs):
     if is_matrix_spec(reference):
         return is_matrix(candidate)
 
-    # MODIFIED 10/29/17 NEW:
     # IMPLEMENTATION NOTE: This allows a number in an ndarray to match a float or int
     # If both the candidate and reference are either a number or an ndarray of dim 0, consider it a match
     if ((is_number(candidate) or (isinstance(candidate, np.ndarray) and candidate.ndim == 0)) or
             (is_number(reference) or (isinstance(reference, np.ndarray) and reference.ndim == 0))):
         return True
-    # MODIFIED 10/29/17 END
 
     # IMPLEMENTATION NOTE:
     #   modified to allow numeric type mismatches (e.g., int and float;
@@ -780,7 +788,14 @@ def __deepcopy__(self, memo):
         result = cls.__new__(cls)
         memo[id(self)] = result
 
-        for k, v in self.__dict__.items():
+        try:
+            # follow dependency order for Parameters to allow validation involving other parameters
+            ordered_dict_keys = sorted(self.__dict__, key=self._dependency_order_key(names=True))
+        except AttributeError:
+            ordered_dict_keys = self.__dict__
+
+        for k in ordered_dict_keys:
+            v = self.__dict__[k]
             if k in shared_keys or isinstance(v, shared_types):
                 res_val = v
             else:
@@ -1012,7 +1027,7 @@ def type_match(value, value_type):
         return value
     if value_type in {int, np.integer, np.int64, np.int32}:
         return int(value)
-    if value_type in {float, np.float, np.float64, np.float32}:
+    if value_type in {float, np.float64, np.float32}:
         return float(value)
     if value_type is np.ndarray:
         return np.array(value)
@@ -1030,30 +1045,42 @@ def get_value_from_array(array):
     :return:
     """
 
-def random_matrix(sender, receiver, clip=1, offset=0):
+def random_matrix(num_rows, num_cols, offset=0.0, scale=1.0):
     """Generate a random matrix
 
-    Calls np.random.rand to generate a 2d np.array with random values.
+    Calls np.random.rand to generate a 2d np.array with random values and shape (num_rows, num_cols):
+
+        :math:`matrix = (random[0.0:1.0] + offset) * scale
+
+    With the default values of **offset** and **scale**, values of matrix are floats between 0 and 1.
+    However, **offset** can be used to center the range on other values (e.g., **offset**=-0.5 centers values on 0),
+    and **scale** can be used to narrow or widen the range.  As a conveniuence the keyword 'ZERO_CENTER' can be used
+    in place of -.05.
 
     Arguments
     ----------
-    sender : int
+    num_rows : int
         specifies number of rows.
 
-    receiver : int
-        spcifies number of columns.
+    num_cols : int
+        specifies number of columns.
 
-    range : int
-        specifies upper limit (lower limit = 0).
+    offset : float or 'zero_center'
+        specifies amount added to each entry of the matrix before it is scaled.
 
-    offset : int
-        specifies amount added to each entry of the matrix.
+    scale : float
+        specifies amount by which random value + **offset** is multiplicatively scaled.
 
     Returns
     -------
     2d np.array
     """
-    return (clip * np.random.rand(sender, receiver)) + offset
+    if isinstance(offset,str):
+        if offset.upper() == 'ZERO_CENTER':
+            offset = -0.5
+        else:
+            raise UtilitiesError(f"'offset' arg of random_matrix must be a number of 'zero_center'")
+    return (np.random.rand(num_rows, num_cols) + offset) * scale
 
 def underscore_to_camelCase(item):
     item = item[1:]
@@ -1230,17 +1257,14 @@ def __copy__(self):
 
     def __getitem__(self, key):
         if key is None:
-            raise KeyError("None is not a legal key for {}".format(self.name))
+            raise KeyError(f"None is not a legal key for '{self.name}'.")
         try:
             return self.data[key]
         except TypeError:
             key_num = self._get_key_for_item(key)
             if key_num is None:
-                # raise TypeError("\'{}\' is not a key in the {} being addressed".
-                #                 format(key, self.__class__.__name__))
-                # raise KeyError("\'{}\' is not a key in {}".
-                raise TypeError("\'{}\' is not a key in {}".
-                                format(key, self.name))
+                # raise TypeError(f"'{key}' is not a key in {self.name}.")
+                raise TypeError(f"'{key}' is not in {self.name}.")
             return self.data[key_num]
 
     def __setitem__(self, key, value):
@@ -1618,7 +1642,6 @@ def safe_len(arr, fallback=1):
     except TypeError:
         return fallback
 
-
 def safe_equals(x, y):
     """
         An == comparison that handles numpy's new behavior of returning
@@ -1635,14 +1658,25 @@ def safe_equals(x, y):
         except (ValueError, DeprecationWarning, FutureWarning):
             try:
                 return np.array_equal(x, y)
-            except DeprecationWarning:
+            except (DeprecationWarning, FutureWarning):
+                # both should have len because non-len objects would not
+                # have triggered the warnings on == or array_equal
                 len_x = len(x)
-                return (
-                    len_x == len(y)
-                    and all([
-                        safe_equals(x[i], y[i]) for i in range(len_x)
-                    ])
-                )
+                if len_x != len(y):
+                    return False
+
+                if hasattr(x, 'keys') and hasattr(y, 'keys'):
+                    # dictionary-like
+                    if x.keys() != y.keys():
+                        return False
+                    subelements = x.keys()
+                elif hasattr(x, 'keys') or hasattr(y, 'keys'):
+                    return False
+                else:
+                    # list-like
+                    subelements = range(len_x)
+
+                return all([safe_equals(x[i], y[i]) for i in subelements])
 
 
 @tc.typecheck
diff --git a/psyneulink/core/llvm/__init__.py b/psyneulink/core/llvm/__init__.py
index a62f8c875e3..651225c2a1f 100644
--- a/psyneulink/core/llvm/__init__.py
+++ b/psyneulink/core/llvm/__init__.py
@@ -29,7 +29,51 @@
 __all__ = ['LLVMBuilderContext', 'ExecutionMode']
 
 class ExecutionMode(enum.Flag):
+    """Specify execution a `Composition` in interpreted or one of ithe compiled modes.
+    These are used to specify the **execution_mode** argument of a Composition's `execute <Composition.execute>`,
+    `run <Composition.run>`, and `learn <Composition.learn>` methods.  See `Compiled Modes
+    <Composition_Compilation_Modes>` under `Compilation <Composition_Compilation>` for additional details concerning
+    use of each mode by a Composition.
+
+    Attributes
+    ----------
+
+    Python
+      Execute using the Python interpreter;  this is the default mode.
+
+    LLVM
+      compile and run Composition `Nodes <Composition_Nodes>` and `Projections <Projection>` individually.
+
+    LLVMExec
+      compile and run each `TRIAL <TimeScale.TRIAL>` individually.
+
+    LLVMRun
+      compile and run multiple `TRIAL <TimeScale.TRIAL>`\\s.
+
+    Auto
+      progressively attempt LLVMRun, LLVMexec. LLVM and then Python.
+
+    PyTorch
+      execute the `AutodiffComposition` `learn <AutodiffComposition.learn>` method using PyTorch, and its
+      `run <AutodiffComposition.run>` method using the Python interpreter.
+
+      .. warning::
+         For clarity, this mode should only be used when executing an `AutodiffComposition`; using it
+         with a standard `Composition` is possible, but it will **not** have the expected effect of executing
+         its `learn <Composition.learn>` method using PyTorch.
+
+    PTX
+      compile and run Composition `Nodes <Composition_Nodes>` and `Projections <Projection>` using CUDA for GPU.
+
+    PTXExec
+      compile and run each `TRIAL <TimeScale.TRIAL>` using CUDA for GPU.
+
+    PTXRun
+      compile and run multiple `TRIAL <TimeScale.TRIAL>`\\s using CUDA for GPU.
+   """
+
     Python   = 0
+    PyTorch = enum.auto()
     LLVM     = enum.auto()
     PTX      = enum.auto()
     _Run      = enum.auto()
@@ -41,6 +85,7 @@ class ExecutionMode(enum.Flag):
     LLVMExec = LLVM | _Exec
     PTXRun = PTX | _Run
     PTXExec = PTX | _Exec
+    COMPILED = ~ (Python | PyTorch)
 
 
 _binary_generation = 0
diff --git a/psyneulink/core/llvm/builder_context.py b/psyneulink/core/llvm/builder_context.py
index 8695d1e5347..3398a2d974a 100644
--- a/psyneulink/core/llvm/builder_context.py
+++ b/psyneulink/core/llvm/builder_context.py
@@ -56,6 +56,7 @@ def module_count():
 
 _BUILTIN_PREFIX = "__pnl_builtin_"
 _builtin_intrinsics = frozenset(('pow', 'log', 'exp', 'tanh', 'coth', 'csch',
+                                 'sin', 'cos',
                                  'is_close_float', 'is_close_double',
                                  'mt_rand_init', 'philox_rand_init'))
 
diff --git a/psyneulink/core/llvm/builtins.py b/psyneulink/core/llvm/builtins.py
index 30973992713..59b2dae5a9b 100644
--- a/psyneulink/core/llvm/builtins.py
+++ b/psyneulink/core/llvm/builtins.py
@@ -448,7 +448,7 @@ def setup_coth(ctx):
     exp_f = ctx.get_builtin("exp", [x.type])
     # (e**2x + 1)/(e**2x - 1) is faster but doesn't handle large inputs (exp -> Inf) well (Inf/Inf = NaN)
     # (1 + (2/(exp(2*x) - 1))) is a bit slower but handles large inputs better
-    # (e**2x + 1)/(e**2x - 1)
+
     _2x = builder.fmul(x.type(2), x)
     e2x = builder.call(exp_f, [_2x])
     den = builder.fsub(e2x, e2x.type(1))
@@ -463,6 +463,8 @@ def setup_pnl_intrinsics(ctx):
     double_intr_ty = ir.FunctionType(ctx.float_ty, (ctx.float_ty, ctx.float_ty))
 
     # Create function declarations
+    ir.Function(ctx.module, single_intr_ty, name=_BUILTIN_PREFIX + "cos")
+    ir.Function(ctx.module, single_intr_ty, name=_BUILTIN_PREFIX + "sin")
     ir.Function(ctx.module, single_intr_ty, name=_BUILTIN_PREFIX + "exp")
     ir.Function(ctx.module, single_intr_ty, name=_BUILTIN_PREFIX + "log")
     ir.Function(ctx.module, double_intr_ty, name=_BUILTIN_PREFIX + "pow")
@@ -483,7 +485,7 @@ def _generate_intrinsic_wrapper(module, name, ret, args):
 def _generate_cpu_builtins_module(_float_ty):
     """Generate function wrappers for log, exp, and pow intrinsics."""
     module = ir.Module(name="cpu_builtins")
-    for intrinsic in ('exp', 'log'):
+    for intrinsic in ('sin', 'cos', 'exp', 'log'):
         _generate_intrinsic_wrapper(module, intrinsic, _float_ty, [_float_ty])
 
     _generate_intrinsic_wrapper(module, "pow", _float_ty, [_float_ty, _float_ty])
@@ -866,6 +868,7 @@ def setup_mersenne_twister(ctx):
     gen_int = _setup_mt_rand_integer(ctx, state_ty)
     gen_float = _setup_mt_rand_float(ctx, state_ty, gen_int)
     _setup_mt_rand_normal(ctx, state_ty, gen_float)
+    _setup_rand_binomial(ctx, state_ty, gen_float, prefix="mt")
 
 
 _PHILOX_DEFAULT_ROUNDS = 10
@@ -1875,8 +1878,9 @@ def _setup_philox_rand_normal(ctx, state_ty, gen_float, gen_int, wi_data, ki_dat
     fptype = gen_float.args[1].type.pointee
     itype = gen_int.args[1].type.pointee
     if fptype != ctx.float_ty:
-        # We don't have numeric halpers available for the desired type
+        # We don't have numeric helpers available for the desired type
         return
+
     builder = _setup_builtin_func_builder(ctx, "philox_rand_normal",
                                          (state_ty.as_pointer(), fptype.as_pointer()))
     state, out = builder.function.args
@@ -1986,6 +1990,50 @@ def _setup_philox_rand_normal(ctx, state_ty, gen_float, gen_int, wi_data, ki_dat
 
     builder.branch(loop_block)
 
+def _setup_rand_binomial(ctx, state_ty, gen_float, prefix):
+    fptype = gen_float.args[1].type.pointee
+    if fptype != ctx.float_ty:
+        # We don't have numeric helpers available for the desired type
+        return
+
+    args = [state_ty.as_pointer(), # state
+            ctx.int32_ty.as_pointer(), # N - total number of draws
+            fptype.as_pointer(),  # p - prob of success
+            ctx.int32_ty.as_pointer()] # output
+
+    builder = _setup_builtin_func_builder(ctx, prefix + "_rand_binomial", args)
+    state, n_ptr, p_ptr, out_ptr = builder.function.args
+
+    n = builder.load(n_ptr)
+    p = builder.load(p_ptr)
+    q = builder.fsub(p.type(1), p)
+
+    success = out_ptr.type.pointee(1)
+    failure = out_ptr.type.pointee(0)
+
+    # N > 1 (!=1) is not supported
+    is_large_n = builder.icmp_unsigned("!=", n, n.type(1))
+    with builder.if_then(is_large_n):
+        builder.store(out_ptr.type.pointee(0), out_ptr)
+        builder.ret_void()
+
+    uniform_draw_ptr = builder.alloca(fptype, name="tmp_fp")
+    builder.call(gen_float, [state, uniform_draw_ptr])
+    draw = builder.load(uniform_draw_ptr)
+
+    # If 'p' is large enough, success == draw < p
+    is_less_than_p = builder.fcmp_ordered("<", draw, p)
+    large_p_result = builder.select(is_less_than_p, success, failure)
+
+
+    # The draw check is reverted for small p
+    is_less_than_q = builder.fcmp_ordered("<", draw, q)
+    small_p_result = builder.select(is_less_than_q, failure, success)
+
+    is_small_p = builder.fcmp_ordered("<=", p, p.type(0.5))
+    result = builder.select(is_small_p, small_p_result, large_p_result)
+    builder.store(result, out_ptr)
+    builder.ret_void()
 
 def get_philox_state_struct(ctx):
     int64_ty = ir.IntType(64)
@@ -2008,7 +2056,9 @@ def setup_philox(ctx):
     gen_int64 = _setup_philox_rand_int64(ctx, state_ty)
     gen_double = _setup_philox_rand_double(ctx, state_ty, gen_int64)
     _setup_philox_rand_normal(ctx, state_ty, gen_double, gen_int64, _wi_double_data, _ki_i64_data, _fi_double_data)
+    _setup_rand_binomial(ctx, state_ty, gen_double, prefix="philox")
 
     gen_int32 = _setup_philox_rand_int32(ctx, state_ty, gen_int64)
     gen_float = _setup_philox_rand_float(ctx, state_ty, gen_int32)
     _setup_philox_rand_normal(ctx, state_ty, gen_float, gen_int32, _wi_float_data, _ki_i32_data, _fi_float_data)
+    _setup_rand_binomial(ctx, state_ty, gen_float, prefix="philox")
diff --git a/psyneulink/core/llvm/codegen.py b/psyneulink/core/llvm/codegen.py
index 76f29f8bbfb..9d192f4e744 100644
--- a/psyneulink/core/llvm/codegen.py
+++ b/psyneulink/core/llvm/codegen.py
@@ -68,11 +68,6 @@ def np_cmp(builder, x, y):
             if v is np:
                 self.register[k] = numpy_handlers
 
-        name_constants = {
-            True: ctx.bool_ty(1),
-            False: ctx.bool_ty(0),
-        }
-        self.name_constants = name_constants
         super().__init__()
 
     def _update_debug_metadata(self, builder: ir.IRBuilder, node:ast.AST):
@@ -239,9 +234,6 @@ def _convert(builder, x):
 
         return val[node.attr]
 
-    def visit_Num(self, node):
-        return self.ctx.float_ty(node.n)
-
     def visit_Assign(self, node):
         value = self.visit(node.value)
 
@@ -259,10 +251,24 @@ def visit_Assign(self, node):
             assert self.is_lval(target)
             self.builder.store(value, target)
 
+    # visit_Constant is supported in Python3.8+
+    def visit_Constant(self, node):
+        # Only True/False are currently supported as named constants
+        # Call deprecated visit_* methods to maintain coverage
+        if node.value is True or node.value is False:
+            return self.visit_NameConstant(node)
+
+        return self.visit_Num(node)
+
+    # deprecated in Python3.8+
     def visit_NameConstant(self, node):
-        val = self.name_constants[node.value]
-        assert val, f"Failed to convert NameConstant {node.value}"
-        return val
+        # Only True and False are supported atm
+        assert node.value is True or node.value is False
+        return self.ctx.bool_ty(node.value)
+
+    # deprecated in Python3.8+
+    def visit_Num(self, node):
+        return self.ctx.float_ty(node.n)
 
     def visit_Tuple(self, node:ast.AST):
         elements = (self.visit(element) for element in node.elts)
diff --git a/psyneulink/core/llvm/execution.py b/psyneulink/core/llvm/execution.py
index 2500d160997..95fc1e04f04 100644
--- a/psyneulink/core/llvm/execution.py
+++ b/psyneulink/core/llvm/execution.py
@@ -683,7 +683,8 @@ def _prepare_evaluate(self, inputs, num_input_sets, num_evaluations):
         ocm = self._composition.controller
         assert len(self._execution_contexts) == 1
 
-        bin_func = pnlvm.LLVMBinaryFunction.from_obj(ocm, tags=frozenset({"evaluate", "alloc_range"}))
+        tags = {"evaluate", "alloc_range", "evaluate_type_objective"}
+        bin_func = pnlvm.LLVMBinaryFunction.from_obj(ocm, tags=frozenset(tags))
         self.__bin_func = bin_func
 
         # There are 7 arguments to evaluate_alloc_range:
diff --git a/psyneulink/core/llvm/jit_engine.py b/psyneulink/core/llvm/jit_engine.py
index 23815b9aa45..73fbf36683b 100644
--- a/psyneulink/core/llvm/jit_engine.py
+++ b/psyneulink/core/llvm/jit_engine.py
@@ -128,16 +128,18 @@ def _ptx_jit_constructor():
 
 
 def _try_parse_module(module):
+    module_text_ir = str(module)
+
     if "dump-llvm-gen" in debug_env:
         with open(module.name + '.generated.ll', 'w') as dump_file:
-            dump_file.write(str(module))
+            dump_file.write(module_text_ir)
 
     # IR module is not the same as binding module.
     # "assembly" in this case is LLVM IR assembly.
     # This is intentional design decision to ease
     # compatibility between LLVM versions.
     try:
-        mod = binding.parse_assembly(str(module))
+        mod = binding.parse_assembly(module_text_ir)
         mod.verify()
     except Exception as e:
         print("ERROR: llvm parsing failed: {}".format(e))
@@ -279,6 +281,8 @@ def _init(self):
 
 
 _ptx_builtin_source = """
+__device__ {type} __pnl_builtin_sin({type} a) {{ return sin(a); }}
+__device__ {type} __pnl_builtin_cos({type} a) {{ return cos(a); }}
 __device__ {type} __pnl_builtin_log({type} a) {{ return log(a); }}
 __device__ {type} __pnl_builtin_exp({type} a) {{ return exp(a); }}
 __device__ {type} __pnl_builtin_pow({type} a, {type} b) {{ return pow(a, b); }}
diff --git a/psyneulink/core/scheduling/scheduler.py b/psyneulink/core/scheduling/scheduler.py
index 3db80e0551a..3eb3c4c272f 100644
--- a/psyneulink/core/scheduling/scheduler.py
+++ b/psyneulink/core/scheduling/scheduler.py
@@ -53,6 +53,7 @@ def __init__(
 
         # TODO: consider integrating something like this into graph-scheduler?
         self._user_specified_conds = copy.copy(conditions) if conditions is not None else {}
+        self._user_specified_termination_conds = copy.copy(termination_conds) if termination_conds is not None else {}
 
         super().__init__(
             graph=graph,
@@ -118,6 +119,13 @@ def _add_condition_set(self, conditions):
         }
         super().add_condition_set(conditions)
 
+    @graph_scheduler.Scheduler.termination_conds.setter
+    def termination_conds(self, termination_conds):
+        if termination_conds is not None:
+            self._user_specified_termination_conds.update(termination_conds)
+
+        graph_scheduler.Scheduler.termination_conds.fset(self, termination_conds)
+
     @handle_external_context(fallback_default=True)
     def run(
         self,
diff --git a/psyneulink/library/components/mechanisms/modulatory/control/agt/agtcontrolmechanism.py b/psyneulink/library/components/mechanisms/modulatory/control/agt/agtcontrolmechanism.py
index 92c245d3275..dba0645d984 100644
--- a/psyneulink/library/components/mechanisms/modulatory/control/agt/agtcontrolmechanism.py
+++ b/psyneulink/library/components/mechanisms/modulatory/control/agt/agtcontrolmechanism.py
@@ -174,13 +174,11 @@
 from psyneulink.core.globals.preferences.preferenceset import PreferenceLevel
 
 __all__ = [
-    'AGTControlMechanism', 'AGTControlMechanismError', 'ControlMechanismRegistry', 'MONITORED_OUTPUT_PORT_NAME_SUFFIX'
+    'AGTControlMechanism', 'AGTControlMechanismError', 'MONITORED_OUTPUT_PORT_NAME_SUFFIX'
 ]
 
 MONITORED_OUTPUT_PORT_NAME_SUFFIX = '_Monitor'
 
-ControlMechanismRegistry = {}
-
 class AGTControlMechanismError(Exception):
     def __init__(self, error_value):
         self.error_value = error_value
diff --git a/psyneulink/library/components/mechanisms/modulatory/control/agt/lccontrolmechanism.py b/psyneulink/library/components/mechanisms/modulatory/control/agt/lccontrolmechanism.py
index f715396b12f..b709fd861f2 100644
--- a/psyneulink/library/components/mechanisms/modulatory/control/agt/lccontrolmechanism.py
+++ b/psyneulink/library/components/mechanisms/modulatory/control/agt/lccontrolmechanism.py
@@ -313,15 +313,13 @@
 from psyneulink.core.globals.utilities import is_iterable, convert_to_list
 
 __all__ = [
-    'CONTROL_SIGNAL_NAME', 'ControlMechanismRegistry', 'LCControlMechanism', 'LCControlMechanismError',
+    'CONTROL_SIGNAL_NAME', 'LCControlMechanism', 'LCControlMechanismError',
     'MODULATED_MECHANISMS',
 ]
 
 MODULATED_MECHANISMS = 'modulated_mechanisms'
 CONTROL_SIGNAL_NAME = 'LCControlMechanism_ControlSignal'
 
-ControlMechanismRegistry = {}
-
 class LCControlMechanismError(Exception):
     def __init__(self, error_value):
         self.error_value = error_value
diff --git a/psyneulink/library/components/mechanisms/processing/integrator/ddm.py b/psyneulink/library/components/mechanisms/processing/integrator/ddm.py
index 5f790fc9200..608b46827d3 100644
--- a/psyneulink/library/components/mechanisms/processing/integrator/ddm.py
+++ b/psyneulink/library/components/mechanisms/processing/integrator/ddm.py
@@ -1227,14 +1227,14 @@ def is_finished(self, context=None):
             return True
         return False
 
-    def _gen_llvm_is_finished_cond(self, ctx, builder, params, state):
+    def _gen_llvm_is_finished_cond(self, ctx, builder, m_base_params, m_state, m_in):
         # Setup pointers to internal function
-        func_state_ptr = pnlvm.helpers.get_state_ptr(builder, self, state, "function")
-        func_param_ptr = pnlvm.helpers.get_param_ptr(builder, self, params, "function")
+        f_state = pnlvm.helpers.get_state_ptr(builder, self, m_state, "function")
 
-        # Find the single numeric entry in previous_value
+        # Find the single numeric entry in previous_value.
+        # This exists only if the 'function' is 'integrator'
         try:
-            prev_val_ptr = pnlvm.helpers.get_state_ptr(builder, self.function, func_state_ptr, "previous_value")
+            prev_val_ptr = pnlvm.helpers.get_state_ptr(builder, self.function, f_state, "previous_value")
         except ValueError:
             return ctx.bool_ty(1)
 
@@ -1246,11 +1246,15 @@ def _gen_llvm_is_finished_cond(self, ctx, builder, params, state):
         llvm_fabs = ctx.get_builtin("fabs", [ctx.float_ty])
         prev_val = builder.call(llvm_fabs, [prev_val])
 
+        # Get functions params and apply modulation
+        f_base_params = pnlvm.helpers.get_param_ptr(builder, self, m_base_params, "function")
+        f_params, builder = self._gen_llvm_param_ports_for_obj(
+                self.function, f_base_params, ctx, builder, m_base_params, m_state, m_in)
 
-        # obtain threshold value
+        # Get threshold value
         threshold_ptr = pnlvm.helpers.get_param_ptr(builder,
                                                     self.function,
-                                                    func_param_ptr,
+                                                    f_params,
                                                     "threshold")
 
         threshold_ptr = builder.gep(threshold_ptr, [ctx.int32_ty(0), ctx.int32_ty(0)])
diff --git a/psyneulink/library/components/mechanisms/processing/integrator/episodicmemorymechanism.py b/psyneulink/library/components/mechanisms/processing/integrator/episodicmemorymechanism.py
index 3286eff87c8..7cd65dc3f84 100644
--- a/psyneulink/library/components/mechanisms/processing/integrator/episodicmemorymechanism.py
+++ b/psyneulink/library/components/mechanisms/processing/integrator/episodicmemorymechanism.py
@@ -365,7 +365,7 @@
 as `input_ports <EpisodicMemoryMechanism.input_ports>`, named correspondingly ``RETRIEVED_FIELD_n``::
 
     >>> my_em.output_ports.names
-    ['RETREIVED_FIELD_0', 'RETREIVED_FIELD_1']
+    ['RETRIEVED_FIELD_0', 'RETRIEVED_FIELD_1']
 
 These are assigned the values of the fields of the entry retrieved from `memory <ContentAddressableMemory.memory>`.
 
@@ -427,7 +427,7 @@
 VALUE_OUTPUT = 'VALUE_OUTPUT'
 DEFAULT_INPUT_PORT_NAME_PREFIX = 'FIELD_'
 DEFAULT_INPUT_PORT_NAME_SUFFIX = '_INPUT'
-DEFAULT_OUTPUT_PORT_PREFIX = 'RETREIVED_'
+DEFAULT_OUTPUT_PORT_PREFIX = 'RETRIEVED_'
 
 
 class EpisodicMemoryMechanismError(Exception):
diff --git a/psyneulink/library/components/mechanisms/processing/objective/comparatormechanism.py b/psyneulink/library/components/mechanisms/processing/objective/comparatormechanism.py
index 3a8e169b380..600a421e2ad 100644
--- a/psyneulink/library/components/mechanisms/processing/objective/comparatormechanism.py
+++ b/psyneulink/library/components/mechanisms/processing/objective/comparatormechanism.py
@@ -152,7 +152,8 @@
 from psyneulink.core.components.ports.outputport import OutputPort
 from psyneulink.core.components.ports.port import _parse_port_spec
 from psyneulink.core.globals.keywords import \
-    COMPARATOR_MECHANISM, FUNCTION, INPUT_PORTS, NAME, OUTCOME, SAMPLE, TARGET, VARIABLE, PREFERENCE_SET_NAME, MSE, SSE
+    COMPARATOR_MECHANISM, FUNCTION, INPUT_PORTS, NAME, OUTCOME, SAMPLE, TARGET, \
+    VARIABLE, PREFERENCE_SET_NAME, Loss, SUM
 from psyneulink.core.globals.parameters import Parameter, check_user_specified
 from psyneulink.core.globals.preferences.basepreferenceset import is_pref_set, REPORT_OUTPUT_PREF
 from psyneulink.core.globals.preferences.preferenceset import PreferenceEntry, PreferenceLevel
@@ -160,10 +161,13 @@
     is_numeric, is_value_spec, iscompatible, kwCompatibilityLength, kwCompatibilityNumeric, recursive_update
 from psyneulink.core.globals.utilities import safe_len
 
-__all__ = [
-    'ComparatorMechanism', 'ComparatorMechanismError'
-]
+__all__ = ['ComparatorMechanism', 'ComparatorMechanismError', 'MSE', 'SSE', 'SSE', 'L0', 'L1', 'CROSS_ENTROPY']
 
+MSE = Loss.MSE.name
+SSE = Loss.SSE.name
+L0 = Loss.L0.name
+L1 = Loss.L1.name
+CROSS_ENTROPY = Loss.CROSS_ENTROPY.name
 
 class ComparatorMechanismError(Exception):
     def __init__(self, error_value):
@@ -245,13 +249,19 @@ class ComparatorMechanism(ObjectiveMechanism):
 
         .. _COMPARATOR_MECHANISM_SSE
 
+        *SUM*
+            the sum of the terms in in the array returned by the Mechanism's function.
+
         *SSE*
-            the value of the sum squared error of the Mechanism's function
+            the sum of squares of the terms in the array returned by the Mechanism's function.
 
         .. _COMPARATOR_MECHANISM_MSE
 
         *MSE*
-            the value of the mean squared error of the Mechanism's function
+            the mean of the squares of the terms returned by the Mechanism's function.
+
+        .. _COMPARATOR_MECHANISM_MSE
+
 
     """
     componentType = COMPARATOR_MECHANISM
@@ -316,12 +326,15 @@ class Parameters(ObjectiveMechanism.Parameters):
     # ComparatorMechanism parameter and control signal assignments):
 
     standard_output_ports = ObjectiveMechanism.standard_output_ports.copy()
-    standard_output_ports.extend([{NAME: SSE,
+    standard_output_ports.extend([{NAME: SUM,
+                                   FUNCTION: lambda x: np.sum(x)},
+                                  {NAME: SSE,
                                    FUNCTION: lambda x: np.sum(x * x)},
                                   {NAME: MSE,
-                                   FUNCTION: lambda x: np.sum(x * x) / safe_len(x)}])
+                                   FUNCTION: lambda x: np.sum(x * x) / safe_len(x)}]
+                                 )
     standard_output_port_names = ObjectiveMechanism.standard_output_port_names.copy()
-    standard_output_port_names.extend([SSE, MSE])
+    standard_output_port_names.extend([SUM, Loss.SSE.name, Loss.MSE.name])
 
     @check_user_specified
     @tc.typecheck
diff --git a/psyneulink/library/components/projections/pathway/autoassociativeprojection.py b/psyneulink/library/components/projections/pathway/autoassociativeprojection.py
index 98c9948ca5d..1938d5e4bb8 100644
--- a/psyneulink/library/components/projections/pathway/autoassociativeprojection.py
+++ b/psyneulink/library/components/projections/pathway/autoassociativeprojection.py
@@ -382,11 +382,11 @@ def get_hetero_matrix(raw_hetero, size):
 # similar to get_hetero_matrix() above
 def get_auto_matrix(raw_auto, size):
     if isinstance(raw_auto, numbers.Number):
-        return np.diag(np.full(size, raw_auto, dtype=np.float))
+        return np.diag(np.full(size, raw_auto, dtype=float))
     elif ((isinstance(raw_auto, np.ndarray) and raw_auto.ndim == 1) or
               (isinstance(raw_auto, list) and np.array(raw_auto).ndim == 1)):
         if len(raw_auto) == 1:
-            return np.diag(np.full(size, raw_auto[0], dtype=np.float))
+            return np.diag(np.full(size, raw_auto[0], dtype=float))
         else:
             if len(raw_auto) != size:
                 return None
diff --git a/psyneulink/library/compositions/autodiffcomposition.py b/psyneulink/library/compositions/autodiffcomposition.py
index 28b4e6cf81d..36ed8c4eeae 100644
--- a/psyneulink/library/compositions/autodiffcomposition.py
+++ b/psyneulink/library/compositions/autodiffcomposition.py
@@ -16,8 +16,11 @@
   * `AutodiffComposition_Overview`
   * `AutodiffComposition_Creation`
   * `AutodiffComposition_Execution`
+      - `AutodiffComposition_LLVM`
+      - `AutodiffComposition_PyTorch`
+      - `AutodiffComposition_Nested_Modulation`
       - `AutodiffComposition_Logging`
-      - `AutodiffComposition_Nested_Execution`
+  * `AutodiffComposition_Examples`
   * `AutodiffComposition_Class_Reference`
 
 
@@ -26,32 +29,64 @@
 Overview
 --------
 
-.. warning:: As of PsyNeuLink 0.7.5, the API for using AutodiffCompositions has been slightly changed! Please see `this link <RefactoredLearningGuide>` for more details!
+AutodiffComposition is a subclass of `Composition` for constructing and training feedforward neural network
+either, using either direct compilation (to LLVM) or automatic conversion to `PyTorch <https://pytorch.org/>`_,
+both of which considerably accelerate training (by as much as three orders of magnitude) compared to the
+`standard implementation of learning  <Composition_Learning_Standard>` in a Composition.  Although an
+AutodiffComposition is constructed and executed in much the same way as a standard Composition, it largely restricted
+to feedforward neural networks using `supervised learning <Composition_Learning_Supervised>`, and in particular the
+the `backpropagation learning algorithm <https://en.wikipedia.org/wiki/Backpropagation>`_. although it can be used for
+some forms of `unsupervised learning <Composition_Learning_Unsupervised>` that are supported in PyTorch (e.g.,
+`self-organized maps <https://github.com/giannisnik/som>`_).
 
-AutodiffComposition is a subclass of `Composition` used to train feedforward neural network models through integration
-with `PyTorch <https://pytorch.org/>`_, a popular machine learning library, which executes considerably more quickly
-than using the `standard implementation of learning <Composition_Learning_Standard>` in a Composition, using its
-`learning methods <Composition_Learning_Methods>`. An AutodiffComposition is configured and run similarly to a standard
-Composition, with some exceptions that are described below.
-COMMENT:
-FIX: UPDATE runtimes WITH COMPILED VERSION
-COMMENT
 
 .. _AutodiffComposition_Creation:
 
 Creating an AutodiffComposition
 -------------------------------
 
-An AutodiffComposition can be created by calling its constructor, and then adding `Components <Component>` using the
-standard `Composition methods <Composition_Creation>` for doing so.  The constructor also includes an number of
-parameters that are specific to the AutodiffComposition. See the <class reference `AutodiffComposition`> for a list of these parameters.
+An AutodiffComposition can be created by calling its constructor, and then adding `Components <Component>` using
+the standard `Composition methods <Composition_Creation>` for doing so (e.g., `add_node <Composition.add_node>`,
+`add_projection <Composition.add_projections>`,  `add_linear_processing_pathway
+<Composition.add_linear_processing_pathway>`, etc.).  The constructor also includes a number of parameters that are
+specific to the AutodiffComposition (see `AutodiffComposition_Class_Reference` for a list of these parameters,
+and `examples <AutodiffComposition_Examples>` below).  Note that all of the Components in an AutodiffComposition
+must be able to be subject to `learning <Composition_Learning>`, but cannot include any `learning components
+<Composition_Learning_Components>` themselves.  Specifically, it cannot include any `ModulatoryMechanisms
+<ModulatoryMechanism>`, `LearningProjections <LearningProjection>`, or the ObjectiveMechanism <OBJECTIVE_MECHANISM>`
+used to compute the loss for learning.
+
+    .. _Autodiff_Learning_Components_Warning:
+    .. warning::
+        When an AutodiffComposition is constructed, it creates all of the learning Components
+        that are needed, and thus **cannot include** any that are prespecified.
+
+COMMENT:
+FIX: IS THIS STILL TRUE? SEEMS TO CONTRADICT STATEMENT BELOW:
+This means that it cannot be used with a Composition that contains any `modulatory components
+<ModulatorySignal_Anatomy_Figure>` or ones that are subject to modulation, whether by ModulatoryMechanisms within or
+outside the Composition;
+?MAYBE THE FOLLOWING IS BETTER:
+COMMENT
+This means that an AutodiffComposition also cannot itself include a `controller <Composition_Controller>` or any
+`ControlMechanisms <ControlMechanism>`.  However, it can include Mechanisms that are subject to modulatory control
+(see `Figure <ModulatorySignal_Anatomy_Figure>`, and `modulation <ModulatorySignal_Modulation>`) by ControlMechanisms
+*outside* the Composition, including the controller of a Composition within which the AutodiffComposition is nested.
+That is, an AutodiffComposition can be `nested in a Composition <Composition_Nested>` that has such other Components
+(see `AutodiffComposition_Nested_Modulation` below).
 
-.. warning:: Mechanisms or Projections should not be added to or deleted from an AutodiffComposition after it has
-   been run for the first time. Unlike an ordinary Composition, AutodiffComposition does not support this
-   functionality.
+A few other restrictions apply to the construction and modification of AutodiffCompositions:
 
-.. warning:: When comparing models built in PyTorch to those using AutodiffComposition,
-   the `bias <https://www.pytorch.org/docs/stable/nn.html#torch.nn.Module>` parameter of PyTorch modules should be set to `False`, as AutodiffComposition does not currently support trainable biases.
+    .. hint:: AutodiffComposition does not (currently) support the *automatic* construction of separate bias parameters.
+       Thus, when comparing a model constructed using an AutodiffComposition to a corresponding model in PyTorch, the
+       `bias <https://www.pytorch.org/docs/stable/nn.html#torch.nn.Module>` parameter of PyTorch modules should be set
+       to `False`.  Trainable biases *can* be specified explicitly in an AutodiffComposition by including a
+       TransferMechanism that projects to the relevant Mechanism (i.e., implementing that layer of the network to
+       receive the biases) using a `MappingProjection` with a `matrix <MappingProjection.matrix>` parameter that
+       implements a diagnoal matrix with values corresponding to the initial value of the biases.
+
+    .. warning:: Mechanisms or Projections should not be added to or deleted from an AutodiffComposition after it
+       has been executed. Unlike an ordinary Composition, AutodiffComposition does not support this functionality.
 
 
 .. _AutodiffComposition_Execution:
@@ -59,10 +94,149 @@
 Execution
 ---------
 
-An AutodiffComposition's `run <Composition.run>`, `execute <Composition.execute>`, and `learn <Composition.learn>` methods are the same as for a `Composition`.
+COMMENT:
+- Execute learn method using Execute_mode == Python (uses Python) or LLVMRun (direct compilation) using
+
+It can be run just as a standard Composition would - using `learn <AutodiffComposition.learn>` for learning mode,
+and `run <AutodiffComposition.run>` for test mode.
+FIX: CHECK WITH SAMYAK THAT THIS IS CORRECT
+COMMENT
+
+An AutodiffComposition's `run <Composition.run>`, `execute <Composition.execute>`, and `learn <Composition.learn>`
+methods are the same as for a `Composition`.  However, the **execution_mode** in the `learn <Composition.learn>`
+method has different effects than for a standard Composition, that determine whether it uses `LLVM compilation
+<AutodiffComposition_LLVM>` or `translation to PyTorch <AutodiffComposition_PyTorch>` to execute learning.
+This `table <Composition_Compilation_Table>` provides a summary and comparison of these different modes of execution,
+that are described in greater detail below.
+
+
+.. _AutodiffComposition_LLVM:
+
+*LLVM mode*
+~~~~~~~~~~~
+
+This is specified by setting **execution_mode** = `ExecutionMode.LLVMRun` in the `learn <Composition.learn>` method
+of an AutodiffCompositon.  This provides the fastest performance, but is limited to `supervised learning
+<Composition_Learning_Supervised>` using the `BackPropagation` algorithm. This can be run using standard forms of
+loss, including mean squared error (MSE) and cross entropy, by specifying this in the **loss_spec** argument of
+the constructor (see `AutodiffComposition <AutodiffComposition_Class_Reference>` for additional details, and
+`Compilation Modes <Composition_Compiled_Modes>` for more information about executing a Composition in compiled mode.
+
+    .. note::
+       Specifying `ExecutionMode.LLVMRUn` in either the `learn <Composition.learn>` and `run <Composition.run>`
+       methods of an AutodiffComposition causes it to (attempt to) use compiled execution in both cases; this is
+       because LLVM compilation supports the use of modulation in PsyNeuLink models (as compared to `PyTorch mode
+       <AutodiffComposition_PyTorch>`; see `note <AutodiffComposition_PyTorch_Note>` below).
+
+
+COMMENT:
+The advantage of using an AutodiffComposition is that it allows a model to be implemented in PsyNeuLink, and then
+exploit the acceleration of optimized implementations of learning. This can be achieved by executing the `learn
+<Composition.learn>` method in one of two modes (specified using its **execution_mode** argument):  using direct
+compilation (**execution_mode** = `ExecutionMode.LLVMRun`); or by automatically translating the model to `PyTorch
+<https://pytorch.org>`_ for training (**execution_mode** = `ExecutionMode.PyTorch`). The advantage of these modes is
+that they can provide up to three orders of magnitude speed-up in training a model. However, there are restrictions
+on the kinds of Compositions that be implemented in this way.  The features of the different ways to implement and
+execute learning are outlined in the following table, and described in more detail in `AutodiffComposition`.
+  TABLE:
+    * AutodiffComposition:
+        * Execute_mode.Python:
+            - execution:
+              - executes `learn <Composition.learn>` using PyTorch
+              - executes `run <Composition.run>` using Python
+            - advantage: - fast (but slightly slower than direct compilation)
+            - disadvantage :broader support (RNN including LSTM, convnet, ?transformer?)
+        * Execute_mode.LLVNRun:
+            - execution: executes `learn <Composition.learn>` *and* `run <Composition.run>` in compiled mode
+            - advantage: fastest (direct compilation of PNL code)
+            - disadvantage: but (currently) more limited; not suppored:
+                            * RNN (including LSTM)
+                            * convnet (though "in the wings")
+                            * transformer
+                            * ?external memory
+    * Composition:
+        - execution: executes `learn <Composition.learn>` *and* `run <Composition.run>` in Python mode
+        - disadvantage: learning is extremely slow
+        - advantage:
+          - broadest support (including RL, TDLearning, Hebbian, Kohonen / SOM)
+          - can be used to implement effects of modulation and control during learning
+          - useful for examination of individual operations (e.g., for teaching purposes)
+COMMENT
+
+.. _AutodiffComposition_PyTorch:
+
+*PyTorch mode*
+~~~~~~~~~~~~~~
+
+This is specified by setting **execution_mode = `ExecutionMode.PyTorch` in the `learn <Composition.learn>` method of
+an AutodiffCompositon (see `example <BasicsAndPrimer_Rumelhart_Model>` in `BasicsAndPrimer`).  This automatically
+translates the AutodiffComposition to a `PyTorch <https://pytorch.org>`_ model and uses that for execution.  This is
+almost as fast as `LLVM compilation <_AutodiffComposition_LLVM>`, but provides greater flexiblity.  Although it too is
+best suited for use with `supervised learning <Composition_Learning_Supervised>`, it can also be used for some forms
+of `unsupervised learning <Composition_Learning_Unsupervised>` that are supported in PyTorch (e.g., `self-organized
+maps <https://github.com/giannisnik/som>`_).
+
+    .. _AutodiffComposition_PyTorch_Note:
+
+    .. note::
+       While specifying `ExecutionMode.PyTorch` in the `learn <Composition.learn>`  method of an AutodiffComposition
+       causes it to use PyTorch for training, specifying this in the `run <Compositon.run>` method causes it to be
+       executing using the *Python* interpreter (and not PyTorch);  this is so that any modulation can take effect
+       during execution (see `AutodiffComposition_Nested_Modulation` below), which is not supported by PyTorch.
+
+.. technical_note::
+   `ExecutionMode.PyTorch` is a synonym for `ExecutionMode.Python`, that is provided for clarity of the user interface:
+   the default for an AutodiffComposition (i.e., if **execution_mode** is not specified, or it is set to
+   `ExecutionMode.Python`) is to use PyTorch translation in `learn <Composition.learn>` but the Python interpreter
+   for `run <Composition.run>`.  The use of `ExecutionMode.PyTorch` is simply to make it clear that, during learning,
+   it will use PyTorch. This contrasts with the use of `ExecutionMode.LLVMrun`, in which case both the `learn
+   <Composition.learn>` and `run <Composition.run>` methods use LLVM compilation.
+
+
+.. _AutodiffComposition_Nested_Modulation:
+
+*Nested Execution and Modulation*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Like any other `Composition`, an AutodiffComposition may be `nested <Composition_Nested>` inside another
+(see `example <AutodiffComposition_Nested_Example>` below).  However, learning, none of the internal
+Components of the AutodiffComposition (e.g., intermediate layers of a neural network model) are accessible to the
+other Components of the outer Composition, (e.g., as sources of information, or for `modulation
+<ModulatorySignal_Modulation>`).  However, when
+COMMENT:
+learning turned off,
+COMMENT
+it is executed using its `run <Composition.run>` method, then the  AutodiffComposition functions like any other,
+and all of its internal Components are accessible to other Components of the outer Composition. Thus, as long as access
+to its internal Components is not needed during learning, an `AutodiffComposition` can be trained, and then used to
+execute the trained Composition like any other.
+
+
+.. _AutodiffComposition_Logging:
+
+*Logging*
+~~~~~~~~~
 
-The following is an example showing how to create a
-simple AutodiffComposition, specify its inputs and targets, and run it with learning enabled and disabled.
+Logging in AutodiffCompositions follows the same procedure as `logging in a Composition <Log>`.
+However, since an AutodiffComposition internally converts all of its Mechanisms either to LLVM
+or to an equivalent PyTorch model, then its inner components are not actually executed. This means that there is
+limited support for logging parameters of components inside an AutodiffComposition; Currently, the only supported
+parameters are:
+
+1) the `matrix` parameter of Projections
+
+2) the `value` parameter of its inner components
+
+
+.. _AutodiffComposition_Examples:
+
+Examples
+--------
+
+.. _AutodiffComposition_Creation_Example:
+
+The following is an example showing how to create a simple AutodiffComposition, specify its inputs and targets,
+and run it with learning enabled and disabled:
 
     >>> import psyneulink as pnl
     >>> # Set up PsyNeuLink Components
@@ -85,25 +259,8 @@
     >>> # Run Composition in test mode
     >>> my_autodiff.run(inputs = input_dict['inputs'])
 
-.. _AutodiffComposition_Logging:
-
-Logging
-~~~~~~~
-
-Logging in AutodiffCompositions follows the same procedure as `logging in a Composition <Log>`. However, since an AutodiffComposition internally converts all of its mechanisms to an equivalent PyTorch model,
-then its inner components are not actually executed. This means that there is limited support for logging parameters of components inside an AutodiffComposition;
-Currently, the only supported parameters are:
-
-1) the `matrix` parameter of Projections
-
-2) the `value` parameter of its inner components
-
-.. _AutodiffComposition_Nested_Execution:
 
-Nested Execution
-~~~~~~~~~~~~~~~~
-
-Like any other `Composition`, an AutodiffComposition may be `nested inside another <Composition_Nested>`.
+.. _AutodiffComposition_Nested_Example:
 
 The following shows how the AutodiffComposition created in the previous example can be nested and run inside another
 Composition::
@@ -128,8 +285,10 @@
 
 """
 import logging
-
+import os
+import warnings
 import numpy as np
+from pathlib import Path, PosixPath
 
 try:
     import torch
@@ -142,19 +301,23 @@
     from psyneulink.library.compositions.pytorchmodelcreator import PytorchModelCreator
 
 from psyneulink.library.components.mechanisms.processing.objective.comparatormechanism import ComparatorMechanism
+from psyneulink.core.components.mechanisms.processing.compositioninterfacemechanism import CompositionInterfaceMechanism
+from psyneulink.core.components.mechanisms.modulatory.modulatorymechanism import ModulatoryMechanism_Base
+from psyneulink.core.components.projections.modulatory.modulatoryprojection import ModulatoryProjection_Base
 from psyneulink.core.compositions.composition import Composition, NodeRole
 from psyneulink.core.compositions.composition import CompositionError
 from psyneulink.core.compositions.report \
     import ReportOutput, ReportParams, ReportProgress, ReportSimulations, ReportDevices, \
     LEARN_REPORT, EXECUTE_REPORT, PROGRESS_REPORT
 from psyneulink.core.globals.context import Context, ContextFlags, handle_external_context
-from psyneulink.core.globals.keywords import AUTODIFF_COMPOSITION, SOFT_CLAMP
+from psyneulink.core.globals.keywords import AUTODIFF_COMPOSITION, SOFT_CLAMP, Loss
 from psyneulink.core.scheduling.scheduler import Scheduler
 from psyneulink.core.globals.parameters import Parameter, check_user_specified
 from psyneulink.core.scheduling.time import TimeScale
 from psyneulink.core import llvm as pnlvm
 
 
+
 logger = logging.getLogger(__name__)
 
 
@@ -174,14 +337,14 @@ def __str__(self):
 
 class AutodiffComposition(Composition):
     """
-    Subclass of `Composition` that trains models using `PyTorch <https://pytorch.org>`_.
-    See `Composition <Composition_Class_Reference>` for additional arguments and attributes.
+    Subclass of `Composition` that trains models using either LLVM compilation or `PyTorch <https://pytorch.org>`_;
+    see and `Composition <Composition_Class_Reference>` for additional arguments and attributes.
 
     Arguments
     ---------
 
     learning_rate : float : default 0.001
-        the learning rate, which is passed to the optimizer.
+        the learning rate passed to the optimizer if none is specified in the learn method of the AutodiffComposition.
 
     disable_learning : bool: default False
         specifies whether the AutodiffComposition should disable learning when run in `learning mode
@@ -193,10 +356,11 @@ class AutodiffComposition(Composition):
     weight_decay : float : default 0
         specifies the L2 penalty (which discourages large weights) used by the optimizer.
 
-    loss_spec : str or PyTorch loss function : default 'mse'
-        specifies the loss function for training. The current string options are 'mse' (the default), 'crossentropy',
-        'l1', 'nll', 'poissonnll', and 'kldiv'. Any PyTorch loss function can work here, such as ones from
-        https://pytorch.org/docs/stable/nn.html#loss-functions
+    loss_spec : Loss or PyTorch loss function : default Loss.MSE
+        specifies the loss function for training; see `Loss` for arguments.
+
+    Attributes
+    ----------
 
     losses : list of floats
         tracks the average for each weight update (i.e. each minibatch)
@@ -212,7 +376,6 @@ class AutodiffComposition(Composition):
 
     componentCategory = AUTODIFF_COMPOSITION
     class Parameters(Composition.Parameters):
-        """"""
         optimizer = None
         learning_rate = Parameter(.001, fallback_default=True)
         losses = Parameter([])
@@ -224,16 +387,16 @@ class Parameters(Composition.Parameters):
     # TODO (CW 9/28/18): add compositions to registry so default arg for name is no longer needed
     @check_user_specified
     def __init__(self,
+                 pathways=None,
                  learning_rate=None,
                  optimizer_type='sgd',
                  weight_decay=0,
-                 loss_spec='mse',
+                 loss_spec=Loss.MSE,
                  disable_learning=False,
                  refresh_losses=False,
                  disable_cuda=True,
                  cuda_index=None,
                  force_no_retain_graph=False,
-                 pathways=None,
                  name="autodiff_composition"):
 
         if not torch_available:
@@ -255,6 +418,7 @@ def __init__(self,
         self.force_no_retain_graph = force_no_retain_graph
         self.loss = None
         self.disable_learning = disable_learning
+        self._runtime_learning_rate = None
 
         # keeps track of average loss per epoch
         self.losses = []
@@ -272,10 +436,10 @@ def __init__(self,
 
     # CLEANUP: move some of what's done in the methods below to a "validate_params" type of method
     @handle_external_context()
-    def _build_pytorch_representation(self, context=None):
+    def _build_pytorch_representation(self, context=None, refresh=False):
         if self.scheduler is None:
             self.scheduler = Scheduler(graph=self.graph_processing)
-        if self.parameters.pytorch_representation._get(context=context) is None:
+        if self.parameters.pytorch_representation._get(context=context) is None or refresh:
             model = PytorchModelCreator(composition=self,
                                         device=self.device,
                                         context=context)
@@ -284,8 +448,9 @@ def _build_pytorch_representation(self, context=None):
 
         # Set up optimizer function
         old_opt = self.parameters.optimizer._get(context)
-        if old_opt is None:
-            opt = self._make_optimizer(self.optimizer_type, self.learning_rate, self.weight_decay, context)
+        learning_rate = self._runtime_learning_rate or self.learning_rate
+        if old_opt is None or refresh:
+            opt = self._make_optimizer(self.optimizer_type, learning_rate, self.weight_decay, context)
             self.parameters.optimizer._set(opt, context, skip_history=True, skip_log=True)
 
         # Set up loss function
@@ -313,13 +478,13 @@ def _make_optimizer(self, optimizer_type, learning_rate, weight_decay, context):
             return optim.Adam(params, lr=learning_rate, weight_decay=weight_decay)
 
     def _get_loss(self, loss_spec):
-        if not isinstance(self.loss_spec, str):
+        if not isinstance(self.loss_spec, (str, Loss)):
             return self.loss_spec
-        elif loss_spec == 'mse':
+        elif loss_spec == Loss.MSE:
             return nn.MSELoss(reduction='mean')
-        elif loss_spec == 'sse':
+        elif loss_spec == Loss.SSE:
             return nn.MSELoss(reduction='sum')
-        elif loss_spec == 'crossentropy':
+        elif loss_spec == Loss.CROSS_ENTROPY:
             # Cross entropy loss is used for multiclass categorization and needs inputs in shape
             # ((# minibatch_size, C), targets) where C is a 1-d vector of probabilities for each potential category
             # and where target is a 1d vector of type long specifying the index to the target category. This
@@ -330,20 +495,20 @@ def _get_loss(self, loss_spec):
                     x.unsqueeze(0),
                     y.type(torch.LongTensor)
             )
-        elif loss_spec == 'l1':
+        elif loss_spec == Loss.L1:
             return nn.L1Loss(reduction='sum')
-        elif loss_spec == 'nll':
+        elif loss_spec == Loss.NLL:
             return nn.NLLLoss(reduction='sum')
-        elif loss_spec == 'poissonnll':
+        elif loss_spec == Loss.POISSON_NLL:
             return nn.PoissonNLLLoss(reduction='sum')
-        elif loss_spec == 'kldiv':
+        elif loss_spec == Loss.KL_DIV:
             return nn.KLDivLoss(reduction='sum')
         else:
-            raise AutodiffCompositionError("Loss type {} not recognized. Loss argument must be a string or function. "
-                                           "Currently, the recognized loss types are Mean Squared Error, Cross Entropy,"
-                                           " L1 loss, Negative Log Likelihood loss, Poisson Negative Log Likelihood, "
-                                           "and KL Divergence. These are specified as 'mse', 'crossentropy', 'l1', "
-                                           "'nll', 'poissonnll', and 'kldiv' respectively.".format(loss_spec))
+            raise AutodiffCompositionError(f"Loss type {loss_spec} not recognized. Loss argument must be a "
+                                           f"Loss enum or function. Currently, the recognized loss types are: "
+                                           f"L1 (Mean), SSE (sum squared error), CROSS_ENTROPY, NLL (negative log "
+                                           f"likelihood), POISSONNLL (Poisson negative log likelihood, "
+                                           f"and KL_DIV (KL divergence.")
 
     # performs learning/training on all input-target pairs it recieves for given number of epochs
     def autodiff_training(self, inputs, targets, context=None, scheduler=None):
@@ -351,7 +516,10 @@ def autodiff_training(self, inputs, targets, context=None, scheduler=None):
         # compute total loss across output neurons for current trial
         tracked_loss = self.parameters.tracked_loss._get(context)
         if tracked_loss is None:
-            self.parameters.tracked_loss._set(torch.zeros(1, device=self.device).double(), context=context, skip_history=True, skip_log=True)
+            self.parameters.tracked_loss._set(torch.zeros(1, device=self.device).double(),
+                                              context=context,
+                                              skip_history=True,
+                                              skip_log=True)
             tracked_loss = self.parameters.tracked_loss._get(context)
 
         curr_tensor_inputs = {}
@@ -364,10 +532,9 @@ def autodiff_training(self, inputs, targets, context=None, scheduler=None):
             curr_tensor_targets[component] = torch.tensor(target, device=self.device).double()
 
         # do forward computation on current inputs
-        curr_tensor_outputs = self.parameters.pytorch_representation._get(context).forward(
-            curr_tensor_inputs,
-            context,
-        )
+        curr_tensor_outputs = self.parameters.pytorch_representation._get(context).forward(curr_tensor_inputs,
+                                                                                           context,
+                                                                                           )
 
         for component in curr_tensor_outputs.keys():
             # possibly add custom loss option, which is a loss function that takes many args
@@ -381,7 +548,10 @@ def autodiff_training(self, inputs, targets, context=None, scheduler=None):
             component = input_port.all_afferents[0].sender.owner
             outputs.append(curr_tensor_outputs[component].detach().cpu().numpy().copy())
 
-        self.parameters.tracked_loss_count._set(self.parameters.tracked_loss_count._get(context=context) + 1, context=context, skip_history=True, skip_log=True)
+        self.parameters.tracked_loss_count._set(self.parameters.tracked_loss_count._get(context=context) + 1,
+                                                context=context,
+                                                skip_history=True,
+                                                skip_log=True)
         return outputs
 
     def clear_losses(self, context=None):
@@ -390,7 +560,7 @@ def clear_losses(self, context=None):
 
     def _update_learning_parameters(self, context):
         """
-        Updates parameters based on trials ran since last update.
+        Updates parameters based on trials run since last update.
         """
         optimizer = self.parameters.optimizer._get(context=context)
         optimizer.zero_grad()
@@ -459,6 +629,18 @@ def learn(self, *args, **kwargs):
         if self._built_pathways is False:
             self.infer_backpropagation_learning_pathways()
             self._built_pathways = True
+
+        if 'execution_mode' in kwargs:
+            execution_mode = kwargs['execution_mode']
+            if execution_mode == pnlvm.ExecutionMode.Python:
+                raise AutodiffCompositionError(f"{self.name} is an AutodiffComposition so its learn() "
+                                               f"cannot be called with execution_mode = ExecutionMode.Python; "
+                                               f"use ExecutionMode.PyTorch or ExecutionMode.LLVMRun.")
+            # OK, now that the user has been advised to use ExecutionMode.PyTorch and warned *not* to ExecutionMdoe.Python,
+            #     convert ExecutionMode.PyTorch specification to ExecutionMode.Python for internal use (nice, eh?)
+            if execution_mode == pnlvm.ExecutionMode.PyTorch:
+                kwargs['execution_mode'] = pnlvm.ExecutionMode.Python
+
         return super().learn(*args, **kwargs)
 
     @handle_external_context()
@@ -481,7 +663,7 @@ def execute(self,
                 clamp_input=SOFT_CLAMP,
                 targets=None,
                 runtime_params=None,
-                execution_mode:pnlvm.ExecutionMode = pnlvm.ExecutionMode.Python,
+                execution_mode:pnlvm.ExecutionMode = pnlvm.ExecutionMode.PyTorch,
                 skip_initialization=False,
                 report_output:ReportOutput=ReportOutput.OFF,
                 report_params:ReportOutput=ReportParams.OFF,
@@ -559,6 +741,117 @@ def execute(self,
                                                         report_num=report_num
                                                         )
 
+    @handle_external_context(fallback_most_recent=True)
+    def save(self, path:PosixPath=None, directory:str=None, filename:str=None, context=None):
+        """Saves all weight matrices for all MappingProjections in the AutodiffComposition
+
+        Arguments
+        ---------
+        path: Path, PosixPath or str : default None
+            path specification; must be a legal path specification in the filesystem.
+        directory: str : default ``current working directory``
+            directory where `matrices <MappingProjection.matrix>` for all MappingProjections
+            in the AutodiffComposition are saved.
+        filename: str : default ``<name of AutodiffComposition>_matrix_wts.pnl``
+            filename in which `matrices <MappingProjection.matrix>` for all MappingProjections
+            in the AutodiffComposition are saved.
+        .. note::
+           Matrices are saved in
+           `PyTorch state_dict <https://pytorch.org/tutorials/beginner/saving_loading_models.html>`_ format.
+
+        Return
+        ------
+        Path
+
+        """
+        if path:
+            try:
+                path = Path(path)
+            except:
+                raise AutodiffCompositionError(f"'{path}' (for saving weight matrices of ({self.name}) "
+                                               f"is not a legal path.")
+        else:
+            try:
+                if directory:
+                    path = Path(directory)
+                else:
+                    path = Path(os.getcwd())
+                if filename:
+                    path = Path(os.path.join(path, filename))
+                else:
+                    path = Path(os.path.join(path, f'{self.name}_matrix_wts.pnl'))
+            except IsADirectoryError:
+                raise AutodiffCompositionError(f"'{path}' (for saving weight matrices of ({self.name}) "
+                                               f"is not a legal path.")
+        proj_state = {
+            # p.name: p.parameters.matrix.get(context=context)
+            p.name: p.matrix.base
+            for p in self.projections
+            if not (isinstance(p, ModulatoryProjection_Base)
+                    or isinstance(p.sender.owner, CompositionInterfaceMechanism)
+                    or isinstance(p.receiver.owner, CompositionInterfaceMechanism)
+                    or isinstance(p.sender.owner, ModulatoryMechanism_Base)
+                    or isinstance(p.receiver.owner, ModulatoryMechanism_Base)
+                    or p.sender.owner in self.get_nodes_by_role(NodeRole.LEARNING)
+                    or p.receiver.owner in self.get_nodes_by_role(NodeRole.LEARNING)
+                )}
+        torch.save(proj_state, path)
+        return path
+
+    @handle_external_context(fallback_most_recent=True)
+    def load(self, path:PosixPath=None, directory:str=None, filename:str=None, context=None):
+        """Loads all weights matrices for all MappingProjections in the AutodiffComposition from file
+        Arguments
+        ---------
+        path: Path : default None
+            Path for file in which `MappingProjection` `matrices <MappingProjection.matrix>` are stored.
+            This must be a legal PosixPath object; if it is specified **directory** and **filename** are ignored.
+        directory: str : default ``current working directory``
+            directory where `MappingProjection` `matrices <MappingProjection.matrix>` are stored.
+        filename: str : default ``<name of AutodiffComposition>_matrix_wts.pnl``
+            name of file in which `MappingProjection` `matrices <MappingProjection.matrix>` are stored.
+        .. note::
+           Matrices must be stored in
+           `PyTorch state_dict <https://pytorch.org/tutorials/beginner/saving_loading_models.html>`_ format.
+        """
+        if path:
+            if not isinstance(path,Path):
+                raise AutodiffCompositionError(f"'{path}' (for saving weight matrices of ({self.name}) "
+                                               f"is not a legal path.")
+        else:
+            try:
+                if directory:
+                    path = Path(directory)
+                else:
+                    path = Path(os.getcwd())
+                if filename:
+                    path = Path(os.path.join(path, filename))
+                else:
+                    path = Path(os.path.join(path , f'{self.name}_matrix_wts.pnl'))
+            except IsADirectoryError:
+                raise AutodiffCompositionError(f"'{path}' (for saving weight matrices of ({self.name}) "
+                                               f"is not a legal path.")
+        state = torch.load(path)
+        for projection in [p for p in self.projections
+                           if not (isinstance(p, ModulatoryProjection_Base)
+                                   or isinstance(p.sender.owner, CompositionInterfaceMechanism)
+                                   or isinstance(p.receiver.owner, CompositionInterfaceMechanism)
+                                   or isinstance(p.sender.owner, ModulatoryMechanism_Base)
+                                   or isinstance(p.receiver.owner, ModulatoryMechanism_Base)
+                                   or p.sender.owner in self.get_nodes_by_role(NodeRole.LEARNING)
+                                   or p.receiver.owner in self.get_nodes_by_role(NodeRole.LEARNING)
+            )]:
+            matrix = state[projection.name]
+            if np.array(matrix).shape != projection.matrix.base.shape:
+                raise AutodiffCompositionError(f"Shape of matrix loaded for '{projection.name}' "
+                                               f"({np.array(matrix).shape}) "
+                                               f"does not match its shape ({projection.matrix.base.shape})")
+            projection.matrix.base = matrix
+            projection.parameters.matrix.set(matrix, context=context, override=True)
+            projection.parameter_ports['matrix'].parameters.value.set(matrix, context=context, override=True)
+        self._build_pytorch_representation(context=context, refresh=True)
+    # MODIFIED 11/8/22 END
+
     def _get_state_ids(self):
         return super()._get_state_ids() + ["optimizer"]
 
diff --git a/psyneulink/library/compositions/compiledloss.py b/psyneulink/library/compositions/compiledloss.py
index b82fd64cd49..53d0027ad40 100644
--- a/psyneulink/library/compositions/compiledloss.py
+++ b/psyneulink/library/compositions/compiledloss.py
@@ -1,7 +1,7 @@
 from psyneulink.core import llvm as pnlvm
 from psyneulink.library.compositions.pytorchllvmhelper import *
 
-__all__ = ['MSELoss']
+__all__ = ['MSELoss', "CROSS_ENTROPYLoss"]
 
 
 class Loss():
@@ -33,8 +33,8 @@ def _gen_loss_function(self, ctx):
 
         # args:
         # 1) pointer to network output
-        # 2) pointer to target
-        # 3) dimensionality
+        # 2) dimensionality
+        # 3) pointer to target
         args = [ctx.float_ty.as_pointer(), ctx.int32_ty, ctx.float_ty.as_pointer()]
         builder = ctx.create_llvm_function(args, self, name, return_type=ctx.float_ty)
         value, dim, target = builder.function.args
@@ -79,3 +79,67 @@ def _gen_inject_loss_differential(self, ctx, builder, value, target, output=None
             tmp = gen_inject_vec_sub(ctx, builder, value, target)
             gen_inject_vec_add(ctx, builder, output, tmp, output)
         return output
+
+
+class CROSS_ENTROPYLoss(Loss):
+    """Implements compiled CROSS_ENTROPY Loss"""
+    def __init__(self, reduction='cross_entropy'):
+        if reduction not in ['cross_entropy']:
+            raise Exception("Unsupported compiled reduction type " + reduction)
+
+        super().__init__()
+        self.reduction = reduction
+
+    def _gen_loss_function(self, ctx):
+        name = "LEARNING_CROSS_ENTROPY_CALL"
+
+        # args:
+        # 1) pointer to network output
+        # 2) dimensionality
+        # 3) pointer to target
+        args = [ctx.float_ty.as_pointer(), ctx.int32_ty, ctx.float_ty.as_pointer()]
+        builder = ctx.create_llvm_function(args, self, name, return_type=ctx.float_ty)
+        value, dim, target = builder.function.args
+
+        sum = builder.alloca(ctx.float_ty)
+        builder.store(ctx.float_ty(-0.0), sum)
+
+        with pnlvm.helpers.for_loop_zero_inc(builder, dim, "cross_entropy_sum_loop") as (b1, index):
+            value_ptr = b1.gep(value,[index])
+            target_ptr = b1.gep(target,[index])
+            target_val = b1.load(target_ptr)
+            log_f = ctx.get_builtin("log", [ctx.float_ty])
+            log = b1.call(log_f, [target_val])
+            diff = b1.fmul(b1.load(value_ptr), log)
+            b1.store(b1.fadd(b1.load(sum),diff),sum)
+
+        builder.ret(builder.load(sum))
+
+        return builder.function
+
+    # inserts the computation for dC/da
+    def _gen_inject_loss_differential(self, ctx, builder, value, target, output=None, sum_loss=False):
+
+        # FIX: FROM MSE_LOSS -- HERE JUST AS FILLER TO GET PAST THIS METHOD DURING DEBUGGING;
+        #                       NEEDS TO BE PROPERLY IMPLEMENTED
+        dim = len(value.type.pointee)
+        assert len(target.type.pointee) == dim
+        if output is None:
+            output = builder.alloca(pnlvm.ir.types.ArrayType(ctx.float_ty, dim))
+            # zero output vector
+            builder.store(output.type.pointee(None), output)
+        assert len(output.type.pointee) == dim
+
+        if sum_loss is False:
+            # we take mean
+            gen_inject_vec_sub(ctx, builder, value, target, output)
+            # multiply each element i by 2/n to get dC/da_i
+            scalar_mult = builder.fdiv(ctx.float_ty(2), ctx.float_ty(dim))
+            with pnlvm.helpers.for_loop_zero_inc(builder, ctx.int32_ty(dim), "mse_mean_mult_loop") as (b1, index):
+                element_ptr = b1.gep(output, [ctx.int32_ty(0), index])
+                b1.store(b1.fmul(b1.load(element_ptr),scalar_mult),element_ptr)
+        else:
+            # in this case, we add the loss
+            tmp = gen_inject_vec_sub(ctx, builder, value, target)
+            gen_inject_vec_add(ctx, builder, output, tmp, output)
+        return output
diff --git a/psyneulink/library/compositions/compositionrunner.py b/psyneulink/library/compositions/compositionrunner.py
index d7039a1902e..601bb6b6484 100644
--- a/psyneulink/library/compositions/compositionrunner.py
+++ b/psyneulink/library/compositions/compositionrunner.py
@@ -129,6 +129,7 @@ def run_learning(self,
                      targets: dict = None,
                      num_trials: int = None,
                      epochs: int = 1,
+                     learning_rate = None,
                      minibatch_size: int = 1,
                      patience: int = None,
                      min_delta: int = 0,
@@ -139,17 +140,21 @@ def run_learning(self,
                      execution_mode:pnlvm.ExecutionMode = pnlvm.ExecutionMode.Python,
                      **kwargs):
         """
-        Runs the composition repeatedly with the specified parameters
+        Runs the composition repeatedly with the specified parameters.
 
         Returns
         ---------
         Outputs from the final execution
         """
-        if not execution_mode:
+
+        if not (execution_mode & pnlvm.ExecutionMode.COMPILED):
             self._is_llvm_mode = False
         else:
             self._is_llvm_mode = True
 
+        # This is used by local learning-related methods to override the default learning_rate set at construction.
+        self._composition._runtime_learning_rate = learning_rate
+
         # Handle function and generator inputs
         if isgeneratorfunction(inputs):
             inputs = inputs()
@@ -191,7 +196,7 @@ def run_learning(self,
                 raise Exception("The minibatch size cannot be greater than the number of trials.")
 
             early_stopper = None
-            if patience is not None and not execution_mode:
+            if patience is not None and not self._is_llvm_mode:
                 early_stopper = EarlyStopping(min_delta=min_delta, patience=patience)
 
             if callable(stim_input) and not isgeneratorfunction(stim_input):
diff --git a/psyneulink/library/compositions/pytorchcomponents.py b/psyneulink/library/compositions/pytorchcomponents.py
index 43122730437..e106272d91a 100644
--- a/psyneulink/library/compositions/pytorchcomponents.py
+++ b/psyneulink/library/compositions/pytorchcomponents.py
@@ -1,4 +1,4 @@
-from psyneulink.core.components.functions.nonstateful.transferfunctions import Linear, Logistic, ReLU
+from psyneulink.core.components.functions.nonstateful.transferfunctions import Linear, Logistic, ReLU, SoftMax
 from psyneulink.library.compositions.pytorchllvmhelper import *
 from psyneulink.core.globals.log import LogCondition
 from psyneulink.core import llvm as pnlvm
@@ -10,7 +10,8 @@
 def pytorch_function_creator(function, device, context=None):
     """
     Converts a PsyNeuLink function into an equivalent PyTorch lambda function.
-    NOTE: This is needed due to PyTorch limitations (see: https://github.com/PrincetonUniversity/PsyNeuLink/pull/1657#discussion_r437489990)
+    NOTE: This is needed due to PyTorch limitations
+    (see: https://github.com/PrincetonUniversity/PsyNeuLink/pull/1657#discussion_r437489990)
     """
     def get_fct_param_value(param_name):
         val = function._get_current_parameter_value(
@@ -38,6 +39,10 @@ def get_fct_param_value(param_name):
         return lambda x: (torch.max(input=(x - bias), other=torch.tensor([0], device=device).double()) * gain +
                             torch.min(input=(x - bias), other=torch.tensor([0], device=device).double()) * leak)
 
+    elif isinstance(function, SoftMax):
+        gain = get_fct_param_value('gain')
+        return lambda x: (torch.softmax(x, len(x), other=torch.tensor([0], device=device).double()))
+
     else:
         raise Exception(f"Function {function} is not currently supported in AutodiffCompositions!")
 
diff --git a/psyneulink/library/compositions/pytorchmodelcreator.py b/psyneulink/library/compositions/pytorchmodelcreator.py
index af809613bf4..45dc323a792 100644
--- a/psyneulink/library/compositions/pytorchmodelcreator.py
+++ b/psyneulink/library/compositions/pytorchmodelcreator.py
@@ -5,9 +5,9 @@
 from psyneulink.core.globals.context import Context, ContextFlags, handle_external_context
 from psyneulink.core import llvm as pnlvm
 from psyneulink.library.compositions.compiledoptimizer import AdamOptimizer, SGDOptimizer
-from psyneulink.library.compositions.compiledloss import MSELoss
+from psyneulink.library.compositions.compiledloss import MSELoss, CROSS_ENTROPYLoss
 from psyneulink.library.compositions.pytorchllvmhelper import *
-from psyneulink.core.globals.keywords import TARGET_MECHANISM
+from psyneulink.core.globals.keywords import TARGET_MECHANISM, Loss
 from psyneulink.core.globals.utilities import get_deepcopy_with_shared
 from .pytorchcomponents import *
 
@@ -60,7 +60,8 @@ def __init__(self, composition, device, context=None):
                 proj_recv.add_afferent(new_proj)
                 self.projection_map[projection] = new_proj
                 self.projections.append(new_proj)
-                self.params.append(new_proj.matrix)
+
+        self._regenerate_paramlist()
 
         c = Context()
         try:
@@ -81,6 +82,11 @@ def __init__(self, composition, device, context=None):
 
     __deepcopy__ = get_deepcopy_with_shared(shared_types=(Component, ComponentsMeta))
 
+    def _regenerate_paramlist(self):
+        self.params = nn.ParameterList()
+        for proj in self.projections:
+            self.params.append(proj.matrix)
+
     # generates llvm function for self.forward
     def _gen_llvm_function(self, *, ctx:pnlvm.LLVMBuilderContext, tags:frozenset):
         args = [ctx.get_state_struct_type(self._composition).as_pointer(),
@@ -268,8 +274,10 @@ def _gen_llvm_training_function_body(self, ctx, builder, state, params, data):
         optimizer = self._get_compiled_optimizer()
         # setup loss
         loss_type = self._composition.loss_spec
-        if loss_type == 'mse':
+        if loss_type == Loss.MSE:
             loss = MSELoss()
+        elif loss_type == Loss.CROSS_ENTROPY:
+            loss = CROSS_ENTROPYLoss()
         else:
             raise Exception("LOSS TYPE", loss_type, "NOT SUPPORTED")
 
diff --git a/requirements.txt b/requirements.txt
index c113611feea..41d0a3d96b6 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-autograd<1.5
+autograd<1.6
 graph-scheduler>=0.2.0, <1.1.2
 dill<=0.32
 elfi<0.8.5
@@ -6,16 +6,16 @@ graphviz<0.21.0
 grpcio<1.43.0
 grpcio-tools<1.43.0
 llvmlite<0.40
-matplotlib<3.5.4
-modeci_mdf<0.5, >=0.3.4
-networkx<2.9
-numpy<1.21.7, >=1.17.0
-pillow<9.3.0
-pint<0.20.0
-toposort<1.8
-torch>=1.8.0, <1.12.0; (platform_machine == 'AMD64' or platform_machine == 'x86_64') and platform_python_implementation == 'CPython' and implementation_name == 'cpython'
+matplotlib<3.6.4
+modeci_mdf<0.5, >=0.3.4; (platform_machine == 'AMD64' or platform_machine == 'x86_64') and platform_python_implementation == 'CPython' and implementation_name == 'cpython'
+networkx<3.1
+numpy<1.22.5, >=1.17.0
+pillow<9.5.0
+pint<0.21.0
+toposort<1.10
+torch>=1.8.0, <1.14.0; (platform_machine == 'AMD64' or platform_machine == 'x86_64') and platform_python_implementation == 'CPython' and implementation_name == 'cpython'
 typecheck-decorator<=1.2
 leabra-psyneulink<=0.3.2
 rich>=10.1, <10.13
-pandas<1.4.4
-fastkde==1.0.19
+pandas<1.5.3
+fastkde>=1.0.19, <1.0.21
diff --git a/setup.cfg b/setup.cfg
index fbd3d06c3bb..12d3191ded7 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -67,6 +67,7 @@ xfail_strict = True
 
 filterwarnings =
 	error:Creating an ndarray from ragged nested sequences \(which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes\) is deprecated.*:numpy.VisibleDeprecationWarning
+	error:Invalid escape sequence
 	ignore:Multiple ParameterPorts:UserWarning
 
 [pycodestyle]
diff --git a/tests/composition/autodiff_composition_matrix_wts.pnl b/tests/composition/autodiff_composition_matrix_wts.pnl
new file mode 100644
index 00000000000..4053d03da1d
Binary files /dev/null and b/tests/composition/autodiff_composition_matrix_wts.pnl differ
diff --git a/tests/composition/test_autodiffcomposition.py b/tests/composition/test_autodiffcomposition.py
index 2bc81653862..a6bbdd6ca26 100644
--- a/tests/composition/test_autodiffcomposition.py
+++ b/tests/composition/test_autodiffcomposition.py
@@ -1,6 +1,6 @@
 import logging
 import timeit as timeit
-
+import os
 import numpy as np
 
 import pytest
@@ -11,10 +11,10 @@
 from psyneulink.core.components.functions.nonstateful.learningfunctions import BackPropagation
 from psyneulink.core.compositions.composition import Composition
 from psyneulink.core.globals import Context
-from psyneulink.core.globals.keywords import TRAINING_SET
+from psyneulink.core.globals.keywords import TRAINING_SET, Loss
 from psyneulink.core.components.mechanisms.processing.transfermechanism import TransferMechanism
 from psyneulink.core.components.projections.pathway.mappingprojection import MappingProjection
-from psyneulink.library.compositions.autodiffcomposition import AutodiffComposition
+from psyneulink.library.compositions.autodiffcomposition import AutodiffComposition, AutodiffCompositionError
 from psyneulink.core.compositions.report import ReportOutput
 
 logger = logging.getLogger(__name__)
@@ -26,36 +26,6 @@
 # Unit tests for functions of AutodiffComposition class that are new (not in Composition)
 # or override functions in Composition
 
-@pytest.mark.pytorch
-def test_autodiff_forward(autodiff_mode):
-    # create xor model mechanisms and projections
-    xor_in = TransferMechanism(name='xor_in',
-                               default_variable=np.zeros(2))
-
-    xor_hid = TransferMechanism(name='xor_hid',
-                                default_variable=np.zeros(10),
-                                function=Logistic())
-
-    xor_out = TransferMechanism(name='xor_out',
-                                default_variable=np.zeros(1),
-                                function=Logistic())
-
-    hid_map = MappingProjection(matrix=np.random.rand(2,10))
-    out_map = MappingProjection(matrix=np.random.rand(10,1))
-
-    # put the mechanisms and projections together in an autodiff composition (AC)
-    xor = AutodiffComposition()
-
-    xor.add_node(xor_in)
-    xor.add_node(xor_hid)
-    xor.add_node(xor_out)
-
-    xor.add_projection(sender=xor_in, projection=hid_map, receiver=xor_hid)
-    xor.add_projection(sender=xor_hid, projection=out_map, receiver=xor_out)
-
-    outputs = xor.run(inputs=[0,0], execution_mode=autodiff_mode)
-    assert np.allclose(outputs, [[0.9479085241082691]])
-
 @pytest.mark.pytorch
 @pytest.mark.acconstructor
 class TestACConstructor:
@@ -92,49 +62,51 @@ def test_report_prefs(self):
         # comp = AutodiffComposition()
         # assert comp.patience == 10
 
+
 @pytest.mark.pytorch
-@pytest.mark.acmisc
-class TestMiscTrainingFunctionality:
+def test_autodiff_forward(autodiff_mode):
+    # create xor model mechanisms and projections
+    xor_in = TransferMechanism(name='xor_in',
+                               default_variable=np.zeros(2))
 
-    # test whether pytorch parameters are initialized to be identical to the Autodiff Composition's
-    def test_weight_initialization(self):
+    xor_hid = TransferMechanism(name='xor_hid',
+                                default_variable=np.zeros(10),
+                                function=Logistic())
 
-        # create xor model mechanisms and projections
-        xor_in = TransferMechanism(name='xor_in',
-                                   default_variable=np.zeros(2))
+    xor_out = TransferMechanism(name='xor_out',
+                                default_variable=np.zeros(1),
+                                function=Logistic())
 
-        xor_hid = TransferMechanism(name='xor_hid',
-                                    default_variable=np.zeros(10),
-                                    function=Logistic())
+    hid_map = MappingProjection(matrix=np.random.rand(2,10))
+    out_map = MappingProjection(matrix=np.random.rand(10,1))
 
-        xor_out = TransferMechanism(name='xor_out',
-                                    default_variable=np.zeros(1),
-                                    function=Logistic())
+    # put the mechanisms and projections together in an autodiff composition (AC)
+    xor = AutodiffComposition()
 
-        hid_map = MappingProjection(matrix=np.random.rand(2,10))
-        out_map = MappingProjection(matrix=np.random.rand(10,1))
+    xor.add_node(xor_in)
+    xor.add_node(xor_hid)
+    xor.add_node(xor_out)
 
-        # put the mechanisms and projections together in an autodiff composition (AC)
-        xor = AutodiffComposition()
+    xor.add_projection(sender=xor_in, projection=hid_map, receiver=xor_hid)
+    xor.add_projection(sender=xor_hid, projection=out_map, receiver=xor_out)
 
-        xor.add_node(xor_in)
-        xor.add_node(xor_hid)
-        xor.add_node(xor_out)
+    outputs = xor.run(inputs=[0,0], execution_mode=autodiff_mode)
+    assert np.allclose(outputs, [[0.9479085241082691]])
 
-        xor.add_projection(sender=xor_in, projection=hid_map, receiver=xor_hid)
-        xor.add_projection(sender=xor_hid, projection=out_map, receiver=xor_out)
 
-        # mini version of xor.execute just to build up pytorch representation
-        xor._analyze_graph()
-        xor._build_pytorch_representation(context=xor.default_execution_id)
-        # check whether pytorch parameters are identical to projections
-        assert np.allclose(hid_map.parameters.matrix.get(None),
-                           xor.parameters.pytorch_representation.get(xor).params[0].detach().numpy())
-        assert np.allclose(out_map.parameters.matrix.get(None),
-                           xor.parameters.pytorch_representation.get(xor).params[1].detach().numpy())
+@pytest.mark.pytorch
+@pytest.mark.accorrectness
+class TestTrainingCorrectness:
 
-    # test whether processing doesn't interfere with pytorch parameters after training
-    def test_training_then_processing(self, autodiff_mode):
+    # test whether xor model created as autodiff composition learns properly
+    @pytest.mark.benchmark(group="XOR")
+    @pytest.mark.parametrize(
+        'eps, calls, opt, expected', [
+            (100, 'single', 'adam', [[[0.09823965]], [[0.81092879]], [[0.78179557]], [[0.25593583]]]),
+            (50, 'multiple', 'adam', [[[0.31200036]], [[0.59406178]], [[0.60417587]], [[0.52347365]]]),
+        ]
+    )
+    def test_xor_training_correctness(self, eps, calls, opt, autodiff_mode, benchmark, expected):
         xor_in = TransferMechanism(name='xor_in',
                                    default_variable=np.zeros(2))
 
@@ -146,10 +118,11 @@ def test_training_then_processing(self, autodiff_mode):
                                     default_variable=np.zeros(1),
                                     function=Logistic())
 
-        hid_map = MappingProjection()
-        out_map = MappingProjection()
+        hid_map = MappingProjection(matrix=np.random.rand(2, 10))
+        out_map = MappingProjection(matrix=np.random.rand(10, 1))
 
-        xor = AutodiffComposition()
+        xor = AutodiffComposition(optimizer_type=opt,
+                                  learning_rate=0.1)
 
         xor.add_node(xor_in)
         xor.add_node(xor_hid)
@@ -159,495 +132,200 @@ def test_training_then_processing(self, autodiff_mode):
         xor.add_projection(sender=xor_hid, projection=out_map, receiver=xor_out)
 
         xor_inputs = np.array(  # the inputs we will provide to the model
-            [[0, 0],
-             [0, 1],
-             [1, 0],
-             [1, 1]])
+            [[0, 0], [0, 1], [1, 0], [1, 1]])
 
         xor_targets = np.array(  # the outputs we wish to see from the model
-            [[0],
-             [1],
-             [1],
-             [0]])
+            [[0], [1], [1], [0]])
 
-        # train model for a few epochs
-        # results_before_proc = xor.run(inputs={xor_in:xor_inputs},
-        #                               targets={xor_out:xor_targets},
-        #                               epochs=10)
-        results_before_proc = xor.learn(inputs={"inputs": {xor_in:xor_inputs},
-                                                "targets": {xor_out:xor_targets},
-                                                "epochs": 10},
-                                        execution_mode=autodiff_mode)
+        if calls == 'single':
+            results = benchmark(xor.learn, inputs={"inputs": {xor_in:xor_inputs},
+                                        "targets": {xor_out:xor_targets},
+                                        "epochs": eps}, execution_mode=autodiff_mode)
 
-        # get weight parameters from pytorch
-        pt_weights_hid_bp = xor.parameters.pytorch_representation.get(xor).params[0].detach().numpy().copy()
-        pt_weights_out_bp = xor.parameters.pytorch_representation.get(xor).params[1].detach().numpy().copy()
+        else:
+            input_dict = {"inputs": {xor_in: xor_inputs},
+                          "targets": {xor_out: xor_targets},
+                          "epochs": 1}
+            for i in range(eps - 1):
+                xor.learn(inputs=input_dict, execution_mode=autodiff_mode)
+            results = benchmark(xor.learn, inputs=input_dict, execution_mode=autodiff_mode)
+
+        assert len(results) == len(expected)
+        for r, t in zip(results, expected):
+            assert np.allclose(r[0], t)
 
-        #KAM temporarily removed -- will reimplement when pytorch weights can be used in pure PNL execution
-        # do processing on a few inputs
-        # results_proc = xor.run(inputs={xor_in:xor_inputs})
-        # results_proc = xor.run(inputs={"inputs": {xor_in:xor_inputs}})
-        #
-        # # get weight parameters from pytorch
-        # pt_weights_hid_ap = xor.parameters.pytorch_representation.get(xor).params[0].detach().numpy().copy()
-        # pt_weights_out_ap = xor.parameters.pytorch_representation.get(xor).params[1].detach().numpy().copy()
-        #
-        # # check that weight parameters before and after processing are the same
-        # assert np.allclose(pt_weights_hid_bp, pt_weights_hid_ap)
-        # assert np.allclose(pt_weights_out_bp, pt_weights_out_ap)
 
+    # tests whether semantic network created as autodiff composition learns properly
+    @pytest.mark.benchmark(group="Semantic net")
     @pytest.mark.parametrize(
-        'loss', ['l1', 'poissonnll']
+        'eps, opt', [
+            (50, 'adam'),
+        ]
     )
-    def test_various_loss_specs(self, loss, autodiff_mode):
-        if autodiff_mode is not pnl.ExecutionMode.Python:
-            pytest.skip("Loss spec not yet implemented!")
+    def test_semantic_net_training_correctness(self, eps, opt, autodiff_mode, benchmark):
 
-        xor_in = TransferMechanism(name='xor_in',
-                                   default_variable=np.zeros(2))
+        # MECHANISMS FOR SEMANTIC NET:
 
-        xor_hid = TransferMechanism(name='xor_hid',
-                                    default_variable=np.zeros(10),
-                                    function=Logistic())
+        nouns_in = TransferMechanism(name="nouns_input",
+                                     default_variable=np.zeros(8))
 
-        xor_out = TransferMechanism(name='xor_out',
-                                    default_variable=np.zeros(1),
-                                    function=Logistic())
+        rels_in = TransferMechanism(name="rels_input",
+                                    default_variable=np.zeros(3))
 
-        hid_map = MappingProjection()
-        out_map = MappingProjection()
+        h1 = TransferMechanism(name="hidden_nouns",
+                               default_variable=np.zeros(8),
+                               function=Logistic())
 
-        xor = AutodiffComposition(loss_spec=loss)
+        h2 = TransferMechanism(name="hidden_mixed",
+                               default_variable=np.zeros(15),
+                               function=Logistic())
 
-        xor.add_node(xor_in)
-        xor.add_node(xor_hid)
-        xor.add_node(xor_out)
+        out_sig_I = TransferMechanism(name="sig_outs_I",
+                                      default_variable=np.zeros(8),
+                                      function=Logistic())
 
-        xor.add_projection(sender=xor_in, projection=hid_map, receiver=xor_hid)
-        xor.add_projection(sender=xor_hid, projection=out_map, receiver=xor_out)
+        out_sig_is = TransferMechanism(name="sig_outs_is",
+                                       default_variable=np.zeros(12),
+                                       function=Logistic())
 
-        xor_inputs = np.array(  # the inputs we will provide to the model
-            [[0, 0],
-             [0, 1],
-             [1, 0],
-             [1, 1]])
+        out_sig_has = TransferMechanism(name="sig_outs_has",
+                                        default_variable=np.zeros(9),
+                                        function=Logistic())
 
-        xor_targets = np.array(  # the outputs we wish to see from the model
-            [[0],
-             [1],
-             [1],
-             [0]])
+        out_sig_can = TransferMechanism(name="sig_outs_can",
+                                        default_variable=np.zeros(9),
+                                        function=Logistic())
 
-        xor.learn(inputs = {"inputs": {xor_in:xor_inputs},
-                            "targets": {xor_out:xor_targets},
-                            "epochs": 10}, execution_mode=autodiff_mode)
+        # SET UP PROJECTIONS FOR SEMANTIC NET
 
-    def test_pytorch_loss_spec(self, autodiff_mode):
-        if autodiff_mode is not pnl.ExecutionMode.Python:
-            pytest.skip("Loss spec not yet implemented!")
+        map_nouns_h1 = MappingProjection(matrix=np.random.rand(8,8),
+                                         name="map_nouns_h1",
+                                         sender=nouns_in,
+                                         receiver=h1)
 
-        import torch
-        ls = torch.nn.SoftMarginLoss(reduction='sum')
+        map_rels_h2 = MappingProjection(matrix=np.random.rand(3,15),
+                                        name="map_relh2",
+                                        sender=rels_in,
+                                        receiver=h2)
 
-        xor_in = TransferMechanism(name='xor_in',
-                                   default_variable=np.zeros(2))
+        map_h1_h2 = MappingProjection(matrix=np.random.rand(8,15),
+                                      name="map_h1_h2",
+                                      sender=h1,
+                                      receiver=h2)
 
-        xor_hid = TransferMechanism(name='xor_hid',
-                                    default_variable=np.zeros(10),
-                                    function=Logistic())
+        map_h2_I = MappingProjection(matrix=np.random.rand(15,8),
+                                     name="map_h2_I",
+                                    sender=h2,
+                                    receiver=out_sig_I)
 
-        xor_out = TransferMechanism(name='xor_out',
-                                    default_variable=np.zeros(1),
-                                    function=Logistic())
+        map_h2_is = MappingProjection(matrix=np.random.rand(15,12),
+                                      name="map_h2_is",
+                                      sender=h2,
+                                      receiver=out_sig_is)
 
-        hid_map = MappingProjection()
-        out_map = MappingProjection()
+        map_h2_has = MappingProjection(matrix=np.random.rand(15,9),
+                                       name="map_h2_has",
+                                       sender=h2,
+                                       receiver=out_sig_has)
 
-        xor = AutodiffComposition(loss_spec=ls)
+        map_h2_can = MappingProjection(matrix=np.random.rand(15,9),
+                                       name="map_h2_can",
+                                       sender=h2,
+                                       receiver=out_sig_can)
 
-        xor.add_node(xor_in)
-        xor.add_node(xor_hid)
-        xor.add_node(xor_out)
-
-        xor.add_projection(sender=xor_in, projection=hid_map, receiver=xor_hid)
-        xor.add_projection(sender=xor_hid, projection=out_map, receiver=xor_out)
-        xor_inputs = np.array(  # the inputs we will provide to the model
-            [[0, 0], [0, 1], [1, 0], [1, 1]])
-
-        xor_targets = np.array(  # the outputs we wish to see from the model
-            [[0], [1], [1], [0]])
-
-        xor.learn(inputs={"inputs": {xor_in:xor_inputs},
-                          "targets": {xor_out:xor_targets},
-                          "epochs": 10}, execution_mode=autodiff_mode)
-        xor.learn(inputs={"inputs": {xor_in: xor_inputs},
-                          "targets": {xor_out: xor_targets},
-                          "epochs": 10}, execution_mode=autodiff_mode)
-
-
-    @pytest.mark.benchmark(group="Optimizer specs")
-    @pytest.mark.parametrize(
-        'learning_rate, weight_decay, optimizer_type, expected', [
-            (10, 0, 'sgd', [[[0.9863038667851067]], [[0.9944287263151904]], [[0.9934801466163382]], [[0.9979153035411085]]]),
-            (1.5, 1, 'sgd', [[[0.33226742]], [[0.4492334]], [[0.75459534]], [[0.44477028]]]),
-            (1.5, 1, 'adam', [[[0.43109927]], [[0.33088828]], [[0.40094236]], [[0.57104689]]]),
-        ]
-    )
-    def test_optimizer_specs(self, learning_rate, weight_decay, optimizer_type, expected, autodiff_mode, benchmark):
-        xor_in = TransferMechanism(name='xor_in',
-                                   default_variable=np.zeros(2))
-
-        xor_hid = TransferMechanism(name='xor_hid',
-                                    default_variable=np.zeros(10),
-                                    function=Logistic())
-
-        xor_out = TransferMechanism(name='xor_out',
-                                    default_variable=np.zeros(1),
-                                    function=Logistic())
-
-        hid_map = MappingProjection()
-        out_map = MappingProjection()
-
-        xor = AutodiffComposition(learning_rate=learning_rate,
-                                  optimizer_type=optimizer_type,
-                                  weight_decay=weight_decay)
-
-        xor.add_node(xor_in)
-        xor.add_node(xor_hid)
-        xor.add_node(xor_out)
-
-        xor.add_projection(sender=xor_in, projection=hid_map, receiver=xor_hid)
-        xor.add_projection(sender=xor_hid, projection=out_map, receiver=xor_out)
-
-        xor_inputs = np.array(  # the inputs we will provide to the model
-            [[0, 0], [0, 1], [1, 0], [1, 1]])
-
-        xor_targets = np.array(  # the outputs we wish to see from the model
-            [[0], [1], [1], [0]])
-
-        # train model for a few epochs
-        # results_before_proc = xor.run(inputs={xor_in:xor_inputs},
-        #                               targets={xor_out:xor_targets},
-        #                               epochs=10)
-        results_before_proc = xor.learn(inputs={"inputs": {xor_in:xor_inputs},
-                                                "targets": {xor_out:xor_targets},
-                                                "epochs": 10}, execution_mode=autodiff_mode)
-
-        # fp32 results are different due to rounding
-        if pytest.helpers.llvm_current_fp_precision() == 'fp32' and \
-           autodiff_mode != pnl.ExecutionMode.Python and \
-           optimizer_type == 'sgd' and \
-           learning_rate == 10:
-            expected = [[[0.9918830394744873]], [[0.9982172846794128]], [[0.9978305697441101]], [[0.9994590878486633]]]
-        # FIXME: LLVM version is broken with learning rate == 1.5
-        if learning_rate != 1.5 or autodiff_mode == pnl.ExecutionMode.Python:
-            assert np.allclose(results_before_proc, expected)
-
-        if benchmark.enabled:
-            benchmark(xor.learn, inputs={"inputs": {xor_in:xor_inputs},
-                                         "targets": {xor_out:xor_targets},
-                                         "epochs": 10}, execution_mode=autodiff_mode)
-
-
-    # test whether pytorch parameters and projections are kept separate (at diff. places in memory)
-    def test_params_stay_separate(self, autodiff_mode):
-        if autodiff_mode is not pnl.ExecutionMode.Python:
-            pytest.skip("Compiled weights are always copied back!")
-
-        xor_in = TransferMechanism(name='xor_in',
-                                   default_variable=np.zeros(2))
-
-        xor_hid = TransferMechanism(name='xor_hid',
-                                    default_variable=np.zeros(10),
-                                    function=Logistic())
-
-        xor_out = TransferMechanism(name='xor_out',
-                                    default_variable=np.zeros(1),
-                                    function=Logistic())
-
-        hid_m = np.random.rand(2,10)
-        out_m = np.random.rand(10,1)
-
-        hid_map = MappingProjection(name='hid_map',
-                                    matrix=hid_m.copy(),
-                                    sender=xor_in,
-                                    receiver=xor_hid)
-
-        out_map = MappingProjection(name='out_map',
-                                    matrix=out_m.copy(),
-                                    sender=xor_hid,
-                                    receiver=xor_out)
-
-        xor = AutodiffComposition(learning_rate=10.0,
-                                  optimizer_type="sgd")
-
-        xor.add_node(xor_in)
-        xor.add_node(xor_hid)
-        xor.add_node(xor_out)
-
-        xor.add_projection(sender=xor_in, projection=hid_map, receiver=xor_hid)
-        xor.add_projection(sender=xor_hid, projection=out_map, receiver=xor_out)
-
-        xor_inputs = np.array(  # the inputs we will provide to the model
-            [[0, 0], [0, 1], [1, 0], [1, 1]])
-
-        xor_targets = np.array(  # the outputs we wish to see from the model
-            [[0], [1], [1], [0]])
-
-        # train the model for a few epochs
-        result = xor.learn(inputs={"inputs": {xor_in:xor_inputs},
-                                   "targets": {xor_out:xor_targets},
-                                   "epochs": 10}, execution_mode=autodiff_mode)
+        # COMPOSITION FOR SEMANTIC NET
+        sem_net = AutodiffComposition(optimizer_type=opt, learning_rate=.001)
 
-        # get weight parameters from pytorch
-        pt_weights_hid = xor.parameters.pytorch_representation.get(xor).params[0].detach().numpy().copy()
-        pt_weights_out = xor.parameters.pytorch_representation.get(xor).params[1].detach().numpy().copy()
+        sem_net.add_node(nouns_in)
+        sem_net.add_node(rels_in)
+        sem_net.add_node(h1)
+        sem_net.add_node(h2)
+        sem_net.add_node(out_sig_I)
+        sem_net.add_node(out_sig_is)
+        sem_net.add_node(out_sig_has)
+        sem_net.add_node(out_sig_can)
 
-        # assert that projections are still what they were initialized as
-        assert np.allclose(hid_map.parameters.matrix.get(None), hid_m)
-        assert np.allclose(out_map.parameters.matrix.get(None), out_m)
+        sem_net.add_projection(sender=nouns_in, projection=map_nouns_h1, receiver=h1)
+        sem_net.add_projection(sender=rels_in, projection=map_rels_h2, receiver=h2)
+        sem_net.add_projection(sender=h1, projection=map_h1_h2, receiver=h2)
+        sem_net.add_projection(sender=h2, projection=map_h2_I, receiver=out_sig_I)
+        sem_net.add_projection(sender=h2, projection=map_h2_is, receiver=out_sig_is)
+        sem_net.add_projection(sender=h2, projection=map_h2_has, receiver=out_sig_has)
+        sem_net.add_projection(sender=h2, projection=map_h2_can, receiver=out_sig_can)
 
-        # assert that projections didn't change during training with the pytorch
-        # parameters (they should now be different)
-        assert not np.allclose(pt_weights_hid, hid_map.parameters.matrix.get(None))
-        assert not np.allclose(pt_weights_out, out_map.parameters.matrix.get(None))
+        # INPUTS & OUTPUTS FOR SEMANTIC NET:
 
-@pytest.mark.pytorch
-@pytest.mark.accorrectness
-class TestTrainingCorrectness:
+        nouns = ['oak', 'pine', 'rose', 'daisy', 'canary', 'robin', 'salmon', 'sunfish']
+        relations = ['is', 'has', 'can']
+        is_list = ['living', 'living thing', 'plant', 'animal', 'tree', 'flower', 'bird', 'fish', 'big', 'green', 'red',
+                   'yellow']
+        has_list = ['roots', 'leaves', 'bark', 'branches', 'skin', 'feathers', 'wings', 'gills', 'scales']
+        can_list = ['grow', 'move', 'swim', 'fly', 'breathe', 'breathe underwater', 'breathe air', 'walk', 'photosynthesize']
 
-    # test whether xor model created as autodiff composition learns properly
-    @pytest.mark.benchmark(group="XOR")
-    @pytest.mark.parametrize(
-        'eps, calls, opt, expected', [
-            (100, 'single', 'adam', [[[0.09823965]], [[0.81092879]], [[0.78179557]], [[0.25593583]]]),
-            (50, 'multiple', 'adam', [[[0.31200036]], [[0.59406178]], [[0.60417587]], [[0.52347365]]]),
-        ]
-    )
-    def test_xor_training_correctness(self, eps, calls, opt, autodiff_mode, benchmark, expected):
-        xor_in = TransferMechanism(name='xor_in',
-                                   default_variable=np.zeros(2))
+        nouns_input = np.identity(len(nouns))
 
-        xor_hid = TransferMechanism(name='xor_hid',
-                                    default_variable=np.zeros(10),
-                                    function=Logistic())
+        rels_input = np.identity(len(relations))
 
-        xor_out = TransferMechanism(name='xor_out',
-                                    default_variable=np.zeros(1),
-                                    function=Logistic())
+        truth_nouns = np.identity(len(nouns))
 
-        hid_map = MappingProjection(matrix=np.random.rand(2, 10))
-        out_map = MappingProjection(matrix=np.random.rand(10, 1))
+        truth_is = np.zeros((len(nouns), len(is_list)))
 
-        xor = AutodiffComposition(optimizer_type=opt,
-                                  learning_rate=0.1)
+        truth_is[0, :] = [1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0]
+        truth_is[1, :] = [1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0]
+        truth_is[2, :] = [1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0]
+        truth_is[3, :] = [1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1]
+        truth_is[4, :] = [1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1]
+        truth_is[5, :] = [1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0]
+        truth_is[6, :] = [1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0]
+        truth_is[7, :] = [1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0]
 
-        xor.add_node(xor_in)
-        xor.add_node(xor_hid)
-        xor.add_node(xor_out)
+        truth_has = np.zeros((len(nouns), len(has_list)))
 
-        xor.add_projection(sender=xor_in, projection=hid_map, receiver=xor_hid)
-        xor.add_projection(sender=xor_hid, projection=out_map, receiver=xor_out)
+        truth_has[0, :] = [1, 1, 1, 1, 0, 0, 0, 0, 0]
+        truth_has[1, :] = [1, 1, 1, 1, 0, 0, 0, 0, 0]
+        truth_has[2, :] = [1, 1, 0, 0, 0, 0, 0, 0, 0]
+        truth_has[3, :] = [1, 1, 0, 0, 0, 0, 0, 0, 0]
+        truth_has[4, :] = [0, 0, 0, 0, 1, 1, 1, 0, 0]
+        truth_has[5, :] = [0, 0, 0, 0, 1, 1, 1, 0, 0]
+        truth_has[6, :] = [0, 0, 0, 0, 0, 0, 0, 1, 1]
+        truth_has[7, :] = [0, 0, 0, 0, 0, 0, 0, 1, 1]
 
-        xor_inputs = np.array(  # the inputs we will provide to the model
-            [[0, 0], [0, 1], [1, 0], [1, 1]])
+        truth_can = np.zeros((len(nouns), len(can_list)))
 
-        xor_targets = np.array(  # the outputs we wish to see from the model
-            [[0], [1], [1], [0]])
+        truth_can[0, :] = [1, 0, 0, 0, 0, 0, 0, 0, 1]
+        truth_can[1, :] = [1, 0, 0, 0, 0, 0, 0, 0, 1]
+        truth_can[2, :] = [1, 0, 0, 0, 0, 0, 0, 0, 1]
+        truth_can[3, :] = [1, 0, 0, 0, 0, 0, 0, 0, 1]
+        truth_can[4, :] = [1, 1, 0, 1, 1, 0, 1, 1, 0]
+        truth_can[5, :] = [1, 1, 0, 1, 1, 0, 1, 1, 0]
+        truth_can[6, :] = [1, 1, 1, 0, 1, 1, 0, 0, 0]
+        truth_can[7, :] = [1, 1, 1, 0, 1, 1, 0, 0, 0]
 
-        if calls == 'single':
-            results = xor.learn(inputs={"inputs": {xor_in:xor_inputs},
-                                        "targets": {xor_out:xor_targets},
-                                        "epochs": eps}, execution_mode=autodiff_mode)
+        # SETTING UP DICTIONARY OF INPUTS/OUTPUTS FOR SEMANTIC NET
 
-        else:
-            input_dict = {"inputs": {xor_in: xor_inputs},
-                          "targets": {xor_out: xor_targets},
-                          "epochs": 1}
-            for i in range(eps):
-                results = xor.learn(inputs=input_dict, execution_mode=autodiff_mode)
+        inputs_dict = {}
+        inputs_dict[nouns_in] = []
+        inputs_dict[rels_in] = []
 
-        assert len(results) == len(expected)
-        for r, t in zip(results, expected):
-            assert np.allclose(r[0], t)
+        targets_dict = {}
+        targets_dict[out_sig_I] = []
+        targets_dict[out_sig_is] = []
+        targets_dict[out_sig_has] = []
+        targets_dict[out_sig_can] = []
 
-        if benchmark.enabled:
-            benchmark(xor.learn, inputs={"inputs": {xor_in: xor_inputs},
-                                         "targets": {xor_out: xor_targets},
-                                         "epochs": eps}, execution_mode=autodiff_mode)
+        for i in range(len(nouns)):
+            for j in range(len(relations)):
+                inputs_dict[nouns_in].append(nouns_input[i])
+                inputs_dict[rels_in].append(rels_input[j])
+                targets_dict[out_sig_I].append(truth_nouns[i])
+                targets_dict[out_sig_is].append(truth_is[i])
+                targets_dict[out_sig_has].append(truth_has[i])
+                targets_dict[out_sig_can].append(truth_can[i])
 
-
-    # tests whether semantic network created as autodiff composition learns properly
-    @pytest.mark.benchmark(group="Semantic net")
-    @pytest.mark.parametrize(
-        'eps, opt', [
-            (50, 'adam'),
-        ]
-    )
-    def test_semantic_net_training_correctness(self, eps, opt, autodiff_mode, benchmark):
-
-        # MECHANISMS FOR SEMANTIC NET:
-
-        nouns_in = TransferMechanism(name="nouns_input",
-                                     default_variable=np.zeros(8))
-
-        rels_in = TransferMechanism(name="rels_input",
-                                    default_variable=np.zeros(3))
-
-        h1 = TransferMechanism(name="hidden_nouns",
-                               default_variable=np.zeros(8),
-                               function=Logistic())
-
-        h2 = TransferMechanism(name="hidden_mixed",
-                               default_variable=np.zeros(15),
-                               function=Logistic())
-
-        out_sig_I = TransferMechanism(name="sig_outs_I",
-                                      default_variable=np.zeros(8),
-                                      function=Logistic())
-
-        out_sig_is = TransferMechanism(name="sig_outs_is",
-                                       default_variable=np.zeros(12),
-                                       function=Logistic())
-
-        out_sig_has = TransferMechanism(name="sig_outs_has",
-                                        default_variable=np.zeros(9),
-                                        function=Logistic())
-
-        out_sig_can = TransferMechanism(name="sig_outs_can",
-                                        default_variable=np.zeros(9),
-                                        function=Logistic())
-
-        # SET UP PROJECTIONS FOR SEMANTIC NET
-
-        map_nouns_h1 = MappingProjection(matrix=np.random.rand(8,8),
-                                         name="map_nouns_h1",
-                                         sender=nouns_in,
-                                         receiver=h1)
-
-        map_rels_h2 = MappingProjection(matrix=np.random.rand(3,15),
-                                        name="map_relh2",
-                                        sender=rels_in,
-                                        receiver=h2)
-
-        map_h1_h2 = MappingProjection(matrix=np.random.rand(8,15),
-                                      name="map_h1_h2",
-                                      sender=h1,
-                                      receiver=h2)
-
-        map_h2_I = MappingProjection(matrix=np.random.rand(15,8),
-                                     name="map_h2_I",
-                                    sender=h2,
-                                    receiver=out_sig_I)
-
-        map_h2_is = MappingProjection(matrix=np.random.rand(15,12),
-                                      name="map_h2_is",
-                                      sender=h2,
-                                      receiver=out_sig_is)
-
-        map_h2_has = MappingProjection(matrix=np.random.rand(15,9),
-                                       name="map_h2_has",
-                                       sender=h2,
-                                       receiver=out_sig_has)
-
-        map_h2_can = MappingProjection(matrix=np.random.rand(15,9),
-                                       name="map_h2_can",
-                                       sender=h2,
-                                       receiver=out_sig_can)
-
-        # COMPOSITION FOR SEMANTIC NET
-        sem_net = AutodiffComposition(optimizer_type=opt, learning_rate=.001)
-
-        sem_net.add_node(nouns_in)
-        sem_net.add_node(rels_in)
-        sem_net.add_node(h1)
-        sem_net.add_node(h2)
-        sem_net.add_node(out_sig_I)
-        sem_net.add_node(out_sig_is)
-        sem_net.add_node(out_sig_has)
-        sem_net.add_node(out_sig_can)
-
-        sem_net.add_projection(sender=nouns_in, projection=map_nouns_h1, receiver=h1)
-        sem_net.add_projection(sender=rels_in, projection=map_rels_h2, receiver=h2)
-        sem_net.add_projection(sender=h1, projection=map_h1_h2, receiver=h2)
-        sem_net.add_projection(sender=h2, projection=map_h2_I, receiver=out_sig_I)
-        sem_net.add_projection(sender=h2, projection=map_h2_is, receiver=out_sig_is)
-        sem_net.add_projection(sender=h2, projection=map_h2_has, receiver=out_sig_has)
-        sem_net.add_projection(sender=h2, projection=map_h2_can, receiver=out_sig_can)
-
-        # INPUTS & OUTPUTS FOR SEMANTIC NET:
-
-        nouns = ['oak', 'pine', 'rose', 'daisy', 'canary', 'robin', 'salmon', 'sunfish']
-        relations = ['is', 'has', 'can']
-        is_list = ['living', 'living thing', 'plant', 'animal', 'tree', 'flower', 'bird', 'fish', 'big', 'green', 'red',
-                   'yellow']
-        has_list = ['roots', 'leaves', 'bark', 'branches', 'skin', 'feathers', 'wings', 'gills', 'scales']
-        can_list = ['grow', 'move', 'swim', 'fly', 'breathe', 'breathe underwater', 'breathe air', 'walk', 'photosynthesize']
-
-        nouns_input = np.identity(len(nouns))
-
-        rels_input = np.identity(len(relations))
-
-        truth_nouns = np.identity(len(nouns))
-
-        truth_is = np.zeros((len(nouns), len(is_list)))
-
-        truth_is[0, :] = [1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0]
-        truth_is[1, :] = [1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0]
-        truth_is[2, :] = [1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0]
-        truth_is[3, :] = [1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1]
-        truth_is[4, :] = [1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1]
-        truth_is[5, :] = [1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0]
-        truth_is[6, :] = [1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0]
-        truth_is[7, :] = [1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0]
-
-        truth_has = np.zeros((len(nouns), len(has_list)))
-
-        truth_has[0, :] = [1, 1, 1, 1, 0, 0, 0, 0, 0]
-        truth_has[1, :] = [1, 1, 1, 1, 0, 0, 0, 0, 0]
-        truth_has[2, :] = [1, 1, 0, 0, 0, 0, 0, 0, 0]
-        truth_has[3, :] = [1, 1, 0, 0, 0, 0, 0, 0, 0]
-        truth_has[4, :] = [0, 0, 0, 0, 1, 1, 1, 0, 0]
-        truth_has[5, :] = [0, 0, 0, 0, 1, 1, 1, 0, 0]
-        truth_has[6, :] = [0, 0, 0, 0, 0, 0, 0, 1, 1]
-        truth_has[7, :] = [0, 0, 0, 0, 0, 0, 0, 1, 1]
-
-        truth_can = np.zeros((len(nouns), len(can_list)))
-
-        truth_can[0, :] = [1, 0, 0, 0, 0, 0, 0, 0, 1]
-        truth_can[1, :] = [1, 0, 0, 0, 0, 0, 0, 0, 1]
-        truth_can[2, :] = [1, 0, 0, 0, 0, 0, 0, 0, 1]
-        truth_can[3, :] = [1, 0, 0, 0, 0, 0, 0, 0, 1]
-        truth_can[4, :] = [1, 1, 0, 1, 1, 0, 1, 1, 0]
-        truth_can[5, :] = [1, 1, 0, 1, 1, 0, 1, 1, 0]
-        truth_can[6, :] = [1, 1, 1, 0, 1, 1, 0, 0, 0]
-        truth_can[7, :] = [1, 1, 1, 0, 1, 1, 0, 0, 0]
-
-        # SETTING UP DICTIONARY OF INPUTS/OUTPUTS FOR SEMANTIC NET
-
-        inputs_dict = {}
-        inputs_dict[nouns_in] = []
-        inputs_dict[rels_in] = []
-
-        targets_dict = {}
-        targets_dict[out_sig_I] = []
-        targets_dict[out_sig_is] = []
-        targets_dict[out_sig_has] = []
-        targets_dict[out_sig_can] = []
-
-        for i in range(len(nouns)):
-            for j in range(len(relations)):
-                inputs_dict[nouns_in].append(nouns_input[i])
-                inputs_dict[rels_in].append(rels_input[j])
-                targets_dict[out_sig_I].append(truth_nouns[i])
-                targets_dict[out_sig_is].append(truth_is[i])
-                targets_dict[out_sig_has].append(truth_has[i])
-                targets_dict[out_sig_can].append(truth_can[i])
-
-        # TRAIN THE MODEL
-        results = sem_net.learn(inputs={'inputs': inputs_dict,
-                                        'targets': targets_dict,
-                                        'epochs': eps}, execution_mode=autodiff_mode)
+        # TRAIN THE MODEL
+        results = benchmark(sem_net.learn, inputs={'inputs': inputs_dict,
+                                                   'targets': targets_dict,
+                                                   'epochs': eps}, execution_mode=autodiff_mode)
 
         # CHECK CORRECTNESS
         expected = [[[0.13455769, 0.12924714, 0.13288172, 0.1404659 , 0.14305814,
@@ -775,10 +453,6 @@ def test_semantic_net_training_correctness(self, eps, opt, autodiff_mode, benchm
         for res, exp in zip(results, expected):
             for r, e in zip(res, exp):
                 assert np.allclose(r, e)
-        if benchmark.enabled:
-            benchmark(sem_net.learn, inputs={'inputs': inputs_dict,
-                                             'targets': targets_dict,
-                                             'epochs': eps}, execution_mode=autodiff_mode)
 
     def test_pytorch_equivalence_with_autodiff_composition(self, autodiff_mode):
         iSs = np.array(
@@ -1101,212 +775,729 @@ def test_pytorch_equivalence_with_autodiff_training_disabled_on_proj(self):
         D_h = nh
         D_o = nf * nd
 
-        np.random.seed(0)
+        np.random.seed(0)
+
+        wih = np.random.rand(D_i, D_h) * 0.02 - 0.01
+        wch = np.random.rand(D_c, D_h) * 0.02 - 0.01
+        wco = np.random.rand(D_c, D_o) * 0.02 - 0.01
+        who = np.random.rand(D_h, D_o) * 0.02 - 0.01
+
+        patience = 10
+        min_delt = 0.00001
+        learning_rate = 100
+
+        il = TransferMechanism(size=D_i, name='input')
+        cl = TransferMechanism(size=D_c, name='control')
+        hl = TransferMechanism(size=D_h, name='hidden',
+                               function=Logistic(bias=-2))
+        ol = TransferMechanism(size=D_o, name='output',
+                               function=Logistic(bias=-2))
+
+        input_set = {
+            'inputs': {
+                il: iSs,
+                cl: cSs
+            },
+            'targets': {
+                ol: oSs
+            }
+        }
+
+        pih = MappingProjection(matrix=wih)
+        pch = MappingProjection(matrix=wch)
+        pco = MappingProjection(matrix=wco)
+        pho = MappingProjection(matrix=who, learnable=False)
+
+        mnet = AutodiffComposition(learning_rate=learning_rate)
+
+        mnet.add_node(il)
+        mnet.add_node(cl)
+        mnet.add_node(hl)
+        mnet.add_node(ol)
+        mnet.add_projection(projection=pih, sender=il, receiver=hl)
+        mnet.add_projection(projection=pch, sender=cl, receiver=hl)
+        mnet.add_projection(projection=pco, sender=cl, receiver=ol)
+        mnet.add_projection(projection=pho, sender=hl, receiver=ol)
+
+
+        mnet.learn(
+                inputs=input_set,
+                minibatch_size=1,
+                patience=patience,
+                min_delta=min_delt
+        )
+
+        print(mnet.parameters.results.get(mnet))
+        mnet.run(
+                inputs=input_set['inputs'],
+        )
+
+        output = np.array(mnet.parameters.results.get(mnet)[-15:]).reshape(225)
+
+        comparator = np.array([0.10284232, 0.31514028, 0.10299414, 0.10164745, 0.10363132,
+                               0.10164711, 0.10305342, 0.10162935, 0.10363974, 0.10175142,
+                               0.10256631, 0.10194203, 0.10386363, 0.10445295, 0.10228054,
+                               0.31140432, 0.10257346, 0.10279541, 0.1015088, 0.10408029,
+                               0.10167408, 0.10260046, 0.10208146, 0.10258093, 0.10188455,
+                               0.10239721, 0.10162553, 0.10376681, 0.10523887, 0.10231788,
+                               0.08327345, 0.08337342, 0.0835557, 0.0828431, 0.08364569,
+                               0.08285296, 0.21721269, 0.15223454, 0.12355195, 0.08328209,
+                               0.08321026, 0.08318614, 0.08401372, 0.08443127, 0.08355132,
+                               0.10225081, 0.10250866, 0.1032809, 0.10216374, 0.3212671,
+                               0.10171002, 0.10223842, 0.10279202, 0.10348979, 0.102771,
+                               0.10200755, 0.10137874, 0.10408875, 0.10449553, 0.10241774,
+                               0.10293344, 0.10201894, 0.10300561, 0.10239243, 0.10291971,
+                               0.10242151, 0.10280451, 0.10199619, 0.10344362, 0.10265052,
+                               0.1030072, 0.31077573, 0.10299222, 0.10510338, 0.10226066,
+                               0.08338644, 0.08334018, 0.08376527, 0.08334996, 0.08397464,
+                               0.08293792, 0.08313457, 0.08310839, 0.08409815, 0.08289795,
+                               0.08348748, 0.08323742, 0.35242194, 0.22024544, 0.08337309,
+                               0.09164643, 0.09135997, 0.09195332, 0.09117354, 0.15678808,
+                               0.25366357, 0.09192788, 0.09090009, 0.09173747, 0.09161069,
+                               0.09198699, 0.09058125, 0.09191367, 0.09321109, 0.09121469,
+                               0.09163069, 0.09134816, 0.09194396, 0.09114014, 0.15678652,
+                               0.2536617, 0.09192093, 0.09089337, 0.09171399, 0.09160125,
+                               0.09198645, 0.09058312, 0.09191372, 0.09321296, 0.09118975,
+                               0.10222919, 0.1017347, 0.10354281, 0.10158797, 0.1038858,
+                               0.10181702, 0.10269418, 0.10235615, 0.10275149, 0.31305784,
+                               0.1030191, 0.10225646, 0.10283817, 0.10411466, 0.10244074,
+                               0.10203665, 0.10201294, 0.10314981, 0.10192659, 0.10328009,
+                               0.10265024, 0.1021864, 0.10181551, 0.1026119, 0.10268809,
+                               0.10219657, 0.10172481, 0.32032955, 0.104648, 0.10248389,
+                               0.08325538, 0.08334755, 0.08355319, 0.08281158, 0.08365688,
+                               0.08285309, 0.21719442, 0.15221967, 0.12351983, 0.08326486,
+                               0.08321615, 0.08318119, 0.08400558, 0.0844217, 0.08352901,
+                               0.08326998, 0.08336743, 0.08356269, 0.08283862, 0.08365061,
+                               0.08286179, 0.21723635, 0.15221801, 0.12355236, 0.08327687,
+                               0.08322325, 0.08318282, 0.08401041, 0.08442231, 0.0835505,
+                               0.0833958, 0.08335006, 0.08376891, 0.08336972, 0.08397432,
+                               0.08294199, 0.08314709, 0.08311359, 0.0841146, 0.08291036,
+                               0.08349533, 0.08323479, 0.35241473, 0.22023965, 0.08338647,
+                               0.10243648, 0.10270733, 0.10287204, 0.10181676, 0.10309494,
+                               0.10208003, 0.10258352, 0.10279328, 0.10355093, 0.10241994,
+                               0.31674582, 0.10140157, 0.10286999, 0.10426361, 0.1018871,
+                               0.08337424, 0.08333415, 0.08376191, 0.08333433, 0.08398008,
+                               0.08293781, 0.08313539, 0.08310112, 0.08409653, 0.08289441,
+                               0.08348761, 0.08323367, 0.35237628, 0.22024095, 0.08336799])
+
+        assert np.allclose(output, comparator)
+
+
+@pytest.mark.pytorch
+@pytest.mark.acidenticalness
+class TestTrainingIdenticalness():
+
+    @pytest.mark.parametrize(
+        'eps, opt', [
+            # (1, 'sgd'),
+            (10, 'sgd'),
+            # (40, 'sgd')
+        ]
+    )
+    def test_semantic_net_training_identicalness(self, eps, opt):
+        # SET UP MECHANISMS FOR SEMANTIC NET:
+
+        nouns_in = TransferMechanism(name="nouns_input",
+                                     default_variable=np.zeros(8))
+
+        rels_in = TransferMechanism(name="rels_input",
+                                    default_variable=np.zeros(3))
+
+        h1 = TransferMechanism(name="hidden_nouns",
+                               default_variable=np.zeros(8),
+                               function=Logistic())
+
+        h2 = TransferMechanism(name="hidden_mixed",
+                               default_variable=np.zeros(15),
+                               function=Logistic())
+
+        out_sig_I = TransferMechanism(name="sig_outs_I",
+                                      default_variable=np.zeros(8),
+                                      function=Logistic())
+
+        out_sig_is = TransferMechanism(name="sig_outs_is",
+                                       default_variable=np.zeros(12),
+                                       function=Logistic())
+
+        out_sig_has = TransferMechanism(name="sig_outs_has",
+                                        default_variable=np.zeros(9),
+                                        function=Logistic())
+
+        out_sig_can = TransferMechanism(name="sig_outs_can",
+                                        default_variable=np.zeros(9),
+                                        function=Logistic())
+
+        # SET UP MECHANISMS FOR Composition
+
+        nouns_in_comp = TransferMechanism(name="nouns_input_comp",
+                                         default_variable=np.zeros(8))
+
+        rels_in_comp = TransferMechanism(name="rels_input_comp",
+                                        default_variable=np.zeros(3))
+
+        h1_comp = TransferMechanism(name="hidden_nouns_comp",
+                                   default_variable=np.zeros(8),
+                                   function=Logistic())
+
+        h2_comp = TransferMechanism(name="hidden_mixed_comp",
+                                   default_variable=np.zeros(15),
+                                   function=Logistic())
+
+        out_sig_I_comp = TransferMechanism(name="sig_outs_I_comp",
+                                          default_variable=np.zeros(8),
+                                          function=Logistic())
+
+        out_sig_is_comp = TransferMechanism(name="sig_outs_is_comp",
+                                           default_variable=np.zeros(12),
+                                           function=Logistic())
+
+        out_sig_has_comp = TransferMechanism(name="sig_outs_has_comp",
+                                            default_variable=np.zeros(9),
+                                            function=Logistic())
+
+        out_sig_can_comp = TransferMechanism(name="sig_outs_can_comp",
+                                            default_variable=np.zeros(9),
+                                            function=Logistic())
+
+        # SET UP PROJECTIONS FOR SEMANTIC NET
+
+        map_nouns_h1 = MappingProjection(matrix=np.random.rand(8,8),
+                                 name="map_nouns_h1",
+                                 sender=nouns_in,
+                                 receiver=h1)
+
+        map_rels_h2 = MappingProjection(matrix=np.random.rand(3,15),
+                                    name="map_relh2",
+                                    sender=rels_in,
+                                    receiver=h2)
+
+        map_h1_h2 = MappingProjection(matrix=np.random.rand(8,15),
+                                    name="map_h1_h2",
+                                    sender=h1,
+                                    receiver=h2)
+
+        map_h2_I = MappingProjection(matrix=np.random.rand(15,8),
+                                    name="map_h2_I",
+                                    sender=h2,
+                                    receiver=out_sig_I)
+
+        map_h2_is = MappingProjection(matrix=np.random.rand(15,12),
+                                    name="map_h2_is",
+                                    sender=h2,
+                                    receiver=out_sig_is)
+
+        map_h2_has = MappingProjection(matrix=np.random.rand(15,9),
+                                    name="map_h2_has",
+                                    sender=h2,
+                                    receiver=out_sig_has)
+
+        map_h2_can = MappingProjection(matrix=np.random.rand(15,9),
+                                    name="map_h2_can",
+                                    sender=h2,
+                                    receiver=out_sig_can)
+
+        # SET UP PROJECTIONS FOR COMPOSITION
+
+        map_nouns_h1_comp = MappingProjection(matrix=map_nouns_h1.matrix.base.copy(),
+                                             name="map_nouns_h1_comp",
+                                             sender=nouns_in_comp,
+                                             receiver=h1_comp)
+
+        map_rels_h2_comp = MappingProjection(matrix=map_rels_h2.matrix.base.copy(),
+                                        name="map_relh2_comp",
+                                        sender=rels_in_comp,
+                                        receiver=h2_comp)
+
+        map_h1_h2_comp = MappingProjection(matrix=map_h1_h2.matrix.base.copy(),
+                                          name="map_h1_h2_comp",
+                                          sender=h1_comp,
+                                          receiver=h2_comp)
+
+        map_h2_I_comp = MappingProjection(matrix=map_h2_I.matrix.base.copy(),
+                                         name="map_h2_I_comp",
+                                         sender=h2_comp,
+                                         receiver=out_sig_I_comp)
+
+        map_h2_is_comp = MappingProjection(matrix=map_h2_is.matrix.base.copy(),
+                                          name="map_h2_is_comp",
+                                          sender=h2_comp,
+                                          receiver=out_sig_is_comp)
+
+        map_h2_has_comp = MappingProjection(matrix=map_h2_has.matrix.base.copy(),
+                                           name="map_h2_has_comp",
+                                           sender=h2_comp,
+                                           receiver=out_sig_has_comp)
+
+        map_h2_can_comp = MappingProjection(matrix=map_h2_can.matrix.base.copy(),
+                                           name="map_h2_can_comp",
+                                           sender=h2_comp,
+                                           receiver=out_sig_can_comp)
+
+        # SET UP AUTODIFFCOMPOSITION FOR SEMANTIC NET
+        sem_net_autodiff = AutodiffComposition(learning_rate=0.5,
+                                      optimizer_type=opt,
+                                      )
+
+        sem_net_autodiff.add_node(nouns_in)
+        sem_net_autodiff.add_node(rels_in)
+        sem_net_autodiff.add_node(h1)
+        sem_net_autodiff.add_node(h2)
+        sem_net_autodiff.add_node(out_sig_I)
+        sem_net_autodiff.add_node(out_sig_is)
+        sem_net_autodiff.add_node(out_sig_has)
+        sem_net_autodiff.add_node(out_sig_can)
+
+        sem_net_autodiff.add_projection(sender=nouns_in, projection=map_nouns_h1, receiver=h1)
+        sem_net_autodiff.add_projection(sender=rels_in, projection=map_rels_h2, receiver=h2)
+        sem_net_autodiff.add_projection(sender=h1, projection=map_h1_h2, receiver=h2)
+        sem_net_autodiff.add_projection(sender=h2, projection=map_h2_I, receiver=out_sig_I)
+        sem_net_autodiff.add_projection(sender=h2, projection=map_h2_is, receiver=out_sig_is)
+        sem_net_autodiff.add_projection(sender=h2, projection=map_h2_has, receiver=out_sig_has)
+        sem_net_autodiff.add_projection(sender=h2, projection=map_h2_can, receiver=out_sig_can)
+        # INPUTS & OUTPUTS FOR SEMANTIC NET:
+
+        nouns = ['oak', 'pine', 'rose', 'daisy', 'canary', 'robin', 'salmon', 'sunfish']
+        relations = ['is', 'has', 'can']
+        is_list = ['living', 'living thing', 'plant', 'animal', 'tree', 'flower', 'bird', 'fish', 'big', 'green', 'red',
+                   'yellow']
+        has_list = ['roots', 'leaves', 'bark', 'branches', 'skin', 'feathers', 'wings', 'gills', 'scales']
+        can_list = ['grow', 'move', 'swim', 'fly', 'breathe', 'breathe underwater', 'breathe air', 'walk', 'photosynthesize']
+
+        nouns_input = np.identity(len(nouns))
+
+        rels_input = np.identity(len(relations))
+
+        truth_nouns = np.identity(len(nouns))
+
+        truth_is = np.zeros((len(nouns), len(is_list)))
+
+        truth_is[0, :] = [1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0]
+        truth_is[1, :] = [1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0]
+        truth_is[2, :] = [1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0]
+        truth_is[3, :] = [1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0]
+        truth_is[4, :] = [1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1]
+        truth_is[5, :] = [1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1]
+        truth_is[6, :] = [1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0]
+        truth_is[7, :] = [1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0]
+
+        truth_has = np.zeros((len(nouns), len(has_list)))
+
+        truth_has[0, :] = [1, 1, 1, 1, 0, 0, 0, 0, 0]
+        truth_has[1, :] = [1, 1, 1, 1, 0, 0, 0, 0, 0]
+        truth_has[2, :] = [1, 1, 0, 0, 0, 0, 0, 0, 0]
+        truth_has[3, :] = [1, 1, 0, 0, 0, 0, 0, 0, 0]
+        truth_has[4, :] = [0, 0, 0, 0, 1, 1, 1, 0, 0]
+        truth_has[5, :] = [0, 0, 0, 0, 1, 1, 1, 0, 0]
+        truth_has[6, :] = [0, 0, 0, 0, 0, 0, 0, 1, 1]
+        truth_has[7, :] = [0, 0, 0, 0, 0, 0, 0, 1, 1]
+
+        truth_can = np.zeros((len(nouns), len(can_list)))
+
+        truth_can[0, :] = [1, 0, 0, 0, 0, 0, 0, 0, 1]
+        truth_can[1, :] = [1, 0, 0, 0, 0, 0, 0, 0, 1]
+        truth_can[2, :] = [1, 0, 0, 0, 0, 0, 0, 0, 1]
+        truth_can[3, :] = [1, 0, 0, 0, 0, 0, 0, 0, 1]
+        truth_can[4, :] = [1, 1, 0, 1, 1, 0, 1, 1, 0]
+        truth_can[5, :] = [1, 1, 0, 1, 1, 0, 1, 1, 0]
+        truth_can[6, :] = [1, 1, 1, 0, 1, 1, 0, 0, 0]
+        truth_can[7, :] = [1, 1, 1, 0, 1, 1, 0, 0, 0]
+
+        # SETTING UP DICTIONARY OF INPUTS/OUTPUTS FOR SEMANTIC NET
+
+        inputs_dict = {}
+        inputs_dict[nouns_in] = []
+        inputs_dict[rels_in] = []
+
+        targets_dict = {}
+        targets_dict[out_sig_I] = []
+        targets_dict[out_sig_is] = []
+        targets_dict[out_sig_has] = []
+        targets_dict[out_sig_can] = []
+
+        for i in range(len(nouns)):
+            for j in range(len(relations)):
+                inputs_dict[nouns_in].append(nouns_input[i])
+                inputs_dict[rels_in].append(rels_input[j])
+                targets_dict[out_sig_I].append(truth_nouns[i])
+                targets_dict[out_sig_is].append(truth_is[i])
+                targets_dict[out_sig_has].append(truth_has[i])
+                targets_dict[out_sig_can].append(truth_can[i])
+
+        inputs_dict_comp = {}
+        inputs_dict_comp[nouns_in_comp] = inputs_dict[nouns_in]
+        inputs_dict_comp[rels_in_comp] = inputs_dict[rels_in]
+
+        sem_net_autodiff.run(inputs=inputs_dict)
+
+        # TRAIN AUTODIFFCOMPOSITION
+        def g_f():
+            yield {"inputs": inputs_dict,
+                   "targets": targets_dict,
+                   "epochs": eps}
+        g = g_f()
+        sem_net_autodiff.learn(inputs=g_f)
+
+        # SET UP COMPOSITION
+        sem_net_comp = Composition()
+
+        backprop_pathway = sem_net_comp.add_backpropagation_learning_pathway(
+            pathway=[
+                nouns_in_comp,
+                map_nouns_h1_comp,
+                h1_comp,
+                map_h1_h2_comp,
+                h2_comp,
+                map_h2_I_comp,
+                out_sig_I_comp
+            ],
+            learning_rate=0.5
+        )
+        inputs_dict_comp[backprop_pathway.target] = targets_dict[out_sig_I]
+
+        backprop_pathway = sem_net_comp.add_backpropagation_learning_pathway(
+            pathway=[
+                rels_in_comp,
+                map_rels_h2_comp,
+                h2_comp,
+                map_h2_is_comp,
+                out_sig_is_comp
+            ],
+            learning_rate=0.5
+        )
+        inputs_dict_comp[backprop_pathway.target] = targets_dict[out_sig_is]
+
+        backprop_pathway = sem_net_comp.add_backpropagation_learning_pathway(
+            pathway=[
+                h2_comp,
+                map_h2_has_comp,
+                out_sig_has_comp
+            ],
+            learning_rate=0.5
+        )
+        inputs_dict_comp[backprop_pathway.target] = targets_dict[out_sig_has]
+
+        backprop_pathway = sem_net_comp.add_backpropagation_learning_pathway(
+            pathway=[
+                h2_comp,
+                map_h2_can_comp,
+                out_sig_can_comp
+            ],
+            learning_rate=0.5
+        )
+        inputs_dict_comp[backprop_pathway.target] = targets_dict[out_sig_can]
+
+        # TRAIN COMPOSITION
+        sem_net_comp.learn(inputs=inputs_dict_comp,
+                           num_trials=(len(inputs_dict_comp[nouns_in_comp]) * eps))
+
+        # CHECK THAT PARAMETERS FOR AUTODIFFCOMPOSITION, COMPOSITION ARE SAME
+
+        assert np.allclose(map_nouns_h1.parameters.matrix.get(sem_net_autodiff),
+                           map_nouns_h1_comp.get_mod_matrix(sem_net_comp))
+        assert np.allclose(map_rels_h2.parameters.matrix.get(sem_net_autodiff),
+                           map_rels_h2_comp.get_mod_matrix(sem_net_comp))
+        assert np.allclose(map_h1_h2.parameters.matrix.get(sem_net_autodiff),
+                           map_h1_h2_comp.get_mod_matrix(sem_net_comp))
+        assert np.allclose(map_h2_I.parameters.matrix.get(sem_net_autodiff),
+                           map_h2_I_comp.get_mod_matrix(sem_net_comp))
+        assert np.allclose(map_h2_is.parameters.matrix.get(sem_net_autodiff),
+                           map_h2_is_comp.get_mod_matrix(sem_net_comp))
+        assert np.allclose(map_h2_has.parameters.matrix.get(sem_net_autodiff),
+                           map_h2_has_comp.get_mod_matrix(sem_net_comp))
+        assert np.allclose(map_h2_can.parameters.matrix.get(sem_net_autodiff),
+                           map_h2_can_comp.get_mod_matrix(sem_net_comp))
+
+    def test_identicalness_of_input_types(self):
+        # SET UP MECHANISMS FOR COMPOSITION
+        from copy import copy
+        hid_map_mat = np.random.rand(2, 10)
+        out_map_mat = np.random.rand(10, 1)
+        xor_in_dict = TransferMechanism(name='xor_in',
+                                        default_variable=np.zeros(2))
+
+        xor_hid_dict = TransferMechanism(name='xor_hid',
+                                         default_variable=np.zeros(10),
+                                         function=Logistic())
+
+        xor_out_dict = TransferMechanism(name='xor_out',
+                                         default_variable=np.zeros(1),
+                                         function=Logistic())
+
+        # SET UP PROJECTIONS FOR COMPOSITION
+
+        hid_map_dict = MappingProjection(name='hid_map',
+                                         matrix=copy(hid_map_mat),
+                                         sender=xor_in_dict,
+                                         receiver=xor_hid_dict)
+
+        out_map_dict = MappingProjection(name='out_map',
+                                         matrix=copy(out_map_mat),
+                                         sender=xor_hid_dict,
+                                         receiver=xor_out_dict)
+
+        # SET UP COMPOSITION
+
+        xor_dict = AutodiffComposition()
+
+        xor_dict.add_node(xor_in_dict)
+        xor_dict.add_node(xor_hid_dict)
+        xor_dict.add_node(xor_out_dict)
+
+        xor_dict.add_projection(sender=xor_in_dict, projection=hid_map_dict, receiver=xor_hid_dict)
+        xor_dict.add_projection(sender=xor_hid_dict, projection=out_map_dict, receiver=xor_out_dict)
+        # SET UP INPUTS AND TARGETS
+
+        xor_inputs_dict = np.array(  # the inputs we will provide to the model
+                [[0, 0],
+                 [0, 1],
+                 [1, 0],
+                 [1, 1]])
+
+        xor_targets_dict = np.array(  # the outputs we wish to see from the model
+                [[0],
+                 [1],
+                 [1],
+                 [0]])
+
+        input_dict = {
+                "inputs": {
+                    xor_in_dict: xor_inputs_dict
+                },
+                "targets": {
+                    xor_out_dict: xor_targets_dict
+                }
+            }
+
+        result_dict = xor_dict.learn(inputs=input_dict)
+
+        # SET UP MECHANISMS FOR COMPOSITION
+        xor_in_func = TransferMechanism(name='xor_in',
+                                        default_variable=np.zeros(2))
+
+        xor_hid_func = TransferMechanism(name='xor_hid',
+                                         default_variable=np.zeros(10),
+                                         function=Logistic())
+
+        xor_out_func = TransferMechanism(name='xor_out',
+                                         default_variable=np.zeros(1),
+                                         function=Logistic())
+
+        # SET UP PROJECTIONS FOR COMPOSITION
+
+        hid_map_func = MappingProjection(name='hid_map',
+                                         matrix=copy(hid_map_mat),
+                                         sender=xor_in_func,
+                                         receiver=xor_hid_func)
+
+        out_map_func = MappingProjection(name='out_map',
+                                         matrix=copy(out_map_mat),
+                                         sender=xor_hid_func,
+                                         receiver=xor_out_func)
+
+        # SET UP COMPOSITION
+
+        xor_func = AutodiffComposition()
+
+        xor_func.add_node(xor_in_func)
+        xor_func.add_node(xor_hid_func)
+        xor_func.add_node(xor_out_func)
+
+        xor_func.add_projection(sender=xor_in_func, projection=hid_map_func, receiver=xor_hid_func)
+        xor_func.add_projection(sender=xor_hid_func, projection=out_map_func, receiver=xor_out_func)
 
-        wih = np.random.rand(D_i, D_h) * 0.02 - 0.01
-        wch = np.random.rand(D_c, D_h) * 0.02 - 0.01
-        wco = np.random.rand(D_c, D_o) * 0.02 - 0.01
-        who = np.random.rand(D_h, D_o) * 0.02 - 0.01
+        # SET UP INPUTS AND TARGETS
 
-        patience = 10
-        min_delt = 0.00001
-        learning_rate = 100
+        xor_inputs_func = np.array(  # the inputs we will provide to the model
+                [[0, 0],
+                 [0, 1],
+                 [1, 0],
+                 [1, 1]])
 
-        il = TransferMechanism(size=D_i, name='input')
-        cl = TransferMechanism(size=D_c, name='control')
-        hl = TransferMechanism(size=D_h, name='hidden',
-                               function=Logistic(bias=-2))
-        ol = TransferMechanism(size=D_o, name='output',
-                               function=Logistic(bias=-2))
+        xor_targets_func = np.array(  # the outputs we wish to see from the model
+                [[0],
+                 [1],
+                 [1],
+                 [0]])
 
-        input_set = {
-            'inputs': {
-                il: iSs,
-                cl: cSs
-            },
-            'targets': {
-                ol: oSs
+        def get_inputs(idx):
+            return {
+                "inputs": {
+                    xor_in_func: xor_inputs_func[idx]
+                },
+                "targets": {
+                    xor_out_func: xor_targets_func[idx]
+                }
             }
-        }
 
-        pih = MappingProjection(matrix=wih)
-        pch = MappingProjection(matrix=wch)
-        pco = MappingProjection(matrix=wco)
-        pho = MappingProjection(matrix=who, learnable=False)
+        result_func = xor_func.learn(inputs=get_inputs)
 
-        mnet = AutodiffComposition(learning_rate=learning_rate)
+        # SET UP MECHANISMS FOR COMPOSITION
+        xor_in_gen = TransferMechanism(name='xor_in',
+                                       default_variable=np.zeros(2))
 
-        mnet.add_node(il)
-        mnet.add_node(cl)
-        mnet.add_node(hl)
-        mnet.add_node(ol)
-        mnet.add_projection(projection=pih, sender=il, receiver=hl)
-        mnet.add_projection(projection=pch, sender=cl, receiver=hl)
-        mnet.add_projection(projection=pco, sender=cl, receiver=ol)
-        mnet.add_projection(projection=pho, sender=hl, receiver=ol)
+        xor_hid_gen = TransferMechanism(name='xor_hid',
+                                        default_variable=np.zeros(10),
+                                        function=Logistic())
 
+        xor_out_gen = TransferMechanism(name='xor_out',
+                                        default_variable=np.zeros(1),
+                                        function=Logistic())
 
-        mnet.learn(
-                inputs=input_set,
-                minibatch_size=1,
-                patience=patience,
-                min_delta=min_delt
-        )
+        # SET UP PROJECTIONS FOR COMPOSITION
 
-        print(mnet.parameters.results.get(mnet))
-        mnet.run(
-                inputs=input_set['inputs'],
-        )
+        hid_map_gen = MappingProjection(name='hid_map',
+                                        matrix=copy(hid_map_mat),
+                                        sender=xor_in_gen,
+                                        receiver=xor_hid_gen)
 
-        output = np.array(mnet.parameters.results.get(mnet)[-15:]).reshape(225)
+        out_map_gen = MappingProjection(name='out_map',
+                                        matrix=copy(out_map_mat),
+                                        sender=xor_hid_gen,
+                                        receiver=xor_out_gen)
 
-        comparator = np.array([0.10284232, 0.31514028, 0.10299414, 0.10164745, 0.10363132,
-                               0.10164711, 0.10305342, 0.10162935, 0.10363974, 0.10175142,
-                               0.10256631, 0.10194203, 0.10386363, 0.10445295, 0.10228054,
-                               0.31140432, 0.10257346, 0.10279541, 0.1015088, 0.10408029,
-                               0.10167408, 0.10260046, 0.10208146, 0.10258093, 0.10188455,
-                               0.10239721, 0.10162553, 0.10376681, 0.10523887, 0.10231788,
-                               0.08327345, 0.08337342, 0.0835557, 0.0828431, 0.08364569,
-                               0.08285296, 0.21721269, 0.15223454, 0.12355195, 0.08328209,
-                               0.08321026, 0.08318614, 0.08401372, 0.08443127, 0.08355132,
-                               0.10225081, 0.10250866, 0.1032809, 0.10216374, 0.3212671,
-                               0.10171002, 0.10223842, 0.10279202, 0.10348979, 0.102771,
-                               0.10200755, 0.10137874, 0.10408875, 0.10449553, 0.10241774,
-                               0.10293344, 0.10201894, 0.10300561, 0.10239243, 0.10291971,
-                               0.10242151, 0.10280451, 0.10199619, 0.10344362, 0.10265052,
-                               0.1030072, 0.31077573, 0.10299222, 0.10510338, 0.10226066,
-                               0.08338644, 0.08334018, 0.08376527, 0.08334996, 0.08397464,
-                               0.08293792, 0.08313457, 0.08310839, 0.08409815, 0.08289795,
-                               0.08348748, 0.08323742, 0.35242194, 0.22024544, 0.08337309,
-                               0.09164643, 0.09135997, 0.09195332, 0.09117354, 0.15678808,
-                               0.25366357, 0.09192788, 0.09090009, 0.09173747, 0.09161069,
-                               0.09198699, 0.09058125, 0.09191367, 0.09321109, 0.09121469,
-                               0.09163069, 0.09134816, 0.09194396, 0.09114014, 0.15678652,
-                               0.2536617, 0.09192093, 0.09089337, 0.09171399, 0.09160125,
-                               0.09198645, 0.09058312, 0.09191372, 0.09321296, 0.09118975,
-                               0.10222919, 0.1017347, 0.10354281, 0.10158797, 0.1038858,
-                               0.10181702, 0.10269418, 0.10235615, 0.10275149, 0.31305784,
-                               0.1030191, 0.10225646, 0.10283817, 0.10411466, 0.10244074,
-                               0.10203665, 0.10201294, 0.10314981, 0.10192659, 0.10328009,
-                               0.10265024, 0.1021864, 0.10181551, 0.1026119, 0.10268809,
-                               0.10219657, 0.10172481, 0.32032955, 0.104648, 0.10248389,
-                               0.08325538, 0.08334755, 0.08355319, 0.08281158, 0.08365688,
-                               0.08285309, 0.21719442, 0.15221967, 0.12351983, 0.08326486,
-                               0.08321615, 0.08318119, 0.08400558, 0.0844217, 0.08352901,
-                               0.08326998, 0.08336743, 0.08356269, 0.08283862, 0.08365061,
-                               0.08286179, 0.21723635, 0.15221801, 0.12355236, 0.08327687,
-                               0.08322325, 0.08318282, 0.08401041, 0.08442231, 0.0835505,
-                               0.0833958, 0.08335006, 0.08376891, 0.08336972, 0.08397432,
-                               0.08294199, 0.08314709, 0.08311359, 0.0841146, 0.08291036,
-                               0.08349533, 0.08323479, 0.35241473, 0.22023965, 0.08338647,
-                               0.10243648, 0.10270733, 0.10287204, 0.10181676, 0.10309494,
-                               0.10208003, 0.10258352, 0.10279328, 0.10355093, 0.10241994,
-                               0.31674582, 0.10140157, 0.10286999, 0.10426361, 0.1018871,
-                               0.08337424, 0.08333415, 0.08376191, 0.08333433, 0.08398008,
-                               0.08293781, 0.08313539, 0.08310112, 0.08409653, 0.08289441,
-                               0.08348761, 0.08323367, 0.35237628, 0.22024095, 0.08336799])
+        # SET UP COMPOSITION
 
-        assert np.allclose(output, comparator)
+        xor_gen = AutodiffComposition()
 
+        xor_gen.add_node(xor_in_gen)
+        xor_gen.add_node(xor_hid_gen)
+        xor_gen.add_node(xor_out_gen)
 
-@pytest.mark.pytorch
-@pytest.mark.actime
-class TestTrainingTime:
+        xor_gen.add_projection(sender=xor_in_gen, projection=hid_map_gen, receiver=xor_hid_gen)
+        xor_gen.add_projection(sender=xor_hid_gen, projection=out_map_gen, receiver=xor_out_gen)
+        # SET UP INPUTS AND TARGETS
 
-    @pytest.mark.skip
-    @pytest.mark.parametrize(
-        'eps, opt', [
-            (1, 'sgd'),
-            (10, 'sgd'),
-            (100, 'sgd')
-        ]
-    )
-    def test_and_training_time(self, eps, opt,autodiff_mode):
+        xor_inputs_gen = np.array(  # the inputs we will provide to the model
+                [[0, 0],
+                 [0, 1],
+                 [1, 0],
+                 [1, 1]])
 
-        # SET UP MECHANISMS FOR COMPOSITION
+        xor_targets_gen = np.array(  # the outputs we wish to see from the model
+                [[0],
+                 [1],
+                 [1],
+                 [0]])
 
-        and_in = TransferMechanism(name='and_in',
-                                   default_variable=np.zeros(2))
+        def get_inputs_gen():
+            yield {
+                "inputs": {
+                    xor_in_gen: xor_inputs_gen
+                },
+                "targets": {
+                    xor_out_gen: xor_targets_gen
+                }
+            }
 
-        and_out = TransferMechanism(name='and_out',
-                                    default_variable=np.zeros(1),
-                                    function=Logistic())
+        g = get_inputs_gen()
+        result_gen = xor_gen.learn(inputs=g)
 
-        # SET UP MECHANISMS FOR SYSTEM
+        # SET UP MECHANISMS FOR COMPOSITION
+        xor_in_gen_func = TransferMechanism(name='xor_in',
+                                            default_variable=np.zeros(2))
 
-        and_in_sys = TransferMechanism(name='and_in_sys',
-                                       default_variable=np.zeros(2))
+        xor_hid_gen_func = TransferMechanism(name='xor_hid',
+                                             default_variable=np.zeros(10),
+                                             function=Logistic())
 
-        and_out_sys = TransferMechanism(name='and_out_sys',
-                                        default_variable=np.zeros(1),
-                                        function=Logistic())
+        xor_out_gen_func = TransferMechanism(name='xor_out',
+                                             default_variable=np.zeros(1),
+                                             function=Logistic())
 
         # SET UP PROJECTIONS FOR COMPOSITION
 
-        and_map = MappingProjection(name='and_map',
-                                    matrix=np.random.rand(2, 1),
-                                    sender=and_in,
-                                    receiver=and_out)
-
-        # SET UP PROJECTIONS FOR SYSTEM
+        hid_map_gen_func = MappingProjection(name='hid_map',
+                                             matrix=copy(hid_map_mat),
+                                             sender=xor_in_gen_func,
+                                             receiver=xor_hid_gen_func)
 
-        and_map_sys = MappingProjection(name='and_map_sys',
-                                        matrix=and_map.matrix.base.copy(),
-                                        sender=and_in_sys,
-                                        receiver=and_out_sys)
+        out_map_gen_func = MappingProjection(name='out_map',
+                                             matrix=copy(out_map_mat),
+                                             sender=xor_hid_gen_func,
+                                             receiver=xor_out_gen_func)
 
         # SET UP COMPOSITION
 
-        and_net = AutodiffComposition()
+        xor_gen_func = AutodiffComposition()
 
-        and_net.add_node(and_in)
-        and_net.add_node(and_out)
+        xor_gen_func.add_node(xor_in_gen_func)
+        xor_gen_func.add_node(xor_hid_gen_func)
+        xor_gen_func.add_node(xor_out_gen_func)
+
+        xor_gen_func.add_projection(sender=xor_in_gen_func, projection=hid_map_gen_func, receiver=xor_hid_gen_func)
+        xor_gen_func.add_projection(sender=xor_hid_gen_func, projection=out_map_gen_func, receiver=xor_out_gen_func)
+        # SET UP INPUTS AND TARGETS
+
+        xor_inputs_gen_func = np.array(  # the inputs we will provide to the model
+                [[0, 0],
+                 [0, 1],
+                 [1, 0],
+                 [1, 1]])
+
+        xor_targets_gen_func = np.array(  # the outputs we wish to see from the model
+                [[0],
+                 [1],
+                 [1],
+                 [0]])
+
+        def get_inputs_gen_func():
+            yield {
+                "inputs": {
+                    xor_in_gen_func: xor_inputs_gen_func
+                },
+                "targets": {
+                    xor_out_gen_func: xor_targets_gen_func
+                }
+            }
+
+        result_gen_func = xor_gen_func.learn(inputs=get_inputs_gen_func)
+
+        assert result_dict == result_func == result_gen == result_gen_func
+
+
+@pytest.mark.pytorch
+@pytest.mark.acmisc
+class TestMiscTrainingFunctionality:
 
-        and_net.add_projection(sender=and_in, projection=and_map, receiver=and_out)
+    # test whether pytorch parameters are initialized to be identical to the Autodiff Composition's
+    def test_weight_initialization(self):
 
-        # SET UP INPUTS AND TARGETS
+        # create xor model mechanisms and projections
+        xor_in = TransferMechanism(name='xor_in',
+                                   default_variable=np.zeros(2))
 
-        and_inputs = np.zeros((4,2))
-        and_inputs[0] = [0, 0]
-        and_inputs[1] = [0, 1]
-        and_inputs[2] = [1, 0]
-        and_inputs[3] = [1, 1]
+        xor_hid = TransferMechanism(name='xor_hid',
+                                    default_variable=np.zeros(10),
+                                    function=Logistic())
 
-        and_targets = np.zeros((4,1))
-        and_targets[0] = [0]
-        and_targets[1] = [1]
-        and_targets[2] = [1]
-        and_targets[3] = [0]
+        xor_out = TransferMechanism(name='xor_out',
+                                    default_variable=np.zeros(1),
+                                    function=Logistic())
 
-        # TIME TRAINING FOR COMPOSITION
+        hid_map = MappingProjection(matrix=np.random.rand(2,10))
+        out_map = MappingProjection(matrix=np.random.rand(10,1))
 
-        start = timeit.default_timer()
-        result = and_net.run(inputs={and_in:and_inputs},
-                             targets={and_out:and_targets},
-                             epochs=eps,
-                             learning_rate=0.1,
-                             controller=opt,
-                             execution_mode=autodiff_mode)
-        end = timeit.default_timer()
-        comp_time = end - start
+        # put the mechanisms and projections together in an autodiff composition (AC)
+        xor = AutodiffComposition()
 
-        msg = 'Training XOR model as AutodiffComposition for {0} epochs took {1} seconds'.format(eps, comp_time)
-        print(msg)
-        print("\n")
-        logger.info(msg)
+        xor.add_node(xor_in)
+        xor.add_node(xor_hid)
+        xor.add_node(xor_out)
 
-    @pytest.mark.skip
-    @pytest.mark.parametrize(
-        'eps, opt', [
-            (1, 'sgd'),
-            (10, 'sgd'),
-            (100, 'sgd')
-        ]
-    )
-    def test_xor_training_time(self, eps, opt, autodiff_mode):
+        xor.add_projection(sender=xor_in, projection=hid_map, receiver=xor_hid)
+        xor.add_projection(sender=xor_hid, projection=out_map, receiver=xor_out)
 
-        # SET UP MECHANISMS FOR COMPOSITION
+        # mini version of xor.execute just to build up pytorch representation
+        xor._analyze_graph()
+        xor._build_pytorch_representation(context=xor.default_execution_id)
+        # check whether pytorch parameters are identical to projections
+        assert np.allclose(hid_map.parameters.matrix.get(None),
+                           xor.parameters.pytorch_representation.get(xor).params[0].detach().numpy())
+        assert np.allclose(out_map.parameters.matrix.get(None),
+                           xor.parameters.pytorch_representation.get(xor).params[1].detach().numpy())
 
+    # test whether processing doesn't interfere with pytorch parameters after training
+    def test_training_then_processing(self, autodiff_mode):
         xor_in = TransferMechanism(name='xor_in',
                                    default_variable=np.zeros(2))
 
@@ -1318,46 +1509,78 @@ def test_xor_training_time(self, eps, opt, autodiff_mode):
                                     default_variable=np.zeros(1),
                                     function=Logistic())
 
-        # SET UP MECHANISMS FOR SYSTEM
+        hid_map = MappingProjection()
+        out_map = MappingProjection()
 
-        xor_in_sys = TransferMechanism(name='xor_in_sys',
-                                       default_variable=np.zeros(2))
+        xor = AutodiffComposition()
 
-        xor_hid_sys = TransferMechanism(name='xor_hid_sys',
-                                        default_variable=np.zeros(10),
-                                        function=Logistic())
+        xor.add_node(xor_in)
+        xor.add_node(xor_hid)
+        xor.add_node(xor_out)
 
-        xor_out_sys = TransferMechanism(name='xor_out_sys',
-                                        default_variable=np.zeros(1),
-                                        function=Logistic())
+        xor.add_projection(sender=xor_in, projection=hid_map, receiver=xor_hid)
+        xor.add_projection(sender=xor_hid, projection=out_map, receiver=xor_out)
 
-        # SET UP PROJECTIONS FOR COMPOSITION
+        xor_inputs = np.array(  # the inputs we will provide to the model
+            [[0, 0],
+             [0, 1],
+             [1, 0],
+             [1, 1]])
 
-        hid_map = MappingProjection(name='hid_map',
-                                    matrix=np.random.rand(2,10),
-                                    sender=xor_in,
-                                    receiver=xor_hid)
+        xor_targets = np.array(  # the outputs we wish to see from the model
+            [[0],
+             [1],
+             [1],
+             [0]])
 
-        out_map = MappingProjection(name='out_map',
-                                    matrix=np.random.rand(10,1),
-                                    sender=xor_hid,
-                                    receiver=xor_out)
+        # train model for a few epochs
+        # results_before_proc = xor.run(inputs={xor_in:xor_inputs},
+        #                               targets={xor_out:xor_targets},
+        #                               epochs=10)
+        results_before_proc = xor.learn(inputs={"inputs": {xor_in:xor_inputs},
+                                                "targets": {xor_out:xor_targets},
+                                                "epochs": 10},
+                                        execution_mode=autodiff_mode)
 
-        # SET UP PROJECTIONS FOR SYSTEM
+        # get weight parameters from pytorch
+        pt_weights_hid_bp = xor.parameters.pytorch_representation.get(xor).params[0].detach().numpy().copy()
+        pt_weights_out_bp = xor.parameters.pytorch_representation.get(xor).params[1].detach().numpy().copy()
 
-        hid_map_sys = MappingProjection(name='hid_map_sys',
-                                        matrix=hid_map.matrix.base.copy(),
-                                        sender=xor_in_sys,
-                                        receiver=xor_hid_sys)
+        #KAM temporarily removed -- will reimplement when pytorch weights can be used in pure PNL execution
+        # do processing on a few inputs
+        # results_proc = xor.run(inputs={xor_in:xor_inputs})
+        # results_proc = xor.run(inputs={"inputs": {xor_in:xor_inputs}})
+        #
+        # # get weight parameters from pytorch
+        # pt_weights_hid_ap = xor.parameters.pytorch_representation.get(xor).params[0].detach().numpy().copy()
+        # pt_weights_out_ap = xor.parameters.pytorch_representation.get(xor).params[1].detach().numpy().copy()
+        #
+        # # check that weight parameters before and after processing are the same
+        # assert np.allclose(pt_weights_hid_bp, pt_weights_hid_ap)
+        # assert np.allclose(pt_weights_out_bp, pt_weights_out_ap)
 
-        out_map_sys = MappingProjection(name='out_map_sys',
-                                        matrix=out_map.matrix.base.copy(),
-                                        sender=xor_hid_sys,
-                                        receiver=xor_out_sys)
+    @pytest.mark.parametrize(
+        'loss', [Loss.L1, Loss.POISSON_NLL]
+    )
+    def test_various_loss_specs(self, loss, autodiff_mode):
+        if autodiff_mode is not pnl.ExecutionMode.Python:
+            pytest.skip("Loss spec not yet implemented!")
 
-        # SET UP COMPOSITION
+        xor_in = TransferMechanism(name='xor_in',
+                                   default_variable=np.zeros(2))
 
-        xor = AutodiffComposition(execution_mode=autodiff_mode)
+        xor_hid = TransferMechanism(name='xor_hid',
+                                    default_variable=np.zeros(10),
+                                    function=Logistic())
+
+        xor_out = TransferMechanism(name='xor_out',
+                                    default_variable=np.zeros(1),
+                                    function=Logistic())
+
+        hid_map = MappingProjection()
+        out_map = MappingProjection()
+
+        xor = AutodiffComposition(loss_spec=loss)
 
         xor.add_node(xor_in)
         xor.add_node(xor_hid)
@@ -1366,8 +1589,6 @@ def test_xor_training_time(self, eps, opt, autodiff_mode):
         xor.add_projection(sender=xor_in, projection=hid_map, receiver=xor_hid)
         xor.add_projection(sender=xor_hid, projection=out_map, receiver=xor_out)
 
-        # SET UP INPUTS AND TARGETS
-
         xor_inputs = np.array(  # the inputs we will provide to the model
             [[0, 0],
              [0, 1],
@@ -1380,867 +1601,712 @@ def test_xor_training_time(self, eps, opt, autodiff_mode):
              [1],
              [0]])
 
-        # TIME TRAINING FOR COMPOSITION
-
-        start = timeit.default_timer()
-        result = xor.run(inputs={xor_in:xor_inputs},
-                         targets={xor_out:xor_targets},
-                         epochs=eps,
-                         learning_rate=0.1,
-                         controller=opt,
-                         execution_mode=autodiff_mode)
-        end = timeit.default_timer()
-        comp_time = end - start
+        xor.learn(inputs = {"inputs": {xor_in:xor_inputs},
+                            "targets": {xor_out:xor_targets},
+                            "epochs": 10}, execution_mode=autodiff_mode)
 
-        # SET UP SYSTEM
+    def test_pytorch_loss_spec(self, autodiff_mode):
+        if autodiff_mode is not pnl.ExecutionMode.Python:
+            pytest.skip("Loss spec not yet implemented!")
 
-        # xor_process = Process(pathway=[xor_in_sys,
-        #                                hid_map_sys,
-        #                                xor_hid_sys,
-        #                                out_map_sys,
-        #                                xor_out_sys],
-        #                       learning=pnl.LEARNING)
+        import torch
+        ls = torch.nn.SoftMarginLoss(reduction='sum')
 
-        xor_process = Composition(pathways=([xor_in_sys,
-                                       hid_map_sys,
-                                       xor_hid_sys,
-                                       out_map_sys,
-                                       xor_out_sys], BackPropagation))
+        xor_in = TransferMechanism(name='xor_in',
+                                   default_variable=np.zeros(2))
 
-        msg = 'Training XOR model as AutodiffComposition for {eps} epochs took {comp_time} seconds.'
-        print(msg)
-        print("\n")
-        logger.info(msg)
+        xor_hid = TransferMechanism(name='xor_hid',
+                                    default_variable=np.zeros(10),
+                                    function=Logistic())
 
-    @pytest.mark.skip
-    @pytest.mark.parametrize(
-            'eps, opt', [
-            (1, 'sgd'),
-            (10, 'sgd'),
-            (100, 'sgd')
-        ]
-    )
-    def test_semantic_net_training_time(self, eps, opt):
+        xor_out = TransferMechanism(name='xor_out',
+                                    default_variable=np.zeros(1),
+                                    function=Logistic())
 
-        # SET UP MECHANISMS FOR COMPOSITION:
+        hid_map = MappingProjection()
+        out_map = MappingProjection()
 
-        nouns_in = TransferMechanism(name="nouns_input",
-                                     default_variable=np.zeros(8))
+        xor = AutodiffComposition(loss_spec=ls)
 
-        rels_in = TransferMechanism(name="rels_input",
-                                    default_variable=np.zeros(3))
+        xor.add_node(xor_in)
+        xor.add_node(xor_hid)
+        xor.add_node(xor_out)
 
-        h1 = TransferMechanism(name="hidden_nouns",
-                               default_variable=np.zeros(8),
-                               function=Logistic())
+        xor.add_projection(sender=xor_in, projection=hid_map, receiver=xor_hid)
+        xor.add_projection(sender=xor_hid, projection=out_map, receiver=xor_out)
+        xor_inputs = np.array(  # the inputs we will provide to the model
+            [[0, 0], [0, 1], [1, 0], [1, 1]])
 
-        h2 = TransferMechanism(name="hidden_mixed",
-                               default_variable=np.zeros(15),
-                               function=Logistic())
+        xor_targets = np.array(  # the outputs we wish to see from the model
+            [[0], [1], [1], [0]])
 
-        out_sig_I = TransferMechanism(name="sig_outs_I",
-                                      default_variable=np.zeros(8),
-                                      function=Logistic())
+        xor.learn(inputs={"inputs": {xor_in:xor_inputs},
+                          "targets": {xor_out:xor_targets},
+                          "epochs": 10}, execution_mode=autodiff_mode)
+        xor.learn(inputs={"inputs": {xor_in: xor_inputs},
+                          "targets": {xor_out: xor_targets},
+                          "epochs": 10}, execution_mode=autodiff_mode)
 
-        out_sig_is = TransferMechanism(name="sig_outs_is",
-                                       default_variable=np.zeros(12),
-                                       function=Logistic())
 
-        out_sig_has = TransferMechanism(name="sig_outs_has",
-                                        default_variable=np.zeros(9),
-                                        function=Logistic())
+    @pytest.mark.benchmark(group="Optimizer specs")
+    @pytest.mark.parametrize(
+        'learning_rate, weight_decay, optimizer_type, expected', [
+            (10, 0, 'sgd', [[[0.9863038667851067]], [[0.9944287263151904]], [[0.9934801466163382]], [[0.9979153035411085]]]),
+            (1.5, 1, 'sgd', [[[0.33226742]], [[0.4492334]], [[0.75459534]], [[0.44477028]]]),
+            (1.5, 1, 'adam', [[[0.43109927]], [[0.33088828]], [[0.40094236]], [[0.57104689]]]),
+        ]
+    )
+    def test_optimizer_specs(self, learning_rate, weight_decay, optimizer_type, expected, autodiff_mode, benchmark):
+        xor_in = TransferMechanism(name='xor_in',
+                                   default_variable=np.zeros(2))
 
-        out_sig_can = TransferMechanism(name="sig_outs_can",
-                                        default_variable=np.zeros(9),
-                                        function=Logistic())
+        xor_hid = TransferMechanism(name='xor_hid',
+                                    default_variable=np.zeros(10),
+                                    function=Logistic())
 
-        # SET UP MECHANISMS FOR SYSTEM
+        xor_out = TransferMechanism(name='xor_out',
+                                    default_variable=np.zeros(1),
+                                    function=Logistic())
 
-        nouns_in_sys = TransferMechanism(name="nouns_input_sys",
-                                         default_variable=np.zeros(8))
+        hid_map = MappingProjection()
+        out_map = MappingProjection()
 
-        rels_in_sys = TransferMechanism(name="rels_input_sys",
-                                        default_variable=np.zeros(3))
+        xor = AutodiffComposition(learning_rate=learning_rate,
+                                  optimizer_type=optimizer_type,
+                                  weight_decay=weight_decay)
 
-        h1_sys = TransferMechanism(name="hidden_nouns_sys",
-                                   default_variable=np.zeros(8),
-                                   function=Logistic())
+        xor.add_node(xor_in)
+        xor.add_node(xor_hid)
+        xor.add_node(xor_out)
 
-        h2_sys = TransferMechanism(name="hidden_mixed_sys",
-                                   default_variable=np.zeros(15),
-                                   function=Logistic())
+        xor.add_projection(sender=xor_in, projection=hid_map, receiver=xor_hid)
+        xor.add_projection(sender=xor_hid, projection=out_map, receiver=xor_out)
 
-        out_sig_I_sys = TransferMechanism(name="sig_outs_I_sys",
-                                          default_variable=np.zeros(8),
-                                          function=Logistic())
+        xor_inputs = np.array(  # the inputs we will provide to the model
+            [[0, 0], [0, 1], [1, 0], [1, 1]])
 
-        out_sig_is_sys = TransferMechanism(name="sig_outs_is_sys",
-                                           default_variable=np.zeros(12),
-                                           function=Logistic())
+        xor_targets = np.array(  # the outputs we wish to see from the model
+            [[0], [1], [1], [0]])
 
-        out_sig_has_sys = TransferMechanism(name="sig_outs_has_sys",
-                                            default_variable=np.zeros(9),
-                                            function=Logistic())
+        # train model for a few epochs
+        # results_before_proc = xor.run(inputs={xor_in:xor_inputs},
+        #                               targets={xor_out:xor_targets},
+        #                               epochs=10)
+        results_before_proc = benchmark(xor.learn, inputs={"inputs": {xor_in:xor_inputs},
+                                                           "targets": {xor_out:xor_targets},
+                                                           "epochs": 10}, execution_mode=autodiff_mode)
 
-        out_sig_can_sys = TransferMechanism(name="sig_outs_can_sys",
-                                            default_variable=np.zeros(9),
-                                            function=Logistic())
+        # fp32 results are different due to rounding
+        if pytest.helpers.llvm_current_fp_precision() == 'fp32' and \
+           autodiff_mode != pnl.ExecutionMode.Python and \
+           optimizer_type == 'sgd' and \
+           learning_rate == 10:
+            expected = [[[0.9918830394744873]], [[0.9982172846794128]], [[0.9978305697441101]], [[0.9994590878486633]]]
+        # FIXME: LLVM version is broken with learning rate == 1.5
+        if learning_rate != 1.5 or autodiff_mode == pnl.ExecutionMode.Python:
+            assert np.allclose(results_before_proc, expected)
 
-        # SET UP PROJECTIONS FOR COMPOSITION
 
-        map_nouns_h1 = MappingProjection(matrix=np.random.rand(8,8),
-                                         name="map_nouns_h1",
-                                         sender=nouns_in,
-                                         receiver=h1)
+    # test whether pytorch parameters and projections are kept separate (at diff. places in memory)
+    def test_params_stay_separate(self, autodiff_mode):
+        if autodiff_mode is not pnl.ExecutionMode.Python:
+            pytest.skip("Compiled weights are always copied back!")
 
-        map_rels_h2 = MappingProjection(matrix=np.random.rand(3,15),
-                                        name="map_rel_h2",
-                                        sender=rels_in,
-                                        receiver=h2)
+        xor_in = TransferMechanism(name='xor_in',
+                                   default_variable=np.zeros(2))
 
-        map_h1_h2 = MappingProjection(matrix=np.random.rand(8,15),
-                                      name="map_h1_h2",
-                                      sender=h1,
-                                      receiver=h2)
+        xor_hid = TransferMechanism(name='xor_hid',
+                                    default_variable=np.zeros(10),
+                                    function=Logistic())
 
-        map_h2_I = MappingProjection(matrix=np.random.rand(15,8),
-                                     name="map_h2_I",
-                                     sender=h2,
-                                     receiver=out_sig_I)
+        xor_out = TransferMechanism(name='xor_out',
+                                    default_variable=np.zeros(1),
+                                    function=Logistic())
 
-        map_h2_is = MappingProjection(matrix=np.random.rand(15,12),
-                                      name="map_h2_is",
-                                      sender=h2,
-                                      receiver=out_sig_is)
+        hid_m = np.random.rand(2,10)
+        out_m = np.random.rand(10,1)
 
-        map_h2_has = MappingProjection(matrix=np.random.rand(15,9),
-                                       name="map_h2_has",
-                                       sender=h2,
-                                       receiver=out_sig_has)
+        hid_map = MappingProjection(name='hid_map',
+                                    matrix=hid_m.copy(),
+                                    sender=xor_in,
+                                    receiver=xor_hid)
 
-        map_h2_can = MappingProjection(matrix=np.random.rand(15,9),
-                                       name="map_h2_can",
-                                       sender=h2,
-                                       receiver=out_sig_can)
+        out_map = MappingProjection(name='out_map',
+                                    matrix=out_m.copy(),
+                                    sender=xor_hid,
+                                    receiver=xor_out)
 
-        # SET UP PROJECTIONS FOR SYSTEM
+        xor = AutodiffComposition(learning_rate=10.0,
+                                  optimizer_type="sgd")
 
-        map_nouns_h1_sys = MappingProjection(matrix=map_nouns_h1.matrix.base.copy(),
-                                             name="map_nouns_h1_sys",
-                                             sender=nouns_in_sys,
-                                             receiver=h1_sys)
+        xor.add_node(xor_in)
+        xor.add_node(xor_hid)
+        xor.add_node(xor_out)
 
-        map_rels_h2_sys = MappingProjection(matrix=map_rels_h2.matrix.base.copy(),
-                                        name="map_relh2_sys",
-                                        sender=rels_in_sys,
-                                        receiver=h2_sys)
+        xor.add_projection(sender=xor_in, projection=hid_map, receiver=xor_hid)
+        xor.add_projection(sender=xor_hid, projection=out_map, receiver=xor_out)
 
-        map_h1_h2_sys = MappingProjection(matrix=map_h1_h2.matrix.base.copy(),
-                                          name="map_h1_h2_sys",
-                                          sender=h1_sys,
-                                          receiver=h2_sys)
+        xor_inputs = np.array(  # the inputs we will provide to the model
+            [[0, 0], [0, 1], [1, 0], [1, 1]])
 
-        map_h2_I_sys = MappingProjection(matrix=map_h2_I.matrix.base.copy(),
-                                         name="map_h2_I_sys",
-                                         sender=h2_sys,
-                                         receiver=out_sig_I_sys)
+        xor_targets = np.array(  # the outputs we wish to see from the model
+            [[0], [1], [1], [0]])
 
-        map_h2_is_sys = MappingProjection(matrix=map_h2_is.matrix.base.copy(),
-                                          name="map_h2_is_sys",
-                                          sender=h2_sys,
-                                          receiver=out_sig_is_sys)
+        # train the model for a few epochs
+        result = xor.learn(inputs={"inputs": {xor_in:xor_inputs},
+                                   "targets": {xor_out:xor_targets},
+                                   "epochs": 10}, execution_mode=autodiff_mode)
 
-        map_h2_has_sys = MappingProjection(matrix=map_h2_has.matrix.base.copy(),
-                                           name="map_h2_has_sys",
-                                           sender=h2_sys,
-                                           receiver=out_sig_has_sys)
+        # get weight parameters from pytorch
+        pt_weights_hid = xor.parameters.pytorch_representation.get(xor).params[0].detach().numpy().copy()
+        pt_weights_out = xor.parameters.pytorch_representation.get(xor).params[1].detach().numpy().copy()
 
-        map_h2_can_sys = MappingProjection(matrix=map_h2_can.matrix.base.copy(),
-                                           name="map_h2_can_sys",
-                                           sender=h2_sys,
-                                           receiver=out_sig_can_sys)
+        # assert that projections are still what they were initialized as
+        assert np.allclose(hid_map.parameters.matrix.get(None), hid_m)
+        assert np.allclose(out_map.parameters.matrix.get(None), out_m)
 
-        # COMPOSITION FOR SEMANTIC NET
+        # assert that projections didn't change during training with the pytorch
+        # parameters (they should now be different)
+        assert not np.allclose(pt_weights_hid, hid_map.parameters.matrix.get(None))
+        assert not np.allclose(pt_weights_out, out_map.parameters.matrix.get(None))
 
-        sem_net = AutodiffComposition()
+    def test_execution_mode_python_error(self):
+        A = TransferMechanism(name="learning-process-mech-A")
+        B = TransferMechanism(name="learning-process-mech-B")
+        adc = AutodiffComposition(name='AUTODIFFCOMP')
+        pway = adc.add_backpropagation_learning_pathway(pathway=[A,B])
+        # Call learn with default_variable specified for target (for comparison with missing target)
+        with pytest.raises(AutodiffCompositionError) as error:
+            adc.learn(inputs={A: 1.0,
+                              pway.target: 0.0},
+                      execution_mode=pnl.ExecutionMode.Python,
+                      num_trials=2)
+        assert error.value.error_value == 'AUTODIFFCOMP is an AutodiffComposition so its learn() ' \
+                                          'cannot be called with execution_mode = ExecutionMode.Python; ' \
+                                          'use ExecutionMode.PyTorch or ExecutionMode.LLVMRun.'
 
-        sem_net.add_node(nouns_in)
-        sem_net.add_node(rels_in)
-        sem_net.add_node(h1)
-        sem_net.add_node(h2)
-        sem_net.add_node(out_sig_I)
-        sem_net.add_node(out_sig_is)
-        sem_net.add_node(out_sig_has)
-        sem_net.add_node(out_sig_can)
+@pytest.mark.pytorch
+@pytest.mark.actime
+class TestTrainingTime:
 
-        sem_net.add_projection(sender=nouns_in, projection=map_nouns_h1, receiver=h1)
-        sem_net.add_projection(sender=rels_in, projection=map_rels_h2, receiver=h2)
-        sem_net.add_projection(sender=h1, projection=map_h1_h2, receiver=h2)
-        sem_net.add_projection(sender=h2, projection=map_h2_I, receiver=out_sig_I)
-        sem_net.add_projection(sender=h2, projection=map_h2_is, receiver=out_sig_is)
-        sem_net.add_projection(sender=h2, projection=map_h2_has, receiver=out_sig_has)
-        sem_net.add_projection(sender=h2, projection=map_h2_can, receiver=out_sig_can)
+    @pytest.mark.skip
+    @pytest.mark.parametrize(
+        'eps, opt', [
+            (1, 'sgd'),
+            (10, 'sgd'),
+            (100, 'sgd')
+        ]
+    )
+    def test_and_training_time(self, eps, opt,autodiff_mode):
 
-        # INPUTS & OUTPUTS FOR SEMANTIC NET:
+        # SET UP MECHANISMS FOR COMPOSITION
 
-        nouns = ['oak', 'pine', 'rose', 'daisy', 'canary', 'robin', 'salmon', 'sunfish']
-        relations = ['is', 'has', 'can']
-        is_list = ['living', 'living thing', 'plant', 'animal', 'tree', 'flower', 'bird', 'fish', 'big', 'green', 'red',
-                   'yellow']
-        has_list = ['roots', 'leaves', 'bark', 'branches', 'skin', 'feathers', 'wings', 'gills', 'scales']
-        can_list = ['grow', 'move', 'swim', 'fly', 'breathe', 'breathe underwater', 'breathe air', 'walk', 'photosynthesize']
+        and_in = TransferMechanism(name='and_in',
+                                   default_variable=np.zeros(2))
 
-        nouns_input = np.identity(len(nouns))
+        and_out = TransferMechanism(name='and_out',
+                                    default_variable=np.zeros(1),
+                                    function=Logistic())
 
-        rels_input = np.identity(len(relations))
+        # SET UP MECHANISMS FOR SYSTEM
 
-        truth_nouns = np.identity(len(nouns))
+        and_in_sys = TransferMechanism(name='and_in_sys',
+                                       default_variable=np.zeros(2))
 
-        truth_is = np.zeros((len(nouns), len(is_list)))
+        and_out_sys = TransferMechanism(name='and_out_sys',
+                                        default_variable=np.zeros(1),
+                                        function=Logistic())
 
-        truth_is[0, :] = [1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0]
-        truth_is[1, :] = [1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0]
-        truth_is[2, :] = [1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0]
-        truth_is[3, :] = [1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0]
-        truth_is[4, :] = [1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1]
-        truth_is[5, :] = [1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1]
-        truth_is[6, :] = [1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0]
-        truth_is[7, :] = [1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0]
+        # SET UP PROJECTIONS FOR COMPOSITION
 
-        truth_has = np.zeros((len(nouns), len(has_list)))
+        and_map = MappingProjection(name='and_map',
+                                    matrix=np.random.rand(2, 1),
+                                    sender=and_in,
+                                    receiver=and_out)
 
-        truth_has[0, :] = [1, 1, 1, 1, 0, 0, 0, 0, 0]
-        truth_has[1, :] = [1, 1, 1, 1, 0, 0, 0, 0, 0]
-        truth_has[2, :] = [1, 1, 0, 0, 0, 0, 0, 0, 0]
-        truth_has[3, :] = [1, 1, 0, 0, 0, 0, 0, 0, 0]
-        truth_has[4, :] = [0, 0, 0, 0, 1, 1, 1, 0, 0]
-        truth_has[5, :] = [0, 0, 0, 0, 1, 1, 1, 0, 0]
-        truth_has[6, :] = [0, 0, 0, 0, 0, 0, 0, 1, 1]
-        truth_has[7, :] = [0, 0, 0, 0, 0, 0, 0, 1, 1]
+        # SET UP PROJECTIONS FOR SYSTEM
 
-        truth_can = np.zeros((len(nouns), len(can_list)))
+        and_map_sys = MappingProjection(name='and_map_sys',
+                                        matrix=and_map.matrix.base.copy(),
+                                        sender=and_in_sys,
+                                        receiver=and_out_sys)
 
-        truth_can[0, :] = [1, 0, 0, 0, 0, 0, 0, 0, 1]
-        truth_can[1, :] = [1, 0, 0, 0, 0, 0, 0, 0, 1]
-        truth_can[2, :] = [1, 0, 0, 0, 0, 0, 0, 0, 1]
-        truth_can[3, :] = [1, 0, 0, 0, 0, 0, 0, 0, 1]
-        truth_can[4, :] = [1, 1, 0, 1, 1, 0, 1, 1, 0]
-        truth_can[5, :] = [1, 1, 0, 1, 1, 0, 1, 1, 0]
-        truth_can[6, :] = [1, 1, 1, 0, 1, 1, 0, 0, 0]
-        truth_can[7, :] = [1, 1, 1, 0, 1, 1, 0, 0, 0]
+        # SET UP COMPOSITION
 
-        # SETTING UP DICTIONARIES OF INPUTS/OUTPUTS FOR SEMANTIC NET
+        and_net = AutodiffComposition()
 
-        inputs_dict = {}
-        inputs_dict[nouns_in] = []
-        inputs_dict[rels_in] = []
+        and_net.add_node(and_in)
+        and_net.add_node(and_out)
 
-        targets_dict = {}
-        targets_dict[out_sig_I] = []
-        targets_dict[out_sig_is] = []
-        targets_dict[out_sig_has] = []
-        targets_dict[out_sig_can] = []
+        and_net.add_projection(sender=and_in, projection=and_map, receiver=and_out)
 
-        for i in range(len(nouns)):
-            for j in range(len(relations)):
-                inputs_dict[nouns_in].append(nouns_input[i])
-                inputs_dict[rels_in].append(rels_input[j])
-                targets_dict[out_sig_I].append(truth_nouns[i])
-                targets_dict[out_sig_is].append(truth_is[i])
-                targets_dict[out_sig_has].append(truth_has[i])
-                targets_dict[out_sig_can].append(truth_can[i])
+        # SET UP INPUTS AND TARGETS
 
-        inputs_dict_sys = {}
-        inputs_dict_sys[nouns_in_sys] = inputs_dict[nouns_in]
-        inputs_dict_sys[rels_in_sys] = inputs_dict[rels_in]
+        and_inputs = np.zeros((4,2))
+        and_inputs[0] = [0, 0]
+        and_inputs[1] = [0, 1]
+        and_inputs[2] = [1, 0]
+        and_inputs[3] = [1, 1]
 
-        targets_dict_sys = {}
-        targets_dict_sys[out_sig_I_sys] = targets_dict[out_sig_I]
-        targets_dict_sys[out_sig_is_sys] = targets_dict[out_sig_is]
-        targets_dict_sys[out_sig_has_sys] = targets_dict[out_sig_has]
-        targets_dict_sys[out_sig_can_sys] = targets_dict[out_sig_can]
+        and_targets = np.zeros((4,1))
+        and_targets[0] = [0]
+        and_targets[1] = [1]
+        and_targets[2] = [1]
+        and_targets[3] = [0]
 
         # TIME TRAINING FOR COMPOSITION
 
         start = timeit.default_timer()
-        result = sem_net.run(inputs=inputs_dict,
-                             targets=targets_dict,
+        result = and_net.run(inputs={and_in:and_inputs},
+                             targets={and_out:and_targets},
                              epochs=eps,
                              learning_rate=0.1,
-                             controller=opt)
+                             controller=opt,
+                             execution_mode=autodiff_mode)
         end = timeit.default_timer()
         comp_time = end - start
 
-        msg = 'Training Semantic net as AutodiffComposition for {0} epochs took {1} seconds'.format(eps, comp_time)
+        msg = 'Training XOR model as AutodiffComposition for {0} epochs took {1} seconds'.format(eps, comp_time)
         print(msg)
         print("\n")
         logger.info(msg)
 
-
-@pytest.mark.pytorch
-@pytest.mark.acidenticalness
-class TestTrainingIdenticalness():
-
+    @pytest.mark.skip
     @pytest.mark.parametrize(
         'eps, opt', [
-            # (1, 'sgd'),
+            (1, 'sgd'),
             (10, 'sgd'),
-            # (40, 'sgd')
+            (100, 'sgd')
         ]
     )
-    def test_semantic_net_training_identicalness(self, eps, opt):
-        # SET UP MECHANISMS FOR SEMANTIC NET:
+    def test_xor_training_time(self, eps, opt, autodiff_mode):
 
-        nouns_in = TransferMechanism(name="nouns_input",
-                                     default_variable=np.zeros(8))
+        # SET UP MECHANISMS FOR COMPOSITION
 
-        rels_in = TransferMechanism(name="rels_input",
-                                    default_variable=np.zeros(3))
+        xor_in = TransferMechanism(name='xor_in',
+                                   default_variable=np.zeros(2))
 
-        h1 = TransferMechanism(name="hidden_nouns",
-                               default_variable=np.zeros(8),
-                               function=Logistic())
+        xor_hid = TransferMechanism(name='xor_hid',
+                                    default_variable=np.zeros(10),
+                                    function=Logistic())
 
-        h2 = TransferMechanism(name="hidden_mixed",
-                               default_variable=np.zeros(15),
-                               function=Logistic())
+        xor_out = TransferMechanism(name='xor_out',
+                                    default_variable=np.zeros(1),
+                                    function=Logistic())
 
-        out_sig_I = TransferMechanism(name="sig_outs_I",
-                                      default_variable=np.zeros(8),
-                                      function=Logistic())
+        # SET UP MECHANISMS FOR SYSTEM
 
-        out_sig_is = TransferMechanism(name="sig_outs_is",
-                                       default_variable=np.zeros(12),
-                                       function=Logistic())
+        xor_in_sys = TransferMechanism(name='xor_in_sys',
+                                       default_variable=np.zeros(2))
 
-        out_sig_has = TransferMechanism(name="sig_outs_has",
-                                        default_variable=np.zeros(9),
+        xor_hid_sys = TransferMechanism(name='xor_hid_sys',
+                                        default_variable=np.zeros(10),
                                         function=Logistic())
 
-        out_sig_can = TransferMechanism(name="sig_outs_can",
-                                        default_variable=np.zeros(9),
+        xor_out_sys = TransferMechanism(name='xor_out_sys',
+                                        default_variable=np.zeros(1),
                                         function=Logistic())
 
-        # SET UP MECHANISMS FOR SYSTEM
-
-        nouns_in_sys = TransferMechanism(name="nouns_input_sys",
-                                         default_variable=np.zeros(8))
-
-        rels_in_sys = TransferMechanism(name="rels_input_sys",
-                                        default_variable=np.zeros(3))
-
-        h1_sys = TransferMechanism(name="hidden_nouns_sys",
-                                   default_variable=np.zeros(8),
-                                   function=Logistic())
-
-        h2_sys = TransferMechanism(name="hidden_mixed_sys",
-                                   default_variable=np.zeros(15),
-                                   function=Logistic())
-
-        out_sig_I_sys = TransferMechanism(name="sig_outs_I_sys",
-                                          default_variable=np.zeros(8),
-                                          function=Logistic())
-
-        out_sig_is_sys = TransferMechanism(name="sig_outs_is_sys",
-                                           default_variable=np.zeros(12),
-                                           function=Logistic())
-
-        out_sig_has_sys = TransferMechanism(name="sig_outs_has_sys",
-                                            default_variable=np.zeros(9),
-                                            function=Logistic())
-
-        out_sig_can_sys = TransferMechanism(name="sig_outs_can_sys",
-                                            default_variable=np.zeros(9),
-                                            function=Logistic())
-
-        # SET UP PROJECTIONS FOR SEMANTIC NET
-
-        map_nouns_h1 = MappingProjection(matrix=np.random.rand(8,8),
-                                 name="map_nouns_h1",
-                                 sender=nouns_in,
-                                 receiver=h1)
-
-        map_rels_h2 = MappingProjection(matrix=np.random.rand(3,15),
-                                    name="map_relh2",
-                                    sender=rels_in,
-                                    receiver=h2)
-
-        map_h1_h2 = MappingProjection(matrix=np.random.rand(8,15),
-                                    name="map_h1_h2",
-                                    sender=h1,
-                                    receiver=h2)
-
-        map_h2_I = MappingProjection(matrix=np.random.rand(15,8),
-                                    name="map_h2_I",
-                                    sender=h2,
-                                    receiver=out_sig_I)
-
-        map_h2_is = MappingProjection(matrix=np.random.rand(15,12),
-                                    name="map_h2_is",
-                                    sender=h2,
-                                    receiver=out_sig_is)
+        # SET UP PROJECTIONS FOR COMPOSITION
 
-        map_h2_has = MappingProjection(matrix=np.random.rand(15,9),
-                                    name="map_h2_has",
-                                    sender=h2,
-                                    receiver=out_sig_has)
+        hid_map = MappingProjection(name='hid_map',
+                                    matrix=np.random.rand(2,10),
+                                    sender=xor_in,
+                                    receiver=xor_hid)
 
-        map_h2_can = MappingProjection(matrix=np.random.rand(15,9),
-                                    name="map_h2_can",
-                                    sender=h2,
-                                    receiver=out_sig_can)
+        out_map = MappingProjection(name='out_map',
+                                    matrix=np.random.rand(10,1),
+                                    sender=xor_hid,
+                                    receiver=xor_out)
 
         # SET UP PROJECTIONS FOR SYSTEM
 
-        map_nouns_h1_sys = MappingProjection(matrix=map_nouns_h1.matrix.base.copy(),
-                                             name="map_nouns_h1_sys",
-                                             sender=nouns_in_sys,
-                                             receiver=h1_sys)
-
-        map_rels_h2_sys = MappingProjection(matrix=map_rels_h2.matrix.base.copy(),
-                                        name="map_relh2_sys",
-                                        sender=rels_in_sys,
-                                        receiver=h2_sys)
-
-        map_h1_h2_sys = MappingProjection(matrix=map_h1_h2.matrix.base.copy(),
-                                          name="map_h1_h2_sys",
-                                          sender=h1_sys,
-                                          receiver=h2_sys)
-
-        map_h2_I_sys = MappingProjection(matrix=map_h2_I.matrix.base.copy(),
-                                         name="map_h2_I_sys",
-                                         sender=h2_sys,
-                                         receiver=out_sig_I_sys)
-
-        map_h2_is_sys = MappingProjection(matrix=map_h2_is.matrix.base.copy(),
-                                          name="map_h2_is_sys",
-                                          sender=h2_sys,
-                                          receiver=out_sig_is_sys)
+        hid_map_sys = MappingProjection(name='hid_map_sys',
+                                        matrix=hid_map.matrix.base.copy(),
+                                        sender=xor_in_sys,
+                                        receiver=xor_hid_sys)
 
-        map_h2_has_sys = MappingProjection(matrix=map_h2_has.matrix.base.copy(),
-                                           name="map_h2_has_sys",
-                                           sender=h2_sys,
-                                           receiver=out_sig_has_sys)
+        out_map_sys = MappingProjection(name='out_map_sys',
+                                        matrix=out_map.matrix.base.copy(),
+                                        sender=xor_hid_sys,
+                                        receiver=xor_out_sys)
 
-        map_h2_can_sys = MappingProjection(matrix=map_h2_can.matrix.base.copy(),
-                                           name="map_h2_can_sys",
-                                           sender=h2_sys,
-                                           receiver=out_sig_can_sys)
+        # SET UP COMPOSITION
 
-        # SET UP COMPOSITION FOR SEMANTIC NET
-        sem_net = AutodiffComposition(learning_rate=0.5,
-                                      optimizer_type=opt,
-                                      )
+        xor = AutodiffComposition(execution_mode=autodiff_mode)
 
-        sem_net.add_node(nouns_in)
-        sem_net.add_node(rels_in)
-        sem_net.add_node(h1)
-        sem_net.add_node(h2)
-        sem_net.add_node(out_sig_I)
-        sem_net.add_node(out_sig_is)
-        sem_net.add_node(out_sig_has)
-        sem_net.add_node(out_sig_can)
+        xor.add_node(xor_in)
+        xor.add_node(xor_hid)
+        xor.add_node(xor_out)
 
-        sem_net.add_projection(sender=nouns_in, projection=map_nouns_h1, receiver=h1)
-        sem_net.add_projection(sender=rels_in, projection=map_rels_h2, receiver=h2)
-        sem_net.add_projection(sender=h1, projection=map_h1_h2, receiver=h2)
-        sem_net.add_projection(sender=h2, projection=map_h2_I, receiver=out_sig_I)
-        sem_net.add_projection(sender=h2, projection=map_h2_is, receiver=out_sig_is)
-        sem_net.add_projection(sender=h2, projection=map_h2_has, receiver=out_sig_has)
-        sem_net.add_projection(sender=h2, projection=map_h2_can, receiver=out_sig_can)
-        # INPUTS & OUTPUTS FOR SEMANTIC NET:
+        xor.add_projection(sender=xor_in, projection=hid_map, receiver=xor_hid)
+        xor.add_projection(sender=xor_hid, projection=out_map, receiver=xor_out)
 
-        nouns = ['oak', 'pine', 'rose', 'daisy', 'canary', 'robin', 'salmon', 'sunfish']
-        relations = ['is', 'has', 'can']
-        is_list = ['living', 'living thing', 'plant', 'animal', 'tree', 'flower', 'bird', 'fish', 'big', 'green', 'red',
-                   'yellow']
-        has_list = ['roots', 'leaves', 'bark', 'branches', 'skin', 'feathers', 'wings', 'gills', 'scales']
-        can_list = ['grow', 'move', 'swim', 'fly', 'breathe', 'breathe underwater', 'breathe air', 'walk', 'photosynthesize']
+        # SET UP INPUTS AND TARGETS
 
-        nouns_input = np.identity(len(nouns))
+        xor_inputs = np.array(  # the inputs we will provide to the model
+            [[0, 0],
+             [0, 1],
+             [1, 0],
+             [1, 1]])
 
-        rels_input = np.identity(len(relations))
+        xor_targets = np.array(  # the outputs we wish to see from the model
+            [[0],
+             [1],
+             [1],
+             [0]])
 
-        truth_nouns = np.identity(len(nouns))
+        # TIME TRAINING FOR COMPOSITION
 
-        truth_is = np.zeros((len(nouns), len(is_list)))
+        start = timeit.default_timer()
+        result = xor.run(inputs={xor_in:xor_inputs},
+                         targets={xor_out:xor_targets},
+                         epochs=eps,
+                         learning_rate=0.1,
+                         controller=opt,
+                         execution_mode=autodiff_mode)
+        end = timeit.default_timer()
+        comp_time = end - start
 
-        truth_is[0, :] = [1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0]
-        truth_is[1, :] = [1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0]
-        truth_is[2, :] = [1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0]
-        truth_is[3, :] = [1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0]
-        truth_is[4, :] = [1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1]
-        truth_is[5, :] = [1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1]
-        truth_is[6, :] = [1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0]
-        truth_is[7, :] = [1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0]
+        # SET UP SYSTEM
 
-        truth_has = np.zeros((len(nouns), len(has_list)))
+        # xor_process = Process(pathway=[xor_in_sys,
+        #                                hid_map_sys,
+        #                                xor_hid_sys,
+        #                                out_map_sys,
+        #                                xor_out_sys],
+        #                       learning=pnl.LEARNING)
 
-        truth_has[0, :] = [1, 1, 1, 1, 0, 0, 0, 0, 0]
-        truth_has[1, :] = [1, 1, 1, 1, 0, 0, 0, 0, 0]
-        truth_has[2, :] = [1, 1, 0, 0, 0, 0, 0, 0, 0]
-        truth_has[3, :] = [1, 1, 0, 0, 0, 0, 0, 0, 0]
-        truth_has[4, :] = [0, 0, 0, 0, 1, 1, 1, 0, 0]
-        truth_has[5, :] = [0, 0, 0, 0, 1, 1, 1, 0, 0]
-        truth_has[6, :] = [0, 0, 0, 0, 0, 0, 0, 1, 1]
-        truth_has[7, :] = [0, 0, 0, 0, 0, 0, 0, 1, 1]
+        xor_process = Composition(pathways=([xor_in_sys,
+                                       hid_map_sys,
+                                       xor_hid_sys,
+                                       out_map_sys,
+                                       xor_out_sys], BackPropagation))
 
-        truth_can = np.zeros((len(nouns), len(can_list)))
+        msg = 'Training XOR model as AutodiffComposition for {eps} epochs took {comp_time} seconds.'
+        print(msg)
+        print("\n")
+        logger.info(msg)
 
-        truth_can[0, :] = [1, 0, 0, 0, 0, 0, 0, 0, 1]
-        truth_can[1, :] = [1, 0, 0, 0, 0, 0, 0, 0, 1]
-        truth_can[2, :] = [1, 0, 0, 0, 0, 0, 0, 0, 1]
-        truth_can[3, :] = [1, 0, 0, 0, 0, 0, 0, 0, 1]
-        truth_can[4, :] = [1, 1, 0, 1, 1, 0, 1, 1, 0]
-        truth_can[5, :] = [1, 1, 0, 1, 1, 0, 1, 1, 0]
-        truth_can[6, :] = [1, 1, 1, 0, 1, 1, 0, 0, 0]
-        truth_can[7, :] = [1, 1, 1, 0, 1, 1, 0, 0, 0]
+    @pytest.mark.skip
+    @pytest.mark.parametrize(
+            'eps, opt', [
+            (1, 'sgd'),
+            (10, 'sgd'),
+            (100, 'sgd')
+        ]
+    )
+    def test_semantic_net_training_time(self, eps, opt):
 
-        # SETTING UP DICTIONARY OF INPUTS/OUTPUTS FOR SEMANTIC NET
+        # SET UP MECHANISMS FOR COMPOSITION:
 
-        inputs_dict = {}
-        inputs_dict[nouns_in] = []
-        inputs_dict[rels_in] = []
+        nouns_in = TransferMechanism(name="nouns_input",
+                                     default_variable=np.zeros(8))
 
-        targets_dict = {}
-        targets_dict[out_sig_I] = []
-        targets_dict[out_sig_is] = []
-        targets_dict[out_sig_has] = []
-        targets_dict[out_sig_can] = []
+        rels_in = TransferMechanism(name="rels_input",
+                                    default_variable=np.zeros(3))
 
-        for i in range(len(nouns)):
-            for j in range(len(relations)):
-                inputs_dict[nouns_in].append(nouns_input[i])
-                inputs_dict[rels_in].append(rels_input[j])
-                targets_dict[out_sig_I].append(truth_nouns[i])
-                targets_dict[out_sig_is].append(truth_is[i])
-                targets_dict[out_sig_has].append(truth_has[i])
-                targets_dict[out_sig_can].append(truth_can[i])
+        h1 = TransferMechanism(name="hidden_nouns",
+                               default_variable=np.zeros(8),
+                               function=Logistic())
 
-        inputs_dict_sys = {}
-        inputs_dict_sys[nouns_in_sys] = inputs_dict[nouns_in]
-        inputs_dict_sys[rels_in_sys] = inputs_dict[rels_in]
+        h2 = TransferMechanism(name="hidden_mixed",
+                               default_variable=np.zeros(15),
+                               function=Logistic())
 
-        result = sem_net.run(inputs=inputs_dict)
+        out_sig_I = TransferMechanism(name="sig_outs_I",
+                                      default_variable=np.zeros(8),
+                                      function=Logistic())
 
-        # TRAIN COMPOSITION
-        def g_f():
-            yield {"inputs": inputs_dict,
-                   "targets": targets_dict,
-                   "epochs": eps}
-        g = g_f()
-        result = sem_net.learn(inputs=g_f)
+        out_sig_is = TransferMechanism(name="sig_outs_is",
+                                       default_variable=np.zeros(12),
+                                       function=Logistic())
 
-        # SET UP SYSTEM
-        sem_net_sys = Composition()
+        out_sig_has = TransferMechanism(name="sig_outs_has",
+                                        default_variable=np.zeros(9),
+                                        function=Logistic())
 
-        backprop_pathway = sem_net_sys.add_backpropagation_learning_pathway(
-            pathway=[
-                nouns_in_sys,
-                map_nouns_h1_sys,
-                h1_sys,
-                map_h1_h2_sys,
-                h2_sys,
-                map_h2_I_sys,
-                out_sig_I_sys
-            ],
-            learning_rate=0.5
-        )
-        inputs_dict_sys[backprop_pathway.target] = targets_dict[out_sig_I]
+        out_sig_can = TransferMechanism(name="sig_outs_can",
+                                        default_variable=np.zeros(9),
+                                        function=Logistic())
 
-        backprop_pathway = sem_net_sys.add_backpropagation_learning_pathway(
-            pathway=[
-                rels_in_sys,
-                map_rels_h2_sys,
-                h2_sys,
-                map_h2_is_sys,
-                out_sig_is_sys
-            ],
-            learning_rate=0.5
-        )
-        inputs_dict_sys[backprop_pathway.target] = targets_dict[out_sig_is]
+        # SET UP MECHANISMS FOR SYSTEM
 
-        backprop_pathway = sem_net_sys.add_backpropagation_learning_pathway(
-            pathway=[
-                h2_sys,
-                map_h2_has_sys,
-                out_sig_has_sys
-            ],
-            learning_rate=0.5
-        )
-        inputs_dict_sys[backprop_pathway.target] = targets_dict[out_sig_has]
+        nouns_in_sys = TransferMechanism(name="nouns_input_sys",
+                                         default_variable=np.zeros(8))
 
-        backprop_pathway = sem_net_sys.add_backpropagation_learning_pathway(
-            pathway=[
-                h2_sys,
-                map_h2_can_sys,
-                out_sig_can_sys
-            ],
-            learning_rate=0.5
-        )
-        inputs_dict_sys[backprop_pathway.target] = targets_dict[out_sig_can]
+        rels_in_sys = TransferMechanism(name="rels_input_sys",
+                                        default_variable=np.zeros(3))
 
-        # TRAIN SYSTEM
-        results = sem_net_sys.learn(inputs=inputs_dict_sys,
-                                  num_trials=(len(inputs_dict_sys[nouns_in_sys]) * eps))
+        h1_sys = TransferMechanism(name="hidden_nouns_sys",
+                                   default_variable=np.zeros(8),
+                                   function=Logistic())
 
-        # CHECK THAT PARAMETERS FOR COMPOSITION, SYSTEM ARE SAME
+        h2_sys = TransferMechanism(name="hidden_mixed_sys",
+                                   default_variable=np.zeros(15),
+                                   function=Logistic())
 
-        assert np.allclose(map_nouns_h1.parameters.matrix.get(sem_net), map_nouns_h1_sys.get_mod_matrix(sem_net_sys))
-        assert np.allclose(map_rels_h2.parameters.matrix.get(sem_net), map_rels_h2_sys.get_mod_matrix(sem_net_sys))
-        assert np.allclose(map_h1_h2.parameters.matrix.get(sem_net), map_h1_h2_sys.get_mod_matrix(sem_net_sys))
-        assert np.allclose(map_h2_I.parameters.matrix.get(sem_net), map_h2_I_sys.get_mod_matrix(sem_net_sys))
-        assert np.allclose(map_h2_is.parameters.matrix.get(sem_net), map_h2_is_sys.get_mod_matrix(sem_net_sys))
-        assert np.allclose(map_h2_has.parameters.matrix.get(sem_net), map_h2_has_sys.get_mod_matrix(sem_net_sys))
-        assert np.allclose(map_h2_can.parameters.matrix.get(sem_net), map_h2_can_sys.get_mod_matrix(sem_net_sys))
+        out_sig_I_sys = TransferMechanism(name="sig_outs_I_sys",
+                                          default_variable=np.zeros(8),
+                                          function=Logistic())
 
-    def test_identicalness_of_input_types(self):
-        # SET UP MECHANISMS FOR COMPOSITION
-        from copy import copy
-        hid_map_mat = np.random.rand(2, 10)
-        out_map_mat = np.random.rand(10, 1)
-        xor_in_dict = TransferMechanism(name='xor_in',
-                                        default_variable=np.zeros(2))
+        out_sig_is_sys = TransferMechanism(name="sig_outs_is_sys",
+                                           default_variable=np.zeros(12),
+                                           function=Logistic())
 
-        xor_hid_dict = TransferMechanism(name='xor_hid',
-                                         default_variable=np.zeros(10),
-                                         function=Logistic())
+        out_sig_has_sys = TransferMechanism(name="sig_outs_has_sys",
+                                            default_variable=np.zeros(9),
+                                            function=Logistic())
 
-        xor_out_dict = TransferMechanism(name='xor_out',
-                                         default_variable=np.zeros(1),
-                                         function=Logistic())
+        out_sig_can_sys = TransferMechanism(name="sig_outs_can_sys",
+                                            default_variable=np.zeros(9),
+                                            function=Logistic())
 
         # SET UP PROJECTIONS FOR COMPOSITION
 
-        hid_map_dict = MappingProjection(name='hid_map',
-                                         matrix=copy(hid_map_mat),
-                                         sender=xor_in_dict,
-                                         receiver=xor_hid_dict)
-
-        out_map_dict = MappingProjection(name='out_map',
-                                         matrix=copy(out_map_mat),
-                                         sender=xor_hid_dict,
-                                         receiver=xor_out_dict)
+        map_nouns_h1 = MappingProjection(matrix=np.random.rand(8,8),
+                                         name="map_nouns_h1",
+                                         sender=nouns_in,
+                                         receiver=h1)
 
-        # SET UP COMPOSITION
+        map_rels_h2 = MappingProjection(matrix=np.random.rand(3,15),
+                                        name="map_rel_h2",
+                                        sender=rels_in,
+                                        receiver=h2)
 
-        xor_dict = AutodiffComposition()
+        map_h1_h2 = MappingProjection(matrix=np.random.rand(8,15),
+                                      name="map_h1_h2",
+                                      sender=h1,
+                                      receiver=h2)
 
-        xor_dict.add_node(xor_in_dict)
-        xor_dict.add_node(xor_hid_dict)
-        xor_dict.add_node(xor_out_dict)
+        map_h2_I = MappingProjection(matrix=np.random.rand(15,8),
+                                     name="map_h2_I",
+                                     sender=h2,
+                                     receiver=out_sig_I)
 
-        xor_dict.add_projection(sender=xor_in_dict, projection=hid_map_dict, receiver=xor_hid_dict)
-        xor_dict.add_projection(sender=xor_hid_dict, projection=out_map_dict, receiver=xor_out_dict)
-        # SET UP INPUTS AND TARGETS
+        map_h2_is = MappingProjection(matrix=np.random.rand(15,12),
+                                      name="map_h2_is",
+                                      sender=h2,
+                                      receiver=out_sig_is)
 
-        xor_inputs_dict = np.array(  # the inputs we will provide to the model
-                [[0, 0],
-                 [0, 1],
-                 [1, 0],
-                 [1, 1]])
+        map_h2_has = MappingProjection(matrix=np.random.rand(15,9),
+                                       name="map_h2_has",
+                                       sender=h2,
+                                       receiver=out_sig_has)
 
-        xor_targets_dict = np.array(  # the outputs we wish to see from the model
-                [[0],
-                 [1],
-                 [1],
-                 [0]])
+        map_h2_can = MappingProjection(matrix=np.random.rand(15,9),
+                                       name="map_h2_can",
+                                       sender=h2,
+                                       receiver=out_sig_can)
 
-        input_dict = {
-                "inputs": {
-                    xor_in_dict: xor_inputs_dict
-                },
-                "targets": {
-                    xor_out_dict: xor_targets_dict
-                }
-            }
+        # SET UP PROJECTIONS FOR SYSTEM
 
-        result_dict = xor_dict.learn(inputs=input_dict)
+        map_nouns_h1_sys = MappingProjection(matrix=map_nouns_h1.matrix.base.copy(),
+                                             name="map_nouns_h1_sys",
+                                             sender=nouns_in_sys,
+                                             receiver=h1_sys)
 
-        # SET UP MECHANISMS FOR COMPOSITION
-        xor_in_func = TransferMechanism(name='xor_in',
-                                        default_variable=np.zeros(2))
+        map_rels_h2_sys = MappingProjection(matrix=map_rels_h2.matrix.base.copy(),
+                                        name="map_relh2_sys",
+                                        sender=rels_in_sys,
+                                        receiver=h2_sys)
 
-        xor_hid_func = TransferMechanism(name='xor_hid',
-                                         default_variable=np.zeros(10),
-                                         function=Logistic())
+        map_h1_h2_sys = MappingProjection(matrix=map_h1_h2.matrix.base.copy(),
+                                          name="map_h1_h2_sys",
+                                          sender=h1_sys,
+                                          receiver=h2_sys)
 
-        xor_out_func = TransferMechanism(name='xor_out',
-                                         default_variable=np.zeros(1),
-                                         function=Logistic())
+        map_h2_I_sys = MappingProjection(matrix=map_h2_I.matrix.base.copy(),
+                                         name="map_h2_I_sys",
+                                         sender=h2_sys,
+                                         receiver=out_sig_I_sys)
 
-        # SET UP PROJECTIONS FOR COMPOSITION
+        map_h2_is_sys = MappingProjection(matrix=map_h2_is.matrix.base.copy(),
+                                          name="map_h2_is_sys",
+                                          sender=h2_sys,
+                                          receiver=out_sig_is_sys)
 
-        hid_map_func = MappingProjection(name='hid_map',
-                                         matrix=copy(hid_map_mat),
-                                         sender=xor_in_func,
-                                         receiver=xor_hid_func)
+        map_h2_has_sys = MappingProjection(matrix=map_h2_has.matrix.base.copy(),
+                                           name="map_h2_has_sys",
+                                           sender=h2_sys,
+                                           receiver=out_sig_has_sys)
 
-        out_map_func = MappingProjection(name='out_map',
-                                         matrix=copy(out_map_mat),
-                                         sender=xor_hid_func,
-                                         receiver=xor_out_func)
+        map_h2_can_sys = MappingProjection(matrix=map_h2_can.matrix.base.copy(),
+                                           name="map_h2_can_sys",
+                                           sender=h2_sys,
+                                           receiver=out_sig_can_sys)
 
-        # SET UP COMPOSITION
+        # COMPOSITION FOR SEMANTIC NET
 
-        xor_func = AutodiffComposition()
+        sem_net = AutodiffComposition()
 
-        xor_func.add_node(xor_in_func)
-        xor_func.add_node(xor_hid_func)
-        xor_func.add_node(xor_out_func)
+        sem_net.add_node(nouns_in)
+        sem_net.add_node(rels_in)
+        sem_net.add_node(h1)
+        sem_net.add_node(h2)
+        sem_net.add_node(out_sig_I)
+        sem_net.add_node(out_sig_is)
+        sem_net.add_node(out_sig_has)
+        sem_net.add_node(out_sig_can)
 
-        xor_func.add_projection(sender=xor_in_func, projection=hid_map_func, receiver=xor_hid_func)
-        xor_func.add_projection(sender=xor_hid_func, projection=out_map_func, receiver=xor_out_func)
+        sem_net.add_projection(sender=nouns_in, projection=map_nouns_h1, receiver=h1)
+        sem_net.add_projection(sender=rels_in, projection=map_rels_h2, receiver=h2)
+        sem_net.add_projection(sender=h1, projection=map_h1_h2, receiver=h2)
+        sem_net.add_projection(sender=h2, projection=map_h2_I, receiver=out_sig_I)
+        sem_net.add_projection(sender=h2, projection=map_h2_is, receiver=out_sig_is)
+        sem_net.add_projection(sender=h2, projection=map_h2_has, receiver=out_sig_has)
+        sem_net.add_projection(sender=h2, projection=map_h2_can, receiver=out_sig_can)
 
-        # SET UP INPUTS AND TARGETS
+        # INPUTS & OUTPUTS FOR SEMANTIC NET:
 
-        xor_inputs_func = np.array(  # the inputs we will provide to the model
-                [[0, 0],
-                 [0, 1],
-                 [1, 0],
-                 [1, 1]])
+        nouns = ['oak', 'pine', 'rose', 'daisy', 'canary', 'robin', 'salmon', 'sunfish']
+        relations = ['is', 'has', 'can']
+        is_list = ['living', 'living thing', 'plant', 'animal', 'tree', 'flower', 'bird', 'fish', 'big', 'green', 'red',
+                   'yellow']
+        has_list = ['roots', 'leaves', 'bark', 'branches', 'skin', 'feathers', 'wings', 'gills', 'scales']
+        can_list = ['grow', 'move', 'swim', 'fly', 'breathe', 'breathe underwater', 'breathe air', 'walk', 'photosynthesize']
 
-        xor_targets_func = np.array(  # the outputs we wish to see from the model
-                [[0],
-                 [1],
-                 [1],
-                 [0]])
+        nouns_input = np.identity(len(nouns))
 
-        def get_inputs(idx):
-            return {
-                "inputs": {
-                    xor_in_func: xor_inputs_func[idx]
-                },
-                "targets": {
-                    xor_out_func: xor_targets_func[idx]
-                }
-            }
+        rels_input = np.identity(len(relations))
 
-        result_func = xor_func.learn(inputs=get_inputs)
+        truth_nouns = np.identity(len(nouns))
 
-        # SET UP MECHANISMS FOR COMPOSITION
-        xor_in_gen = TransferMechanism(name='xor_in',
-                                       default_variable=np.zeros(2))
+        truth_is = np.zeros((len(nouns), len(is_list)))
 
-        xor_hid_gen = TransferMechanism(name='xor_hid',
-                                        default_variable=np.zeros(10),
-                                        function=Logistic())
+        truth_is[0, :] = [1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0]
+        truth_is[1, :] = [1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0]
+        truth_is[2, :] = [1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0]
+        truth_is[3, :] = [1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0]
+        truth_is[4, :] = [1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1]
+        truth_is[5, :] = [1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1]
+        truth_is[6, :] = [1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0]
+        truth_is[7, :] = [1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0]
 
-        xor_out_gen = TransferMechanism(name='xor_out',
-                                        default_variable=np.zeros(1),
-                                        function=Logistic())
+        truth_has = np.zeros((len(nouns), len(has_list)))
 
-        # SET UP PROJECTIONS FOR COMPOSITION
+        truth_has[0, :] = [1, 1, 1, 1, 0, 0, 0, 0, 0]
+        truth_has[1, :] = [1, 1, 1, 1, 0, 0, 0, 0, 0]
+        truth_has[2, :] = [1, 1, 0, 0, 0, 0, 0, 0, 0]
+        truth_has[3, :] = [1, 1, 0, 0, 0, 0, 0, 0, 0]
+        truth_has[4, :] = [0, 0, 0, 0, 1, 1, 1, 0, 0]
+        truth_has[5, :] = [0, 0, 0, 0, 1, 1, 1, 0, 0]
+        truth_has[6, :] = [0, 0, 0, 0, 0, 0, 0, 1, 1]
+        truth_has[7, :] = [0, 0, 0, 0, 0, 0, 0, 1, 1]
 
-        hid_map_gen = MappingProjection(name='hid_map',
-                                        matrix=copy(hid_map_mat),
-                                        sender=xor_in_gen,
-                                        receiver=xor_hid_gen)
+        truth_can = np.zeros((len(nouns), len(can_list)))
 
-        out_map_gen = MappingProjection(name='out_map',
-                                        matrix=copy(out_map_mat),
-                                        sender=xor_hid_gen,
-                                        receiver=xor_out_gen)
+        truth_can[0, :] = [1, 0, 0, 0, 0, 0, 0, 0, 1]
+        truth_can[1, :] = [1, 0, 0, 0, 0, 0, 0, 0, 1]
+        truth_can[2, :] = [1, 0, 0, 0, 0, 0, 0, 0, 1]
+        truth_can[3, :] = [1, 0, 0, 0, 0, 0, 0, 0, 1]
+        truth_can[4, :] = [1, 1, 0, 1, 1, 0, 1, 1, 0]
+        truth_can[5, :] = [1, 1, 0, 1, 1, 0, 1, 1, 0]
+        truth_can[6, :] = [1, 1, 1, 0, 1, 1, 0, 0, 0]
+        truth_can[7, :] = [1, 1, 1, 0, 1, 1, 0, 0, 0]
 
-        # SET UP COMPOSITION
+        # SETTING UP DICTIONARIES OF INPUTS/OUTPUTS FOR SEMANTIC NET
 
-        xor_gen = AutodiffComposition()
+        inputs_dict = {}
+        inputs_dict[nouns_in] = []
+        inputs_dict[rels_in] = []
 
-        xor_gen.add_node(xor_in_gen)
-        xor_gen.add_node(xor_hid_gen)
-        xor_gen.add_node(xor_out_gen)
+        targets_dict = {}
+        targets_dict[out_sig_I] = []
+        targets_dict[out_sig_is] = []
+        targets_dict[out_sig_has] = []
+        targets_dict[out_sig_can] = []
 
-        xor_gen.add_projection(sender=xor_in_gen, projection=hid_map_gen, receiver=xor_hid_gen)
-        xor_gen.add_projection(sender=xor_hid_gen, projection=out_map_gen, receiver=xor_out_gen)
-        # SET UP INPUTS AND TARGETS
+        for i in range(len(nouns)):
+            for j in range(len(relations)):
+                inputs_dict[nouns_in].append(nouns_input[i])
+                inputs_dict[rels_in].append(rels_input[j])
+                targets_dict[out_sig_I].append(truth_nouns[i])
+                targets_dict[out_sig_is].append(truth_is[i])
+                targets_dict[out_sig_has].append(truth_has[i])
+                targets_dict[out_sig_can].append(truth_can[i])
 
-        xor_inputs_gen = np.array(  # the inputs we will provide to the model
-                [[0, 0],
-                 [0, 1],
-                 [1, 0],
-                 [1, 1]])
+        inputs_dict_sys = {}
+        inputs_dict_sys[nouns_in_sys] = inputs_dict[nouns_in]
+        inputs_dict_sys[rels_in_sys] = inputs_dict[rels_in]
 
-        xor_targets_gen = np.array(  # the outputs we wish to see from the model
-                [[0],
-                 [1],
-                 [1],
-                 [0]])
+        targets_dict_sys = {}
+        targets_dict_sys[out_sig_I_sys] = targets_dict[out_sig_I]
+        targets_dict_sys[out_sig_is_sys] = targets_dict[out_sig_is]
+        targets_dict_sys[out_sig_has_sys] = targets_dict[out_sig_has]
+        targets_dict_sys[out_sig_can_sys] = targets_dict[out_sig_can]
 
-        def get_inputs_gen():
-            yield {
-                "inputs": {
-                    xor_in_gen: xor_inputs_gen
-                },
-                "targets": {
-                    xor_out_gen: xor_targets_gen
-                }
-            }
+        # TIME TRAINING FOR COMPOSITION
 
-        g = get_inputs_gen()
+        start = timeit.default_timer()
+        result = sem_net.run(inputs=inputs_dict,
+                             targets=targets_dict,
+                             epochs=eps,
+                             learning_rate=0.1,
+                             controller=opt)
+        end = timeit.default_timer()
+        comp_time = end - start
 
-        result_gen = xor_gen.learn(inputs=g)
+        msg = 'Training Semantic net as AutodiffComposition for {0} epochs took {1} seconds'.format(eps, comp_time)
+        print(msg)
+        print("\n")
+        logger.info(msg)
 
-        # SET UP MECHANISMS FOR COMPOSITION
-        xor_in_gen_func = TransferMechanism(name='xor_in',
-                                            default_variable=np.zeros(2))
 
-        xor_hid_gen_func = TransferMechanism(name='xor_hid',
-                                             default_variable=np.zeros(10),
-                                             function=Logistic())
+@pytest.mark.pytorch
+def test_autodiff_saveload(tmp_path):
+    def create_xor():
+        # create xor model mechanisms and projections
+        xor_in = TransferMechanism(name='xor_in',
+                                   default_variable=np.zeros(2))
 
-        xor_out_gen_func = TransferMechanism(name='xor_out',
-                                             default_variable=np.zeros(1),
-                                             function=Logistic())
+        xor_hid = TransferMechanism(name='xor_hid',
+                                    default_variable=np.zeros(10),
+                                    function=Logistic())
 
-        # SET UP PROJECTIONS FOR COMPOSITION
+        xor_out = TransferMechanism(name='xor_out',
+                                    default_variable=np.zeros(1),
+                                    function=Logistic())
 
-        hid_map_gen_func = MappingProjection(name='hid_map',
-                                             matrix=copy(hid_map_mat),
-                                             sender=xor_in_gen_func,
-                                             receiver=xor_hid_gen_func)
+        hid_map = MappingProjection(matrix=np.random.rand(2,10), name='hid_map')
+        out_map = MappingProjection(matrix=np.random.rand(10,1), name='out_map')
 
-        out_map_gen_func = MappingProjection(name='out_map',
-                                             matrix=copy(out_map_mat),
-                                             sender=xor_hid_gen_func,
-                                             receiver=xor_out_gen_func)
+        # put the mechanisms and projections together in an autodiff composition (AC)
+        xor = AutodiffComposition()
 
-        # SET UP COMPOSITION
+        xor.add_node(xor_in)
+        xor.add_node(xor_hid)
+        xor.add_node(xor_out)
 
-        xor_gen_func = AutodiffComposition()
+        xor.add_projection(sender=xor_in, projection=hid_map, receiver=xor_hid)
+        xor.add_projection(sender=xor_hid, projection=out_map, receiver=xor_out)
+        return xor
 
-        xor_gen_func.add_node(xor_in_gen_func)
-        xor_gen_func.add_node(xor_hid_gen_func)
-        xor_gen_func.add_node(xor_out_gen_func)
+    np.random.seed(0)
+    xor1 = create_xor()
+    xor1_outputs = xor1.run(inputs=[0,0])
 
-        xor_gen_func.add_projection(sender=xor_in_gen_func, projection=hid_map_gen_func, receiver=xor_hid_gen_func)
-        xor_gen_func.add_projection(sender=xor_hid_gen_func, projection=out_map_gen_func, receiver=xor_out_gen_func)
-        # SET UP INPUTS AND TARGETS
+    # save
+    # path = xor1.save()
+    path = xor1.save(os.path.join(tmp_path, 'xor_1.pnl'))
 
-        xor_inputs_gen_func = np.array(  # the inputs we will provide to the model
-                [[0, 0],
-                 [0, 1],
-                 [1, 0],
-                 [1, 1]])
+    # del xor1
+    pnl.clear_registry()
 
-        xor_targets_gen_func = np.array(  # the outputs we wish to see from the model
-                [[0],
-                 [1],
-                 [1],
-                 [0]])
+    # load
+    np.random.seed(1)
+    xor2 = create_xor()
+    xor2_outputs_pre = xor2.run(inputs=[0,0])
+    # xor2.load(os.path.join(tmp_path, 'xor_1.pnl'))
+    xor2.load(path)
+    xor2_outputs_post = xor2.run(inputs=[0,0])
 
-        def get_inputs_gen_func():
-            yield {
-                "inputs": {
-                    xor_in_gen_func: xor_inputs_gen_func
-                },
-                "targets": {
-                    xor_out_gen_func: xor_targets_gen_func
-                }
-            }
 
-        result_gen_func = xor_gen_func.learn(inputs=get_inputs_gen_func)
+    # sanity check - make sure xor2 weights differ
+    assert not np.allclose(xor2_outputs_pre, xor2_outputs_post, atol=1e-9)
 
-        assert result_dict == result_func == result_gen == result_gen_func
+    # make sure loaded model is identical, and used during run
+    assert np.allclose(xor1_outputs, xor2_outputs_post, atol=1e-9)
 
 
 @pytest.mark.pytorch
@@ -2383,6 +2449,7 @@ def test_autodiff_loss_tracking(self):
         xor.clear_losses(context=xor)
         assert len(xor.losses) == 0
 
+
 @pytest.mark.pytorch
 @pytest.mark.acnested
 class TestNested:
@@ -2857,6 +2924,7 @@ def test_semantic_net_nested(self, eps, opt, autodiff_mode):
 
         parentComposition.run(inputs=no_training_input)
 
+
 @pytest.mark.pytorch
 class TestBatching:
     def test_call_before_minibatch(self):
@@ -3113,7 +3181,7 @@ def test_cross_entropy_loss(self):
         m1 = pnl.TransferMechanism()
         p = pnl.MappingProjection()
         m2 = pnl.TransferMechanism()
-        adc = pnl.AutodiffComposition(loss_spec='crossentropy')
+        adc = pnl.AutodiffComposition(loss_spec=Loss.CROSS_ENTROPY)
 
         adc.add_linear_processing_pathway([m1, p, m2])
         adc._build_pytorch_representation()
diff --git a/tests/composition/test_composition.py b/tests/composition/test_composition.py
index 01dd5053c73..49e60e00f21 100644
--- a/tests/composition/test_composition.py
+++ b/tests/composition/test_composition.py
@@ -8,7 +8,8 @@
 
 import psyneulink as pnl
 from psyneulink.core.components.functions.nonstateful.combinationfunctions import LinearCombination
-from psyneulink.core.components.functions.nonstateful.learningfunctions import Reinforcement, BackPropagation
+from psyneulink.core.components.functions.nonstateful.learningfunctions import \
+    LearningFunction, Reinforcement, BackPropagation, TDLearning
 from psyneulink.core.components.functions.nonstateful.optimizationfunctions import GridSearch
 from psyneulink.core.components.functions.nonstateful.transferfunctions import \
     Linear, Logistic, INTENSITY_COST_FCT_MULTIPLICATIVE_PARAM
@@ -39,9 +40,12 @@
 from psyneulink.core.scheduling.condition import EveryNCalls
 from psyneulink.core.scheduling.scheduler import Scheduler, SchedulingMode
 from psyneulink.core.scheduling.time import TimeScale
+from psyneulink.library.components.mechanisms.processing.objective.comparatormechanism import ComparatorMechanism
 from psyneulink.library.components.mechanisms.modulatory.control.agt.lccontrolmechanism import LCControlMechanism
 from psyneulink.library.components.mechanisms.processing.transfer.recurrenttransfermechanism import \
     RecurrentTransferMechanism
+from psyneulink.library.components.mechanisms.processing.integrator.episodicmemorymechanism import \
+    EpisodicMemoryMechanism
 
 logger = logging.getLogger(__name__)
 
@@ -1012,6 +1016,75 @@ def test_various_pathway_configurations_in_constructor(self, config):
             assert all(node in comp.get_nodes_by_role(NodeRole.INPUT) for node in {A,C})
             assert all(node in comp.get_nodes_by_role(NodeRole.OUTPUT) for node in {B,D})
 
+    config = [
+        ('([{A,B,C},D,E],Proj)', 'a'),
+        ('([{A,B,C},Proj_1,D,E],Proj_2)', 'b'),
+        ('([{A,B,C},D,Proj_1,E],Proj_2)', 'c'),
+        ('Pathway(default_matrix)', 'd'),
+        ('([A,B,C],BackProp,Proj)', 'e'),
+        ('([A,B,C],Proj,BackProp)', 'f'),
+        ('([A,B],RL,Proj)', 'g'),
+        ('([A,B],TD,Proj)', 'h'),
+        # FIX: Set specification not yet supported for learning pathway:
+        # ('([{A,B,C},D,Proj_1,E],Proj_2,learning_fct)', 'i'),  # set spec for Projections
+        # ('([{A,B,C},D,Proj_1,E],learning_fct,Proj_2)', 'j'),  # not yet supported for learning Pathways
+    ]
+    @pytest.mark.parametrize('config', config, ids=[x[0] for x in config])
+    def test_pathway_tuple_specs(self, config):
+        A = ProcessingMechanism(name='A')
+        B = ProcessingMechanism(name='B')
+        C = ProcessingMechanism(name='C')
+        # if config[1] not in {'g','h'}:
+        D = ProcessingMechanism(name='D')
+        E = ProcessingMechanism(name='E')
+        F = ProcessingMechanism(name='F')
+        if config[1]=='a':
+            comp = Composition(([{A,B,C},D,E],[2.9]))
+            assert all([p.matrix.base==2.9 for p in D.path_afferents])
+            assert E.path_afferents[0].matrix.base==2.9
+        if config[1]=='b':
+            comp = Composition(([{A,B,C},[1.6],D,E],[2.9]))
+            assert all([p.matrix.base==1.6 for p in D.path_afferents])
+            assert E.path_afferents[0].matrix.base==2.9
+        if config[1]=='c':
+            comp = Composition(([{A,B,C},D,[1.6],E],[2.9]))
+            assert all([p.matrix.base==2.9 for p in D.path_afferents])
+            assert E.path_afferents[0].matrix.base==1.6
+        if config[1]=='d':
+            pway=Pathway(([{A,B,C},[1.6],D,E], [2.9]))
+            comp = Composition(pway)
+            assert all([p.matrix.base==1.6 for p in D.path_afferents])
+            assert E.path_afferents[0].matrix.base==2.9
+        if config[1]=='e':
+            comp = Composition(([A,B,C],BackPropagation,[2.9]))
+            assert B.path_afferents[0].matrix.base==2.9
+            assert C.path_afferents[0].matrix.base==2.9
+            assert comp.pathways[0].learning_function == BackPropagation
+        if config[1]=='f':
+            comp = Composition(([A,B,C],[2.9],BackPropagation))
+            assert B.path_afferents[0].matrix.base==2.9
+            assert C.path_afferents[0].matrix.base==2.9
+            assert comp.pathways[0].learning_function == BackPropagation
+        if config[1]=='g':
+            comp = Composition(([A,B],Reinforcement,[2.9]))
+            assert B.path_afferents[0].matrix.base==2.9
+            assert comp.pathways[0].learning_function == Reinforcement
+        if config[1]=='h':
+            comp = Composition(([A,B],[2.9],TDLearning))
+            assert B.path_afferents[0].matrix.base==2.9
+            assert comp.pathways[0].learning_function == TDLearning
+        # FIX: Set specification not yet supported for learning pathway:
+        # if config[1]=='i':
+        #     comp = Composition(([{A,B,C},D,[1.6],E],BackPropagation,[2.9]))
+        #     assert all([p.matrix.base==2.9 for p in D.path_afferents])
+        #     assert E.path_afferents[0].matrix.base==1.6
+        #     assert comp.pathways[0].learning_function == BackPropagation
+        # if config[1]=='j':
+        #     comp = Composition(([{A,B,C},D,[1.6],E],[2.9],BackPropagation))
+        #     assert all([p.matrix.base==2.9 for p in D.path_afferents])
+        #     assert E.path_afferents[0].matrix.base==1.6
+        #     assert comp.pathways[0].learning_function == BackPropagation
+
     def test_add_pathways_bad_arg_error(self):
         I = InputPort(name='I')
         c = Composition()
@@ -1607,7 +1680,9 @@ def test_composition_learning_pathway_dict_with_no_learning_fct_in_tuple_error(s
         C = ProcessingMechanism(name='C')
         with pytest.raises(pnl.CompositionError) as error_text:
             c = Composition(pathways=[{'P1': ([A,B],C)}])
-        assert ("The 2nd item" in str(error_text.value) and "must be a LearningFunction" in str(error_text.value))
+        assert ("Bad spec for one of the items in the value of a dict specified for the \'pathways\' arg "
+                "of the constructor for Composition-0: (ProcessingMechanism C); "
+                "its item(s) must be a matrix specification and/or a LearningFunction" in str(error_text.value))
 
 
 class TestProperties:
@@ -2530,12 +2605,9 @@ def test_3_mechanisms_frozen_values(self, benchmark, comp_mode):
 
         inputs_dict = {A: [4.0]}
         sched = Scheduler(composition=comp)
-        output = comp.run(inputs=inputs_dict, scheduler=sched, execution_mode=comp_mode)
+        output = benchmark(comp.run, inputs=inputs_dict, scheduler=sched, execution_mode=comp_mode)
         assert np.allclose(output, 320)
 
-        if benchmark.enabled:
-            benchmark(comp.run, inputs=inputs_dict, scheduler=sched, execution_mode=comp_mode)
-
     @pytest.mark.control
     @pytest.mark.composition
     @pytest.mark.benchmark(group="Control composition scalar")
@@ -2570,12 +2642,9 @@ def test_3_mechanisms_2_origins_1_multi_control_1_terminal(self, benchmark, comp
 
 
         inputs_dict = {B: [4.0]}
-        output = comp.run(inputs=inputs_dict, execution_mode=comp_mode)
+        output = benchmark(comp.run, inputs=inputs_dict, execution_mode=comp_mode)
         assert np.allclose(output, 354.19328716)
 
-        if benchmark.enabled:
-            benchmark(comp.run, inputs=inputs_dict, execution_mode=comp_mode)
-
     @pytest.mark.control
     @pytest.mark.composition
     @pytest.mark.benchmark(group="Control composition scalar")
@@ -2610,12 +2679,9 @@ def test_3_mechanisms_2_origins_1_additive_control_1_terminal(self, benchmark, c
 
         inputs_dict = {B: [4.0]}
         sched = Scheduler(composition=comp)
-        output = comp.run(inputs=inputs_dict, scheduler=sched, execution_mode=comp_mode)
+        output = benchmark(comp.run, inputs=inputs_dict, scheduler=sched, execution_mode=comp_mode)
         assert np.allclose(output, 650.83865743)
 
-        if benchmark.enabled:
-            benchmark(comp.run, inputs=inputs_dict, scheduler=sched, execution_mode=comp_mode)
-
     @pytest.mark.control
     @pytest.mark.composition
     @pytest.mark.benchmark(group="Control composition scalar")
@@ -2650,10 +2716,8 @@ def test_3_mechanisms_2_origins_1_override_control_1_terminal(self, benchmark, c
 
 
         inputs_dict = {B: [4.0]}
-        output = comp.run(inputs=inputs_dict, execution_mode=comp_mode)
+        output = benchmark(comp.run, inputs=inputs_dict, execution_mode=comp_mode)
         assert np.allclose(output, 150.83865743)
-        if benchmark.enabled:
-            benchmark(comp.run, inputs=inputs_dict, execution_mode=comp_mode)
 
     @pytest.mark.control
     @pytest.mark.composition
@@ -2690,12 +2754,9 @@ def test_3_mechanisms_2_origins_1_disable_control_1_terminal(self, benchmark, co
 
         inputs_dict = {B: [4.0]}
         sched = Scheduler(composition=comp)
-        output = comp.run(inputs=inputs_dict, scheduler=sched, execution_mode=comp_mode)
+        output = benchmark(comp.run, inputs=inputs_dict, scheduler=sched, execution_mode=comp_mode)
         assert np.allclose(output, 600)
 
-        if benchmark.enabled:
-            benchmark(comp.run, inputs=inputs_dict, scheduler=sched, execution_mode=comp_mode)
-
     @pytest.mark.composition
     @pytest.mark.benchmark(group="Transfer")
     def xtest_transfer_mechanism(self, benchmark, comp_mode):
@@ -3843,22 +3904,15 @@ def test_run_recurrent_transfer_mechanism(self, benchmark, comp_mode):
         sched = Scheduler(composition=comp)
         output1 = comp.run(inputs={A: [[1.0, 2.0, 3.0]]}, scheduler=sched, execution_mode=comp_mode)
         assert np.allclose([5.0, 10.0, 15.0], output1)
-        output2 = comp.run(inputs={A: [[1.0, 2.0, 3.0]]}, scheduler=sched, execution_mode=comp_mode)
+        output2 = benchmark(comp.run, inputs={A: [[1.0, 2.0, 3.0]]}, scheduler=sched, execution_mode=comp_mode)
         # Using the hollow matrix: (10 + 15 + 1) * 5 = 130,
         #                          ( 5 + 15 + 2) * 5 = 110,
         #                          ( 5 + 10 + 3) * 5 = 90
         assert np.allclose([130.0, 110.0, 90.0], output2)
-        if benchmark.enabled:
-            benchmark(comp.run, inputs={A: [[1.0, 2.0, 3.0]]}, scheduler=sched, execution_mode=comp_mode)
 
     @pytest.mark.composition
     @pytest.mark.benchmark(group="Recurrent")
-    @pytest.mark.parametrize("mode", [pnl.ExecutionMode.Python,
-                                      pytest.param(pnl.ExecutionMode.LLVM, marks=pytest.mark.llvm),
-                                      pytest.param(pnl.ExecutionMode.LLVMExec, marks=pytest.mark.llvm),
-                                      pytest.param(pnl.ExecutionMode.PTXExec, marks=[pytest.mark.llvm, pytest.mark.cuda]),
-                                     ])
-    def test_run_recurrent_transfer_mechanism_hetero(self, benchmark, mode):
+    def test_run_recurrent_transfer_mechanism_hetero(self, benchmark, comp_mode):
         comp = Composition()
         R = RecurrentTransferMechanism(size=1,
                                        function=Logistic(),
@@ -3867,28 +3921,18 @@ def test_run_recurrent_transfer_mechanism_hetero(self, benchmark, mode):
         comp.add_node(R)
         comp._analyze_graph()
         sched = Scheduler(composition=comp)
-        val = comp.execute(inputs={R: [[3.0]]}, execution_mode=mode)
+        val = comp.run(inputs={R: [[3.0]]}, num_trials=1, execution_mode=comp_mode)
         assert np.allclose(val, [[0.95257413]])
-        val = comp.execute(inputs={R: [[4.0]]}, execution_mode=mode)
+        val = comp.run(inputs={R: [[4.0]]}, num_trials=1, execution_mode=comp_mode)
         assert np.allclose(val, [[0.98201379]])
 
-        # execute 10 times
-        for i in range(10):
-            val = comp.execute(inputs={R: [[5.0]]}, execution_mode=mode)
-
+        # execute 10 trials
+        val = benchmark(comp.run, inputs={R: [[5.0]]}, num_trials=10, execution_mode=comp_mode)
         assert np.allclose(val, [[0.99330715]])
 
-        if benchmark.enabled:
-            benchmark(comp.execute, inputs={R: [[1.0]]}, execution_mode=mode)
-
     @pytest.mark.composition
     @pytest.mark.benchmark(group="Recurrent")
-    @pytest.mark.parametrize("mode", [pnl.ExecutionMode.Python,
-                                      pytest.param(pnl.ExecutionMode.LLVM, marks=pytest.mark.llvm),
-                                      pytest.param(pnl.ExecutionMode.LLVMExec, marks=pytest.mark.llvm),
-                                      pytest.param(pnl.ExecutionMode.PTXExec, marks=[pytest.mark.llvm, pytest.mark.cuda]),
-                                     ])
-    def test_run_recurrent_transfer_mechanism_integrator(self, benchmark, mode):
+    def test_run_recurrent_transfer_mechanism_integrator(self, benchmark, comp_mode):
         comp = Composition()
         R = RecurrentTransferMechanism(size=1,
                                        function=Logistic(),
@@ -3899,55 +3943,36 @@ def test_run_recurrent_transfer_mechanism_integrator(self, benchmark, mode):
         comp.add_node(R)
         comp._analyze_graph()
         sched = Scheduler(composition=comp)
-        val = comp.execute(inputs={R: [[3.0]]}, execution_mode=mode)
+        val = comp.run(inputs={R: [[3.0]]}, num_trials=1, execution_mode=comp_mode)
         assert np.allclose(val, [[0.50749944]])
-        val = comp.execute(inputs={R: [[4.0]]}, execution_mode=mode)
+        val = comp.run(inputs={R: [[4.0]]}, num_trials=1, execution_mode=comp_mode)
         assert np.allclose(val, [[0.51741795]])
 
-        # execute 10 times
-        for i in range(10):
-            val = comp.execute(inputs={R: [[5.0]]}, execution_mode=mode)
-
+        # execute 10 trials
+        val = benchmark(comp.run, inputs={R: [[5.0]]}, num_trials=10, execution_mode=comp_mode)
         assert np.allclose(val, [[0.6320741]])
 
-        if benchmark.enabled:
-            benchmark(comp.execute, inputs={R: [[1.0]]}, execution_mode=mode)
-
     @pytest.mark.composition
     @pytest.mark.benchmark(group="Recurrent")
-    @pytest.mark.parametrize("mode", [pnl.ExecutionMode.Python,
-                                      pytest.param(pnl.ExecutionMode.LLVM, marks=pytest.mark.llvm),
-                                      pytest.param(pnl.ExecutionMode.LLVMExec, marks=pytest.mark.llvm),
-                                      pytest.param(pnl.ExecutionMode.PTXExec, marks=[pytest.mark.llvm, pytest.mark.cuda]),
-                                     ])
-    def test_run_recurrent_transfer_mechanism_vector_2(self, benchmark, mode):
+    def test_run_recurrent_transfer_mechanism_vector_2(self, benchmark, comp_mode):
         comp = Composition()
         R = RecurrentTransferMechanism(size=2, function=Logistic())
         comp.add_node(R)
         comp._analyze_graph()
         sched = Scheduler(composition=comp)
-        val = comp.execute(inputs={R: [[1.0, 2.0]]}, execution_mode=mode)
+        val = comp.run(inputs={R: [[1.0, 2.0]]}, num_trials=1, execution_mode=comp_mode)
         assert np.allclose(val, [[0.81757448, 0.92414182]])
-        val = comp.execute(inputs={R: [[1.0, 2.0]]}, execution_mode=mode)
+        val = comp.run(inputs={R: [[1.0, 2.0]]}, num_trials=1, execution_mode=comp_mode)
         assert np.allclose(val, [[0.87259959,  0.94361816]])
 
-        # execute 10 times
-        for i in range(10):
-            val = comp.execute(inputs={R: [[1.0, 2.0]]}, execution_mode=mode)
+        # execute 10 trials
+        val = benchmark(comp.run, inputs={R: [[1.0, 2.0]]}, num_trials=10, execution_mode=comp_mode)
 
         assert np.allclose(val, [[0.87507549,  0.94660049]])
 
-        if benchmark.enabled:
-            benchmark(comp.execute, inputs={R: [[1.0, 2.0]]}, execution_mode=mode)
-
     @pytest.mark.composition
     @pytest.mark.benchmark(group="Recurrent")
-    @pytest.mark.parametrize("mode", [pnl.ExecutionMode.Python,
-                                      pytest.param(pnl.ExecutionMode.LLVM, marks=pytest.mark.llvm),
-                                      pytest.param(pnl.ExecutionMode.LLVMExec, marks=pytest.mark.llvm),
-                                      pytest.param(pnl.ExecutionMode.PTXExec, marks=[pytest.mark.llvm, pytest.mark.cuda]),
-                                     ])
-    def test_run_recurrent_transfer_mechanism_hetero_2(self, benchmark, mode):
+    def test_run_recurrent_transfer_mechanism_hetero_2(self, benchmark, comp_mode):
         comp = Composition()
         R = RecurrentTransferMechanism(size=2,
                                        function=Logistic(),
@@ -3956,28 +3981,18 @@ def test_run_recurrent_transfer_mechanism_hetero_2(self, benchmark, mode):
         comp.add_node(R)
         comp._analyze_graph()
         sched = Scheduler(composition=comp)
-        val = comp.execute(inputs={R: [[1.0, 2.0]]}, execution_mode=mode)
+        val = comp.run(inputs={R: [[1.0, 2.0]]}, num_trials=1, execution_mode=comp_mode)
         assert np.allclose(val, [[0.5, 0.73105858]])
-        val = comp.execute(inputs={R: [[1.0, 2.0]]}, execution_mode=mode)
+        val = comp.run(inputs={R: [[1.0, 2.0]]}, num_trials=1, execution_mode=comp_mode)
         assert np.allclose(val, [[0.3864837, 0.73105858]])
 
-        # execute 10 times
-        for i in range(10):
-            val = comp.execute(inputs={R: [[1.0, 2.0]]}, execution_mode=mode)
-
+        # execute 10 trials
+        val = benchmark(comp.run, inputs={R: [[1.0, 2.0]]}, num_trials=10, execution_mode=comp_mode)
         assert np.allclose(val, [[0.36286875, 0.78146724]])
 
-        if benchmark.enabled:
-            benchmark(comp.execute, inputs={R: [[1.0, 2.0]]}, execution_mode=mode)
-
     @pytest.mark.composition
     @pytest.mark.benchmark(group="Recurrent")
-    @pytest.mark.parametrize("mode", [pnl.ExecutionMode.Python,
-                                      pytest.param(pnl.ExecutionMode.LLVM, marks=pytest.mark.llvm),
-                                      pytest.param(pnl.ExecutionMode.LLVMExec, marks=pytest.mark.llvm),
-                                      pytest.param(pnl.ExecutionMode.PTXExec, marks=[pytest.mark.llvm, pytest.mark.cuda]),
-                                     ])
-    def test_run_recurrent_transfer_mechanism_integrator_2(self, benchmark, mode):
+    def test_run_recurrent_transfer_mechanism_integrator_2(self, benchmark, comp_mode):
         comp = Composition()
         R = RecurrentTransferMechanism(size=2,
                                        function=Logistic(),
@@ -3988,20 +4003,16 @@ def test_run_recurrent_transfer_mechanism_integrator_2(self, benchmark, mode):
         comp.add_node(R)
         comp._analyze_graph()
         sched = Scheduler(composition=comp)
-        val = comp.execute(inputs={R: [[1.0, 2.0]]}, execution_mode=mode)
+        val = comp.run(inputs={R: [[1.0, 2.0]]}, num_trials=1, execution_mode=comp_mode)
         assert np.allclose(val, [[0.5, 0.50249998]])
-        val = comp.execute(inputs={R: [[1.0, 2.0]]}, execution_mode=mode)
+        val = comp.run(inputs={R: [[1.0, 2.0]]}, num_trials=1, execution_mode=comp_mode)
         assert np.allclose(val, [[0.4999875, 0.50497484]])
 
-        # execute 10 times
-        for i in range(10):
-            val = comp.execute(inputs={R: [[1.0, 2.0]]}, execution_mode=mode)
+        # execute 10 trials
+        val = benchmark(comp.run, inputs={R: [[1.0, 2.0]]}, num_trials=10, execution_mode=comp_mode)
 
         assert np.allclose(val, [[0.49922843, 0.52838607]])
 
-        if benchmark.enabled:
-            benchmark(comp.execute, inputs={R: [[1.0, 2.0]]}, execution_mode=mode)
-
     def test_run_termination_condition_custom_context(self):
         D = pnl.DDM(function=pnl.DriftDiffusionIntegrator, execute_until_finished=False)
         comp = pnl.Composition()
@@ -4029,6 +4040,30 @@ def test_manual_context(self):
         comp.run({t: [1]}, context=context)
         assert comp.results == [[[2]]]
 
+    def test_missing_afferent_at_run_time(self):
+        A = ProcessingMechanism()
+        B = ProcessingMechanism(input_ports=['OCCUPIED', 'UNOCCUPIED'])
+        comp = Composition([A,B])
+        warning_type = UserWarning
+        warning_msg = '"InputPort (\'UNOCCUPIED\') of \'ProcessingMechanism-1\' ' \
+                      'doesn\'t have any afferent Projections."'
+        with pytest.raises(TypeError): # Caused by error on B at construction (with only one InputPort "occupied")
+            with pytest.warns(warning_type) as warning:
+                comp.run()
+        assert repr(warning[0].message.args[0]) == warning_msg
+
+    def test_missing_efferent_at_run_time(self):
+        A = ProcessingMechanism()
+        B = ProcessingMechanism(output_ports=['OCCUPIED','UNOCCUPIED'])  # Comparator Mech has two inputports,
+        C = ProcessingMechanism(name='C')
+        comp = Composition([A,B,C])
+        warning_type = UserWarning
+        warning_msg = '"OutputPort (\'UNOCCUPIED\') of \'ProcessingMechanism-1\' ' \
+                      'doesn\'t have any efferent Projections in \'Composition-0\'."'
+        with pytest.warns(warning_type) as warning:
+            comp.run()
+        assert repr(warning[0].message.args[0]) == warning_msg
+
 
 class TestCallBeforeAfterTimescale:
 
diff --git a/tests/composition/test_control.py b/tests/composition/test_control.py
index 706dc08ef19..b6b89cb28ce 100644
--- a/tests/composition/test_control.py
+++ b/tests/composition/test_control.py
@@ -1914,11 +1914,8 @@ def test_multilevel_ocm_gridsearch_conflicting_directions(self, mode, benchmark)
                                                    intensity_cost_function=pnl.Linear(slope=0.0),
                                                    allocation_samples=pnl.SampleSpec(start=1.0, stop=5.0, num=5))])
         )
-        results = ocomp.run([5], execution_mode=mode)
-        assert np.allclose(results, [[50]])
-
-        if benchmark.enabled:
-            benchmark(ocomp.run, [5], execution_mode=mode)
+        result = benchmark(ocomp.run, [5], execution_mode=mode)
+        assert np.allclose(result, [[50]])
 
     @pytest.mark.control
     @pytest.mark.composition
@@ -1981,11 +1978,8 @@ def test_multilevel_ocm_gridsearch_maximize(self, mode, benchmark):
                                                                                      stop=5.0,
                                                                                      num=5))])
         )
-        results = ocomp.run([5], execution_mode=mode)
-        assert np.allclose(results, [[70]])
-
-        if benchmark.enabled:
-            benchmark(ocomp.run, [5], execution_mode=mode)
+        result = benchmark(ocomp.run, [5], execution_mode=mode)
+        assert np.allclose(result, [[70]])
 
     @pytest.mark.control
     @pytest.mark.composition
@@ -2048,11 +2042,8 @@ def test_multilevel_ocm_gridsearch_minimize(self, mode, benchmark):
                                                                                      stop=5.0,
                                                                                      num=5))])
         )
-        results = ocomp.run([5], execution_mode=mode)
-        assert np.allclose(results, [[5]])
-
-        if benchmark.enabled:
-            benchmark(ocomp.run, [5], execution_mode=mode)
+        result = benchmark(ocomp.run, [5], execution_mode=mode)
+        assert np.allclose(result, [[5]])
 
     def test_two_tier_ocm(self):
         integrationConstant = 0.8  # Time Constant
@@ -2275,12 +2266,9 @@ def test_multilevel_control(self, comp_mode, benchmark):
         iComp.add_controller(iController)
         assert iComp.controller == iController
         assert oComp.controller == oController
-        res = oComp.run(inputs=[5], execution_mode=comp_mode)
+        res = benchmark(oComp.run, inputs=[5], execution_mode=comp_mode)
         assert np.allclose(res, [40])
 
-        if benchmark.enabled:
-            benchmark(oComp.run, [5], execution_mode=comp_mode)
-
     @pytest.mark.control
     @pytest.mark.composition
     def test_recurrent_control(self, comp_mode):
@@ -2600,6 +2588,7 @@ def test_modulation_of_random_state(self, comp_mode, num_generators):
         assert np.allclose(best_second, comp.results[1])
 
 
+@pytest.mark.composition
 @pytest.mark.control
 class TestModelBasedOptimizationControlMechanisms_Execution:
     def test_ocm_default_function(self):
@@ -2950,8 +2939,6 @@ def test_evc_gratton(self):
                                # Note: Skip decision variable OutputPort
                                evc_gratton.simulation_results[simulation][1:])
 
-    @pytest.mark.control
-    @pytest.mark.composition
     def test_laming_validation_specify_control_signals(self):
         # Mechanisms
         Input = pnl.TransferMechanism(name='Input')
@@ -3072,8 +3059,6 @@ def test_laming_validation_specify_control_signals(self):
                 err_msg='Failed on expected_output[{0}]'.format(trial)
             )
 
-    @pytest.mark.control
-    @pytest.mark.composition
     def test_stateful_mechanism_in_simulation(self):
         # Mechanisms
         Input = pnl.TransferMechanism(name='Input', integrator_mode=True)
@@ -3211,8 +3196,6 @@ def test_stateful_mechanism_in_simulation(self):
                 err_msg='Failed on expected_output[{0}]'.format(trial)
             )
 
-    @pytest.mark.control
-    @pytest.mark.composition
     @pytest.mark.benchmark(group="Model Based OCM")
     @pytest.mark.parametrize("mode", pytest.helpers.get_comp_execution_modes() +
                                      [pytest.helpers.cuda_param('Python-PTX'),
@@ -3262,8 +3245,6 @@ def test_model_based_ocm_after(self, benchmark, mode):
         if benchmark.enabled:
             benchmark(comp.run, inputs, execution_mode=mode)
 
-    @pytest.mark.control
-    @pytest.mark.composition
     @pytest.mark.benchmark(group="Model Based OCM")
     @pytest.mark.parametrize("mode", pytest.helpers.get_comp_execution_modes() +
                                      [pytest.helpers.cuda_param('Python-PTX'),
@@ -3585,7 +3566,7 @@ def test_model_based_num_estimates(self, num_estimates, rand_var):
         warning_msg = f'"\'OptimizationControlMechanism-0\' has \'num_estimates = {num_estimates}\' specified, ' \
                       f'but its \'agent_rep\' (\'comp\') has no random variables: ' \
                       f'\'RANDOMIZATION_CONTROL_SIGNAL\' will not be created, and num_estimates set to None."'
-        with pytest.warns(warning_type) as warning:
+        with pytest.warns(warning_type) as warnings:
             ocm = pnl.OptimizationControlMechanism(agent_rep=comp,
                                                    state_features=[A.input_port],
                                                    objective_mechanism=objective_mech,
@@ -3593,7 +3574,7 @@ def test_model_based_num_estimates(self, num_estimates, rand_var):
                                                    num_estimates=num_estimates,
                                                    control_signals=[control_signal])
             if warning_type:
-                assert repr(warning[5].message.args[0]) == warning_msg
+                assert any(warning_msg == repr(w.message.args[0]) for w in warnings)
 
         comp.add_controller(ocm)
         inputs = {A: [[[1.0]]]}
@@ -3652,8 +3633,6 @@ def test_model_based_ocm_no_simulations(self):
         # initial 1 + each allocation sample (1, 2, 3) integrated
         assert B.parameters.value.get(comp) == 7
 
-    @pytest.mark.control
-    @pytest.mark.composition
     @pytest.mark.benchmark(group="Multilevel")
     def test_grid_search_random_selection(self, comp_mode, benchmark):
         A = pnl.ProcessingMechanism(name='A')
@@ -3700,8 +3679,7 @@ def test_grid_search_random_selection(self, comp_mode, benchmark):
             benchmark(comp.run, inputs=inputs, num_trials=10, context='bench_outer_comp', execution_mode=comp_mode)
             assert len(A.log.get_logged_entries()) == 0
 
-    @pytest.mark.control
-    @pytest.mark.composition
+
     def test_input_CIM_assignment(self, comp_mode):
         input_a = pnl.ProcessingMechanism(name='oa', function=pnl.Linear(slope=1))
         input_b = pnl.ProcessingMechanism(name='ob', function=pnl.Linear(slope=1))
@@ -3863,6 +3841,7 @@ def test_list(self):
         assert sample_iterator.num == len(sample_list)
 
 
+@pytest.mark.composition
 @pytest.mark.control
 class TestControlTimeScales:
 
diff --git a/tests/composition/test_gating.py b/tests/composition/test_gating.py
index 486f1c04fbb..e27cda9f4b7 100644
--- a/tests/composition/test_gating.py
+++ b/tests/composition/test_gating.py
@@ -40,7 +40,7 @@ def test_gating(benchmark, comp_mode):
     comp.add_linear_processing_pathway(p_pathway)
     comp.add_node(Gating_Mechanism)
 
-    comp.run(num_trials=4, inputs=stim_list, execution_mode=comp_mode)
+    benchmark(comp.run, num_trials=4, inputs=stim_list, execution_mode=comp_mode)
 
     expected_results = [
         [np.array([0., 0., 0.])],
@@ -49,9 +49,7 @@ def test_gating(benchmark, comp_mode):
         [np.array([2.53788284, 2.53788284, 2.53788284])]
     ]
 
-    np.testing.assert_allclose(comp.results, expected_results)
-    if benchmark.enabled:
-        benchmark(comp.run, num_trials=4, inputs=stim_list, execution_mode=comp_mode)
+    np.testing.assert_allclose(comp.results[:4], expected_results)
 
 # DEPRECATED FUNCTIONALITY 9/26/19
 # @pytest.mark.composition
diff --git a/tests/composition/test_learning.py b/tests/composition/test_learning.py
index cbba3e2d0c8..bfc001bab4b 100644
--- a/tests/composition/test_learning.py
+++ b/tests/composition/test_learning.py
@@ -8,7 +8,9 @@
 from psyneulink.core.compositions.composition import Composition, CompositionError, RunError
 from psyneulink.core.components.mechanisms.processing.transfermechanism import TransferMechanism
 from psyneulink.core.components.functions.nonstateful.learningfunctions import BackPropagation
-
+import psyneulink.core.llvm as pnlvm
+from psyneulink.core.globals.keywords import Loss
+from psyneulink.library.components.mechanisms.processing.objective.comparatormechanism import SSE, MSE, L0
 
 class TestTargetSpecs:
 
@@ -406,6 +408,22 @@ def test_indepedence_of_learning_pathways_using_same_mechs_in_different_comps(se
                     num_trials=2)
         assert np.allclose(comp2.results, comp1.results)
 
+    @pytest.mark.parametrize('execution_mode',
+                             [pnlvm.ExecutionMode.LLVM, pnlvm.ExecutionMode.PyTorch])
+    def test_execution_mode_pytorch_and_LLVM_errors(self, execution_mode):
+        A = TransferMechanism(name="learning-process-mech-A")
+        B = TransferMechanism(name="learning-process-mech-B")
+        comp = Composition()
+        pway = comp.add_backpropagation_learning_pathway(pathway=[A,B])
+        # Call learn with default_variable specified for target (for comparison with missing target)
+        with pytest.raises(CompositionError) as error:
+            comp.learn(inputs={A: 1.0,
+                              pway.target: 0.0},
+                      execution_mode=execution_mode,
+                      num_trials=2)
+        assert error.value.error_value == f"ExecutionMode.{execution_mode.name} cannot be used in the learn() " \
+                                                f"method of \'Composition-0\' because it is not an AutodiffComposition"
+
 
 class TestNoLearning:
 
@@ -1708,7 +1726,8 @@ def test_stranded_nested_target_mech_error(self):
                     f'as the target attribute of the relevant pathway in {inner_comp.name}.pathways. '
             )
 
-class TestBackProp:
+
+class TestBackPropLearning:
 
     def test_matrix_spec_and_learning_rate(self):
         T1 = pnl.TransferMechanism(size = 2,
@@ -1773,7 +1792,121 @@ def test_back_prop(self):
         #     else:
         #         print(node.name, " EMPTY LOG!")
 
-    def test_multilayer(self):
+    expected_quantities = [
+        (
+            Loss.L0,
+            pnl.SUM,
+            # output_layer output values
+            [np.array([0.22686074, 0.25270212, 0.91542149])],
+            # objective_mechanism.output_port[<LOSS>] value
+            [np.array(-0.39498435)],
+            # Input Weights
+            [[ 0.09900247, 0.19839653, 0.29785764, 0.39739191, 0.49700232],
+             [ 0.59629092, 0.69403786, 0.79203411, 0.89030237, 0.98885379]],
+            # Middle Weights
+            [[ 0.09490249, 0.10488719, 0.12074013, 0.1428774 ],
+             [ 0.29677354, 0.30507726, 0.31949676, 0.3404652 ],
+             [ 0.49857336, 0.50526254, 0.51830509, 0.53815062],
+             [ 0.70029406, 0.70544225, 0.71717037, 0.73594383],
+             [ 0.90192903, 0.90561554, 0.91609668, 0.93385292]],
+            # Output Weights
+            [[-0.74447522, -0.71016859, 0.31575293],
+             [-0.50885177, -0.47444784, 0.56676582],
+             [-0.27333719, -0.23912033, 0.8178167 ],
+             [-0.03767547, -0.00389039, 1.06888608]],
+            # Results
+            [[np.array([0.8344837 , 0.87072018, 0.89997433])],
+             [np.array([0.77970193, 0.83263138, 0.90159627])],
+             [np.array([0.70218502, 0.7773823 , 0.90307765])],
+             [np.array([0.60279149, 0.69958079, 0.90453143])],
+             [np.array([0.4967927 , 0.60030321, 0.90610082])],
+             [np.array([0.4056202 , 0.49472391, 0.90786617])],
+             [np.array([0.33763025, 0.40397637, 0.90977675])],
+             [np.array([0.28892812, 0.33633532, 0.9117193 ])],
+             [np.array([0.25348771, 0.28791896, 0.9136125 ])],
+             [np.array([0.22686074, 0.25270212, 0.91542149])]]
+        ),
+        (
+            Loss.SSE,
+            SSE,
+            # output_layer output values
+            [np.array([0.12306101, 0.12855051, 0.92795179])],
+            # objective_mechanism.output_port[<LOSS>] value
+            [np.array(0.03686019)],
+            # Input Weights
+            [[0.09944189, 0.19971589, 0.29997209, 0.40020673, 0.50041673],
+             [0.5979248, 0.69894361, 0.79989623, 0.90076867, 1.0015495]],
+            # Middle Weights
+            [[0.11871093, 0.12080358, 0.12913871, 0.14437706],
+             [0.32158068, 0.32166374, 0.32825218, 0.34203389],
+             [0.52434054, 0.52249285, 0.52740295, 0.53978486],
+             [0.72697833, 0.72328725, 0.72659469, 0.73763981],
+             [0.92948392, 0.92404372, 0.92583026, 0.93560663]],
+            # Output Weights
+            [[-0.93832915, -0.92583809, 0.36458405],
+             [-0.70446298, -0.69176289, 0.61576631],
+             [-0.47104248, -0.45856457, 0.86712447],
+             [-0.23778995, -0.22590794, 1.11863746]],
+            # Results
+            [[np.array([0.8344837, 0.87072018, 0.89997433])],
+             [np.array([0.71351724, 0.78641358, 0.90315634])],
+             [np.array([0.50994992, 0.62475304, 0.90595494])],
+             [np.array([0.32856147, 0.41172748, 0.90933295])],
+             [np.array([0.24083869, 0.2789737 , 0.91321678])],
+             [np.array([0.19538549, 0.21621273, 0.91684295])],
+             [np.array([0.16740723, 0.1806998 , 0.92008144])],
+             [np.array([0.14819045, 0.15753784, 0.92297786])],
+             [np.array([0.13402466, 0.14102997, 0.92558631])],
+             [np.array([0.12306101, 0.12855051, 0.92795179])]]
+        ),
+        (
+            Loss.MSE,
+            MSE,
+            # output_layer output values
+            [np.array([0.34065762, 0.40283722, 0.90991679])],
+            # objective_mechanism.output_port[<LOSS>] value
+            np.array([0.09548014]),
+            # Input Weights
+            [[0.09878461, 0.19766035, 0.29665234, 0.39577252, 0.49502758],
+             [0.59548084, 0.69130054, 0.78755247, 0.88428106, 0.98151113]],
+            # Middle Weights
+            [[0.07706183, 0.09444972, 0.11723154, 0.14557542],
+             [0.27818676, 0.29420326, 0.3158414, 0.34327603],
+             [0.4792692, 0.49396883, 0.51450859, 0.54106987],
+             [0.68030443, 0.69374747, 0.71323898, 0.73896663],
+             [0.88128847, 0.89353987, 0.91203717, 0.93697403]],
+            # Output Weights
+            [[-0.59467351, -0.52912455, 0.29597305],
+             [-0.35770705, -0.29192171, 0.54683712],
+             [-0.12052892, -0.05468307, 0.79769116],
+             [ 0.11707288, 0.18282992, 1.04852107]],
+            # Results
+            [[np.array([0.8344837, 0.87072018, 0.89997433])],
+             [np.array([0.79924855, 0.84620706, 0.90106255])],
+             [np.array([0.75417448, 0.81457342, 0.90208226])],
+             [np.array([0.69827147, 0.77394099, 0.90306295])],
+             [np.array([0.63285507, 0.72284124, 0.90404476])],
+             [np.array([0.5625646 , 0.66140581, 0.90507175])],
+             [np.array([0.49415513, 0.59273088, 0.90617688])],
+             [np.array([0.4332465 , 0.52285839, 0.90736771])],
+             [np.array([0.38219876, 0.45825994, 0.90862524])],
+             [np.array([0.34065762, 0.40283722, 0.90991679])]]
+        ),
+    ]
+    # Indices into expected_quantities
+    @pytest.mark.parametrize("expected_quantities", expected_quantities,
+                             # Rename L0 for test output as keyword actually = 'difference'
+                             ids=['L0' if x[0] == Loss.L0 else x[0].name for x in expected_quantities])
+    def test_multilayer_truth(self, expected_quantities):
+
+        LOSS_FUNCTION = 0
+        LOSS = 1
+        OUTPUT_LAYER_VAL = 2
+        OBJECTIVE_MECH_VAL = 3
+        INPUT_WEIGHTS = 4
+        MIDDLE_WEIGHTS = 5
+        OUTPUT_WEIGHTS = 6
+        RESULTS = 7
 
         input_layer = pnl.TransferMechanism(name='input_layer',
                                             function=pnl.Logistic,
@@ -1824,7 +1957,7 @@ def test_multilayer(self):
         p = [input_layer, input_weights, hidden_layer_1, middle_weights, hidden_layer_2, output_weights, output_layer]
         backprop_pathway = comp.add_backpropagation_learning_pathway(
             pathway=p,
-            loss_function='sse',
+            loss_function=expected_quantities[LOSS_FUNCTION],
             learning_rate=1.
         )
 
@@ -1838,42 +1971,15 @@ def test_multilayer(self):
 
         objective_output_layer = comp.nodes[5]
 
-        expected_output = [
-            (output_layer.get_output_values(comp), [np.array([0.22686074, 0.25270212, 0.91542149])]),
-            # error here? why still MSE
-            (objective_output_layer.output_ports[pnl.MSE].parameters.value.get(comp), np.array(0.04082589331852094)),
-            (input_weights.get_mod_matrix(comp), np.array([
-                [ 0.09900247, 0.19839653, 0.29785764, 0.39739191, 0.49700232],
-                [ 0.59629092, 0.69403786, 0.79203411, 0.89030237, 0.98885379],
-            ])),
-            (middle_weights.get_mod_matrix(comp), np.array([
-                [ 0.09490249, 0.10488719, 0.12074013, 0.1428774 ],
-                [ 0.29677354, 0.30507726, 0.31949676, 0.3404652 ],
-                [ 0.49857336, 0.50526254, 0.51830509, 0.53815062],
-                [ 0.70029406, 0.70544225, 0.71717037, 0.73594383],
-                [ 0.90192903, 0.90561554, 0.91609668, 0.93385292],
-            ])),
-            (output_weights.get_mod_matrix(comp), np.array([
-                [-0.74447522, -0.71016859, 0.31575293],
-                [-0.50885177, -0.47444784, 0.56676582],
-                [-0.27333719, -0.23912033, 0.8178167 ],
-                [-0.03767547, -0.00389039, 1.06888608],
-            ])),
-            (comp.parameters.results.get(comp), [
-                [np.array([0.8344837 , 0.87072018, 0.89997433])],
-                [np.array([0.77970193, 0.83263138, 0.90159627])],
-                [np.array([0.70218502, 0.7773823 , 0.90307765])],
-                [np.array([0.60279149, 0.69958079, 0.90453143])],
-                [np.array([0.4967927 , 0.60030321, 0.90610082])],
-                [np.array([0.4056202 , 0.49472391, 0.90786617])],
-                [np.array([0.33763025, 0.40397637, 0.90977675])],
-                [np.array([0.28892812, 0.33633532, 0.9117193 ])],
-                [np.array([0.25348771, 0.28791896, 0.9136125 ])],
-                [np.array([0.22686074, 0.25270212, 0.91542149])]
-            ]),
-        ]
 
-        # Test nparray output of log for Middle_Weights
+        expected_output = [
+            (output_layer.get_output_values(comp), expected_quantities[OUTPUT_LAYER_VAL]),
+            (objective_output_layer.output_ports[LOSS].parameters.value.get(comp),
+             expected_quantities[OBJECTIVE_MECH_VAL]),
+            (input_weights.get_mod_matrix(comp), expected_quantities[INPUT_WEIGHTS]),
+            (middle_weights.get_mod_matrix(comp), expected_quantities[MIDDLE_WEIGHTS]),
+            (output_weights.get_mod_matrix(comp), expected_quantities[OUTPUT_WEIGHTS]),
+            (comp.parameters.results.get(comp), expected_quantities[RESULTS])]
 
         for i in range(len(expected_output)):
             val, expected = expected_output[i]
@@ -1882,13 +1988,10 @@ def test_multilayer(self):
             # which WILL FAIL unless you gather higher precision values to use as reference
             np.testing.assert_allclose(val, expected, atol=1e-08, err_msg='Failed on expected_output[{0}]'.format(i))
 
-    @pytest.mark.parametrize('models', [
-        # [pnl.SYSTEM,pnl.COMPOSITION],
-        # [pnl.SYSTEM,'AUTODIFF'],
-        [pnl.COMPOSITION,'AUTODIFF']
-    ])
+    models = ['PYTORCH','LLVM']
+    @pytest.mark.parametrize('models', models, ids=[x for x in models])
     @pytest.mark.pytorch
-    def test_xor_training_identicalness_standard_composition_vs_autodiff(self, models):
+    def test_xor_training_identicalness_standard_composition_vs_PyTorch_and_LLVM(self, models):
         """Test equality of results for running 3-layered xor network using System, Composition and Autodiff"""
 
         num_epochs=2
@@ -1910,89 +2013,136 @@ def test_xor_training_identicalness_standard_composition_vs_autodiff(self, model
 
         # SET UP MODELS --------------------------------------------------------------------------------
 
-       # STANDARD Composition
-        if pnl.COMPOSITION in models:
+       # STANDARD Composition (used in all comparisons)
 
-            input_comp = pnl.TransferMechanism(name='input_comp',
+        input_comp = pnl.TransferMechanism(name='input_comp',
+                                   default_variable=np.zeros(2))
+
+        hidden_comp = pnl.TransferMechanism(name='hidden_comp',
+                                    default_variable=np.zeros(10),
+                                    function=pnl.Logistic())
+
+        output_comp = pnl.TransferMechanism(name='output_comp',
+                                    default_variable=np.zeros(1),
+                                    function=pnl.Logistic())
+
+        in_to_hidden_comp = pnl.MappingProjection(name='in_to_hidden_comp',
+                                    matrix=in_to_hidden_matrix.copy(),
+                                    sender=input_comp,
+                                    receiver=hidden_comp)
+
+        hidden_to_out_comp = pnl.MappingProjection(name='hidden_to_out_comp',
+                                    matrix=hidden_to_out_matrix.copy(),
+                                    sender=hidden_comp,
+                                    receiver=output_comp)
+
+        xor_comp = pnl.Composition()
+
+        backprop_pathway = xor_comp.add_backpropagation_learning_pathway([input_comp,
+                                                                          in_to_hidden_comp,
+                                                                          hidden_comp,
+                                                                          hidden_to_out_comp,
+                                                                          output_comp],
+                                                                         learning_rate=10)
+        target_mech = backprop_pathway.target
+        inputs_dict = {"inputs": {input_comp:xor_inputs},
+                       "targets": {output_comp:xor_targets},
+                       "epochs": num_epochs}
+        result_comp = xor_comp.learn(inputs=inputs_dict)
+
+        # AutodiffComposition using LLVM
+        if 'LLVM' in models:
+
+            input_LLVM = pnl.TransferMechanism(name='input',
                                        default_variable=np.zeros(2))
 
-            hidden_comp = pnl.TransferMechanism(name='hidden_comp',
+            hidden_LLVM = pnl.TransferMechanism(name='hidden',
                                         default_variable=np.zeros(10),
                                         function=pnl.Logistic())
 
-            output_comp = pnl.TransferMechanism(name='output_comp',
+            output_LLVM = pnl.TransferMechanism(name='output',
                                         default_variable=np.zeros(1),
                                         function=pnl.Logistic())
 
-            in_to_hidden_comp = pnl.MappingProjection(name='in_to_hidden_comp',
+            in_to_hidden_LLVM = pnl.MappingProjection(name='in_to_hidden',
                                         matrix=in_to_hidden_matrix.copy(),
-                                        sender=input_comp,
-                                        receiver=hidden_comp)
+                                        sender=input_LLVM,
+                                        receiver=hidden_LLVM)
 
-            hidden_to_out_comp = pnl.MappingProjection(name='hidden_to_out_comp',
+            hidden_to_out_LLVM = pnl.MappingProjection(name='hidden_to_out',
                                         matrix=hidden_to_out_matrix.copy(),
-                                        sender=hidden_comp,
-                                        receiver=output_comp)
-
-            xor_comp = pnl.Composition()
-
-            backprop_pathway = xor_comp.add_backpropagation_learning_pathway([input_comp,
-                                                                              in_to_hidden_comp,
-                                                                              hidden_comp,
-                                                                              hidden_to_out_comp,
-                                                                              output_comp],
-                                                                             learning_rate=10)
-            target_mech = backprop_pathway.target
-            inputs_dict = {"inputs": {input_comp:xor_inputs},
-                           "targets": {output_comp:xor_targets},
+                                        sender=hidden_LLVM,
+                                        receiver=output_LLVM)
+
+            xor_LLVM = pnl.AutodiffComposition(learning_rate=10,
+                                               optimizer_type='sgd')
+
+            xor_LLVM.add_node(input_LLVM)
+            xor_LLVM.add_node(hidden_LLVM)
+            xor_LLVM.add_node(output_LLVM)
+
+            xor_LLVM.add_projection(sender=input_LLVM, projection=in_to_hidden_LLVM, receiver=hidden_LLVM)
+            xor_LLVM.add_projection(sender=hidden_LLVM, projection=hidden_to_out_LLVM, receiver=output_LLVM)
+            xor_LLVM.infer_backpropagation_learning_pathways()
+
+            inputs_dict = {"inputs": {input_LLVM:xor_inputs},
+                           "targets": {output_LLVM:xor_targets},
                            "epochs": num_epochs}
-            result_comp = xor_comp.learn(inputs=inputs_dict)
+            result_LLVM = xor_LLVM.learn(inputs=inputs_dict, execution_mode=pnlvm.ExecutionMode.LLVMRun)
+
+            assert np.allclose(in_to_hidden_LLVM.parameters.matrix.get(xor_LLVM),
+                               in_to_hidden_comp.get_mod_matrix(xor_comp))
+            assert np.allclose(hidden_to_out_LLVM.parameters.matrix.get(xor_LLVM),
+                               hidden_to_out_comp.get_mod_matrix(xor_comp))
+            assert np.allclose(result_comp, result_LLVM)
 
-        # AutodiffComposition
-        if 'AUTODIFF' in models:
+        # AutodiffComposition using PyTorch
+        elif 'PYTORCH' in models:
 
-            input_autodiff = pnl.TransferMechanism(name='input',
+            input_PYTORCH = pnl.TransferMechanism(name='input',
                                        default_variable=np.zeros(2))
 
-            hidden_autodiff = pnl.TransferMechanism(name='hidden',
+            hidden_PYTORCH = pnl.TransferMechanism(name='hidden',
                                         default_variable=np.zeros(10),
                                         function=pnl.Logistic())
 
-            output_autodiff = pnl.TransferMechanism(name='output',
+            output_PYTORCH = pnl.TransferMechanism(name='output',
                                         default_variable=np.zeros(1),
                                         function=pnl.Logistic())
 
-            in_to_hidden_autodiff = pnl.MappingProjection(name='in_to_hidden',
+            in_to_hidden_PYTORCH = pnl.MappingProjection(name='in_to_hidden',
                                         matrix=in_to_hidden_matrix.copy(),
-                                        sender=input_autodiff,
-                                        receiver=hidden_autodiff)
+                                        sender=input_PYTORCH,
+                                        receiver=hidden_PYTORCH)
 
-            hidden_to_out_autodiff = pnl.MappingProjection(name='hidden_to_out',
+            hidden_to_out_PYTORCH = pnl.MappingProjection(name='hidden_to_out',
                                         matrix=hidden_to_out_matrix.copy(),
-                                        sender=hidden_autodiff,
-                                        receiver=output_autodiff)
+                                        sender=hidden_PYTORCH,
+                                        receiver=output_PYTORCH)
 
-            xor_autodiff = pnl.AutodiffComposition(learning_rate=10,
-                                                   optimizer_type='sgd')
+            xor_PYTORCH = pnl.AutodiffComposition(learning_rate=10,
+                                                  optimizer_type='sgd')
 
-            xor_autodiff.add_node(input_autodiff)
-            xor_autodiff.add_node(hidden_autodiff)
-            xor_autodiff.add_node(output_autodiff)
+            xor_PYTORCH.add_node(input_PYTORCH)
+            xor_PYTORCH.add_node(hidden_PYTORCH)
+            xor_PYTORCH.add_node(output_PYTORCH)
 
-            xor_autodiff.add_projection(sender=input_autodiff, projection=in_to_hidden_autodiff, receiver=hidden_autodiff)
-            xor_autodiff.add_projection(sender=hidden_autodiff, projection=hidden_to_out_autodiff, receiver=output_autodiff)
-            xor_autodiff.infer_backpropagation_learning_pathways()
+            xor_PYTORCH.add_projection(sender=input_PYTORCH, projection=in_to_hidden_PYTORCH, receiver=hidden_PYTORCH)
+            xor_PYTORCH.add_projection(sender=hidden_PYTORCH, projection=hidden_to_out_PYTORCH, receiver=output_PYTORCH)
+            xor_PYTORCH.infer_backpropagation_learning_pathways()
 
-            inputs_dict = {"inputs": {input_autodiff:xor_inputs},
-                           "targets": {output_autodiff:xor_targets},
+            inputs_dict = {"inputs": {input_PYTORCH:xor_inputs},
+                           "targets": {output_PYTORCH:xor_targets},
                            "epochs": num_epochs}
-            result_autodiff = xor_autodiff.learn(inputs=inputs_dict)
+            result_PYTORCH = xor_PYTORCH.learn(inputs=inputs_dict,
+                                               execution_mode=pnlvm.ExecutionMode.PyTorch)
+
+            assert np.allclose(in_to_hidden_PYTORCH.parameters.matrix.get(xor_PYTORCH),
+                               in_to_hidden_comp.get_mod_matrix(xor_comp))
+            assert np.allclose(hidden_to_out_PYTORCH.parameters.matrix.get(xor_PYTORCH),
+                               hidden_to_out_comp.get_mod_matrix(xor_comp))
+            assert np.allclose(result_comp, result_PYTORCH)
 
-        # COMPARE WEIGHTS FOR PAIRS OF MODELS ----------------------------------------------------------
-        if all(m in models for m in {pnl.COMPOSITION, 'AUTODIFF'}):
-            assert np.allclose(in_to_hidden_autodiff.parameters.matrix.get(xor_autodiff), in_to_hidden_comp.get_mod_matrix(xor_comp))
-            assert np.allclose(hidden_to_out_autodiff.parameters.matrix.get(xor_autodiff), hidden_to_out_comp.get_mod_matrix(xor_comp))
-            assert np.allclose(result_comp, result_autodiff)
 
     @pytest.mark.parametrize('configuration', [
         'Y UP',
diff --git a/tests/composition/test_show_graph.py b/tests/composition/test_show_graph.py
index 9d81254486b..a159f08eec9 100644
--- a/tests/composition/test_show_graph.py
+++ b/tests/composition/test_show_graph.py
@@ -72,7 +72,7 @@ def test_converging_pathways(self):
 
 class TestNested:
     def test_multiple_projections_to_node_of_nested_composition(self):
-        '''This is based on the N-back script'''
+        '''This is based on the Nback script'''
 
         stim = TransferMechanism(name='STIM', size=5)
         context = TransferMechanism(name='CONTEXT', size=5)
diff --git a/tests/functions/test_buffer.py b/tests/functions/test_buffer.py
index b088cf8a6a6..e3e8c4bb2c1 100644
--- a/tests/functions/test_buffer.py
+++ b/tests/functions/test_buffer.py
@@ -1,6 +1,5 @@
 import numpy as np
 import pytest
-from collections import deque
 
 from psyneulink.core.compositions.composition import Composition
 from psyneulink.core.components.functions.nonstateful.distributionfunctions import NormalDist
@@ -13,103 +12,54 @@ class TestBuffer():
     def test_buffer_standalone(self):
         B = Buffer()
         val = B.execute(1.0)
-        assert np.allclose(deque(np.atleast_1d(1.0)), val)
+        assert np.allclose(np.atleast_1d(1.0), val)
 
     @pytest.mark.benchmark(group="BufferFunction")
-    def test_buffer_standalone_rate_float(self, benchmark):
-        B = Buffer(history=3, rate = 0.1)
-        B.execute([1,2,3])
-        B.execute([4,5,6])
-        B.execute([7,8,9])
-        val = B.execute([10,11,12])
-        assert np.allclose(deque(np.atleast_1d([ 0.04,  0.05,  0.06], [ 0.7,  0.8,  0.9], [10, 11, 12])), val)
-        if benchmark.enabled:
-            benchmark(B.execute, [1, 2, 3])
-
-    @pytest.mark.benchmark(group="BufferFunction")
-    def test_buffer_standalone_rate_list(self, benchmark):
-        B = Buffer(history=3, rate = [0.1, 0.5, 0.9])
-        B.execute([1,2,3])
-        B.execute([4,5,6])
-        B.execute([7,8,9])
-        val = B.execute([10,11,12])
-        assert np.allclose(deque(np.atleast_1d([ 0.04, 1.25, 4.86], [ 0.7,  4. , 8.1], [10, 11, 12])), val)
-        if benchmark.enabled:
-            benchmark(B.execute, [1, 2, 3])
-
-    @pytest.mark.benchmark(group="BufferFunction")
-    def test_buffer_standalone_rate_ndarray(self, benchmark):
-        B = Buffer(history=3, rate = np.array([0.1, 0.5, 0.9]))
-        B.execute([1,2,3])
-        B.execute([4,5,6])
-        B.execute([7,8,9])
-        val = B.execute([10,11,12])
-        assert np.allclose(deque(np.atleast_1d([ 0.04, 1.25, 4.86], [ 0.7,  4. , 8.1], [10, 11, 12])), val)
-        if benchmark.enabled:
-            benchmark(B.execute, [1, 2, 3])
-
-    @pytest.mark.benchmark(group="BufferFunction")
-    def test_buffer_standalone_noise_float(self, benchmark):
-        B = Buffer(history=3, rate = 1.0, noise=10.0)
-        B.execute([1,2,3])
-        B.execute([4,5,6])
-        B.execute([7,8,9])
-        val = B.execute([10,11,12])
-        assert np.allclose(deque(np.atleast_1d([ 24.,  25.,  26.], [ 17.,  18.,  19.], [10, 11, 12])), val)
-        if benchmark.enabled:
-            benchmark(B.execute, [1, 2, 3])
-
-    @pytest.mark.benchmark(group="BufferFunction")
-    def test_buffer_standalone_noise_list(self, benchmark):
-        B = Buffer(history=3, rate = 1.0, noise=[10.0, 20.0, 30.0])
-        B.execute([1,2,3])
-        B.execute([4,5,6])
-        B.execute([7,8,9])
-        val = B.execute([10,11,12])
-        assert np.allclose(deque(np.atleast_1d([ 24., 45., 66.], [ 17., 28., 39.], [10, 11, 12])), val)
-        if benchmark.enabled:
-            benchmark(B.execute, [1, 2, 3])
-
-    @pytest.mark.benchmark(group="BufferFunction")
-    def test_buffer_standalone_noise_ndarray(self, benchmark):
-        B = Buffer(history=3, rate = 1.0, noise=[10.0, 20.0, 30.0])
-        B.execute([1,2,3])
-        B.execute([4,5,6])
-        B.execute([7,8,9])
-        val = B.execute([10,11,12])
-        assert np.allclose(deque(np.atleast_1d([ 24., 45., 66.], [ 17., 28., 39.], [10, 11, 12])), val)
-        if benchmark.enabled:
-            benchmark(B.execute, [1, 2, 3])
-
+    @pytest.mark.parametrize("rate, expected",
+                             [
+                             (0.1, [[0.04, 0.05, 0.06], [0.7, 0.8, 0.9], [10, 11, 12]]),
+                             ([0.1, 0.5, 0.9], [[0.04, 1.25, 4.86], [ 0.7, 4., 8.1], [10, 11, 12]]),
+                             (np.array([0.1, 0.5, 0.9]), [[0.04, 1.25, 4.86], [ 0.7, 4., 8.1], [10, 11, 12]]),
+                             ], ids=["float", "list", "ndarray"])
+    def test_buffer_standalone_rate(self, benchmark, rate, expected):
+        B = Buffer(history=3, rate=rate)
+        B.execute([1, 2, 3])
+        B.execute([4, 5, 6])
+        B.execute([7, 8, 9])
+        val = benchmark(B.execute, [10, 11, 12])
+        assert np.allclose(expected, val)
+
+    @pytest.mark.parametrize("noise, expected",
+                             [
+                             (10.0, [[ 24., 25., 26.], [17., 18., 19.], [10, 11, 12]]),
+                             ([10.0, 20.0, 30.0], [[ 24., 45., 66.], [17., 28., 39.], [10, 11, 12]]),
+                             (np.array([10.0, 20.0, 30.0]), [[ 24., 45., 66.], [17., 28., 39.], [10, 11, 12]]),
+                             (NormalDist(seed=0, standard_deviation=0.1), [[4.02430687, 4.91927251, 5.95087965],
+                                                                           [7.09586966, 7.91823773, 8.86077491],
+                                                                           [10, 11, 12]]),
+                             ], ids=["float", "list", "ndarray", "function"])
     @pytest.mark.benchmark(group="BufferFunction")
-    def test_buffer_standalone_noise_function(self, benchmark):
-        B = Buffer(history=3, rate = 1.0, noise=NormalDist(standard_deviation=0.1))
+    def test_buffer_standalone_noise_float(self, benchmark, noise, expected):
+        B = Buffer(history=3, rate=1.0, noise=noise)
         B.execute([1, 2, 3])
         B.execute([4, 5, 6])
         B.execute([7, 8, 9])
-        val = B.execute([10,11,12])
-        assert np.allclose(deque(np.atleast_1d([[4.02430687, 4.91927251, 5.95087965],
-                                                [7.09586966, 7.91823773, 8.86077491],
-                                                [10, 11, 12]])), val)
-        if benchmark.enabled:
-            benchmark(B.execute, [1, 2, 3])
+        val = benchmark(B.execute, [10, 11, 12])
+        assert np.allclose(expected, val)
 
     @pytest.mark.benchmark(group="BufferFunction")
     def test_buffer_standalone_noise_function_in_array(self, benchmark):
         B = Buffer(history=3)
-        # Set noise parameter ouside of a constructor to avoid problems
-        # with extra copying
+        # Set noise parameter outside of the constructor to avoid problems with extra copying
         B.parameters.noise.set([10, NormalDist(standard_deviation=0.1), 20])
         B.execute([1, 2, 3])
         B.execute([4, 5, 6])
         B.execute([7, 8, 9])
-        val = B.execute([10, 11, 12])
+        val = benchmark(B.execute, [10, 11, 12])
         expected_val = [[24, 4.693117564500052, 46], [17, 7.744647273059847, 29], [10, 11, 12]]
         for v_v, v_e in zip(val, expected_val):
             for v, e in zip(v_v, v_e):
                 assert np.allclose(v, e)
-        if benchmark.enabled:
-            benchmark(B.execute, [1, 2, 3])
 
     def test_buffer_standalone_noise_function_invocation(self):
         class CallCount:
@@ -120,8 +70,8 @@ def __call__(self):
                 return self.count
 
         counter_f = CallCount()
-        # Set noise parameter ouside of a constructor to avoid problems
-        # with extra copying. This test fails if noise is passed to constructor
+        # Set noise parameter outside of the constructor to avoid problems with extra copying
+        # This test fails if noise is passed to constructor
         B = Buffer(history=3)
         B.parameters.noise.set([10, counter_f, 20])
         B.execute([1, 2, 3])
@@ -140,11 +90,10 @@ def test_buffer_initializer_len_3(self, benchmark):
         B = Buffer(default_variable=[[0.0], [1.0], [2.0]],
                    initializer=[[0.0], [1.0], [2.0]],
                    history=3)
-        assert np.allclose(B.execute(3.0), deque([[1.0], [2.0], np.array([3.])]))
-        assert np.allclose(B.execute(4.0), deque([[2.0], np.array([3.]), np.array([4.])]))
-        assert np.allclose(B.execute(5.0), deque([np.array([3.]), np.array([4.]), np.array([5.])]))
-        if benchmark.enabled:
-            benchmark(B.execute, 5.0)
+        assert np.allclose(B.execute(3.0), [[1.0], [2.0], np.array([3.])])
+        assert np.allclose(B.execute(4.0), [[2.0], np.array([3.]), np.array([4.])])
+        val = benchmark(B.execute, 5.0)
+        assert np.allclose(val, [np.array([3.]), np.array([4.]), np.array([5.])])
 
     @pytest.mark.benchmark(group="BufferFunction")
     def test_buffer_as_function_of_processing_mech(self, benchmark):
@@ -152,12 +101,11 @@ def test_buffer_as_function_of_processing_mech(self, benchmark):
         P = ProcessingMechanism(function=Buffer(default_variable=[[0.0]],
                                                 initializer=[0.0],
                                                 history=3))
-        val = P.execute(1.0)
+        val = benchmark(P.execute, 1.0)
 
         # NOTE: actual output is [0, [[1]]]
         assert np.allclose(np.asfarray(val), [[0., 1.]])
-        if benchmark.enabled:
-            benchmark(P.execute, 5.0)
+
         # fails due to value and variable problems when Buffer is the function of a mechanism
         # P = ProcessingMechanism(function=Buffer(default_variable=[[0.0], [1.0], [2.0]],
         #                                         initializer=[[0.0], [1.0], [2.0]],
diff --git a/tests/functions/test_distribution.py b/tests/functions/test_distribution.py
index 2b0d111d2c3..4bb805a095e 100644
--- a/tests/functions/test_distribution.py
+++ b/tests/functions/test_distribution.py
@@ -2,6 +2,8 @@
 import pytest
 import sys
 
+from packaging import version as pversion
+
 import psyneulink.core.llvm as pnlvm
 import psyneulink.core.components.functions.nonstateful.distributionfunctions as Functions
 from psyneulink.core.globals.utilities import _SeededPhilox
@@ -25,32 +27,36 @@
                          0.5173675420165031, 0.06942854144616283, 6.302631815990666,
                          1.4934079600147951, 0.4288991185241868, 1.7740760781361433)
 dda_expected_small = (0.5828813465336954, 0.04801236718458773,
-                      0.532471083815943, 0.09633801362499317, 6.111833139205608,
-                      1.5821207676710864, 0.5392724012504414, 1.8065252817609618)
+                      0.532471083815943, 0.09633801555720854, 6.1142591416669765,
+                      1.5821207676710864, 0.5392724051148722, 1.806647390875747)
+
 # Different libm implementations produce slightly different results
-if sys.platform.startswith("win") or sys.platform.startswith("darwin"):
+# Numpy 1.22+ uses new/optimized implementation of FP routines
+# on processors that support AVX512 since 1.22 [0]
+# [0] https://github.com/numpy/numpy/commit/1eff1c543a8f1e9d7ea29182b8c76db5a2efc3c2
+if sys.platform.startswith("win") or sys.platform.startswith("darwin") or \
+    ( pversion.parse(np.version.version) >= pversion.parse('1.22') and pytest.helpers.numpy_uses_avx512()):
     dda_expected_small = (0.5828813465336954, 0.04801236718458773,
-                          0.5324710838150166, 0.09633802135385469, 6.119380538293901,
-                          1.58212076767016, 0.5392724012504414, 1.8065252817609618)
+                          0.5324710838150166, 0.09633802135385469, 6.117763080882898,
+                          1.58212076767016, 0.5392724012504414, 1.8064031532265)
 
 normal_expected_mt = (1.0890232855122397)
 uniform_expected_mt = (0.6879771504250405)
 normal_expected_philox = (0.5910357654927911)
 uniform_expected_philox = (0.6043448764869507)
 
-llvm_expected = {}
 llvm_expected = {'fp64': {}, 'fp32': {}}
 llvm_expected['fp64'][dda_expected_small] = (0.5828813465336954, 0.04801236718458773,
-                                             0.5324710838085324, 0.09633787836991654, 6.0158766570416775,
-                                             1.5821207675877176, 0.5392731045768397, 1.8434859117411773)
+                                             0.5324710838085324, 0.09633788030213193, 6.0183026674990625,
+                                             1.5821207675877176, 0.5392731084412705, 1.843608020219776)
 
 # add fp32 results
 llvm_expected['fp32'][dda_expected_random] = (0.42365485429763794, 0.0,
-                                              0.5173675417900085, 0.06942801177501678, 6.302331447601318,
-                                              1.4934077262878418, 0.428894966840744, 1.7738982439041138)
+                                              0.5173675417900085, 0.069428451359272, 6.302595138549805,
+                                              1.4934077262878418, 0.42889538407325745, 1.7739042043685913)
 llvm_expected['fp32'][dda_expected_negative] = (0.4236549735069275, 5.960464477539063e-08,
-                                                0.5173678398132324, 0.06942889094352722, 6.303247451782227,
-                                                1.4934080839157104, 0.42889583110809326, 1.7739603519439697)
+                                                0.5173678398132324, 0.06942932307720184, 6.302994251251221,
+                                                1.4934080839157104, 0.4288962781429291, 1.7739406824111938)
 llvm_expected['fp32'][dda_expected_small] = None
 llvm_expected['fp32'][normal_expected_philox] = (0.5655658841133118)
 llvm_expected['fp32'][uniform_expected_philox] = (0.6180108785629272)
@@ -110,8 +116,8 @@ def test_execute(func, variable, params, prng, llvm_skip, expected, benchmark, f
         # it to the mechanism above
         if func_mode == "PTX" and precision == 'fp32' and expected is dda_expected_negative:
             expected = (0.4236549735069275, 5.960464477539063e-08,
-                        0.5173678398132324, 0.06942889094352722, 6.303247451782227,
-                        1.4934064149856567, 0.42889145016670227, 1.7737685441970825)
+                        0.5173678398132324, 0.06942932307720184, 6.302994728088379,
+                        1.4934064149856567, 0.4288918972015381, 1.7737658023834229)
         expected = llvm_expected.get(precision, {}).get(expected, expected)
 
     if expected is None:
@@ -122,12 +128,9 @@ def test_execute(func, variable, params, prng, llvm_skip, expected, benchmark, f
         f.parameters.random_state.set(prng([0]))
 
     ex = pytest.helpers.get_func_execution(f, func_mode)
-    res = ex(variable)
+    res = benchmark(ex, variable)
 
     if pytest.helpers.llvm_current_fp_precision() == 'fp32':
         assert np.allclose(res, expected)
     else:
         np.testing.assert_allclose(res, expected)
-
-    if benchmark.enabled:
-        benchmark(ex, variable)
diff --git a/tests/functions/test_fhn_integrator.py b/tests/functions/test_fhn_integrator.py
index 445ec8d9697..d117ed40d16 100644
--- a/tests/functions/test_fhn_integrator.py
+++ b/tests/functions/test_fhn_integrator.py
@@ -56,11 +56,8 @@ def test_basic(func, variable, integration_method, params, expected, benchmark,
 
     res = EX(variable)
     res = EX(variable)
-    res = EX(variable)
+    res = benchmark(EX, variable)
 
     assert np.allclose(res[0], expected[0])
     assert np.allclose(res[1], expected[1])
     assert np.allclose(res[2], expected[2])
-
-    if benchmark.enabled:
-        benchmark(EX, variable)
diff --git a/tests/functions/test_integrator.py b/tests/functions/test_integrator.py
index 26600d7cc7c..ff007c6afe3 100644
--- a/tests/functions/test_integrator.py
+++ b/tests/functions/test_integrator.py
@@ -67,7 +67,6 @@ def AdaptiveIntFun(init, value, iterations, noise, rate, offset, **kwargs):
             return [3.59649986, 3.28818534, 2.45181396, 3.14321808, 1.56270704,
                     2.88397872, 1.62818492, 3.72575501, 2.80657186, 2.2131637]
 
-
 def DriftIntFun(init, value, iterations, noise, **kwargs):
     assert iterations == 3
     if np.isscalar(noise):
@@ -108,7 +107,6 @@ def LeakyFun(init, value, iterations, noise, **kwargs):
         else:
             return [3.12748415, 2.76778478, 2.45911505, 3.06686514, 1.6311395, 2.19281309, 1.61148745, 3.23404557, 2.81418859, 2.63042344]
 
-
 def AccumulatorFun(init, value, iterations, noise, **kwargs):
     assert iterations == 3
 
@@ -169,7 +167,7 @@ def test_execute(func, func_mode, variable, noise, params, benchmark):
 
     if 'DriftOnASphereIntegrator' in func[0].componentName:
         if func_mode != 'Python':
-            pytest.skip("DriftDiffusionIntegrator not yet compiled")
+            pytest.skip("DriftOnASphereIntegrator not yet compiled")
         params.update({'dimension':len(variable) + 1})
     else:
         if 'dimension' in params:
@@ -193,14 +191,11 @@ def test_execute(func, func_mode, variable, noise, params, benchmark):
 
     ex(variable)
     ex(variable)
-    res = ex(variable)
+    res = benchmark(ex, variable)
     expected = func[1](f.initializer, variable, 3, noise, **params)
     for r, e in zip(res, expected):
         assert np.allclose(r, e)
 
-    if benchmark.enabled:
-        benchmark(ex, variable)
-
 
 def test_integrator_function_no_default_variable_and_params_len_more_than_1():
     I = Functions.AdaptiveIntegrator(rate=[.1, .2, .3])
@@ -263,6 +258,43 @@ def test_integrator_function_with_default_variable_and_params_of_different_lengt
     "NOISE_SCALAR", "NOISE_2", "NOISE_3", "NOISE_4"
 ]
 
+
+def test_DriftOnASphere_identicalness_against_reference_implementation():
+    """Compare against reference implementation in nback-paper model (https://github.com/andrebeu/nback-paper)."""
+
+    # PNL DriftOnASphere
+    DoS = Functions.DriftOnASphereIntegrator(dimension=5, initializer=np.array([.2] * (4)), noise=0.0)
+    results_dos = []
+    for i in range(3):
+        results_dos.append(DoS(.1))
+
+    # nback-paper implementation
+    def spherical_drift(n_steps=3, dim=5, var=0, mean=.1):
+        def convert_spherical_to_angular(dim, ros):
+            ct = np.zeros(dim)
+            ct[0] = np.cos(ros[0])
+            prod = np.product([np.sin(ros[k]) for k in range(1, dim - 1)])
+            n_prod = prod
+            for j in range(dim - 2):
+                n_prod /= np.sin(ros[j + 1])
+                amt = n_prod * np.cos(ros[j + 1])
+                ct[j + 1] = amt
+            ct[dim - 1] = prod
+            return ct
+        # initialize the spherical coordinates to ensure each context run begins in a new random location on the unit sphere
+        ros = np.array([.2] *(dim - 1))
+        slen = n_steps
+        ctxt = np.zeros((slen, dim))
+        for i in range(slen):
+            noise = np.random.normal(mean, var, size=(dim - 1)) # add a separately-drawn Gaussian to each spherical coord
+            ros += noise
+            ctxt[i] = convert_spherical_to_angular(dim, ros)
+        return ctxt
+    results_sd = spherical_drift()
+
+    assert np.allclose(np.array(results_dos), np.array(results_sd))
+
+
 # FIX: CROSS WITH INITIALIZER SIZE:
 @pytest.mark.parametrize("params, error_msg, error_type", test_vars, ids=names)
 def test_drift_on_a_sphere_errors(params, error_msg, error_type):
diff --git a/tests/functions/test_memory.py b/tests/functions/test_memory.py
index 92d736fda8a..3eefdcc0b14 100644
--- a/tests/functions/test_memory.py
+++ b/tests/functions/test_memory.py
@@ -149,11 +149,9 @@ def test_basic(func, variable, params, expected, benchmark, func_mode):
     EX = pytest.helpers.get_func_execution(f, func_mode)
 
     EX(variable)
-    res = EX(variable)
+    res = benchmark(EX, variable)
     assert np.allclose(res[0], expected[0])
     assert np.allclose(res[1], expected[1])
-    if benchmark.enabled:
-        benchmark(EX, variable)
 
 #endregion
 
diff --git a/tests/functions/test_optimization.py b/tests/functions/test_optimization.py
index 2069c2b2356..6f4669f9766 100644
--- a/tests/functions/test_optimization.py
+++ b/tests/functions/test_optimization.py
@@ -82,13 +82,10 @@ def test_grid_search(obj_func, metric, normalize, direction, selection, benchmar
                                 seed=0, save_values=False)
     EX = pytest.helpers.get_func_execution(f, func_mode)
 
-    res = EX(variable)
+    res = benchmark(EX, variable)
 
     assert np.allclose(res[0], result[0])
     assert np.allclose(res[1], result[1])
     if func_mode == 'Python':
         assert np.allclose(res[2], result[2])
         assert np.allclose(res[3], result[3])
-
-    if benchmark.enabled:
-        benchmark(EX, variable)
diff --git a/tests/functions/test_selection.py b/tests/functions/test_selection.py
index 8fe21b1c5b2..8fc4f3c6408 100644
--- a/tests/functions/test_selection.py
+++ b/tests/functions/test_selection.py
@@ -76,8 +76,6 @@ def test_basic(func, variable, params, expected, benchmark, func_mode):
     EX = pytest.helpers.get_func_execution(f, func_mode)
 
     EX(variable)
-    res = EX(variable)
+    res = benchmark(EX, variable)
 
     assert np.allclose(res, expected)
-    if benchmark.enabled:
-        benchmark(EX, variable)
diff --git a/tests/functions/test_transfer.py b/tests/functions/test_transfer.py
index e0c0066295e..9168e37c544 100644
--- a/tests/functions/test_transfer.py
+++ b/tests/functions/test_transfer.py
@@ -1,10 +1,9 @@
 import numpy as np
-import psyneulink.core.llvm as pnlvm
-import psyneulink.core.components.functions.nonstateful.transferfunctions as Functions
-import psyneulink.core.globals.keywords as kw
 import pytest
 
-from math import e, pi, sqrt
+import psyneulink.core.components.functions.nonstateful.transferfunctions as Functions
+import psyneulink.core.globals.keywords as kw
+import psyneulink.core.llvm as pnlvm
 
 SIZE=10
 np.random.seed(0)
@@ -18,16 +17,20 @@
 RAND3 = np.random.rand()
 RAND4 = np.random.rand()
 
-softmax_helper = RAND1 * test_var
-softmax_helper = softmax_helper - np.max(softmax_helper)
-softmax_helper = np.exp(softmax_helper) / np.sum(np.exp(softmax_helper))
+softmax_helper  = RAND1 * test_var
+softmax_helper  = softmax_helper - np.max(softmax_helper)
+softmax_helper  = np.exp(softmax_helper) / np.sum(np.exp(softmax_helper))
+softmax_helper2 = np.array((softmax_helper, softmax_helper)).reshape(2, -1)
 
 tanh_helper = (RAND1 * (test_var + RAND2 - RAND3) + RAND4)
 tanh_helper = np.tanh(tanh_helper)
 
-gaussian_helper = e**(-(test_var - RAND2)**2 / (2 * RAND1**2)) / sqrt(2 * pi * RAND1)
+gaussian_helper = np.e**(-(test_var - RAND2)**2 / (2 * RAND1**2)) / np.sqrt(2 * np.pi * RAND1)
 gaussian_helper = RAND3 * gaussian_helper + RAND4
 
+relu_helper = np.maximum(RAND1 * (test_var - RAND2), RAND3 * RAND1 *(test_var - RAND2))
+logistic_helper = RAND4 / (1 + np.exp(-(RAND1 * (test_var - RAND2)) + RAND3))
+
 def gaussian_distort_helper(seed):
     state = np.random.RandomState([seed])
     # compensate for construction
@@ -36,27 +39,48 @@ def gaussian_distort_helper(seed):
 
 
 test_data = [
-    pytest.param(Functions.Linear, test_var, {'slope':RAND1, 'intercept':RAND2}, test_var * RAND1 + RAND2, id="LINEAR"),
-    pytest.param(Functions.Exponential, test_var, {'scale':RAND1, 'rate':RAND2}, RAND1 * np.exp(RAND2 * test_var), id="EXPONENTIAL"),
-    pytest.param(Functions.Logistic, test_var, {'gain':RAND1, 'x_0':RAND2, 'offset':RAND3, 'scale':RAND4}, RAND4 / (1 + np.exp(-(RAND1 * (test_var - RAND2)) + RAND3)), id="LOGISTIC"),
-    pytest.param(Functions.Tanh, test_var, {'gain':RAND1, 'bias':RAND2, 'x_0':RAND3, 'offset':RAND4}, tanh_helper, id="TANH"),
-    pytest.param(Functions.ReLU, test_var, {'gain':RAND1, 'bias':RAND2, 'leak':RAND3}, np.maximum(RAND1 * (test_var - RAND2), RAND3 * RAND1 *(test_var - RAND2)), id="RELU"),
+    pytest.param(Functions.Linear, test_var, {kw.SLOPE:RAND1, kw.INTERCEPT:RAND2}, test_var * RAND1 + RAND2, id="LINEAR"),
+    pytest.param(Functions.Exponential, test_var, {kw.SCALE:RAND1, kw.RATE:RAND2}, RAND1 * np.exp(RAND2 * test_var), id="EXPONENTIAL"),
+    pytest.param(Functions.Logistic, test_var, {kw.GAIN:RAND1, kw.X_0:RAND2, kw.OFFSET:RAND3, kw.SCALE:RAND4}, logistic_helper, id="LOGISTIC"),
+    pytest.param(Functions.Tanh, test_var, {kw.GAIN:RAND1, kw.BIAS:RAND2, kw.X_0:RAND3, kw.OFFSET:RAND4}, tanh_helper, id="TANH"),
+    pytest.param(Functions.ReLU, test_var, {kw.GAIN:RAND1, kw.BIAS:RAND2, kw.LEAK:RAND3}, relu_helper, id="RELU"),
+    # Angle doesn't have a helper using 'test_var', hardcode the input as well
     pytest.param(Functions.Angle, [0.5488135,  0.71518937, 0.60276338, 0.54488318, 0.4236548,
                                    0.64589411, 0.43758721, 0.891773, 0.96366276, 0.38344152], {},
                  [0.85314409, 0.00556188, 0.01070476, 0.0214405,  0.05559454,
                   0.08091079, 0.21657281, 0.19296643, 0.21343805, 0.92738261, 0.00483101],
                  id="ANGLE"),
-    pytest.param(Functions.Gaussian, test_var, {'standard_deviation':RAND1, 'bias':RAND2, 'scale':RAND3, 'offset':RAND4}, gaussian_helper, id="GAUSSIAN"),
-    pytest.param(Functions.GaussianDistort, test_var.tolist(), {'bias': RAND1, 'variance':RAND2, 'offset':RAND3, 'scale':RAND4 }, gaussian_distort_helper(0), id="GAUSSIAN DISTORT GLOBAL SEED"),
-    pytest.param(Functions.GaussianDistort, test_var.tolist(), {'bias': RAND1, 'variance':RAND2, 'offset':RAND3, 'scale':RAND4, 'seed':0 }, gaussian_distort_helper(0), id="GAUSSIAN DISTORT"),
-    pytest.param(Functions.SoftMax, test_var, {'gain':RAND1, 'per_item': False}, softmax_helper, id="SOFT_MAX ALL"),
-    pytest.param(Functions.SoftMax, test_var, {'gain':RAND1, 'params':{kw.OUTPUT_TYPE:kw.MAX_VAL}, 'per_item': False}, np.where(softmax_helper == np.max(softmax_helper), np.max(softmax_helper), 0), id="SOFT_MAX MAX_VAL"),
-    pytest.param(Functions.SoftMax, test_var, {'gain':RAND1, 'params':{kw.OUTPUT_TYPE:kw.MAX_INDICATOR}, 'per_item': False}, np.where(softmax_helper == np.max(softmax_helper), 1, 0), id="SOFT_MAX MAX_INDICATOR"),
-    pytest.param(Functions.SoftMax, test_var, {'gain':RAND1, 'params':{kw.OUTPUT_TYPE:kw.PROB}, 'per_item': False},
+
+    pytest.param(Functions.Gaussian, test_var, {kw.STANDARD_DEVIATION:RAND1, kw.BIAS:RAND2, kw.SCALE:RAND3, kw.OFFSET:RAND4}, gaussian_helper, id="GAUSSIAN"),
+    pytest.param(Functions.GaussianDistort, test_var, {kw.BIAS: RAND1, kw.VARIANCE:RAND2, kw.OFFSET:RAND3, kw.SCALE:RAND4 }, gaussian_distort_helper(0), id="GAUSSIAN DISTORT GLOBAL SEED"),
+    pytest.param(Functions.GaussianDistort, test_var, {kw.BIAS: RAND1, kw.VARIANCE:RAND2, kw.OFFSET:RAND3, kw.SCALE:RAND4, 'seed':0 }, gaussian_distort_helper(0), id="GAUSSIAN DISTORT"),
+
+    # SoftMax 1D input
+    pytest.param(Functions.SoftMax, test_var, {kw.GAIN:RAND1, kw.PER_ITEM:False}, softmax_helper, id="SOFT_MAX ALL"),
+    pytest.param(Functions.SoftMax, test_var, {kw.GAIN:RAND1, kw.OUTPUT_TYPE:kw.MAX_VAL, kw.PER_ITEM:False}, np.where(softmax_helper == np.max(softmax_helper), softmax_helper, 0), id="SOFT_MAX MAX_VAL"),
+    pytest.param(Functions.SoftMax, test_var, {kw.GAIN:RAND1, kw.OUTPUT_TYPE:kw.MAX_INDICATOR, kw.PER_ITEM:False}, np.where(softmax_helper == np.max(softmax_helper), 1, 0), id="SOFT_MAX MAX_INDICATOR"),
+    pytest.param(Functions.SoftMax, test_var, {kw.GAIN:RAND1, kw.OUTPUT_TYPE:kw.PROB, kw.PER_ITEM:False},
                  [0.0, 0.0, 0.0, 0.0, test_var[4], 0.0, 0.0, 0.0, 0.0, 0.0], id="SOFT_MAX PROB"),
-    pytest.param(Functions.LinearMatrix, test_var.tolist(), {'matrix':test_matrix.tolist()}, np.dot(test_var, test_matrix), id="LINEAR_MATRIX SQUARE"),
-    pytest.param(Functions.LinearMatrix, test_var.tolist(), {'matrix':test_matrix_l.tolist()}, np.dot(test_var, test_matrix_l), id="LINEAR_MATRIX WIDE"),
-    pytest.param(Functions.LinearMatrix, test_var.tolist(), {'matrix':test_matrix_s.tolist()}, np.dot(test_var, test_matrix_s), id="LINEAR_MATRIX TALL"),
+
+    # SoftMax 2D testing per-item
+    pytest.param(Functions.SoftMax, [test_var], {kw.GAIN:RAND1, kw.PER_ITEM:True}, [softmax_helper], id="SOFT_MAX ALL 2D"),
+    pytest.param(Functions.SoftMax, [test_var], {kw.GAIN:RAND1, kw.OUTPUT_TYPE:kw.MAX_VAL, kw.PER_ITEM:True},
+                 [np.where(softmax_helper == np.max(softmax_helper), softmax_helper, 0)], id="SOFT_MAX MAX_VAL 2D"),
+    pytest.param(Functions.SoftMax, [test_var], {kw.GAIN:RAND1, kw.OUTPUT_TYPE:kw.MAX_INDICATOR, kw.PER_ITEM:True},
+                 [np.where(softmax_helper == np.max(softmax_helper), 1, 0)], id="SOFT_MAX MAX_INDICATOR 2D"),
+    pytest.param(Functions.SoftMax, [test_var], {kw.GAIN:RAND1, kw.OUTPUT_TYPE:kw.PROB, kw.PER_ITEM:True},
+                 [[0.0, 0.0, 0.0, 0.0, test_var[4], 0.0, 0.0, 0.0, 0.0, 0.0]], id="SOFT_MAX PROB 2D"),
+
+    # SoftMax per-item with 2 elements in input
+    pytest.param(Functions.SoftMax, [test_var, test_var], {kw.GAIN:RAND1, kw.PER_ITEM: True}, softmax_helper2, id="SOFT_MAX ALL PER_ITEM"),
+    pytest.param(Functions.SoftMax, [test_var, test_var], {kw.GAIN:RAND1, kw.OUTPUT_TYPE:kw.MAX_VAL, kw.PER_ITEM: True},
+                 np.where(softmax_helper2 == np.max(softmax_helper2), softmax_helper2, 0), id="SOFT_MAX MAX_VAL PER_ITEM"),
+    pytest.param(Functions.SoftMax, [test_var, test_var], {kw.GAIN:RAND1, kw.OUTPUT_TYPE:kw.MAX_INDICATOR, kw.PER_ITEM: True},
+                 np.where(softmax_helper2 == np.max(softmax_helper2), 1, 0), id="SOFT_MAX MAX_INDICATOR PER_ITEM"),
+
+    pytest.param(Functions.LinearMatrix, test_var, {kw.MATRIX:test_matrix}, np.dot(test_var, test_matrix), id="LINEAR_MATRIX SQUARE"),
+    pytest.param(Functions.LinearMatrix, test_var, {kw.MATRIX:test_matrix_l}, np.dot(test_var, test_matrix_l), id="LINEAR_MATRIX WIDE"),
+    pytest.param(Functions.LinearMatrix, test_var, {kw.MATRIX:test_matrix_s}, np.dot(test_var, test_matrix_s), id="LINEAR_MATRIX TALL"),
 ]
 
 @pytest.mark.function
@@ -64,43 +88,72 @@ def gaussian_distort_helper(seed):
 @pytest.mark.benchmark
 @pytest.mark.parametrize("func, variable, params, expected", test_data)
 def test_execute(func, variable, params, expected, benchmark, func_mode):
-    if 'Angle' in func.componentName and func_mode != 'Python':
-        pytest.skip('Angle not yet supported by LLVM or PTX')
     benchmark.group = "TransferFunction " + func.componentName
     f = func(default_variable=variable, **params)
     ex = pytest.helpers.get_func_execution(f, func_mode)
 
-    res = ex(variable)
+    res = benchmark(ex, variable)
     assert np.allclose(res, expected)
-    if benchmark.enabled:
-        benchmark(ex, variable)
 
 
-logistic_helper = RAND4 / (1 + np.exp(-(RAND1 * (test_var - RAND2)) + RAND3))
 tanh_derivative_helper = (RAND1 * (test_var + RAND2) + RAND3)
 tanh_derivative_helper = (1 - np.tanh(tanh_derivative_helper)**2) * RAND4 * RAND1
 
+
 derivative_test_data = [
-    (Functions.Linear, test_var, {'slope':RAND1, 'intercept':RAND2}, RAND1),
-    (Functions.Exponential, test_var, {'scale':RAND1, 'rate':RAND2}, RAND1 * RAND2 * np.exp(RAND2 * test_var)),
-    (Functions.Logistic, test_var, {'gain':RAND1, 'x_0':RAND2, 'offset':RAND3, 'scale':RAND4}, RAND1 * RAND4 * logistic_helper * (1 - logistic_helper)),
-    (Functions.ReLU, test_var, {'gain':RAND1, 'bias':RAND2, 'leak':RAND3}, np.where(test_var > 0, RAND1, RAND1 * RAND3)),
-    (Functions.Tanh, test_var, {'gain':RAND1, 'bias':RAND2, 'offset':RAND3, 'scale':RAND4}, tanh_derivative_helper),
-]
+    (Functions.Linear, test_var, {kw.SLOPE:RAND1, kw.INTERCEPT:RAND2}, RAND1),
+    (Functions.Exponential, test_var, {kw.SCALE:RAND1, kw.RATE:RAND2}, RAND1 * RAND2 * np.exp(RAND2 * test_var)),
+    (Functions.Logistic, test_var, {kw.GAIN:RAND1, kw.X_0:RAND2, kw.OFFSET:RAND3, kw.SCALE:RAND4}, RAND1 * RAND4 * logistic_helper * (1 - logistic_helper)),
+    (Functions.ReLU, test_var, {kw.GAIN:RAND1, kw.BIAS:RAND2, kw.LEAK:RAND3}, np.where((test_var - RAND2) > 0, RAND1, RAND1 * RAND3)),
+    (Functions.Tanh, test_var, {kw.GAIN:RAND1, kw.BIAS:RAND2, kw.OFFSET:RAND3, kw.SCALE:RAND4}, tanh_derivative_helper),
 
-derivative_names = [
-    "LINEAR_DERIVATIVE",
-    "EXPONENTIAL_DERIVATIVE",
-    "LOGISTIC_DERIVATIVE",
-    "RELU_DERIVATIVE",
-    "TANH_DERIVATIVE",
+    # SoftMax per-item=False
+    (Functions.SoftMax, test_var, {kw.GAIN:RAND1, kw.OUTPUT_TYPE:kw.MAX_VAL, kw.PER_ITEM:False},
+     [-0.010680386821751537, -0.011118109698906909, -0.01082040340318878, -0.010670257514724047, -0.010362498859374309,
+      -0.010933660158663306, -0.010397412260182806, -0.011602329078808718, 0.09684744183944892, -0.010262384043848513]),
+    (Functions.SoftMax, test_var, {kw.GAIN:RAND1, kw.OUTPUT_TYPE:kw.MAX_INDICATOR, kw.PER_ITEM:False},
+     [-0.010680386821751537, -0.011118109698906909, -0.01082040340318878, -0.010670257514724047, -0.010362498859374309,
+      -0.010933660158663306, -0.010397412260182806, -0.011602329078808718, 0.09684744183944892, -0.010262384043848513]),
+    (Functions.SoftMax, test_var, {kw.GAIN:RAND1, kw.OUTPUT_TYPE:kw.ALL, kw.PER_ITEM:False},
+     [[ 0.08863569, -0.01005855, -0.00978921, -0.00965338, -0.00937495, -0.00989168, -0.00940653, -0.01049662, -0.01068039, -0.00928437],
+      [-0.01005855,  0.09185608, -0.01019041, -0.01004901, -0.00975917, -0.01029708, -0.00979205, -0.01092681, -0.01111811, -0.00966488],
+      [-0.00978921, -0.01019041,  0.08966934, -0.00977993, -0.00949785, -0.01002135, -0.00952985, -0.01063423, -0.0108204,  -0.00940609],
+      [-0.00965338, -0.01004901, -0.00977993,  0.08856078, -0.00936606, -0.0098823,  -0.00939761, -0.01048667, -0.01067026, -0.00927557],
+      [-0.00937495, -0.00975917, -0.00949785, -0.00936606,  0.08627659, -0.00959726, -0.00912656, -0.0101842,  -0.0103625,  -0.00900804],
+      [-0.00989168, -0.01029708, -0.01002135, -0.0098823,  -0.00959726,  0.09050301, -0.0096296,  -0.01074554, -0.01093366, -0.00950454],
+      [-0.00940653, -0.00979205, -0.00952985, -0.00939761, -0.00912656, -0.0096296,   0.08653653, -0.01021852, -0.01039741, -0.00903839],
+      [-0.01049662, -0.01092681, -0.01063423, -0.01048667, -0.0101842,  -0.01074554, -0.01021852,  0.09538073, -0.01160233, -0.01008581],
+      [-0.01068039, -0.01111811, -0.0108204,  -0.01067026, -0.0103625,  -0.01093366, -0.01039741, -0.01160233,  0.09684744, -0.01026238],
+      [-0.00928437, -0.00966488, -0.00940609, -0.00927557, -0.00900804, -0.00950454, -0.00903839, -0.01008581, -0.01026238,  0.08553008]]),
+
+      # SoftMax per-tem=True 2D single element
+    (Functions.SoftMax, [test_var], {kw.GAIN:RAND1, kw.OUTPUT_TYPE:kw.MAX_VAL, kw.PER_ITEM:True},
+     [[-0.010680386821751537, -0.011118109698906909, -0.01082040340318878, -0.010670257514724047, -0.010362498859374309,
+       -0.010933660158663306, -0.010397412260182806, -0.011602329078808718, 0.09684744183944892, -0.010262384043848513]]),
+    (Functions.SoftMax, [test_var], {kw.GAIN:RAND1, kw.OUTPUT_TYPE:kw.MAX_INDICATOR, kw.PER_ITEM:True},
+     [[-0.010680386821751537, -0.011118109698906909, -0.01082040340318878, -0.010670257514724047, -0.010362498859374309,
+       -0.010933660158663306, -0.010397412260182806, -0.011602329078808718, 0.09684744183944892, -0.010262384043848513]]),
+    (Functions.SoftMax, [test_var], {kw.GAIN:RAND1, kw.OUTPUT_TYPE:kw.ALL, kw.PER_ITEM:True},
+     [[ 0.08863569, -0.01005855, -0.00978921, -0.00965338, -0.00937495, -0.00989168, -0.00940653, -0.01049662, -0.01068039, -0.00928437],
+      [-0.01005855,  0.09185608, -0.01019041, -0.01004901, -0.00975917, -0.01029708, -0.00979205, -0.01092681, -0.01111811, -0.00966488],
+      [-0.00978921, -0.01019041,  0.08966934, -0.00977993, -0.00949785, -0.01002135, -0.00952985, -0.01063423, -0.0108204,  -0.00940609],
+      [-0.00965338, -0.01004901, -0.00977993,  0.08856078, -0.00936606, -0.0098823,  -0.00939761, -0.01048667, -0.01067026, -0.00927557],
+      [-0.00937495, -0.00975917, -0.00949785, -0.00936606,  0.08627659, -0.00959726, -0.00912656, -0.0101842,  -0.0103625,  -0.00900804],
+      [-0.00989168, -0.01029708, -0.01002135, -0.0098823,  -0.00959726,  0.09050301, -0.0096296,  -0.01074554, -0.01093366, -0.00950454],
+      [-0.00940653, -0.00979205, -0.00952985, -0.00939761, -0.00912656, -0.0096296,   0.08653653, -0.01021852, -0.01039741, -0.00903839],
+      [-0.01049662, -0.01092681, -0.01063423, -0.01048667, -0.0101842,  -0.01074554, -0.01021852,  0.09538073, -0.01160233, -0.01008581],
+      [-0.01068039, -0.01111811, -0.0108204,  -0.01067026, -0.0103625,  -0.01093366, -0.01039741, -0.01160233,  0.09684744, -0.01026238],
+      [-0.00928437, -0.00966488, -0.00940609, -0.00927557, -0.00900804, -0.00950454, -0.00903839, -0.01008581, -0.01026238,  0.08553008]]),
 ]
 
 @pytest.mark.function
 @pytest.mark.transfer_function
 @pytest.mark.benchmark
-@pytest.mark.parametrize("func, variable, params, expected", derivative_test_data, ids=derivative_names)
-def test_execute_derivative(func, variable, params, expected, benchmark, func_mode):
+@pytest.mark.parametrize("func, variable, params, expected", derivative_test_data, ids=lambda x: getattr(x, 'name', None) or getattr(x, 'get', lambda p, q: None)(kw.OUTPUT_TYPE, None))
+def test_transfer_derivative(func, variable, params, expected, benchmark, func_mode):
+    if func == Functions.SoftMax and params[kw.OUTPUT_TYPE] == kw.ALL and func_mode != "Python":
+        pytest.skip("Compiled derivative using 'ALL' is not implemented")
+
     f = func(default_variable=variable, **params)
     benchmark.group = "TransferFunction " + func.componentName + " Derivative"
     if func_mode == 'Python':
@@ -109,11 +162,46 @@ def test_execute_derivative(func, variable, params, expected, benchmark, func_mo
         ex = pnlvm.execution.FuncExecution(f, tags=frozenset({"derivative"})).execute
     elif func_mode == 'PTX':
         ex = pnlvm.execution.FuncExecution(f, tags=frozenset({"derivative"})).cuda_execute
+    else:
+        assert False, "unknown function mode: {}".format(func_mode)
 
     res = benchmark(ex, variable)
     assert np.allclose(res, expected)
 
 
+derivative_out_test_data = [
+    (Functions.Logistic, logistic_helper, {kw.GAIN:RAND1, kw.X_0:RAND2, kw.OFFSET:RAND3, kw.SCALE:RAND4}, RAND1 * RAND4 * logistic_helper * (1 - logistic_helper)),
+    (Functions.ReLU, relu_helper, {kw.GAIN:RAND1, kw.BIAS:RAND2, kw.LEAK:RAND3}, np.where((test_var - RAND2) > 0, RAND1, RAND1 * RAND3)),
+    (Functions.SoftMax, softmax_helper, {kw.GAIN:RAND1, kw.OUTPUT_TYPE:kw.MAX_VAL, kw.PER_ITEM:False},
+     [-0.010680386821751537, -0.011118109698906909, -0.01082040340318878, -0.010670257514724047, -0.010362498859374309,
+      -0.010933660158663306, -0.010397412260182806, -0.011602329078808718, 0.09684744183944892, -0.010262384043848513]),
+    (Functions.SoftMax, [softmax_helper], {kw.GAIN:RAND1, kw.OUTPUT_TYPE:kw.MAX_VAL, kw.PER_ITEM:True},
+     [[-0.010680386821751537, -0.011118109698906909, -0.01082040340318878, -0.010670257514724047, -0.010362498859374309,
+       -0.010933660158663306, -0.010397412260182806, -0.011602329078808718, 0.09684744183944892, -0.010262384043848513]]),
+]
+@pytest.mark.function
+@pytest.mark.transfer_function
+@pytest.mark.benchmark
+@pytest.mark.parametrize("func, variable, params, expected", derivative_out_test_data, ids=lambda x: getattr(x, 'name', None) or getattr(x, 'get', lambda p, q: None)(kw.OUTPUT_TYPE, None))
+def test_transfer_derivative_out(func, variable, params, expected, benchmark, func_mode):
+    if func == Functions.SoftMax and params[kw.OUTPUT_TYPE] == kw.ALL and func_mode != "Python":
+        pytest.skip("Compiled derivative using 'ALL' is not implemented")
+
+    f = func(default_variable=variable, **params)
+    benchmark.group = "TransferFunction " + func.componentName + " Derivative"
+    if func_mode == 'Python':
+        def ex(x):
+            return f.derivative(input=None, output=x)
+    elif func_mode == 'LLVM':
+        ex = pnlvm.execution.FuncExecution(f, tags=frozenset({"derivative_out"})).execute
+    elif func_mode == 'PTX':
+        ex = pnlvm.execution.FuncExecution(f, tags=frozenset({"derivative_out"})).cuda_execute
+    else:
+        assert False, "unknown function mode: {}".format(func_mode)
+
+    res = benchmark(ex, variable)
+    assert np.allclose(res, expected)
+
 def test_transfer_with_costs_function():
     f = Functions.TransferWithCosts()
     result = f(1)
diff --git a/tests/functions/test_user_defined_func.py b/tests/functions/test_user_defined_func.py
index 77d12fca243..84cd2f18a8d 100644
--- a/tests/functions/test_user_defined_func.py
+++ b/tests/functions/test_user_defined_func.py
@@ -604,6 +604,7 @@ def test_user_def_func_builtin_direct(func, args, expected, benchmark):
     val = benchmark(func, *args)
     assert np.allclose(val, expected)
 
+@pytest.mark.composition
 @pytest.mark.benchmark(group="UDF as Composition Origin")
 def test_udf_composition_origin(comp_mode, benchmark):
     def myFunction(variable, context):
@@ -616,6 +617,7 @@ def myFunction(variable, context):
     assert np.allclose(c.results[0][0], [3, 1])
 
 
+@pytest.mark.composition
 @pytest.mark.benchmark(group="UDF as Composition Terminal")
 def test_udf_composition_terminal(comp_mode, benchmark):
     def myFunction(variable, context):
diff --git a/tests/llvm/test_builtins_intrinsics.py b/tests/llvm/test_builtins_intrinsics.py
index 307ccdabc5d..0b7b4dc7bdd 100644
--- a/tests/llvm/test_builtins_intrinsics.py
+++ b/tests/llvm/test_builtins_intrinsics.py
@@ -25,8 +25,10 @@
                          (lambda x: 1.0 / np.sinh(x), (450,), "__pnl_builtin_csch", 1 / np.sinh(450)),
                          #~900 is the limit after which exp(x) used in csch formula returns inf
                          (lambda x: 1.0 / np.sinh(x), (900,), "__pnl_builtin_csch", 1 / np.sinh(900)),
+                         (np.sin, (x,), "__pnl_builtin_sin", np.sin(x)),
+                         (np.cos, (x,), "__pnl_builtin_cos", np.cos(x)),
                          ], ids=["EXP", "Large EXP", "LOG", "POW", "TANH", "Large TANH", "COTH", "Large COTH",
-                                "CSCH", "Large CSCH", "xLarge CSCH"])
+                                "CSCH", "Large CSCH", "xLarge CSCH", "SIN", "COS"])
 def test_builtin_op(benchmark, op, args, builtin, result, func_mode):
     if func_mode == 'Python':
         f = op
diff --git a/tests/llvm/test_builtins_mt_random.py b/tests/llvm/test_builtins_mt_random.py
index 19dbeb7b818..09840843447 100644
--- a/tests/llvm/test_builtins_mt_random.py
+++ b/tests/llvm/test_builtins_mt_random.py
@@ -44,6 +44,8 @@ def f():
         def f():
             gen_fun.cuda_call(gpu_state, gpu_out)
             return out[0]
+    else:
+        assert False, "Unknown mode: {}".format(mode)
 
     res = [f(), f()]
     assert np.allclose(res, [3626764237, 1654615998])
@@ -88,6 +90,8 @@ def f():
         def f():
             gen_fun.cuda_call(gpu_state, gpu_out)
             return out[0]
+    else:
+        assert False, "Unknown mode: {}".format(mode)
 
     res = [f(), f()]
     assert np.allclose(res, [0.8444218515250481, 0.7579544029403025])
@@ -127,7 +131,65 @@ def f():
         def f():
             gen_fun.cuda_call(gpu_state, gpu_out)
             return out[0]
+    else:
+        assert False, "Unknown mode: {}".format(mode)
 
     res = [f(), f()]
     assert np.allclose(res, [0.4644982638709743, 0.6202001216069017])
     benchmark(f)
+
+@pytest.mark.benchmark(group="Marsenne Twister Binomial distribution")
+@pytest.mark.parametrize('mode', ['numpy',
+                                  pytest.param('LLVM', marks=pytest.mark.llvm),
+                                  pytest.helpers.cuda_param('PTX')])
+@pytest.mark.parametrize('n', [1])
+@pytest.mark.parametrize('p, exp', [
+    (0, [0]),
+    (0.1, [0x20d00c]),
+    (0.33, [0xc224f70d]),
+    (0.5, [0xca76f71d]),
+    (0.66, [0x3ddb08f2]),
+    (0.95, [0xffffbffb]),
+    (1, [0xffffffff]),
+    ])
+# Python uses different algorithm so skip it in this test
+def test_random_binomial(benchmark, mode, n, p, exp):
+    if mode == 'numpy':
+        # numpy promotes elements to int64
+        state = np.random.RandomState([SEED])
+        def f():
+            return state.binomial(n, p)
+    elif mode == 'LLVM':
+        init_fun = pnlvm.LLVMBinaryFunction.get('__pnl_builtin_mt_rand_init')
+        state = init_fun.byref_arg_types[0]()
+        init_fun(state, SEED)
+
+        gen_fun = pnlvm.LLVMBinaryFunction.get('__pnl_builtin_mt_rand_binomial')
+        c_n = gen_fun.byref_arg_types[1](n)
+        c_p = gen_fun.byref_arg_types[2](p)
+        c_out = gen_fun.byref_arg_types[-1]()
+        def f():
+            gen_fun(state, c_n, c_p, c_out)
+            return c_out.value
+    elif mode == 'PTX':
+        init_fun = pnlvm.LLVMBinaryFunction.get('__pnl_builtin_mt_rand_init')
+        state_size = ctypes.sizeof(init_fun.byref_arg_types[0])
+        gpu_state = pnlvm.jit_engine.pycuda.driver.mem_alloc(state_size)
+        init_fun.cuda_call(gpu_state, np.int32(SEED))
+
+        gen_fun = pnlvm.LLVMBinaryFunction.get('__pnl_builtin_mt_rand_binomial')
+        gpu_n = pnlvm.jit_engine.pycuda.driver.In(np.array([n], dtype=np.dtype(gen_fun.byref_arg_types[1])))
+        gpu_p = pnlvm.jit_engine.pycuda.driver.In(np.array([p], dtype=np.dtype(gen_fun.byref_arg_types[2])))
+        out = np.array([0.0], dtype=np.dtype(gen_fun.byref_arg_types[3]))
+        gpu_out = pnlvm.jit_engine.pycuda.driver.Out(out)
+
+        def f():
+            gen_fun.cuda_call(gpu_state, gpu_n, gpu_p, gpu_out)
+            return out[0]
+    else:
+        assert False, "Unknown mode: {}".format(mode)
+
+    res = [f() for _ in range(32)]
+    res = int(''.join(str(x) for x in res), 2)
+    assert res == exp[n - 1]
+    benchmark(f)
diff --git a/tests/llvm/test_builtins_philox_random.py b/tests/llvm/test_builtins_philox_random.py
index 479e91379e7..0398fb9eda0 100644
--- a/tests/llvm/test_builtins_philox_random.py
+++ b/tests/llvm/test_builtins_philox_random.py
@@ -47,6 +47,8 @@ def f():
         def f():
             gen_fun.cuda_call(gpu_state, gpu_out)
             return out[0]
+    else:
+        assert False, "Unknown mode: {}".format(mode)
 
     # Get >4 samples to force regeneration of Philox buffer
     res = [f(), f(), f(), f(), f(), f()]
@@ -89,6 +91,8 @@ def f():
         def f():
             gen_fun.cuda_call(gpu_state, gpu_out)
             return out[0]
+    else:
+        assert False, "Unknown mode: {}".format(mode)
 
     # Get >4 samples to force regeneration of Philox buffer
     res = [f(), f(), f(), f(), f(), f()]
@@ -129,6 +133,8 @@ def f():
         def f():
             gen_fun.cuda_call(gpu_state, gpu_out)
             return out[0]
+    else:
+        assert False, "Unknown mode: {}".format(mode)
 
     res = [f(), f()]
     assert np.allclose(res, [0.014067035665647709, 0.2577672456246177])
@@ -168,6 +174,8 @@ def f():
         def f():
             gen_fun.cuda_call(gpu_state, gpu_out)
             return out[0]
+    else:
+        assert False, "Unknown mode: {}".format(mode)
 
     res = [f(), f()]
     assert np.allclose(res, [0.13562285900115967, 0.014066934585571289])
@@ -179,7 +187,7 @@ def f():
                                   pytest.param('LLVM', marks=pytest.mark.llvm),
                                   pytest.helpers.cuda_param('PTX')])
 @pytest.mark.parametrize('fp_type', [pnlvm.ir.DoubleType(), pnlvm.ir.FloatType()],
-                         ids=lambda x: str(x))
+                         ids=str)
 def test_random_normal(benchmark, mode, fp_type):
     if mode != 'numpy':
         # Instantiate builder context with the desired type
@@ -208,11 +216,13 @@ def f():
         init_fun.cuda_call(gpu_state, np.int64(SEED))
 
         gen_fun = pnlvm.LLVMBinaryFunction.get('__pnl_builtin_philox_rand_normal')
-        out = np.asfarray([0.0], dtype=dtype)
+        out = np.array([0.0], dtype=np.dtype(gen_fun.byref_arg_types[1]))
         gpu_out = pnlvm.jit_engine.pycuda.driver.Out(out)
         def f():
             gen_fun.cuda_call(gpu_state, gpu_out)
             return out[0]
+    else:
+        assert False, "Unknown mode: {}".format(mode)
 
     res = [f() for i in range(191000)]
     if fp_type is pnlvm.ir.DoubleType():
@@ -250,3 +260,67 @@ def f():
                                                  2.000257730484009, -1.129721999168396])
     assert not any(np.isnan(res)), list(np.isnan(res)).index(True)
     benchmark(f)
+
+@pytest.mark.benchmark(group="Philox Binomial distribution")
+@pytest.mark.parametrize('mode', ['numpy',
+                                  pytest.param('LLVM', marks=pytest.mark.llvm),
+                                  pytest.helpers.cuda_param('PTX')])
+@pytest.mark.parametrize('fp_type', [pnlvm.ir.DoubleType(), pnlvm.ir.FloatType()],
+                         ids=str)
+@pytest.mark.parametrize('n', [1])
+@pytest.mark.parametrize('p, exp_64, exp_32', [
+    (0, [0], [0]),
+    (0.1, [0xa0c0100], [0x20440250]),
+    (0.33, [0xa2c8186], [0x20440650]),
+    (0.5, [0xa2c81c6], [0x226c8650]),
+    (0.66, [0xf5d37e79], [0xdfbbf9af]),
+    (0.95, [0xf7f3ffff], [0xffbffdaf]),
+    (1, [0xffffffff], [0xffffffff]),
+    ])
+def test_random_binomial(benchmark, mode, fp_type, n, p, exp_64, exp_32):
+    if mode != 'numpy':
+        # Instantiate builder context with the desired type
+        pnlvm.LLVMBuilderContext(fp_type)
+
+    # numpy always uses fp64 uniform sampling
+    exp = exp_64 if fp_type is pnlvm.ir.DoubleType() or mode == 'numpy' else exp_32
+    if mode == 'numpy':
+        state = np.random.Philox([SEED])
+        prng = np.random.Generator(state)
+        def f():
+            return prng.binomial(n, p)
+    elif mode == 'LLVM':
+        init_fun = pnlvm.LLVMBinaryFunction.get('__pnl_builtin_philox_rand_init')
+        c_state = init_fun.byref_arg_types[0]()
+        init_fun(c_state, SEED)
+
+        gen_fun = pnlvm.LLVMBinaryFunction.get('__pnl_builtin_philox_rand_binomial')
+        c_n = gen_fun.byref_arg_types[1](n)
+        c_p = gen_fun.byref_arg_types[2](p)
+        c_out = gen_fun.byref_arg_types[-1]()
+        def f():
+            gen_fun(c_state, c_n, c_p, c_out)
+            return c_out.value
+    elif mode == 'PTX':
+        init_fun = pnlvm.LLVMBinaryFunction.get('__pnl_builtin_philox_rand_init')
+        state_size = ctypes.sizeof(init_fun.byref_arg_types[0])
+        gpu_state = pnlvm.jit_engine.pycuda.driver.mem_alloc(state_size)
+        init_fun.cuda_call(gpu_state, np.int64(SEED))
+
+        gen_fun = pnlvm.LLVMBinaryFunction.get('__pnl_builtin_philox_rand_binomial')
+        gpu_n = pnlvm.jit_engine.pycuda.driver.In(np.array([n], dtype=np.dtype(gen_fun.byref_arg_types[1])))
+        gpu_p = pnlvm.jit_engine.pycuda.driver.In(np.array([p], dtype=np.dtype(gen_fun.byref_arg_types[2])))
+        out = np.array([0.0], dtype=np.dtype(gen_fun.byref_arg_types[3]))
+        gpu_out = pnlvm.jit_engine.pycuda.driver.Out(out)
+
+        def f():
+            gen_fun.cuda_call(gpu_state, gpu_n, gpu_p, gpu_out)
+            return out[0]
+    else:
+        assert False, "Unknown mode: {}".format(mode)
+
+    res = [f() for i in range(32)]
+    res = int(''.join(str(x) for x in res), 2)
+    assert res == exp[n - 1]
+
+    benchmark(f)
diff --git a/tests/log/test_log.py b/tests/log/test_log.py
index 2229e2f3d30..ba103e5f582 100644
--- a/tests/log/test_log.py
+++ b/tests/log/test_log.py
@@ -1253,7 +1253,7 @@ def test_multilayer(self):
         p = [input_layer, input_weights, hidden_layer_1, middle_weights, hidden_layer_2, output_weights, output_layer]
         backprop_pathway = comp.add_backpropagation_learning_pathway(
             pathway=p,
-            loss_function='sse',
+            loss_function=pnl.Loss.L0,
             learning_rate=1.
         )
 
diff --git a/tests/log/test_rpc.py b/tests/log/test_rpc.py
index 405706cf97c..c17b1cb292b 100644
--- a/tests/log/test_rpc.py
+++ b/tests/log/test_rpc.py
@@ -485,7 +485,8 @@ def test_multilayer(self):
         p = [input_layer, input_weights, hidden_layer_1, middle_weights, hidden_layer_2, output_weights, output_layer]
         backprop_pathway = comp.add_backpropagation_learning_pathway(
             pathway=p,
-            loss_function='sse',
+            # loss_function=pnl.Loss.SSE,
+            loss_function=pnl.Loss.L0,
             learning_rate=1.
         )
 
diff --git a/tests/mdf/test_mdf.py b/tests/mdf/test_mdf.py
index 150f7c1964a..d2ea75142a0 100644
--- a/tests/mdf/test_mdf.py
+++ b/tests/mdf/test_mdf.py
@@ -1,3 +1,4 @@
+import copy
 import numpy as np
 import os
 import psyneulink as pnl
@@ -11,6 +12,15 @@
 from modeci_mdf.execution_engine import evaluate_onnx_expr  # noqa: E402
 
 
+def get_onnx_fixed_noise_str(onnx_op, **kwargs):
+    # high precision printing needed because script will be executed from string
+    # 16 is insufficient on windows
+    with np.printoptions(precision=32):
+        return str(
+            evaluate_onnx_expr(f'onnx_ops.{onnx_op}', base_parameters=kwargs, evaluated_parameters=kwargs)
+        )
+
+
 # stroop stimuli
 red = [1, 0]
 green = [0, 1]
@@ -164,18 +174,25 @@ def test_write_json_file_multiple_comps(
         assert orig_results[composition_name] == final_results, f'{composition_name}:'
 
 
+def _get_mdf_model_results(evaluable_graph):
+    return [
+        [eo.curr_value for _, eo in evaluable_graph.enodes[node.id].evaluable_outputs.items()]
+        for node in evaluable_graph.scheduler.consideration_queue[-1]
+    ]
+
+
 # These runtime_params are necessary because noise seeding is not
 # replicable between numpy and onnx.
 # Values are generated from running onnx function RandomUniform and
 # RandomNormal with parameters used in model_integrators.py (seed 0).
 # RandomNormal values are different on mac versus linux and windows
 onnx_noise_data = {
-    'onnx_ops.randomuniform': {
+    'randomuniform': {
         'A': {'low': -1.0, 'high': 1.0, 'seed': 0, 'shape': (1, 1)},
         'D': {'low': -0.5, 'high': 0.5, 'seed': 0, 'shape': (1, 1)},
         'E': {'low': -0.25, 'high': 0.5, 'seed': 0, 'shape': (1, 1)}
     },
-    'onnx_ops.randomnormal': {
+    'randomnormal': {
         'B': {'mean': -1.0, 'scale': 0.5, 'seed': 0, 'shape': (1, 1)},
         'C': {'mean': 0.0, 'scale': 0.25, 'seed': 0, 'shape': (1, 1)},
     }
@@ -187,18 +204,13 @@ def test_write_json_file_multiple_comps(
     for node, args in onnx_noise_data[func_type].items():
         # generates output from onnx noise functions with seed 0 to be
         # passed in in runtime_params during psyneulink execution
-        onnx_integrators_fixed_seeded_noise[node] = evaluate_onnx_expr(
-            func_type, base_parameters=args, evaluated_parameters=args
-        )
+        onnx_integrators_fixed_seeded_noise[node] = get_onnx_fixed_noise_str(func_type, **args)
 
-# high precision printing needed because script will be executed from string
-# 16 is insufficient on windows
-with np.printoptions(precision=32):
-    integrators_runtime_params = (
-        'runtime_params={'
-        + ','.join([f'{k}: {{ "noise": {v} }}' for k, v in onnx_integrators_fixed_seeded_noise.items()])
-        + '}'
-    )
+integrators_runtime_params = (
+    'runtime_params={'
+    + ','.join([f'{k}: {{ "noise": {v} }}' for k, v in onnx_integrators_fixed_seeded_noise.items()])
+    + '}'
+)
 
 
 @pytest.mark.parametrize(
@@ -234,12 +246,56 @@ def test_mdf_equivalence(filename, composition_name, input_dict, simple_edge_for
     eg = ee.EvaluableGraph(m.graphs[0], verbose=True)
     eg.evaluate(initializer={f'{node}_InputPort_0': i for node, i in input_dict.items()})
 
-    mdf_results = [
-        [eo.curr_value for _, eo in eg.enodes[node.id].evaluable_outputs.items()]
-        for node in eg.scheduler.consideration_queue[-1]
-    ]
+    assert pnl.safe_equals(orig_results, _get_mdf_model_results(eg))
+
+
+ddi_termination_conds = [
+    None,
+    (
+        "pnl.Or("
+        "pnl.Threshold(A, parameter='value', threshold=A.function.defaults.threshold, comparator='>=', indices=(0,)),"
+        "pnl.Threshold(A, parameter='value', threshold=-1 * A.function.defaults.threshold, comparator='<=', indices=(0,))"
+        ")"
+    ),
+    'pnl.AfterNCalls(A, 10)',
+]
+
+# construct test data manually instead of with multiple @pytest.mark.parametrize
+# so that other functions can use more appropriate termination conds
+individual_functions_test_data = [
+    (
+        pnl.IntegratorMechanism,
+        pnl.DriftDiffusionIntegrator(rate=0.5, offset=1, non_decision_time=1, seed=0),
+        "{{A: {{'random_draw': {0} }} }}".format(get_onnx_fixed_noise_str('randomnormal', mean=0, scale=1, seed=0, shape=(1,)))
+    ) + (x,)
+    for x in ddi_termination_conds
+]
+
+
+@pytest.mark.parametrize(
+    'mech_type, function, runtime_params, trial_termination_cond',
+    individual_functions_test_data
+)
+def test_mdf_equivalence_individual_functions(mech_type, function, runtime_params, trial_termination_cond):
+    import modeci_mdf.execution_engine as ee
+
+    A = mech_type(name='A', function=copy.deepcopy(function))
+    comp = pnl.Composition(pathways=[A])
+
+    try:
+        trial_termination_cond = eval(trial_termination_cond)
+    except TypeError:
+        pass
+    comp.scheduler.termination_conds = {pnl.TimeScale.TRIAL: trial_termination_cond}
+
+    comp.run(inputs={A: [[1.0]]}, runtime_params=eval(runtime_params))
+
+    model = pnl.get_mdf_model(comp)
+
+    eg = ee.EvaluableGraph(model.graphs[0], verbose=True)
+    eg.evaluate(initializer={'A_InputPort_0': 1.0})
 
-    assert pnl.safe_equals(orig_results, mdf_results)
+    assert pnl.safe_equals(comp.results, _get_mdf_model_results(eg))
 
 
 @pytest.mark.parametrize('filename', ['model_basic.py'])
diff --git a/tests/mechanisms/test_control_mechanism.py b/tests/mechanisms/test_control_mechanism.py
index d5fdfd66204..1c2ff3405ec 100644
--- a/tests/mechanisms/test_control_mechanism.py
+++ b/tests/mechanisms/test_control_mechanism.py
@@ -10,6 +10,7 @@ class TestLCControlMechanism:
 
     @pytest.mark.mechanism
     @pytest.mark.control_mechanism
+    @pytest.mark.composition
     @pytest.mark.benchmark(group="LCControlMechanism Default")
     def test_lc_control_mechanism_as_controller(self, benchmark):
         G = 1.0
@@ -83,16 +84,14 @@ def test_lc_control_mech_basic(self, benchmark, mech_mode):
         )
         EX = pytest.helpers.get_mech_execution(LC, mech_mode)
 
-        val = EX([10.0])
+        val = benchmark(EX, [10.0])
         # All values are the same because LCControlMechanism assigns all of its ControlSignals to the same value
         # (the 1st item of its function's value).
         # FIX: 6/6/19 - Python returns 3d array but LLVM returns 2d array
         #               (np.allclose bizarrely passes for LLVM because all the values are the same)
         assert np.allclose(val, [[[3.00139776]], [[3.00139776]], [[3.00139776]], [[3.00139776]]])
 
-        if benchmark.enabled:
-            benchmark(EX, [10.0])
-
+    @pytest.mark.composition
     def test_lc_control_modulated_mechanisms_all(self):
 
         T_1 = pnl.TransferMechanism(name='T_1')
@@ -110,7 +109,9 @@ def test_lc_control_modulated_mechanisms_all(self):
         assert T_2.parameter_ports[pnl.SLOPE].mod_afferents[0] in LC.control_signals[0].efferents
 
 
+@pytest.mark.composition
 class TestControlMechanism:
+
     def test_control_modulation(self):
         Tx = pnl.TransferMechanism(name='Tx')
         Ty = pnl.TransferMechanism(name='Ty')
diff --git a/tests/mechanisms/test_ddm_mechanism.py b/tests/mechanisms/test_ddm_mechanism.py
index 13f6b9703ae..6f39c04d19f 100644
--- a/tests/mechanisms/test_ddm_mechanism.py
+++ b/tests/mechanisms/test_ddm_mechanism.py
@@ -126,18 +126,15 @@ def test_threshold_stops_accumulation(self, mech_mode, variable, expected, bench
 
         decision_variables = []
         time_points = []
-        for i in range(5):
-            output = ex([variable])
-            decision_variables.append(output[0][0])
-            time_points.append(output[1][0])
+        results = []
+        for i in range(4):
+            results.append(ex([variable]))
 
-        # decision variable accumulation stops
-        assert np.allclose(decision_variables, expected)
+        results.append(benchmark(ex,[variable]))
 
+        # decision variable accumulation stops
         # time accumulation does not stop
-        assert np.allclose(time_points, [1.0, 2.0, 3.0, 4.0, 5.0])
-        if benchmark.enabled:
-            benchmark(ex, [variable])
+        assert np.allclose(results, [[[b], [a + 1.0]] for a,b in enumerate(expected)])
 
     # def test_threshold_stops_accumulation_multiple_variables(self):
     #     D = IntegratorMechanism(name='DDM',
@@ -159,6 +156,7 @@ def test_threshold_stops_accumulation(self, mech_mode, variable, expected, bench
     #     assert np.allclose(decision_variables_a, [2.0, 4.0, 5.0, 5.0, 5.0])
 
 
+    @pytest.mark.composition
     def test_is_finished_stops_composition(self):
         D = DDM(name='DDM',
                 function=DriftDiffusionIntegrator(threshold=10.0, time_step_size=1.0),
@@ -183,6 +181,7 @@ def test_is_finished_stops_composition(self):
     #
     #     sched = Scheduler(system=S)
 
+@pytest.mark.composition
 class TestInputPorts:
 
     def test_regular_input_mode(self):
@@ -256,10 +255,9 @@ def test_DDM_Integrator_Bogacz(benchmark, mech_mode, prng):
         T.parameters.random_state.set(_SeededPhilox([0]))
     ex = pytest.helpers.get_mech_execution(T, mech_mode)
 
-    val = ex(stim)[0]
+    ex(stim)
+    val = benchmark(ex, stim)[0]
     assert np.allclose(val, [1.0])
-    if benchmark.enabled:
-        benchmark(ex, stim)
 
 # ------------------------------------------------------------------------------------------------
 # # TEST 3
@@ -289,9 +287,9 @@ def test_DDM_Integrator_Bogacz(benchmark, mech_mode, prng):
 @pytest.mark.mechanism
 @pytest.mark.benchmark(group="DDM")
 @pytest.mark.parametrize("noise, expected", [
-    (0., 10),
-    (np.sqrt(0.5), 8.194383551861414),
-    (np.sqrt(2.0), 6.388767103722829),
+    (0., 20),
+    (np.sqrt(0.5), 18.40852795454561),
+    (np.sqrt(2.0), 16.817055909091223),
     ], ids=["0", "0.5", "2.0"])
 def test_DDM_noise(mech_mode, benchmark, noise, expected):
     T = DDM(
@@ -305,10 +303,9 @@ def test_DDM_noise(mech_mode, benchmark, noise, expected):
     )
     ex = pytest.helpers.get_mech_execution(T, mech_mode)
 
-    val = ex([10])
+    ex([10])
+    val = benchmark(ex, [10])
     assert np.allclose(val[0][0], expected)
-    if benchmark.enabled:
-        benchmark(ex, [10])
 
 # ------------------------------------------------------------------------------------------------
 
@@ -419,7 +416,7 @@ def test_DDM_input_fn():
 @pytest.mark.mechanism
 @pytest.mark.benchmark(group="DDM")
 @pytest.mark.parametrize("rate, expected", [
-    (5, 50), (5., 50), ([5], 50), (-5.0, -50),
+    (5, 100), (5., 100), ([5], 100), (-5.0, -100),
     ], ids=["int", "float", "list", "negative"])
 # ******
 # Should negative pass?
@@ -437,10 +434,9 @@ def test_DDM_rate(benchmark, rate, expected, mech_mode):
     )
     ex = pytest.helpers.get_mech_execution(T, mech_mode)
 
-    val = float(ex(stim)[0][0])
+    ex(stim)
+    val = float(benchmark(ex, stim)[0][0])
     assert val == expected
-    if benchmark.enabled:
-        benchmark(ex, stim)
 
 # ------------------------------------------------------------------------------------------------
 # INVALID RATES:
@@ -631,6 +627,7 @@ def test_WhenFinished_DDM_Analytical():
     c.is_satisfied()
 
 
+@pytest.mark.composition
 @pytest.mark.ddm_mechanism
 @pytest.mark.mechanism
 @pytest.mark.benchmark(group="DDM-comp")
@@ -650,45 +647,58 @@ def test_DDM_in_composition(benchmark, comp_mode):
     C = pnl.Composition()
     C.add_linear_processing_pathway([M])
     inputs = {M: [10]}
-    val = C.run(inputs, num_trials=2, execution_mode=comp_mode)
+    val = benchmark(C.run, inputs, num_trials=2, execution_mode=comp_mode)
+
     # FIXME: Python version returns dtype=object
     val = np.asfarray(val)
     assert np.allclose(val[0], [2.0])
     assert np.allclose(val[1], [0.2])
-    if benchmark.enabled:
-        benchmark(C.run, inputs, num_trials=2, execution_mode=comp_mode)
 
 
+@pytest.mark.composition
 @pytest.mark.ddm_mechanism
-@pytest.mark.mechanism
-def test_DDM_threshold_modulation(comp_mode):
-    M = pnl.DDM(
-        name='DDM',
-        function=pnl.DriftDiffusionAnalytical(
-            threshold=20.0,
-        ),
-    )
-    monitor = pnl.TransferMechanism(default_variable=[[0.0]],
-                                    size=1,
-                                    function=pnl.Linear(slope=1, intercept=0),
-                                    output_ports=[pnl.RESULT],
-                                    name='monitor')
+def test_DDM_threshold_modulation_analytical(comp_mode):
+    M = pnl.DDM(name='DDM',
+                function=pnl.DriftDiffusionAnalytical(
+                    threshold=20.0,
+                ),
+               )
+
+    control = pnl.ControlMechanism(control_signals=[(pnl.THRESHOLD, M)])
+
+    C = pnl.Composition()
+    C.add_node(M, required_roles=[pnl.NodeRole.ORIGIN, pnl.NodeRole.TERMINAL])
+    C.add_node(control)
+    inputs = {M:[1], control:[3]}
+    val = C.run(inputs, num_trials=1, execution_mode=comp_mode)
+
+    # Default modulation is 'multiplicative so the threshold is 20 * 3
+    assert np.allclose(val[0], [60.0])
+    assert np.allclose(val[1], [60.2])
+
+
+@pytest.mark.composition
+@pytest.mark.ddm_mechanism
+def test_DDM_threshold_modulation_integrator(comp_mode):
+    M = pnl.DDM(name='DDM',
+                execute_until_finished=True,
+                function=pnl.DriftDiffusionIntegrator(threshold=20),
+               )
 
     control = pnl.ControlMechanism(
-            monitor_for_control=monitor,
             control_signals=[(pnl.THRESHOLD, M)])
 
     C = pnl.Composition()
     C.add_node(M, required_roles=[pnl.NodeRole.ORIGIN, pnl.NodeRole.TERMINAL])
-    C.add_node(monitor)
     C.add_node(control)
-    inputs = {M:[1], monitor:[3]}
+    inputs = {M:[1], control:[3]}
     val = C.run(inputs, num_trials=1, execution_mode=comp_mode)
-    # FIXME: Python version returns dtype=object
-    val = np.asfarray(val)
+
     assert np.allclose(val[0], [60.0])
-    assert np.allclose(val[1], [60.2])
+    assert np.allclose(val[1], [60.0])
+
 
+@pytest.mark.composition
 @pytest.mark.parametrize(["noise", "threshold", "expected_results"],[
                             (1.0, 0.0, (0.0, 1.0)),
                             (1.5, 2, (-2.0, 1.0)),
@@ -772,7 +782,7 @@ def test_sequence_of_DDM_mechs_in_Composition_Pathway():
         np.testing.assert_allclose(val, expected, atol=1e-08, err_msg='Failed on expected_output[{0}]'.format(i))
 
 
-@pytest.mark.mechanism
+@pytest.mark.composition
 @pytest.mark.ddm_mechanism
 def test_DDMMechanism_LCA_equivalent(comp_mode):
 
diff --git a/tests/mechanisms/test_episodic_memory.py b/tests/mechanisms/test_episodic_memory.py
index ab27e385c9a..479becb96ee 100644
--- a/tests/mechanisms/test_episodic_memory.py
+++ b/tests/mechanisms/test_episodic_memory.py
@@ -48,15 +48,13 @@
 @pytest.mark.parametrize('variable, func, params, expected', test_data, ids=names)
 def test_with_dictionary_memory(variable, func, params, expected, benchmark, mech_mode):
     f = func(seed=0, **params)
-    m = EpisodicMemoryMechanism(content_size=len(variable[0]), assoc_size=len(variable[1]), function=f)
+    m = EpisodicMemoryMechanism(size=len(variable[0]), assoc_size=len(variable[1]), function=f)
     EX = pytest.helpers.get_mech_execution(m, mech_mode)
 
     EX(variable)
-    res = EX(variable)
+    res = benchmark(EX, variable)
     assert np.allclose(res[0], expected[0])
     assert np.allclose(res[1], expected[1])
-    if benchmark.enabled:
-        benchmark(EX, variable)
 
 
 # TEST WITH ContentAddressableMemory ***********************************************************************************
@@ -76,7 +74,7 @@ def test_with_dictionary_memory(variable, func, params, expected, benchmark, mec
         # expected input_port names
         ['FIELD_0_INPUT'],
         # expected output_port names
-        ['RETREIVED_FIELD_0'],
+        ['RETRIEVED_FIELD_0'],
         # expected output
         [[0,0]]
     ),
@@ -94,7 +92,7 @@ def test_with_dictionary_memory(variable, func, params, expected, benchmark, mec
         # expected input_port names
         ['FIELD_0_INPUT', 'FIELD_1_INPUT', 'FIELD_2_INPUT'],
         # expected output_port names
-        ['RETREIVED_FIELD_0', 'RETREIVED_FIELD_1', 'RETREIVED_FIELD_2'],
+        ['RETRIEVED_FIELD_0', 'RETRIEVED_FIELD_1', 'RETRIEVED_FIELD_2'],
         # expected output
         [[0,0],[0,0],[0,0,0]]
     ),
@@ -105,7 +103,7 @@ def test_with_dictionary_memory(variable, func, params, expected, benchmark, mec
         {'default_variable': [[0],[0,0],[0,0,0]]},
         [[10.],[20., 30.],[40., 50., 60.]],
         ['FIELD_0_INPUT', 'FIELD_1_INPUT', 'FIELD_2_INPUT'],
-        ['RETREIVED_FIELD_0', 'RETREIVED_FIELD_1', 'RETREIVED_FIELD_2'],
+        ['RETRIEVED_FIELD_0', 'RETRIEVED_FIELD_1', 'RETRIEVED_FIELD_2'],
         [[0],[0,0],[0,0,0]]
     ),
     (
@@ -117,7 +115,7 @@ def test_with_dictionary_memory(variable, func, params, expected, benchmark, mec
         {'size':[1,2,3]},
         [[10.],[20., 30.],[40., 50., 60.]],
         ['FIELD_0_INPUT', 'FIELD_1_INPUT', 'FIELD_2_INPUT'],
-        ['RETREIVED_FIELD_0', 'RETREIVED_FIELD_1', 'RETREIVED_FIELD_2'],
+        ['RETRIEVED_FIELD_0', 'RETRIEVED_FIELD_1', 'RETRIEVED_FIELD_2'],
         # [[10.],[20., 30.],[40., 50., 60.]]
         [[1], [2,3], [4,5,6]] # <- distance = 0 to [[10.],[20., 30.],[40., 50., 60.]]
     ),
@@ -130,7 +128,7 @@ def test_with_dictionary_memory(variable, func, params, expected, benchmark, mec
         {'default_variable': [[0],[0,0],[0,0,0]], 'input_ports':['hello','world','goodbye']},
         [[10.],[20., 30.],[40., 50., 60.]],
         ['hello', 'world', 'goodbye'],
-        ['RETREIVED_hello', 'RETREIVED_world', 'RETREIVED_goodbye'],
+        ['RETRIEVED_hello', 'RETRIEVED_world', 'RETRIEVED_goodbye'],
         [[1.],[2., 3.],[4., 5., 6.]]
     ),
     (
@@ -142,7 +140,7 @@ def test_with_dictionary_memory(variable, func, params, expected, benchmark, mec
         {'size':[2,2,2]},
         [[11,13], [22,23], [34, 35]],
         ['FIELD_0_INPUT', 'FIELD_1_INPUT', 'FIELD_2_INPUT'],
-        ['RETREIVED_FIELD_0', 'RETREIVED_FIELD_1', 'RETREIVED_FIELD_2'],
+        ['RETRIEVED_FIELD_0', 'RETRIEVED_FIELD_1', 'RETRIEVED_FIELD_2'],
         [[11,12], [22,23], [34, 35]],
     ),
     (
@@ -157,7 +155,7 @@ def test_with_dictionary_memory(variable, func, params, expected, benchmark, mec
         {'default_variable':[[0,0],[0,0],[0,0]]},
         [[10,20], [30,40], [50, 60]],
         ['FIELD_0_INPUT', 'FIELD_1_INPUT', 'FIELD_2_INPUT'],
-        ['RETREIVED_FIELD_0', 'RETREIVED_FIELD_1', 'RETREIVED_FIELD_2'],
+        ['RETRIEVED_FIELD_0', 'RETRIEVED_FIELD_1', 'RETRIEVED_FIELD_2'],
         [[10,20], [30,40], [50, 60]],
     ),
     (
@@ -168,7 +166,7 @@ def test_with_dictionary_memory(variable, func, params, expected, benchmark, mec
          'input_ports':['FIRST','SECOND']},
         [[10,20], [30,40]],
         ['FIRST', 'SECOND'],
-        ['RETREIVED_FIRST', 'RETREIVED_SECOND'],
+        ['RETRIEVED_FIRST', 'RETRIEVED_SECOND'],
         [[0,0], [0,0]],
     ),
     (
@@ -180,7 +178,7 @@ def test_with_dictionary_memory(variable, func, params, expected, benchmark, mec
          'input_ports':['FIRST','SECOND']},
         [[10,20], [30,40]],
         ['FIRST', 'SECOND'],
-        ['RETREIVED_FIRST', 'RETREIVED_SECOND'],
+        ['RETRIEVED_FIRST', 'RETRIEVED_SECOND'],
         [[10,20], [30,40]],
     ),
     (
@@ -191,27 +189,27 @@ def test_with_dictionary_memory(variable, func, params, expected, benchmark, mec
          'input_ports':['FIRST','SECOND']},
         [[10,20], [30,40]],
         ['FIRST', 'SECOND'],
-        ['RETREIVED_FIRST', 'RETREIVED_SECOND'],
+        ['RETRIEVED_FIRST', 'RETRIEVED_SECOND'],
         [[11,12],[22, 23]],
     )
 ]
 
 # Allows names to be with each test_data set
-names = [test_data[i][0] for i in range(len(test_data))]
+names = [td[0] for td in test_data]
 
 @pytest.mark.parametrize('name, func, func_params, mech_params, test_var,'
                          'input_port_names, output_port_names, expected_output', test_data, ids=names)
 def test_with_contentaddressablememory(name, func, func_params, mech_params, test_var,
                                        input_port_names, output_port_names, expected_output, mech_mode):
+    if mech_mode != 'Python':
+        pytest.skip("Compiled execution not yet implemented for ContentAddressableMemory")
+
     f = func(seed=0, **func_params)
     # EpisodicMemoryMechanism(function=f, **mech_params)
     em = EpisodicMemoryMechanism(function=f, **mech_params)
     assert em.input_ports.names == input_port_names
     assert em.output_ports.names == output_port_names
 
-    if mech_mode != 'Python':
-        pytest.skip("PTX not yet implemented for ContentAddressableMemory")
-
     EX = pytest.helpers.get_mech_execution(em, mech_mode)
 
 
diff --git a/tests/mechanisms/test_integrator_mechanism.py b/tests/mechanisms/test_integrator_mechanism.py
index c5e974a4c20..41e247425cc 100644
--- a/tests/mechanisms/test_integrator_mechanism.py
+++ b/tests/mechanisms/test_integrator_mechanism.py
@@ -392,10 +392,8 @@ def test_integrator_multiple_input(self, benchmark, mech_mode):
         )
         ex = pytest.helpers.get_mech_execution(I, mech_mode)
 
-        val = ex([[1], [2]])
+        val = benchmark(ex, [[1], [2]])
         assert np.allclose(val, [[3]])
-        if benchmark.enabled:
-            benchmark(ex, [[1], [2]])
 
     @pytest.mark.mimo
     @pytest.mark.mechanism
@@ -408,10 +406,8 @@ def test_integrator_multiple_output(self, benchmark, mech_mode):
         )
         ex = pytest.helpers.get_mech_execution(I, mech_mode)
 
-        val = ex([5])
+        val = benchmark(ex, [5])
         assert np.allclose(val, [[2.5], [2.5]])
-        if benchmark.enabled:
-            benchmark(ex, [5])
 
     @pytest.mark.mimo
     @pytest.mark.mechanism
@@ -427,10 +423,8 @@ def test_integrator_multiple_input_output(self, benchmark, mech_mode):
         )
         ex = pytest.helpers.get_mech_execution(I, mech_mode)
 
-        val = ex([[1], [2]])
+        val = benchmark(ex, [[1], [2]])
         assert np.allclose(val, [[5], [3]])
-        if benchmark.enabled:
-            benchmark(ex, [[1], [2]])
 
     @pytest.mark.mechanism
     @pytest.mark.integrator_mechanism
@@ -442,10 +436,9 @@ def test_FitzHughNagumo_simple_scalar(self, benchmark, mech_mode):
                                 function=FitzHughNagumoIntegrator())
         ex = pytest.helpers.get_mech_execution(I, mech_mode)
 
-        val = ex(var)
-        assert np.allclose(val[0], [0.05127053])
-        if benchmark.enabled:
-            benchmark(ex, var)
+        ex(var)
+        val = benchmark(ex, var)
+        assert np.allclose(val, [[0.10501801629915011], [0.10501801629915011], [0.10501801629915011]])
 
     @pytest.mark.mechanism
     @pytest.mark.integrator_mechanism
@@ -457,10 +450,11 @@ def test_FitzHughNagumo_simple_vector(self, benchmark, mech_mode):
                                 function=FitzHughNagumoIntegrator)
         ex = pytest.helpers.get_mech_execution(I, mech_mode)
 
-        val = ex(var)
-        assert np.allclose(val[0], [0.05127053, 0.15379818])
-        if benchmark.enabled:
-            benchmark(ex, var)
+        ex(var)
+        val = benchmark(ex, var)
+        assert np.allclose(val, [[[0.10501801629915011, 0.3151109244983909]],
+                                 [[0.10501801629915011, 0.3151109244983909]],
+                                 [[0.10501801629915011, 0.3151109244983909]]])
 
     @pytest.mark.mechanism
     @pytest.mark.integrator_mechanism
@@ -602,10 +596,9 @@ def test_integrator_no_function(self, benchmark, mech_mode):
         I = IntegratorMechanism()
         ex = pytest.helpers.get_mech_execution(I, mech_mode)
 
-        val = ex([10])
-        assert np.allclose(val, [[5.0]])
-        if benchmark.enabled:
-            benchmark(ex, [10])
+        ex([10])
+        val = benchmark(ex, [10])
+        assert np.allclose(val, [[7.5]])
 
 class TestIntegratorInputs:
     # Part 1: VALID INPUT:
@@ -1174,7 +1167,7 @@ def test_has_initializers(self):
         assert I.has_initializers
         assert hasattr(I, "reset_stateful_function_when")
 
-    @pytest.mark.mechanism
+    @pytest.mark.composition
     @pytest.mark.integrator_mechanism
     @pytest.mark.parametrize('cond0, cond1, expected', [
         (pnl.Never(), pnl.AtTrial(2),
@@ -1218,6 +1211,7 @@ def test_reset_stateful_function_when_composition(self, comp_mode, cond0, cond1,
 
         assert np.allclose(expected, C.results)
 
+    @pytest.mark.composition
     def test_reset_stateful_function_when(self):
         I1 = IntegratorMechanism()
         I2 = IntegratorMechanism()
diff --git a/tests/mechanisms/test_kwta.py b/tests/mechanisms/test_kwta.py
index 8154995671b..cd1a999d1f1 100644
--- a/tests/mechanisms/test_kwta.py
+++ b/tests/mechanisms/test_kwta.py
@@ -58,7 +58,7 @@ def test_kwta_inputs_list_of_strings(self):
             )
             K.execute(["one", "two", "three", "four"])
         assert ('"Input to \'K\' ([\'one\' \'two\' \'three\' \'four\']) is incompatible with its corresponding '
-                'InputPort (K[InputPort-0]): \'cannot perform reduce with flexible type.\'"' in str(error_text.value))
+                'InputPort (K[InputPort-0]):' in str(error_text.value))
 
     def test_kwta_var_list_of_strings(self):
         with pytest.raises(ParameterError) as error_text:
diff --git a/tests/mechanisms/test_lca.py b/tests/mechanisms/test_lca.py
index d79e1dbb667..1dc08750638 100644
--- a/tests/mechanisms/test_lca.py
+++ b/tests/mechanisms/test_lca.py
@@ -12,7 +12,8 @@
     LCAMechanism, MAX_VS_AVG, MAX_VS_NEXT, CONVERGENCE
 
 class TestLCA:
-    @pytest.mark.mechanism
+
+    @pytest.mark.composition
     @pytest.mark.lca_mechanism
     @pytest.mark.benchmark(group="LCAMechanism")
     def test_LCAMechanism_length_1(self, benchmark, comp_mode):
@@ -38,7 +39,7 @@ def test_LCAMechanism_length_1(self, benchmark, comp_mode):
 
         #  - - - - - - - - - - - - - -  - - - - - - - - - - - -
 
-        C.run(inputs={T: [1.0]}, num_trials=3, execution_mode=comp_mode)
+        benchmark(C.run, inputs={T: [1.0]}, num_trials=3, execution_mode=comp_mode)
 
         # - - - - - - - TRIAL 1 - - - - - - -
 
@@ -55,11 +56,9 @@ def test_LCAMechanism_length_1(self, benchmark, comp_mode):
         # new_transfer_input = 0.265 + ( 0.5 * 0.265 + 3.0 * 0.53 + 0.0 + 1.0)*0.1 + 0.0    =    0.53725
         # f(new_transfer_input) = 0.53725 * 2.0 = 1.0745
 
-        assert np.allclose(C.results, [[[0.2]], [[0.51]], [[0.9905]]])
-        if benchmark.enabled:
-            benchmark(C.run, inputs={T: [1.0]}, num_trials=3, execution_mode=comp_mode)
+        assert np.allclose(C.results[:3], [[[0.2]], [[0.51]], [[0.9905]]])
 
-    @pytest.mark.mechanism
+    @pytest.mark.composition
     @pytest.mark.lca_mechanism
     @pytest.mark.benchmark(group="LCAMechanism")
     def test_LCAMechanism_length_2(self, benchmark, comp_mode):
@@ -90,7 +89,7 @@ def test_LCAMechanism_length_2(self, benchmark, comp_mode):
 
         #  - - - - - - - - - - - - - -  - - - - - - - - - - - -
 
-        C.run(inputs={T: [1.0, 2.0]}, num_trials=3, execution_mode=comp_mode)
+        benchmark(C.run, inputs={T: [1.0, 2.0]}, num_trials=3, execution_mode=comp_mode)
 
         # - - - - - - - TRIAL 1 - - - - - - -
 
@@ -116,10 +115,9 @@ def test_LCAMechanism_length_2(self, benchmark, comp_mode):
         # new_transfer_input_2 = 0.51 + ( 0.5 * 0.51 + 3.0 * 1.02 - 1.0*0.45 + 2.0)*0.1 + 0.0    =    0.9965
         # f(new_transfer_input_2) = 0.9965 * 2.0 = 1.463
 
-        assert np.allclose(C.results, [[[0.2, 0.4]], [[0.43, 0.98]], [[0.6705, 1.833]]])
-        if benchmark.enabled:
-            benchmark(C.run, inputs={T: [1.0, 2.0]}, num_trials=3, execution_mode=comp_mode)
+        assert np.allclose(C.results[:3], [[[0.2, 0.4]], [[0.43, 0.98]], [[0.6705, 1.833]]])
 
+    @pytest.mark.composition
     def test_equivalance_of_threshold_and_when_finished_condition(self):
         # Note: This tests the equivalence of results when:
         #       execute_until_finished is True for the LCAMechanism (by default)
@@ -152,18 +150,18 @@ def test_LCAMechanism_matrix(self):
 
     # Note: In the following tests, since the LCAMechanism's threshold is specified
     #       it executes until the it reaches threshold.
-    @pytest.mark.mechanism
+    @pytest.mark.composition
     @pytest.mark.lca_mechanism
     @pytest.mark.benchmark(group="LCAMechanism")
     def test_LCAMechanism_threshold(self, benchmark, comp_mode):
         lca = LCAMechanism(size=2, leak=0.5, threshold=0.7)
         comp = Composition()
         comp.add_node(lca)
-        result = comp.run(inputs={lca:[1,0]}, execution_mode=comp_mode)
+
+        result = benchmark(comp.run, inputs={lca:[1,0]}, execution_mode=comp_mode)
         assert np.allclose(result, [0.70005431, 0.29994569])
-        if benchmark.enabled:
-            benchmark(comp.run, inputs={lca:[1,0]}, execution_mode=comp_mode)
 
+    @pytest.mark.composition
     def test_LCAMechanism_threshold_with_max_vs_next(self):
         lca = LCAMechanism(size=3, leak=0.5, threshold=0.1, threshold_criterion=MAX_VS_NEXT)
         comp = Composition()
@@ -171,6 +169,7 @@ def test_LCAMechanism_threshold_with_max_vs_next(self):
         result = comp.run(inputs={lca:[1,0.5,0]})
         assert np.allclose(result, [[0.52490032, 0.42367594, 0.32874867]])
 
+    @pytest.mark.composition
     def test_LCAMechanism_threshold_with_max_vs_avg(self):
         lca = LCAMechanism(size=3, leak=0.5, threshold=0.1, threshold_criterion=MAX_VS_AVG)
         comp = Composition()
@@ -178,13 +177,14 @@ def test_LCAMechanism_threshold_with_max_vs_avg(self):
         result = comp.run(inputs={lca:[1,0.5,0]})
         assert np.allclose(result, [[0.51180475, 0.44161738, 0.37374946]])
 
-    @pytest.mark.mechanism
+    @pytest.mark.composition
     @pytest.mark.lca_mechanism
     @pytest.mark.benchmark(group="LCAMechanism")
     def test_LCAMechanism_threshold_with_convergence(self, benchmark, comp_mode):
         lca = LCAMechanism(size=3, leak=0.5, threshold=0.01, threshold_criterion=CONVERGENCE)
         comp = Composition()
         comp.add_node(lca)
+
         result = comp.run(inputs={lca:[0,1,2]}, execution_mode=comp_mode)
         assert np.allclose(result, [[0.19153799, 0.5, 0.80846201]])
         if comp_mode is pnl.ExecutionMode.Python:
@@ -192,7 +192,7 @@ def test_LCAMechanism_threshold_with_convergence(self, benchmark, comp_mode):
         if benchmark.enabled:
             benchmark(comp.run, inputs={lca:[0,1,2]}, execution_mode=comp_mode)
 
-    @pytest.mark.mechanism
+    @pytest.mark.composition
     @pytest.mark.lca_mechanism
     def test_equivalance_of_threshold_and_termination_specifications_just_threshold(self, comp_mode):
         # Note: This tests the equivalence of using LCAMechanism-specific threshold arguments and
@@ -215,6 +215,7 @@ def test_equivalance_of_threshold_and_termination_specifications_just_threshold(
         result2 = comp2.run(inputs={lca_termination:[1,0]}, execution_mode=comp_mode)
         assert np.allclose(result1, result2)
 
+    @pytest.mark.composition
     def test_equivalance_of_threshold_and_termination_specifications_max_vs_next(self):
         # Note: This tests the equivalence of using LCAMechanism-specific threshold arguments and
         #       generic TransferMechanism termination_<*> arguments
@@ -255,7 +256,7 @@ def test_equivalance_of_threshold_and_termination_specifications_max_vs_next(sel
     #     result = comp.run(inputs={lca:[1,0]})
     #     assert np.allclose(result, [[0.71463572, 0.28536428]])
 
-    @pytest.mark.mechanism
+    @pytest.mark.composition
     @pytest.mark.lca_mechanism
     def test_LCAMechanism_DDM_equivalent(self, comp_mode):
         lca = LCAMechanism(size=2, leak=0., threshold=1, auto=0, hetero=0,
@@ -268,6 +269,7 @@ def test_LCAMechanism_DDM_equivalent(self, comp_mode):
 
 class TestLCAReset:
 
+    @pytest.mark.composition
     def test_reset_run(self):
 
         L = LCAMechanism(name="L",
diff --git a/tests/mechanisms/test_recurrent_transfer_mechanism.py b/tests/mechanisms/test_recurrent_transfer_mechanism.py
index 6fc87408ffb..18d7eb883e0 100644
--- a/tests/mechanisms/test_recurrent_transfer_mechanism.py
+++ b/tests/mechanisms/test_recurrent_transfer_mechanism.py
@@ -21,6 +21,7 @@
     RecurrentTransferError, RecurrentTransferMechanism
 from psyneulink.library.components.projections.pathway.autoassociativeprojection import AutoAssociativeProjection
 
+@pytest.mark.composition
 class TestMatrixSpec:
     def test_recurrent_mech_matrix(self):
 
@@ -99,35 +100,24 @@ def test_recurrent_mech_check_proj_attrs(self):
     @pytest.mark.mechanism
     @pytest.mark.recurrent_transfer_mechanism
     @pytest.mark.benchmark(group="RecurrentTransferMechanism")
-    def test_recurrent_mech_inputs_list_of_ints(self, benchmark, mech_mode):
-        R = RecurrentTransferMechanism(
-            name='R',
-            default_variable=[0, 0, 0, 0]
-        )
+    @pytest.mark.parametrize("variable, params",
+                             [
+                              pytest.param(([10, 12, 0, -1], [1, 2, 3, 0]), {'size': 4}, id="list_of_ints"),
+                              pytest.param(([1.0, 1.2, 0., -1.3], [1., 5., 3., 0.]), {'size': 4}, id="list_of_floats"),
+                              pytest.param(([10], [10]), {}, id="no_init_params"),
+                             ])
+    def test_recurrent_mech_inputs(self, benchmark, params, variable, mech_mode):
+        R = RecurrentTransferMechanism(name='R', **params)
         EX = pytest.helpers.get_mech_execution(R, mech_mode)
 
-        val1 = EX([10, 12, 0, -1])
-        val2 = EX([1, 2, 3, 0])
+        val1 = EX(variable[0])
+        val2 = benchmark(EX, variable[1])
 
         # The outputs match inputs because recurrent projection is
-        # not used when executing: mech is reset each time
-        np.testing.assert_allclose(val1, [[10.0, 12.0, 0, -1]])
-        np.testing.assert_allclose(val2, [[1, 2, 3, 0]])
-        if benchmark.enabled:
-            benchmark(EX, [1, 2, 3, 0])
-
-    @pytest.mark.mechanism
-    @pytest.mark.recurrent_transfer_mechanism
-    @pytest.mark.benchmark(group="RecurrentTransferMechanism")
-    def test_recurrent_mech_inputs_list_of_floats(self, benchmark, mech_mode):
-        R = RecurrentTransferMechanism(
-            name='R',
-            size=4
-        )
-        EX = pytest.helpers.get_mech_execution(R, mech_mode)
-
-        val = benchmark(EX, [10.0, 10.0, 10.0, 10.0])
-        np.testing.assert_allclose(val, [[10.0, 10.0, 10.0, 10.0]])
+        # not used when executing standalone mechanism:
+        #  the mechanism is reset each time
+        np.testing.assert_allclose(val1, [variable[0]])
+        np.testing.assert_allclose(val2, [variable[1]])
 
     @pytest.mark.mechanism
     @pytest.mark.recurrent_transfer_mechanism
@@ -143,15 +133,16 @@ def test_recurrent_mech_integrator(self, benchmark, mech_mode):
 
         val1 = EX([[1.0, 2.0]])
         val2 = EX([[1.0, 2.0]])
+
         # execute 10 times
-        for i in range(10):
-            val10 = EX([[1.0, 2.0]])
+        for i in range(9):
+            EX([[1.0, 2.0]])
+
+        val10 = benchmark(EX, [[1.0, 2.0]])
 
         assert np.allclose(val1, [[0.50249998, 0.50499983]])
         assert np.allclose(val2, [[0.50497484, 0.50994869]])
         assert np.allclose(val10, [[0.52837327, 0.55656439]])
-        if benchmark.enabled:
-            benchmark(EX, [[1.0, 2.0]])
 
     @pytest.mark.mechanism
     @pytest.mark.recurrent_transfer_mechanism
@@ -168,14 +159,14 @@ def test_recurrent_mech_lci(self, benchmark, mech_mode):
         val1 = EX([[1.0, 2.0]])
         val2 = EX([[1.0, 2.0]])
         # execute 10 times
-        for i in range(10):
-            val10 = EX([[1.0, 2.0]])
+        for i in range(9):
+            EX([[1.0, 2.0]])
+
+        val10 = benchmark(EX, [[1.0, 2.0]])
 
         assert np.allclose(val1, [[0.1, 0.2]])
         assert np.allclose(val2, [[0.196, 0.392]])
         assert np.allclose(val10, [[0.96822561, 1.93645121]])
-        if benchmark.enabled:
-            benchmark(EX, [[1.0, 2.0]])
 
     # def test_recurrent_mech_inputs_list_of_fns(self):
     #     R = RecurrentTransferMechanism(
@@ -190,21 +181,6 @@ def test_recurrent_mech_lci(self, benchmark, mech_mode):
     #     for i in range(len(val[0])):
     #         np.testing.assert_allclose(val[0][i], expected[0][i])
 
-    @pytest.mark.mechanism
-    @pytest.mark.recurrent_transfer_mechanism
-    @pytest.mark.benchmark(group="RecurrentTransferMechanism")
-    def test_recurrent_mech_no_inputs(self, benchmark, mech_mode):
-        R = RecurrentTransferMechanism(
-            name='R'
-        )
-        np.testing.assert_allclose(R.defaults.variable, [[0]])
-        EX = pytest.helpers.get_mech_execution(R, mech_mode)
-
-        val = EX([10])
-        np.testing.assert_allclose(val, [[10.]])
-        if benchmark.enabled:
-            benchmark(EX, [1])
-
     def test_recurrent_mech_inputs_list_of_strings(self):
         with pytest.raises(MechanismError) as error_text:
             R = RecurrentTransferMechanism(
@@ -214,8 +190,7 @@ def test_recurrent_mech_inputs_list_of_strings(self):
             )
             R.execute(["one", "two", "three", "four"])
         assert '"Input to \'R\' ([\'one\' \'two\' \'three\' \'four\']) is incompatible ' \
-               'with its corresponding InputPort (R[InputPort-0]): ' \
-               '\'cannot perform reduce with flexible type.\'"' in str(error_text.value)
+               'with its corresponding InputPort (R[InputPort-0]): ' in str(error_text.value)
 
     def test_recurrent_mech_var_list_of_strings(self):
         with pytest.raises(ParameterError) as error_text:
@@ -628,6 +603,7 @@ def test_recurrent_mech_integration_rate_0_8_initial_1_2(self):
 # won't get executed if we only use the execute() method of Mechanism: thus, to test it we must use a Composition
 
 
+@pytest.mark.composition
 def run_twice_in_composition(mech, input1, input2=None):
     if input2 is None:
         input2 = input1
@@ -637,6 +613,7 @@ def run_twice_in_composition(mech, input1, input2=None):
     return result[0]
 
 
+@pytest.mark.composition
 class TestRecurrentTransferMechanismInProcess:
     simple_prefs = {REPORT_OUTPUT_PREF: False, VERBOSE_PREF: False}
 
@@ -722,6 +699,7 @@ def test_recurrent_mech_process_proj_matrix_change(self):
         np.testing.assert_allclose(R.parameters.value.get(c), [[21, 3, 12, 35]])
 
 
+@pytest.mark.composition
 class TestRecurrentTransferMechanismInComposition:
     simple_prefs = {REPORT_OUTPUT_PREF: False, VERBOSE_PREF: False}
 
@@ -957,6 +935,7 @@ def test_learning_of_orthognal_inputs(self):
         np.testing.assert_allclose(R.output_port.parameters.value.get(C),[0.0, 1.18518086, 0.0, 1.18518086])
 
 
+@pytest.mark.composition
 class TestRecurrentTransferMechanismReset:
 
     def test_reset_run(self):
@@ -1024,6 +1003,7 @@ def test_clip_2d_array(self):
         assert np.allclose(R.execute([[-5.0, -1.0, 5.0], [5.0, -5.0, 1.0], [1.0, 5.0, 5.0]]),
                            [[-2.0, -1.0, 2.0], [2.0, -2.0, 1.0], [1.0, 2.0, 2.0]])
 
+@pytest.mark.composition
 class TestRecurrentInputPort:
 
     def test_ris_simple(self):
@@ -1061,6 +1041,7 @@ def my_fct(x):
         result = R2.execute([1,2])
         np.testing.assert_allclose(result, [[0,0]])
 
+    @pytest.mark.composition
     @pytest.mark.mechanism
     @pytest.mark.integrator_mechanism
     @pytest.mark.parametrize('cond0, cond1, expected', [
@@ -1107,6 +1088,7 @@ def test_reset_stateful_function_when_composition(self, comp_mode, cond0, cond1,
 
         assert np.allclose(expected, C.results)
 
+    @pytest.mark.composition
     @pytest.mark.mechanism
     @pytest.mark.integrator_mechanism
     @pytest.mark.parametrize('cond0, cond1, expected', [
@@ -1152,7 +1134,7 @@ def test_reset_stateful_function_when_has_initializers_composition(self, comp_mo
 
         assert np.allclose(exp, C.results)
 
-    @pytest.mark.mechanism
+    @pytest.mark.composition
     @pytest.mark.integrator_mechanism
     @pytest.mark.parametrize('until_finished, expected', [
         (True, [[[[0.96875]]], [[[0.9990234375]]]]), # The 5th and the 10th iteration
@@ -1177,6 +1159,7 @@ def test_max_executions_before_finished(self, comp_mode, until_finished, expecte
         assert np.allclose(expected[0], results)
         assert np.allclose(expected[1], results2)
 
+@pytest.mark.composition
 class TestDebugProperties:
 
     def test_defaults(self):
diff --git a/tests/mechanisms/test_transfer_mechanism.py b/tests/mechanisms/test_transfer_mechanism.py
index e6a295ce05a..fcbe979feca 100644
--- a/tests/mechanisms/test_transfer_mechanism.py
+++ b/tests/mechanisms/test_transfer_mechanism.py
@@ -54,17 +54,16 @@ def test_transfer_mech_inputs_list_of_floats(self, benchmark, mech_mode):
         T = TransferMechanism(
             name='T',
             default_variable=[0 for i in range(VECTOR_SIZE)],
-            integration_rate=1.0,
+            integration_rate=0.5,
             integrator_mode=True
         )
         T.reset_stateful_function_when = Never()
         var = [10.0 for i in range(VECTOR_SIZE)]
         EX = pytest.helpers.get_mech_execution(T, mech_mode)
 
-        val = EX(var)
-        assert np.allclose(val, [[10.0 for i in range(VECTOR_SIZE)]])
-        if benchmark.enabled:
-            benchmark(EX, var)
+        EX(var)
+        val = benchmark(EX, var)
+        assert np.allclose(val, [[7.5 for i in range(VECTOR_SIZE)]])
 
     #@pytest.mark.mechanism
     #@pytest.mark.transfer_mechanism
@@ -109,8 +108,7 @@ def test_transfer_mech_inputs_list_of_strings(self):
             )
             T.execute(["one", "two", "three", "four"])
         assert '"Input to \'T\' ([\'one\' \'two\' \'three\' \'four\']) is incompatible ' \
-               'with its corresponding InputPort (T[InputPort-0]): ' \
-               '\'cannot perform reduce with flexible type.\'"' in str(error_text.value)
+               'with its corresponding InputPort (T[InputPort-0]): ' in str(error_text.value)
 
     @pytest.mark.mechanism
     @pytest.mark.transfer_mechanism
@@ -149,17 +147,16 @@ def test_transfer_mech_array_var_float_noise(self, benchmark, mech_mode):
             default_variable=[0 for i in range(VECTOR_SIZE)],
             function=Linear(),
             noise=5.0,
-            integration_rate=1.0,
+            integration_rate=0.5,
             integrator_mode=True
         )
         T.reset_stateful_function_when = Never()
         EX = pytest.helpers.get_mech_execution(T, mech_mode)
 
-        var = [0 for i in range(VECTOR_SIZE)]
-        val = EX(var)
-        assert np.allclose(val, [[5.0 for i in range(VECTOR_SIZE)]])
-        if benchmark.enabled:
-            benchmark(EX, var)
+        var = [1 for i in range(VECTOR_SIZE)]
+        EX(var)
+        val = benchmark(EX, var)
+        assert np.allclose(val, [[8.25 for i in range(VECTOR_SIZE)]])
 
     @pytest.mark.mechanism
     @pytest.mark.transfer_mechanism
@@ -203,18 +200,17 @@ def test_transfer_mech_array_var_normal_array_noise2(self, benchmark, mech_mode)
             name='T',
             default_variable=[0 for i in range(VECTOR_SIZE)],
             function=Linear(),
-            noise=[5.0 for i in range(VECTOR_SIZE)],
-            integration_rate=1.0,
+            noise=[5.0 + i for i in range(VECTOR_SIZE)],
+            integration_rate=0.3,
             integrator_mode=True
         )
         T.reset_stateful_function_when = Never()
         EX = pytest.helpers.get_mech_execution(T, mech_mode)
 
         var = [0 for i in range(VECTOR_SIZE)]
-        val = EX(var)
-        assert np.allclose(val, [[5.0 for i in range(VECTOR_SIZE)]])
-        if benchmark.enabled:
-            benchmark(EX, var)
+        EX(var)
+        val = benchmark(EX, var)
+        assert np.allclose(val, [[8.5 + (i * 1.7) for i in range(VECTOR_SIZE)]])
 
     @pytest.mark.mechanism
     @pytest.mark.transfer_mechanism
@@ -229,8 +225,8 @@ def test_transfer_mech_mismatched_shape_noise(self):
                 integrator_mode=True
             )
             T.execute()
-        assert 'Noise parameter' in str(error_text.value) and "does not match default variable" in str(
-                error_text.value)
+        assert 'Noise parameter' in str(error_text.value)
+        assert "does not match default variable" in str(error_text.value)
 
     @pytest.mark.mechanism
     @pytest.mark.transfer_mechanism
@@ -412,87 +408,33 @@ def sum_all_elements(variable):
     @pytest.mark.mechanism
     @pytest.mark.transfer_mechanism
     @pytest.mark.benchmark(group="TransferMechanism Logistic")
-    def test_transfer_mech_logistic_fun(self, benchmark, mech_mode):
+    @pytest.mark.parametrize("func,variables,expected",
+                             [
+                              # Operations on vector elements are independent so we only provide one value
+                              pytest.param(Logistic, [0], [0.5], id="Logistic"),
+                              pytest.param(ReLU, [0, 1, -1], [0., 1, 0.], id="ReLU"),
+                              pytest.param(Exponential, [0, 1, -1], [1., 2.71828183, 0.36787944], id="Exponential"),
+                              pytest.param(SoftMax, [0, 1, -1], [1. / VECTOR_SIZE, 1. / VECTOR_SIZE, 1. / VECTOR_SIZE], id="SoftMax"),
+                             ])
+    def test_transfer_mech_func(self, benchmark, func, variables, expected, mech_mode):
 
         T = TransferMechanism(
             name='T',
-            default_variable=[0 for i in range(VECTOR_SIZE)],
-            function=Logistic(),
+            default_variable=np.zeros(VECTOR_SIZE),
+            function=func,
             integration_rate=1.0,
             integrator_mode=True
         )
         EX = pytest.helpers.get_mech_execution(T, mech_mode)
 
-        var = [0 for i in range(VECTOR_SIZE)]
-        val = EX(var)
-        assert np.allclose(val, [[0.5 for i in range(VECTOR_SIZE)]])
-        if benchmark.enabled:
-            benchmark(EX, var)
-
-    @pytest.mark.mechanism
-    @pytest.mark.transfer_mechanism
-    @pytest.mark.benchmark(group="TransferMechanism ReLU")
-    def test_transfer_mech_relu_fun(self, benchmark, mech_mode):
-
-        T = TransferMechanism(
-            name='T',
-            default_variable=[0 for i in range(VECTOR_SIZE)],
-            function=ReLU(),
-            integration_rate=1.0,
-            integrator_mode=True
-        )
-        EX = pytest.helpers.get_mech_execution(T, mech_mode)
-
-        val1 = EX([0 for i in range(VECTOR_SIZE)])
-        val2 = EX([1 for i in range(VECTOR_SIZE)])
-        val3 = EX([-1 for i in range(VECTOR_SIZE)])
-
-        assert np.allclose(val1, [[0.0 for i in range(VECTOR_SIZE)]])
-        assert np.allclose(val2, [[1.0 for i in range(VECTOR_SIZE)]])
-        assert np.allclose(val3, [[0.0 for i in range(VECTOR_SIZE)]])
-
-        if benchmark.enabled:
-            benchmark(EX, [0 for i in range(VECTOR_SIZE)])
-
-    @pytest.mark.mechanism
-    @pytest.mark.transfer_mechanism
-    @pytest.mark.benchmark(group="TransferMechanism Exponential")
-    def test_transfer_mech_exponential_fun(self, benchmark, mech_mode):
-
-        T = TransferMechanism(
-            name='T',
-            default_variable=[0 for i in range(VECTOR_SIZE)],
-            function=Exponential(),
-            integration_rate=1.0,
-            integrator_mode=True
-        )
-        EX = pytest.helpers.get_mech_execution(T, mech_mode)
-
-        var = [0 for i in range(VECTOR_SIZE)]
-        val = EX(var)
-        assert np.allclose(val, [[1.0 for i in range(VECTOR_SIZE)]])
-        if benchmark.enabled:
-            benchmark(EX, var)
-
-    @pytest.mark.mechanism
-    @pytest.mark.transfer_mechanism
-    @pytest.mark.benchmark(group="TransferMechanism SoftMax")
-    def test_transfer_mech_softmax_fun(self, benchmark, mech_mode):
-
-        T = TransferMechanism(
-            name='T',
-            default_variable=[0 for i in range(VECTOR_SIZE)],
-            function=SoftMax(),
-            integration_rate=1.0,
-            integrator_mode=True
-        )
-        EX = pytest.helpers.get_mech_execution(T, mech_mode)
+        vals = []
+        for var in variables[:-1]:
+            vals.append(EX([var] * VECTOR_SIZE))
+        vals.append(benchmark(EX, [variables[-1]] * VECTOR_SIZE))
 
-        var = [0 for i in range(VECTOR_SIZE)]
-        val = EX(var)
-        assert np.allclose(val, [[1.0 / VECTOR_SIZE for i in range(VECTOR_SIZE)]])
-        if benchmark.enabled:
-            benchmark(EX, var)
+        assert len(vals) == len(expected)
+        for val, exp in zip(vals, expected):
+            assert np.allclose(val, [[exp]] * VECTOR_SIZE)
 
     @pytest.mark.mechanism
     @pytest.mark.transfer_mechanism
@@ -577,10 +519,8 @@ def test_transfer_mech_array_assignments_mech_rate(self, benchmark, mech_mode):
 
         var = [1 for i in range(VECTOR_SIZE)]
         EX(var)
-        val = EX(var)
+        val = benchmark(EX, var)
         assert np.allclose(val, [[ 0., 0.19, 0.36, 0.51]])
-        if benchmark.enabled:
-            benchmark(EX, var)
 
     @pytest.mark.mechanism
     @pytest.mark.transfer_mechanism
@@ -597,31 +537,30 @@ def test_transfer_mech_array_assignments_fct_rate(self, benchmark, mech_mode):
 
         var = [1 for i in range(VECTOR_SIZE)]
         EX(var)
-        val = EX(var)
+        val = benchmark(EX, var)
         assert np.allclose(val, [[ 0., 0.19, 0.36, 0.51]])
-        if benchmark.enabled:
-            benchmark(EX, var)
 
     @pytest.mark.mechanism
     @pytest.mark.transfer_mechanism
     @pytest.mark.benchmark(group="TransferMechanism Parameter Array Assignments")
     def test_transfer_mech_array_assignments_fct_over_mech_rate(self, benchmark, mech_mode):
 
-        T = TransferMechanism(
-                name='T',
-                default_variable=[0 for i in range(VECTOR_SIZE)],
-                integrator_mode=True,
-                integrator_function=AdaptiveIntegrator(rate=[i / 20 for i in range(VECTOR_SIZE)]),
-                integration_rate=[i / 10 for i in range(VECTOR_SIZE)]
-        )
+        with pytest.warns(UserWarning) as warnings:
+            T = TransferMechanism(
+                    name='T',
+                    default_variable=[0 for i in range(VECTOR_SIZE)],
+                    integrator_mode=True,
+                    integrator_function=AdaptiveIntegrator(rate=[i / 20 for i in range(VECTOR_SIZE)]),
+                    integration_rate=[i / 10 for i in range(VECTOR_SIZE)]
+            )
+            assert any(str(w.message).startswith('Specification of the "integration_rate" parameter')
+                       for w in warnings), "Warnings: {}".format([str(w.message) for w in warnings])
         EX = pytest.helpers.get_mech_execution(T, mech_mode)
 
         var = [1 for i in range(VECTOR_SIZE)]
         EX(var)
-        val = EX(var)
+        val = benchmark(EX, var)
         assert np.allclose(val, [[ 0., 0.0975, 0.19, 0.2775]])
-        if benchmark.enabled:
-            benchmark(EX, var)
 
     def test_transfer_mech_array_assignments_wrong_size_mech_rate(self):
 
@@ -667,10 +606,8 @@ def test_transfer_mech_array_assignments_mech_init_val(self, benchmark, mech_mod
 
         var = [1 for i in range(VECTOR_SIZE)]
         EX(var)
-        val = EX(var)
+        val = benchmark(EX, var)
         assert np.allclose(val, [[ 0.75,  0.775,  0.8, 0.825]])
-        if benchmark.enabled:
-            benchmark(EX, var)
 
 
     @pytest.mark.mechanism
@@ -690,34 +627,35 @@ def test_transfer_mech_array_assignments_fct_initzr(self, benchmark, mech_mode):
 
         var = [1 for i in range(VECTOR_SIZE)]
         EX(var)
-        val = EX(var)
+        val = benchmark(EX, var)
         assert np.allclose(val, [[ 0.75,  0.775,  0.8, 0.825]])
-        if benchmark.enabled:
-            benchmark(EX, var)
 
 
     @pytest.mark.mechanism
     @pytest.mark.transfer_mechanism
     @pytest.mark.benchmark(group="TransferMechanism Parameter Array Assignments")
     def test_transfer_mech_array_assignments_fct_initlzr_over_mech_init_val(self, benchmark, mech_mode):
-        T = TransferMechanism(
-            name='T',
-            default_variable=[0 for i in range(VECTOR_SIZE)],
-            integrator_mode=True,
-            integrator_function=AdaptiveIntegrator(
-                    default_variable=[0 for i in range(VECTOR_SIZE)],
-                    initializer=[i / 10 for i in range(VECTOR_SIZE)]
-            ),
-            initial_value=[i / 10 for i in range(VECTOR_SIZE)]
-        )
-        EX = pytest.helpers.get_mech_execution(T, mech_mode)
+        with pytest.warns(UserWarning) as warnings:
+            T = TransferMechanism(
+                name='T',
+                default_variable=[0 for i in range(VECTOR_SIZE)],
+                integrator_mode=True,
+                integrator_function=AdaptiveIntegrator(
+                        default_variable=[0 for i in range(VECTOR_SIZE)],
+                        initializer=[i / 10 for i in range(VECTOR_SIZE)]
+                ),
+                initial_value=[i / 10 for i in range(VECTOR_SIZE)]
+            )
+            assert any(str(w.message).startswith('Specification of the "initial_value" parameter')
+                       for w in warnings), "Warnings: {}".format([str(w.message) for w in warnings])
 
+        EX = pytest.helpers.get_mech_execution(T, mech_mode)
         var = [1 for i in range(VECTOR_SIZE)]
+
         EX(var)
-        val = EX(var)
+        val = benchmark(EX, var)
         assert np.allclose(val, [[ 0.75,  0.775,  0.8, 0.825]])
-        if benchmark.enabled:
-            benchmark(EX, var)
+
 
     def test_transfer_mech_array_assignments_wrong_size_mech_init_val(self):
 
@@ -806,10 +744,9 @@ def test_transfer_mech_array_assignments_mech_noise(self, benchmark, mech_mode):
 
         var = [1 for i in range(VECTOR_SIZE)]
         EX(var)
-        val = EX(var)
+        val = benchmark(EX, var)
         assert np.allclose(val, [[ 0.75, 0.9, 1.05, 1.2 ]])
-        if benchmark.enabled:
-            benchmark(EX, var)
+
 
     @pytest.mark.mechanism
     @pytest.mark.transfer_mechanism
@@ -827,10 +764,9 @@ def test_transfer_mech_array_assignments_fct_noise(self, benchmark, mech_mode):
 
         var = [1 for i in range(VECTOR_SIZE)]
         EX(var)
-        val = EX(var)
+        val = benchmark(EX, var)
         assert np.allclose(val, [[ 0.75, 0.9, 1.05, 1.2 ]])
-        if benchmark.enabled:
-            benchmark(EX, var)
+
 
     @pytest.mark.mechanism
     @pytest.mark.transfer_mechanism
@@ -838,21 +774,24 @@ def test_transfer_mech_array_assignments_fct_noise(self, benchmark, mech_mode):
     # FIXME: Incorrect T.integrator_function.defaults.variable reported
     def test_transfer_mech_array_assignments_fct_over_mech_noise(self, benchmark, mech_mode):
 
-        T = TransferMechanism(
-                name='T',
-                default_variable=[0 for i in range(VECTOR_SIZE)],
-                integrator_mode=True,
-                integrator_function=AdaptiveIntegrator(noise=[i / 20 for i in range(VECTOR_SIZE)]),
-                noise=[i / 10 for i in range(VECTOR_SIZE)]
-        )
-        EX = pytest.helpers.get_mech_execution(T, mech_mode)
+        with pytest.warns(UserWarning) as warnings:
+            T = TransferMechanism(
+                    name='T',
+                    default_variable=[0 for i in range(VECTOR_SIZE)],
+                    integrator_mode=True,
+                    integrator_function=AdaptiveIntegrator(noise=[i / 20 for i in range(VECTOR_SIZE)]),
+                    noise=[i / 10 for i in range(VECTOR_SIZE)]
+            )
+            assert any(str(w.message).startswith('Specification of the "noise" parameter')
+                       for w in warnings), "Warnings: {}".format([str(w.message) for w in warnings])
 
+        EX = pytest.helpers.get_mech_execution(T, mech_mode)
         var = [1 for i in range(VECTOR_SIZE)]
+
         EX(var)
-        val = EX(var)
+        val = benchmark(EX, var)
         assert np.allclose(val, [[ 0.75, 0.825, 0.9, 0.975]])
-        if benchmark.enabled:
-            benchmark(EX, var)
+
 
     # def test_transfer_mech_array_assignments_wrong_size_mech_noise(self, benchmark, mode):
     def test_transfer_mech_array_assignments_wrong_size_mech_noise(self):
@@ -903,15 +842,12 @@ def test_transfer_mech_integration_rate_0_8(self, benchmark, mech_mode):
         )
         EX = pytest.helpers.get_mech_execution(T, mech_mode)
 
-        val1 = T.execute([1 for i in range(VECTOR_SIZE)])
-        val2 = T.execute([1 for i in range(VECTOR_SIZE)])
+        val1 = EX([1 for i in range(VECTOR_SIZE)])
+        val2 = benchmark(EX, [1 for i in range(VECTOR_SIZE)])
 
         assert np.allclose(val1, [[0.8 for i in range(VECTOR_SIZE)]])
         assert np.allclose(val2, [[0.96 for i in range(VECTOR_SIZE)]])
 
-        if benchmark.enabled:
-            benchmark(T.execute, [0 for i in range(VECTOR_SIZE)])
-
     @pytest.mark.mechanism
     @pytest.mark.transfer_mechanism
     @pytest.mark.benchmark(group="TransferMechanism Linear TimeConstant=1")
@@ -1430,6 +1366,7 @@ def test_previous_value_persistence_execute(self):
         # linear fn: 0.595*1.0 = 0.595
         assert np.allclose(T.integrator_function.previous_value, 0.595)
 
+    @pytest.mark.composition
     def test_previous_value_persistence_run(self):
         T = TransferMechanism(name="T",
                               initial_value=0.5,
@@ -1497,6 +1434,7 @@ def test_previous_value_reset_execute(self):
         assert np.allclose(T.integrator_function.previous_value, 0.46)  # property that looks at integrator, which updated with mech exec
         assert np.allclose(T.value, 0.46)  # on mechanism, but updates with exec
 
+    @pytest.mark.composition
     def test_reset_run(self):
         T = TransferMechanism(name="T",
                               initial_value=0.5,
@@ -1537,6 +1475,7 @@ def test_reset_run(self):
         # linear fn: 0.595*1.0 = 0.595
         assert np.allclose(T.integrator_function.parameters.previous_value.get(C), 0.595)
 
+    @pytest.mark.composition
     def test_reset_run_array(self):
         T = TransferMechanism(name="T",
                               default_variable=[0.0, 0.0, 0.0],
@@ -1577,6 +1516,7 @@ def test_reset_run_array(self):
         # linear fn: 0.595*1.0 = 0.595
         assert np.allclose(T.integrator_function.parameters.previous_value.get(C), [0.595, 0.595, 0.595])
 
+    @pytest.mark.composition
     def test_reset_run_2darray(self):
 
         initial_val = [[0.5, 0.5, 0.5]]
@@ -1629,6 +1569,7 @@ def test_reset_not_integrator(self):
         assert "not allowed because its `integrator_mode` parameter" in str(err_txt.value)
         assert "is currently set to \'False\'; try setting it to \'True\'" in str(err_txt.value)
 
+    @pytest.mark.composition
     def test_switch_mode(self):
         T = TransferMechanism(integrator_mode=True,
                               on_resume_integrator_mode=LAST_INTEGRATED_VALUE)
@@ -1659,6 +1600,7 @@ def test_switch_mode(self):
         C.run({T: [[1.0], [1.0], [1.0]]})
         assert np.allclose(T.parameters.value.get(C), [[0.984375]])
 
+    @pytest.mark.composition
     def test_initial_values_softmax(self):
         T = TransferMechanism(default_variable=[[0.0, 0.0], [0.0, 0.0]],
                               function=SoftMax(),
@@ -1695,6 +1637,7 @@ def test_set_integrator_mode_after_init(self):
         T.execute(1)
 
 
+@pytest.mark.composition
 class TestOnResumeIntegratorMode:
 
     def test_last_integrated_value_spec(self):
@@ -1777,7 +1720,6 @@ def test_reset_spec(self):
         # Trial 1: 0.5*0.5 + 0.5*2.0 = 1.25 * 1.0 = 1.25
         assert np.allclose(T.parameters.value.get(C), [[1.25]])
 
-    @pytest.mark.mechanism
     @pytest.mark.transfer_mechanism
     @pytest.mark.benchmark(group="TransferMechanism")
     # 'LLVM' mode is not supported, because synchronization of compiler and
diff --git a/tests/misc/test_parameters.py b/tests/misc/test_parameters.py
index 98af182a686..751a1f92a58 100644
--- a/tests/misc/test_parameters.py
+++ b/tests/misc/test_parameters.py
@@ -633,3 +633,41 @@ def set_p_default(obj, val):
         assert TestParent.defaults.p == 0
         assert TestChild.defaults.p == 1
         assert TestGrandchild.defaults.p == 20
+
+
+def test_dependent_parameter_validate():
+    # using 3 parameters to reduce chance of random success
+    class NewF(pnl.Function_Base):
+        class Parameters(pnl.Function_Base.Parameters):
+            a = pnl.Parameter(1)
+            b = pnl.Parameter(2, dependencies='a')
+            c = pnl.Parameter(3, dependencies='b')
+            d = pnl.Parameter(4, dependencies='c')
+
+            def _validate_b(self, b):
+                if b != self.a.default_value + 1:
+                    return 'invalid'
+
+            def _validate_c(self, c):
+                if c != self.b.default_value + 1:
+                    return 'invalid'
+
+            def _validate_d(self, d):
+                if d != self.c.default_value + 1:
+                    return 'invalid'
+
+        def __init__(self, **kwargs):
+            return super().__init__(0, {}, **kwargs)
+
+        def _function(self, variable=None, context=None, params=None):
+            return 0
+
+    pnl.ProcessingMechanism(function=NewF(a=2, b=3, c=4, d=5))
+
+    with pytest.raises(pnl.ParameterError) as err:
+        # b should be first error to occur
+        pnl.ProcessingMechanism(function=NewF(b=3, c=5, d=7))
+    assert re.match(
+        r"Value \(3\) assigned to parameter 'b'.*is not valid: invalid",
+        str(err.value)
+    )
diff --git a/tests/models/test_bi_percepts.py b/tests/models/test_bi_percepts.py
index 5d819d890bf..5a2fd94e181 100644
--- a/tests/models/test_bi_percepts.py
+++ b/tests/models/test_bi_percepts.py
@@ -32,6 +32,8 @@
     pytest.param(8, 10, expected_8_10, id="8-10"),
 ])
 def test_necker_cube(benchmark, comp_mode, n_nodes, n_time_steps, expected):
+    benchmark.group = "Necker Cube {}-{}".format(n_nodes, n_time_steps)
+
     # this code only works for N_PERCEPTS == 2
     ALL_PERCEPTS = ['a', 'b']
 
@@ -124,13 +126,6 @@ def get_node(percept, node_id):
         for node_ in bp_comp.nodes
     }
 
-    # run the model
-    res = bp_comp.run(input_dict, num_trials=n_time_steps, execution_mode=comp_mode)
-    if pytest.helpers.llvm_current_fp_precision() == 'fp32':
-        assert np.allclose(res, expected)
-    else:
-        np.testing.assert_allclose(res, expected)
-
     # Test that order of CIM ports follows order of Nodes in self.nodes
     for i in range(n_nodes):
         a_name = "a-{}".format(i)
@@ -140,9 +135,12 @@ def get_node(percept, node_id):
         assert b_name in bp_comp.input_CIM.input_ports.names[i + n_nodes]
         assert b_name in bp_comp.output_CIM.output_ports.names[i + n_nodes]
 
-    if benchmark.enabled:
-        benchmark.group = "Necker Cube {}-{}".format(n_nodes, n_time_steps)
-        benchmark(bp_comp.run, input_dict, num_trials=n_time_steps, execution_mode=comp_mode)
+    # run the model
+    res = benchmark(bp_comp.run, input_dict, num_trials=n_time_steps, execution_mode=comp_mode)
+    if pytest.helpers.llvm_current_fp_precision() == 'fp32':
+        assert np.allclose(res, expected)
+    else:
+        np.testing.assert_allclose(res, expected)
 
 
 @pytest.mark.model
@@ -222,7 +220,7 @@ def test_vectorized_necker_cube(benchmark, comp_mode):
                   node4: np.random.random((1,16))
                  }
 
-    result = comp2.run(input_dict, num_trials=10, execution_mode=comp_mode)
+    result = benchmark(comp2.run, input_dict, num_trials=10, execution_mode=comp_mode)
     assert np.allclose(result,
             [[ 2636.29181172,  -662.53579899,  2637.35386946,  -620.15550833,
                -595.55319772,  2616.74310649,  -442.74286574,  2588.4778162 ,
@@ -232,6 +230,3 @@ def test_vectorized_necker_cube(benchmark, comp_mode):
                2590.69244696,  -555.19824432,  2591.63200098,  -509.58072358,
               -2618.88711219,   682.65814776, -2620.18294962,   640.09719335,
                 615.39758884, -2599.45663784,   462.67291695, -2570.99427346]])
-
-    if benchmark.enabled:
-        benchmark(comp2.run, input_dict, num_trials=10, execution_mode=comp_mode)
diff --git a/tests/models/test_botvinick.py b/tests/models/test_botvinick.py
index 02cfe6e3d9e..d7b1634a904 100644
--- a/tests/models/test_botvinick.py
+++ b/tests/models/test_botvinick.py
@@ -14,8 +14,6 @@
 # Note that this script implements a slightly different Figure than in the original Figure in the paper.
 # However, this implementation is identical with a plot we created with an old MATLAB code which was used for the
 # conflict monitoring simulations.
-import psyneulink.core.components.functions.nonstateful.objectivefunctions
-import psyneulink.core.components.functions.nonstateful.transferfunctions
 
 
 @pytest.mark.model
@@ -30,20 +28,20 @@ def test_botvinick_model(benchmark, comp_mode, reps):
     # Linear input layer
     # colors: ('red', 'green'), words: ('RED','GREEN')
     colors_input_layer = pnl.TransferMechanism(size=3,
-                                               function=psyneulink.core.components.Linear,
+                                               function=pnl.Linear,
                                                name='COLORS_INPUT')
 
     words_input_layer = pnl.TransferMechanism(size=3,
-                                              function=psyneulink.core.components.Linear,
+                                              function=pnl.Linear,
                                               name='WORDS_INPUT')
 
     task_input_layer = pnl.TransferMechanism(size=2,
-                                             function=psyneulink.core.components.Linear,
+                                             function=pnl.Linear,
                                              name='TASK_INPUT')
 
     #   Task layer, tasks: ('name the color', 'read the word')
     task_layer = pnl.RecurrentTransferMechanism(size=2,
-                                                function=psyneulink.core.components.Logistic,
+                                                function=pnl.Logistic,
                                                 hetero=-2,
                                                 integrator_mode=True,
                                                 integration_rate=0.01,
@@ -52,14 +50,14 @@ def test_botvinick_model(benchmark, comp_mode, reps):
     # Hidden layer
     # colors: ('red','green', 'neutral') words: ('RED','GREEN', 'NEUTRAL')
     colors_hidden_layer = pnl.RecurrentTransferMechanism(size=3,
-                                                         function=psyneulink.core.components.Logistic(x_0=4.0),  # bias 4.0 is -4.0 in the paper see Docs for description
+                                                         function=pnl.Logistic(x_0=4.0),  # bias 4.0 is -4.0 in the paper see Docs for description
                                                          integrator_mode=True,
                                                          hetero=-2,
                                                          integration_rate=0.01,  # cohen-huston text says 0.01
                                                          name='COLORS_HIDDEN')
 
     words_hidden_layer = pnl.RecurrentTransferMechanism(size=3,
-                                                        function=psyneulink.core.components.Logistic(x_0=4.0),
+                                                        function=pnl.Logistic(x_0=4.0),
                                                         integrator_mode=True,
                                                         hetero=-2,
                                                         integration_rate=0.01,
@@ -67,14 +65,14 @@ def test_botvinick_model(benchmark, comp_mode, reps):
 
     #   Response layer, responses: ('red', 'green')
     response_layer = pnl.RecurrentTransferMechanism(size=2,
-                                                    function=psyneulink.core.components.Logistic,
+                                                    function=pnl.Logistic,
                                                     hetero=-2.0,
                                                     integrator_mode=True,
                                                     integration_rate=0.01,
                                                     output_ports = [pnl.RESULT,
                                                                      {pnl.NAME: 'DECISION_ENERGY',
                                                                       pnl.VARIABLE: (pnl.OWNER_VALUE,0),
-                                                                      pnl.FUNCTION: psyneulink.core.components.Stability(
+                                                                      pnl.FUNCTION: pnl.Stability(
                                                                           default_variable = np.array([0.0, 0.0]),
                                                                           metric = pnl.ENERGY,
                                                                           matrix = np.array([[0.0, -4.0],
@@ -189,10 +187,12 @@ def run(mode):
 
             # Comp results include concatenation of both the above runs
             results.append(comp.results)
+            # cleanup the results of the most recently used context id
+            comp.results = []
 
         return results
 
-    res = run(comp_mode)
+    res = benchmark(run, comp_mode)
     # the corresponding output port indices in composition results
     # these were 0 and 1 in the prior version of the test
     response_results_index = 3
@@ -283,5 +283,3 @@ def run(mode):
         assert np.allclose(res[1][-1][response_decision_energy_index], [1.87232903])
         assert np.allclose(res[2][ntrials0 - 1][response_decision_energy_index], [0.94440397])
         assert np.allclose(res[2][-1][response_decision_energy_index], [0.90033387])
-    if benchmark.enabled:
-        benchmark(run, comp_mode)
diff --git a/tests/models/test_greedy_agent.py b/tests/models/test_greedy_agent.py
index 1ee9c192628..a12a4f99dd4 100644
--- a/tests/models/test_greedy_agent.py
+++ b/tests/models/test_greedy_agent.py
@@ -52,15 +52,8 @@ def test_simplified_greedy_agent(benchmark, comp_mode):
     for projection in greedy_action_mech.projections:
         agent_comp.add_projection(projection)
 
-    run_results = agent_comp.run(inputs={player:[[619,177]],
-                                         prey:[[419,69]]},
-                                 execution_mode=comp_mode)
+    run_results = benchmark(agent_comp.run, inputs={player:[[619,177]],prey:[[419,69]]}, execution_mode=comp_mode)
     assert np.allclose(run_results, [[-200, -108]])
-    if benchmark.enabled:
-        benchmark(agent_comp.run, **{'inputs':{
-            player:[[619,177]],
-            prey:[[419,69]],
-            }, 'execution_mode':comp_mode})
 
 @pytest.mark.model
 @pytest.mark.benchmark(group="Greedy Agant Random")
@@ -94,19 +87,8 @@ def test_simplified_greedy_agent_random(benchmark, comp_mode):
     for projection in greedy_action_mech.projections:
         agent_comp.add_projection(projection)
 
-    run_results = agent_comp.run(inputs={player:[[619,177]],
-                                         prey:[[419,69]]},
-                                 execution_mode=comp_mode)
-    # KDM 12/4/19: modified results due to global seed offset of
-    # GaussianDistort assignment.
-    # to produce old numbers, run get_global_seed once before creating
-    # each Mechanism with GaussianDistort above
+    run_results = benchmark(agent_comp.run, inputs={player:[[619, 177]], prey:[[419, 69]]}, execution_mode=comp_mode)
     assert np.allclose(run_results, [[-199.5484223217141, -107.79361870517444]])
-    if benchmark.enabled:
-        benchmark(agent_comp.run, **{'inputs':{
-            player:[[619,177]],
-            prey:[[419,69]],
-            }, 'execution_mode':comp_mode})
 
 @pytest.mark.model
 @pytest.mark.benchmark(group="Predator Prey")
@@ -179,7 +161,7 @@ def action_fn(variable):
 
     # note: unitization is done in main loop
     greedy_action_mech = pnl.ProcessingMechanism(function=action_fn, input_ports=["predator", "player", "prey"],
-                                                 default_variable=[[0,0],[0,0],[0,0]], name="ACTION")
+                                                 default_variable=[[0, 1], [0, -1], [1, 0]], name="ACTION")
 
     direct_move = ComparatorMechanism(name='DIRECT MOVE',sample=player_pos, target=prey_pos)
 
@@ -232,7 +214,7 @@ def action_fn(variable):
                   predator_pos:[[-0.03479106, -0.47666293]],
                   prey_pos:[[-0.60836214,  0.1760381 ]],
                  }
-    run_results = agent_comp.run(inputs=input_dict, num_trials=2, execution_mode=mode)
+    run_results = benchmark(agent_comp.run, inputs=input_dict, num_trials=2, execution_mode=mode)
 
     if len(samples) == 2:
         if prng == 'Default':
@@ -247,12 +229,9 @@ def action_fn(variable):
         else:
             assert False, "Unknown PRNG!"
 
-        if mode == pnl.ExecutionMode.Python:
-            # FIXEM: The results are 'close' for both Philox and MT,
+        if mode == pnl.ExecutionMode.Python and not benchmark.enabled:
+            # FIXME: The results are 'close' for both Philox and MT,
             #        because they're dominated by costs
             assert np.allclose(np.asfarray(ocm.function.saved_values).flatten(),
                                [-2.66258741, -22027.9970321, -22028.17515945, -44053.59867802,
                                 -22028.06045185, -44053.4048842, -44053.40736234, -66078.90687915])
-
-    if benchmark.enabled:
-        benchmark(agent_comp.run, inputs=input_dict, execution_mode=mode)
diff --git a/tests/ports/test_input_ports.py b/tests/ports/test_input_ports.py
index a2c1d807a71..bd2568b018a 100644
--- a/tests/ports/test_input_ports.py
+++ b/tests/ports/test_input_ports.py
@@ -112,19 +112,21 @@ def test_default_input(self, default_input):
         comp = pnl.Composition(nodes=(m, pnl.NodeRole.INTERNAL))
         assert pnl.NodeRole.INTERNAL in comp.get_roles_by_node(m)
         assert pnl.NodeRole.INPUT not in comp.get_roles_by_node(m)
-        assert not m.path_afferents
+
+        assert not m.path_afferents  # No path_afferents since internal_only is set by default_input
+
+
         if default_input is None:
-            with pytest.warns(UserWarning) as warning:  # Warn, since default_input is NOT set
+            with pytest.warns(UserWarning) as warnings:  # Warn, since default_input is NOT set
                 comp.run()
-            assert repr(warning[1].message.args[0]) == '"InputPort (\'INTERNAL_NODE\') of \'TransferMechanism-0\' ' \
-                                                       'doesn\'t have any afferent Projections."'
-            assert m.input_port.value == variable # For Mechanisms other than controller, default_variable seems
-            assert m.value == variable            #     to still be used even though default_input is NOT set
+            assert any(repr(w.message.args[0]) == '"InputPort (\'INTERNAL_NODE\') of \'TransferMechanism-0\' '
+                                                  'doesn\'t have any afferent Projections."'
+                       for w in warnings)
         else:
-            assert not m.path_afferents  # No path_afferents since internal_only is set by default_input
             comp.run()                   # No warning since default_input is set
-            assert m.input_port.value == variable
-            assert m.value == variable
+
+        assert m.input_port.value == variable # For Mechanisms other than controller, default_variable seems
+        assert m.value == variable            #     to still be used even though default_input is NOT set
 
     def test_no_efferents(self):
         A = pnl.InputPort()
diff --git a/tests/ports/test_output_ports.py b/tests/ports/test_output_ports.py
index 71761b860f4..e8a1c1fe977 100644
--- a/tests/ports/test_output_ports.py
+++ b/tests/ports/test_output_ports.py
@@ -31,6 +31,7 @@ def test_output_port_variable_spec(self, mech_mode):
         for i, e in zip(res, expected):
             assert np.array_equal(i, e)
 
+    @pytest.mark.composition
     @pytest.mark.mechanism
     @pytest.mark.parametrize('spec, expected1, expected2',
                              [((pnl.OWNER_VALUE, 0), [1], [1]),
diff --git a/tests/projections/test_projection_specifications.py b/tests/projections/test_projection_specifications.py
index 02edd207534..a7338f2efe7 100644
--- a/tests/projections/test_projection_specifications.py
+++ b/tests/projections/test_projection_specifications.py
@@ -480,8 +480,7 @@ def test_no_warning_when_matrix_specified(self):
             )
             c.add_linear_processing_pathway([m0, p0, m1])
             for warn in w:
-                if r'elementwise comparison failed; returning scalar instead' in warn.message.args[0]:
-                    raise
+                assert 'elementwise comparison failed; returning scalar instead' not in warn.message.args[0]
 
     # KDM: this is a good candidate for pytest.parametrize
     def test_masked_mapping_projection(self):
diff --git a/tests/scheduling/test_condition.py b/tests/scheduling/test_condition.py
index f9684059100..aedb26b04b2 100644
--- a/tests/scheduling/test_condition.py
+++ b/tests/scheduling/test_condition.py
@@ -82,6 +82,7 @@ def func(a, b, c=True):
         assert not cond.is_satisfied(False, c=False)
         assert not cond.is_satisfied(False, c=False, extra_arg=True)
 
+    @pytest.mark.composition
     class TestGeneric:
         def test_WhileNot_AtPass(self):
             comp = Composition()
@@ -115,6 +116,7 @@ def test_WhileNot_AtPass_in_middle(self):
             expected_output = [A, A, set(), A, A]
             assert output == pytest.helpers.setify_expected_output(expected_output)
 
+    @pytest.mark.composition
     class TestRelative:
 
         def test_Any_end_before_one_finished(self):
@@ -211,6 +213,7 @@ def test_NWhen_AfterNCalls(self, n, expected_output):
 
             assert output == pytest.helpers.setify_expected_output(expected_output)
 
+    @pytest.mark.composition
     class TestTime:
 
         def test_BeforeTimeStep(self):
@@ -480,6 +483,7 @@ def test_AfterNTrials(self):
             expected_output = [set(), A, A, A, A]
             assert output == pytest.helpers.setify_expected_output(expected_output)
 
+    @pytest.mark.composition
     class TestComponentBased:
 
         def test_BeforeNCalls(self):
@@ -562,6 +566,7 @@ def test_AfterNCalls(self):
 
     class TestConvenience:
 
+        @pytest.mark.composition
         def test_AtTrialStart(self):
             comp = Composition()
             A = TransferMechanism(name='A')
@@ -579,6 +584,7 @@ def test_AtTrialStart(self):
             expected_output = [A, B, A, A]
             assert output == pytest.helpers.setify_expected_output(expected_output)
 
+    @pytest.mark.composition
     def test_composite_condition_multi(self):
         comp = Composition()
         A = TransferMechanism(function=Linear(slope=5.0, intercept=2.0), name='A')
@@ -613,6 +619,7 @@ def test_composite_condition_multi(self):
         ]
         assert output == pytest.helpers.setify_expected_output(expected_output)
 
+    @pytest.mark.composition
     def test_AfterNCallsCombined(self):
         comp = Composition()
         A = TransferMechanism(function=Linear(slope=5.0, intercept=2.0), name='A')
@@ -640,6 +647,7 @@ def test_AfterNCallsCombined(self):
         ]
         assert output == pytest.helpers.setify_expected_output(expected_output)
 
+    @pytest.mark.composition
     def test_AllHaveRun(self):
         comp = Composition()
         A = TransferMechanism(function=Linear(slope=5.0, intercept=2.0), name='A')
@@ -667,6 +675,7 @@ def test_AllHaveRun(self):
         ]
         assert output == pytest.helpers.setify_expected_output(expected_output)
 
+    @pytest.mark.composition
     def test_AllHaveRun_2(self):
         comp = Composition()
         A = TransferMechanism(function=Linear(slope=5.0, intercept=2.0), name='A')
@@ -692,6 +701,7 @@ def test_AllHaveRun_2(self):
         ]
         assert output == pytest.helpers.setify_expected_output(expected_output)
 
+    @pytest.mark.composition
     @pytest.mark.parametrize(
         'parameter, indices, default_variable, integration_rate, expected_results',
         [
@@ -723,6 +733,7 @@ def test_Threshold_parameters(
 
         np.testing.assert_array_equal(comp.results, expected_results)
 
+    @pytest.mark.composition
     @pytest.mark.parametrize(
         'comparator, increment, threshold, expected_results',
         [
@@ -755,6 +766,7 @@ def test_Threshold_comparators(
 
         np.testing.assert_array_equal(comp.results, expected_results)
 
+    @pytest.mark.composition
     @pytest.mark.parametrize(
         'comparator, increment, threshold, atol, rtol, expected_results',
         [
@@ -790,6 +802,7 @@ def test_Threshold_tolerances(
         np.testing.assert_array_equal(comp.results, expected_results)
 
 
+@pytest.mark.composition
 class TestWhenFinished:
 
     @classmethod
@@ -984,6 +997,7 @@ class TestAbsolute:
     B = TransferMechanism(name='scheduler-pytests-B')
     C = TransferMechanism(name='scheduler-pytests-C')
 
+    @pytest.mark.composition
     @pytest.mark.parametrize(
         'conditions, termination_conds',
         [
@@ -1036,6 +1050,7 @@ def test_TimeInterval_linear_everynms(self, conditions, termination_conds):
             for i in range(1, len(executions)):
                 assert (executions[i] - executions[i - 1]) == cond.repeat
 
+    @pytest.mark.composition
     @pytest.mark.parametrize(
         'conditions, termination_conds',
         [
diff --git a/tests/scheduling/test_scheduler.py b/tests/scheduling/test_scheduler.py
index 459512441f6..69aba949a04 100644
--- a/tests/scheduling/test_scheduler.py
+++ b/tests/scheduling/test_scheduler.py
@@ -1209,6 +1209,31 @@ def test_partial_override_composition(self):
         # two executions of B
         assert output == [.75]
 
+    def test_termination_conditions_after_recreating_scheduler(self):
+        comp = Composition()
+        A = TransferMechanism()
+        comp.scheduler.termination_conds = {TimeScale.TRIAL: AfterNCalls(A, 3)}
+        B = TransferMechanism()
+        for m in [A, B]:
+            comp.add_node(m)
+
+        comp.run(inputs={A: 1, B: 1})
+
+        expected_output = [{A, B}, {A, B}, {A, B}]
+        assert comp.scheduler.execution_list[comp.default_execution_id] == expected_output
+
+    def test_termination_conditions_in_composition_constructor(self):
+        A = TransferMechanism()
+        comp = Composition(termination_processing={TimeScale.TRIAL: AfterNCalls(A, 3)})
+        B = TransferMechanism()
+        for m in [A, B]:
+            comp.add_node(m)
+
+        comp.run(inputs={A: 1, B: 1})
+
+        expected_output = [{A, B}, {A, B}, {A, B}]
+        assert comp.scheduler.execution_list[comp.default_execution_id] == expected_output
+
 
 def _get_vertex_feedback_type(graph, sender_port, receiver_mech):
     # there is only one projection per pair
@@ -1498,7 +1523,7 @@ def test_inline_control_mechanism_example(self):
         }
         assert comp.scheduler.dependency_dict == expected_dependencies
 
-    @pytest.mark.mechanism
+    @pytest.mark.composition
     @pytest.mark.transfer_mechanism
     @pytest.mark.parametrize('timescale, expected',
                              [(TimeScale.TIME_STEP, [[0.5], [0.4375]]),
@@ -1567,7 +1592,8 @@ def test_scheduler_conditions(self, comp_mode, condition, scale, expected_result
                                                               time_step_size=1.0),
                         reset_stateful_function_when=pnl.AtTrialStart(),
                         execute_until_finished=False,
-                        output_ports=[pnl.DECISION_VARIABLE, pnl.RESPONSE_TIME],
+                        # Use only the decision variable in this test
+                        output_ports=[pnl.DECISION_VARIABLE],
                         name='DDM')
 
         response = pnl.ProcessingMechanism(size=2, name="GATE")
diff --git a/tutorial_requirements.txt b/tutorial_requirements.txt
index 6c0b32c13fd..8e08358b08f 100644
--- a/tutorial_requirements.txt
+++ b/tutorial_requirements.txt
@@ -1,3 +1,3 @@
 graphviz<0.21.0
 jupyter<=1.0.0
-matplotlib<3.5.4
+matplotlib<3.6.4
diff --git a/versioneer.py b/versioneer.py
index 64fea1c8927..13901fcd1b9 100644
--- a/versioneer.py
+++ b/versioneer.py
@@ -418,7 +418,7 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
     return stdout, p.returncode
 
 
-LONG_VERSION_PY['git'] = '''
+LONG_VERSION_PY['git'] = r'''
 # This file helps to compute a version number in source trees obtained from
 # git-archive tarball (such as those provided by githubs download-from-tag
 # feature). Distribution tarballs (built by setup.py sdist) and build