Merge pull request #610 from BindsNET/hananel

Gymnasium and MSTDP
BindsNET · Jan 16, 2023 · 54e5dec · 54e5dec
2 parents 8321825 + 7171592
commit 54e5dec
Show file tree

Hide file tree

Showing 9 changed files with 1,070 additions and 704 deletions.
diff --git a/bindsnet/environment/dot_simulator.py b/bindsnet/environment/dot_simulator.py
@@ -6,7 +6,7 @@
 import numpy as np
 import pandas as pd
 import torch
-from gym import spaces
+from gymnasium import spaces
 
 # Mappings for changing direction if reflected.
 # Cannot cross a row boundary moving right or left.

diff --git a/bindsnet/environment/environment.py b/bindsnet/environment/environment.py
@@ -1,7 +1,7 @@
 from abc import ABC, abstractmethod
 from typing import Any, Dict, Tuple
 
-import gym
+import gymnasium as gym
 import numpy as np
 import torch
 
@@ -59,7 +59,13 @@ class GymEnvironment(Environment):
     A wrapper around the OpenAI ``gym`` environments.
     """
 
-    def __init__(self, name: str, encoder: Encoder = NullEncoder(), **kwargs) -> None:
+    def __init__(
+        self,
+        name: str,
+        render_mode: str = "rgb_array",
+        encoder: Encoder = NullEncoder(),
+        **kwargs,
+    ) -> None:
         # language=rst
         """
         Initializes the environment wrapper. This class makes the
@@ -82,7 +88,7 @@ def __init__(self, name: str, encoder: Encoder = NullEncoder(), **kwargs) -> Non
             2D inputs.
         """
         self.name = name
-        self.env = gym.make(name)
+        self.env = gym.make(name, render_mode=render_mode)
         self.action_space = self.env.action_space
 
         self.encoder = encoder
@@ -94,6 +100,7 @@ def __init__(self, name: str, encoder: Encoder = NullEncoder(), **kwargs) -> Non
         self.history_length = kwargs.get("history_length", None)
         self.delta = kwargs.get("delta", 1)
         self.add_channel_dim = kwargs.get("add_channel_dim", True)
+        self.seed = kwargs.get("seed", None)
 
         if self.history_length is not None and self.delta is not None:
             self.history = {
@@ -122,7 +129,8 @@ def step(self, a: int) -> Tuple[torch.Tensor, float, bool, Dict[Any, Any]]:
         :return: Observation, reward, done flag, and information dictionary.
         """
         # Call gym's environment step function.
-        self.obs, self.reward, self.done, info = self.env.step(a)
+        self.obs, self.reward, terminated, truncated, info = self.env.step(a)
+        self.done = terminated or truncated
 
         if self.clip_rewards:
             self.reward = np.sign(self.reward)
@@ -162,15 +170,15 @@ def step(self, a: int) -> Tuple[torch.Tensor, float, bool, Dict[Any, Any]]:
         # Return converted observations and other information.
         return self.obs, self.reward, self.done, info
 
-    def reset(self) -> torch.Tensor:
+    def reset(self, seed=None) -> torch.Tensor:
         # language=rst
         """
         Wrapper around the OpenAI ``gym`` environment ``reset()`` function.
 
         :return: Observation from the environment.
         """
         # Call gym's environment reset function.
-        self.obs = self.env.reset()
+        self.obs, self.info = self.env.reset(seed=seed)
         self.preprocess()
 
         self.history = {i: torch.Tensor() for i in self.history}

diff --git a/bindsnet/learning/learning.py b/bindsnet/learning/learning.py
@@ -1528,12 +1528,18 @@ def _connection_update(self, **kwargs) -> None:
 
         # Parse keyword arguments.
         reward = kwargs["reward"]
-        a_plus = torch.tensor(
-            kwargs.get("a_plus", 1.0), device=self.connection.w.device
-        )
-        a_minus = torch.tensor(
-            kwargs.get("a_minus", -1.0), device=self.connection.w.device
-        )
+        a_plus = kwargs.get("a_plus", 1.0)
+        if isinstance(a_plus, dict):
+            for k, v in a_plus.items():
+                a_plus[k] = torch.tensor(v, device=self.connection.w.device)
+        else:
+            a_plus = torch.tensor(a_plus, device=self.connection.w.device)
+        a_minus = kwargs.get("a_minus", -1.0)
+        if isinstance(a_minus, dict):
+            for k, v in a_minus.items():
+                a_minus[k] = torch.tensor(v, device=self.connection.w.device)
+        else:
+            a_minus = torch.tensor(a_minus, device=self.connection.w.device)
 
         # Compute weight update based on the eligibility value of the past timestep.
         update = reward * self.eligibility

diff --git a/bindsnet/network/network.py b/bindsnet/network/network.py
@@ -405,7 +405,39 @@ def run(
                         self.layers[l].s[:, unclamp[t]] = 0
 
             # Run synapse updates.
+            if "a_minus" in kwargs:
+                A_Minus = kwargs["a_minus"]
+                kwargs.pop("a_minus")
+                if isinstance(A_Minus, dict):
+                    A_MD = True
+                else:
+                    A_MD = False
+            else:
+                A_Minus = None
+
+            if "a_plus" in kwargs:
+                A_Plus = kwargs["a_plus"]
+                kwargs.pop("a_plus")
+                if isinstance(A_Plus, dict):
+                    A_PD = True
+                else:
+                    A_PD = False
+            else:
+                A_Plus = None
+
             for c in self.connections:
+                if A_Minus != None and ((isinstance(A_Minus, float)) or (c in A_Minus)):
+                    if A_MD:
+                        kwargs["a_minus"] = A_Minus[c]
+                    else:
+                        kwargs["a_minus"] = A_Minus
+
+                if A_Plus != None and ((isinstance(A_Plus, float)) or (c in A_Plus)):
+                    if A_PD:
+                        kwargs["a_plus"] = A_Plus[c]
+                    else:
+                        kwargs["a_plus"] = A_Plus
+
                 self.connections[c].update(
                     mask=masks.get(c, None), learning=self.learning, **kwargs
                 )

diff --git a/bindsnet/pipeline/base_pipeline.py b/bindsnet/pipeline/base_pipeline.py
@@ -1,4 +1,4 @@
-import collections
+import collections.abc
 import time
 from typing import Any, Dict, Tuple
 
@@ -25,11 +25,11 @@ def recursive_to(item, device):
         return item.to(device)
     elif isinstance(item, (string_classes, int, float, bool)):
         return item
-    elif isinstance(item, collections.Mapping):
+    elif isinstance(item, collections.abc.Mapping):
         return {key: recursive_to(item[key], device) for key in item}
     elif isinstance(item, tuple) and hasattr(item, "_fields"):
         return type(item)(*(recursive_to(i, device) for i in item))
-    elif isinstance(item, collections.Sequence):
+    elif isinstance(item, collections.abc.Sequence):
         return [recursive_to(i, device) for i in item]
     else:
         raise NotImplementedError(f"Target type {type(item)} not supported.")
@@ -89,6 +89,7 @@ def __init__(self, network: Network, **kwargs) -> None:
 
         self.print_interval = kwargs.get("print_interval", None)
         self.test_interval = kwargs.get("test_interval", None)
+        self.plot_interval = kwargs.get("plot_interval", None)
         self.step_count = 0
         self.init_fn()
         self.clock = time.time()
@@ -133,7 +134,8 @@ def step(self, batch: Any, **kwargs) -> Any:
             )
             self.clock = time.time()
 
-        self.plots(batch, step_out)
+        if self.plot_interval is not None and self.step_count % self.plot_interval == 0:
+            self.plots(batch, step_out)
 
         if self.save_interval is not None and self.step_count % self.save_interval == 0:
             self.network.save(self.save_dir)

diff --git a/examples/breakout/breakout_stdp.py b/examples/breakout/breakout_stdp.py
@@ -35,7 +35,7 @@
 network.add_connection(middle_out, source="Hidden Layer", target="Output Layer")
 
 # Load the Breakout environment.
-environment = GymEnvironment("BreakoutDeterministic-v4")
+environment = GymEnvironment("BreakoutDeterministic-v4", render_mode="human")
 environment.reset()
 
 # Build pipeline from specified components.
@@ -69,6 +69,10 @@ def run_pipeline(pipeline, episode_count):
         print(f"Episode {i} total reward:{total_reward}")
 
 
+# enable MSTDP
+environment_pipeline.network.learning = True
+
+
 print("Training: ")
 run_pipeline(environment_pipeline, episode_count=100)
 

diff --git a/examples/breakout/random_network_baseline.py b/examples/breakout/random_network_baseline.py
@@ -55,7 +55,7 @@
 input_exc_conn = Connection(
     source=layers["X"],
     target=layers["E"],
-    w=0.01 * torch.rand(layers["X"].n, layers["E"].n),
+    w=0.1 * torch.rand(layers["X"].n, layers["E"].n),
     wmax=0.02,
     norm=0.01 * layers["X"].n,
 )
@@ -64,7 +64,7 @@
 exc_readout_conn = Connection(
     source=layers["E"],
     target=layers["R"],
-    w=0.01 * torch.rand(layers["E"].n, layers["R"].n),
+    w=0.1 * torch.rand(layers["E"].n, layers["R"].n),
     update_rule=Hebbian,
     nu=[1e-2, 1e-2],
     norm=0.5 * layers["E"].n,
@@ -95,16 +95,16 @@
         network.add_monitor(voltages[layer], name="%s_voltages" % layer)
 
 # Load the Breakout environment.
-environment = GymEnvironment("BreakoutDeterministic-v4")
+environment = GymEnvironment("BreakoutDeterministic-v4", render_mode="human")
 environment.reset()
 
 pipeline = EnvironmentPipeline(
     network,
     environment,
     encoding=bernoulli,
-    time=1,
-    history=5,
-    delta=10,
+    history_length=1,
+    delta=1,
+    time=100,
     plot_interval=plot_interval,
     print_interval=print_interval,
     render_interval=render_interval,
@@ -119,6 +119,7 @@
 avg_lengths = []
 
 i = 0
+# pipeline.reset_state_variables()
 try:
     while i < n:
         result = pipeline.env_step()