In [1]:
import torch

In [8]:

from __future__ import print_function
import math

import numpy as np

import matplotlib.pyplot as plt

def maxEntropy(n,k):
  """
  The maximum enropy we could get with n units and k winners
  """

  s = float(k)/n
  if s > 0.0 and s < 1.0:
    entropy = - s * math.log(s,2) - (1 - s) * math.log(1 - s,2)
  else:
    entropy = 0

  return n*entropy


def binaryEntropy(x):
  """
  Calculate entropy for a list of binary random variables
  :param x: (torch tensor) the probability of the variable to be 1.
  :return: entropy: (torch tensor) entropy, sum(entropy)
  """
  entropy = - x*x.log2() - (1-x)*(1-x).log2()
  entropy[x*(1 - x) == 0] = 0
  return entropy, entropy.sum()

In [9]:
# ----------------------------------------------------------------------
# Numenta Platform for Intelligent Computing (NuPIC)
# Copyright (C) 2018, Numenta, Inc.  Unless you have an agreement
# with Numenta, Inc., for a separate license for this software code, the
# following terms and conditions apply:
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero Public License version 3 as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU Affero Public License for more details.
#
# You should have received a copy of the GNU Affero Public License
# along with this program.  If not, see http://www.gnu.org/licenses.
#
# http://numenta.org/licenses/
# ----------------------------------------------------------------------

from __future__ import print_function

import torch



class k_winners(torch.autograd.Function):
  """
  A simple K-winner take all autograd function for creating layers with sparse
  output.
   .. note::
      Code adapted from this excellent tutorial:
      https://github.com/jcjohnson/pytorch-examples
  """


  @staticmethod
  def forward(ctx, x, dutyCycles, k, boostStrength):
    """
    Use the boost strength to compute a boost factor for each unit represented
    in x. These factors are used to increase the impact of each unit to improve
    their chances of being chosen. This encourages participation of more columns
    in the learning process.
    The boosting function is a curve defined as: boostFactors = exp[ -
    boostStrength * (dutyCycle - targetDensity)] Intuitively this means that
    units that have been active (i.e. in the top-k) at the target activation
    level have a boost factor of 1, meaning their activity is not boosted.
    Columns whose duty cycle drops too much below that of their neighbors are
    boosted depending on how infrequently they have been active. Unit that has
    been active more than the target activation level have a boost factor below
    1, meaning their activity is suppressed and they are less likely to be in 
    the top-k.
    Note that we do not transmit the boosted values. We only use boosting to
    determine the winning units.
    The target activation density for each unit is k / number of units. The
    boostFactor depends on the dutyCycle via an exponential function:
            boostFactor
                ^
                |
                |\
                | \
          1  _  |  \
                |    _
                |      _ _
                |          _ _ _ _
                +--------------------> dutyCycle
                   |
              targetDensity
    :param ctx: 
      Place where we can store information we will need to compute the gradients
      for the backward pass.
    :param x: 
      Current activity of each unit.  
    :param dutyCycles: 
      The averaged duty cycle of each unit.
    :param k: 
      The activity of the top k units will be allowed to remain, the rest are
      set to zero.
                
    :param boostStrength:     
      A boost strength of 0.0 has no effect on x.
    :return: 
      A tensor representing the activity of x after k-winner take all.
    """
    if boostStrength > 0.0:
      targetDensity = float(k) / x.size(1)
      boostFactors = torch.exp((targetDensity - dutyCycles) * boostStrength)
      boosted = x.detach() * boostFactors
    else:
      boosted = x.detach()

    # Take the boosted version of the input x, find the top k winners.
    # Compute an output that contains the values of x corresponding to the top k
    # boosted values
    res = torch.zeros_like(x)
    topk, indices = boosted.topk(k, sorted=False)
    for i in range(x.shape[0]):
      res[i, indices[i]] = x[i, indices[i]]

    ctx.save_for_backward(indices)
    return res


  @staticmethod
  def backward(ctx, grad_output):
    """
    In the backward pass, we set the gradient to 1 for the winning units, and 0
    for the others.
    """
    indices, = ctx.saved_tensors
    grad_x = torch.zeros_like(grad_output, requires_grad=True)

    # Probably a better way to do it, but this is not terrible as it only loops
    # over the batch size.
    for i in range(grad_output.size(0)):
      grad_x[i, indices[i]] = grad_output[i, indices[i]]

    return grad_x, None, None, None



class k_winners2d(torch.autograd.Function):
  """
  A K-winner take all autograd function for CNN 2D inputs (batch, Channel, H, W).
  .. seealso::
       Function :class:`k_winners`
  """


  @staticmethod
  def forward(ctx, x, dutyCycles, k, boostStrength):
    """
    Use the boost strength to compute a boost factor for each unit represented
    in x. These factors are used to increase the impact of each unit to improve
    their chances of being chosen. This encourages participation of more columns
    in the learning process. See :meth:`k_winners.forward` for more details.
    :param ctx:
      Place where we can store information we will need to compute the gradients
      for the backward pass.
    :param x:
      Current activity of each unit.
    :param dutyCycles:
      The averaged duty cycle of each unit.
    :param k:
      The activity of the top k units will be allowed to remain, the rest are
      set to zero.
    :param boostStrength:
      A boost strength of 0.0 has no effect on x.
    :return:
      A tensor representing the activity of x after k-winner take all.
    """
    batchSize = x.shape[0]
    if boostStrength > 0.0:
      targetDensity = float(k) / (x.shape[1] * x.shape[2] * x.shape[3])
      boostFactors = torch.exp((targetDensity - dutyCycles) * boostStrength)
      boosted = x.detach() * boostFactors
    else:
      boosted = x.detach()

    # Take the boosted version of the input x, find the top k winners.
    # Compute an output that only contains the values of x corresponding to the top k
    # boosted values. The rest of the elements in the output should be 0.
    boosted = boosted.reshape((batchSize, -1))
    xr = x.reshape((batchSize, -1))
    res = torch.zeros_like(boosted)
    topk, indices = boosted.topk(k, dim=1, sorted=False)
    res.scatter_(1, indices, xr.gather(1, indices))
    res = res.reshape(x.shape)

    ctx.save_for_backward(indices)
    return res


  @staticmethod
  def backward(ctx, grad_output):
    """
    In the backward pass, we set the gradient to 1 for the winning units, and 0
    for the others.
    """
    batchSize = grad_output.shape[0]
    indices, = ctx.saved_tensors

    g = grad_output.reshape((batchSize, -1))
    grad_x = torch.zeros_like(g, requires_grad=False)
    grad_x.scatter_(1, indices, g.gather(1, indices))
    grad_x = grad_x.reshape(grad_output.shape)

    return grad_x, None, None, None

In [10]:
# ----------------------------------------------------------------------
# Numenta Platform for Intelligent Computing (NuPIC)
# Copyright (C) 2019, Numenta, Inc.  Unless you have an agreement
# with Numenta, Inc., for a separate license for this software code, the
# following terms and conditions apply:
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero Public License version 3 as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU Affero Public License for more details.
#
# You should have received a copy of the GNU Affero Public License
# along with this program.  If not, see http://www.gnu.org/licenses.
#
# http://numenta.org/licenses/
# ----------------------------------------------------------------------
from __future__ import print_function
import abc

import torch
import torch.nn as nn
import torch.nn.functional as F

# from pytorch.duty_cycle_metrics import (
#   maxEntropy, binaryEntropy
# )
# from pytorch.functions import k_winners, k_winners2d



def updateBoostStrength(m):
  """
  Function used to update KWinner modules boost strength after each epoch.
  Call using :meth:`torch.nn.Module.apply` after each epoch if required
  For example: ``m.apply(updateBoostStrength)``
  :param m: KWinner module
  """
  if isinstance(m, KWinnersBase):
    if m.training:
      m.boostStrength = m.boostStrength * m.boostStrengthFactor



class KWinnersBase(nn.Module):
  """
  Base KWinners class
  """
  __metaclass__ = abc.ABCMeta


  def __init__(self, n, k, kInferenceFactor=1.0, boostStrength=1.0,
               boostStrengthFactor=1.0, dutyCyclePeriod=1000):
    """
    :param n:
      Number of units
    :type n: int
    :param k:
      The activity of the top k units will be allowed to remain, the rest are set
      to zero
    :type k: int
    :param kInferenceFactor:
      During inference (training=False) we increase k by this factor.
    :type kInferenceFactor: float
    :param boostStrength:
      boost strength (0.0 implies no boosting).
    :type boostStrength: float
    :param boostStrengthFactor:
      Boost strength factor to use [0..1]
    :type boostStrengthFactor: float
    :param dutyCyclePeriod:
      The period used to calculate duty cycles
    :type dutyCyclePeriod: int
    """
    super(KWinnersBase, self).__init__()
    assert (boostStrength >= 0.0)

    self.n = n
    self.k = k
    self.kInferenceFactor = kInferenceFactor
    self.learningIterations = 0

    # Boosting related parameters
    self.boostStrength = boostStrength
    self.boostStrengthFactor = boostStrengthFactor
    self.dutyCyclePeriod = dutyCyclePeriod


  def getLearningIterations(self):
    return self.learningIterations


  @abc.abstractmethod
  def updateDutyCycle(self, x):
    """
     Updates our duty cycle estimates with the new value. Duty cycles are
     updated according to the following formula:
    .. math::
        dutyCycle = \\frac{dutyCycle \\times \\left( period - batchSize \\right)
                            + newValue}{period}
    :param x:
      Current activity of each unit
    """
    raise NotImplementedError


  def updateBoostStrength(self):
    """
    Update boost strength using given strength factor during training
    """
    if self.training:
      self.boostStrength = self.boostStrength * self.boostStrengthFactor


  def entropy(self):
    """
    Returns the current total entropy of this layer
    """
    if self.k < self.n:
      _, entropy = binaryEntropy(self.dutyCycle)
      return entropy
    else:
      return 0


  def maxEntropy(self):
    """
    Returns the maximum total entropy we can expect from this layer
    """
    return maxEntropy(self.n, self.k)



class KWinners(KWinnersBase):
  """
  Applies K-Winner function to the input tensor
  See :class:`htmresearch.frameworks.pytorch.functions.k_winners`
  """


  def __init__(self, n, k, kInferenceFactor=1.0, boostStrength=1.0,
               boostStrengthFactor=1.0, dutyCyclePeriod=1000):
    """
    :param n:
      Number of units
    :type n: int
    :param k:
      The activity of the top k units will be allowed to remain, the rest are set
      to zero
    :type k: int
    :param kInferenceFactor:
      During inference (training=False) we increase k by this factor.
    :type kInferenceFactor: float
    :param boostStrength:
      boost strength (0.0 implies no boosting).
    :type boostStrength: float
    :param boostStrengthFactor:
      Boost strength factor to use [0..1]
    :type boostStrengthFactor: float
    :param dutyCyclePeriod:
      The period used to calculate duty cycles
    :type dutyCyclePeriod: int
    """

    super(KWinners, self).__init__(n=n, k=k,
                                   kInferenceFactor=kInferenceFactor,
                                   boostStrength=boostStrength,
                                   boostStrengthFactor=boostStrengthFactor,
                                   dutyCyclePeriod=dutyCyclePeriod)
    self.register_buffer("dutyCycle", torch.zeros(self.n))


  def forward(self, x):
    # Apply k-winner algorithm if k < n, otherwise default to standard RELU
    if self.k >= self.n:
      return F.relu(x)

    if self.training:
      k = self.k
    else:
      k = min(int(round(self.k * self.kInferenceFactor)), self.n)

    x = k_winners.apply(x, self.dutyCycle, k, self.boostStrength)

    if self.training:
      self.updateDutyCycle(x)

    return x


  def updateDutyCycle(self, x):
    batchSize = x.shape[0]
    self.learningIterations += batchSize
    period = min(self.dutyCyclePeriod, self.learningIterations)
    self.dutyCycle.mul_(period - batchSize)
    self.dutyCycle.add_(x.gt(0).sum(dim=0, dtype=torch.float))
    self.dutyCycle.div_(period)



class KWinners2d(KWinnersBase):
  """
  Applies K-Winner function to the input tensor
  See :class:`htmresearch.frameworks.pytorch.functions.k_winners2d`
  """


  def __init__(self, n, k, channels, kInferenceFactor=1.0, boostStrength=1.0,
               boostStrengthFactor=1.0, dutyCyclePeriod=1000):
    """
    :param n:
      Number of units. Usually the output of the max pool or whichever layer
      preceding the KWinners2d layer.
    :type n: int
    :param k:
      The activity of the top k units will be allowed to remain, the rest are set
      to zero
    :type k: int
    :param channels:
      Number of channels (filters) in the convolutional layer.
    :type channels: int
    :param kInferenceFactor:
      During inference (training=False) we increase k by this factor.
    :type kInferenceFactor: float
    :param boostStrength:
      boost strength (0.0 implies no boosting).
    :type boostStrength: float
    :param boostStrengthFactor:
      Boost strength factor to use [0..1]
    :type boostStrengthFactor: float
    :param dutyCyclePeriod:
      The period used to calculate duty cycles
    :type dutyCyclePeriod: int
    """
    super(KWinners2d, self).__init__(n=n, k=k,
                                     kInferenceFactor=kInferenceFactor,
                                     boostStrength=boostStrength,
                                     boostStrengthFactor=boostStrengthFactor,
                                     dutyCyclePeriod=dutyCyclePeriod)

    self.channels = channels
    self.register_buffer("dutyCycle", torch.zeros((1, channels, 1, 1)))


  def forward(self, x):
    # Apply k-winner algorithm if k < n, otherwise default to standard RELU
    if self.k >= self.n:
      return F.relu(x)

    if self.training:
      k = self.k
    else:
      k = min(int(round(self.k * self.kInferenceFactor)), self.n)

    x = k_winners2d.apply(x, self.dutyCycle, k, self.boostStrength)

    if self.training:
      self.updateDutyCycle(x)

    return x


  def updateDutyCycle(self, x):
    batchSize = x.shape[0]
    self.learningIterations += batchSize

    scaleFactor = float(x.shape[2] * x.shape[3])
    period = min(self.dutyCyclePeriod, self.learningIterations)
    self.dutyCycle.mul_(period - batchSize)
    s = x.gt(0).sum(dim=(0, 2, 3), dtype=torch.float) / scaleFactor
    self.dutyCycle.reshape(-1).add_(s)
    self.dutyCycle.div_(period)


  def entropy(self):
    entropy = super(KWinners2d, self).entropy()
    return entropy * self.n / self.channels

In [13]:

# ----------------------------------------------------------------------
# Numenta Platform for Intelligent Computing (NuPIC)
# Copyright (C) 2018, Numenta, Inc.  Unless you have an agreement
# with Numenta, Inc., for a separate license for this software code, the
# following terms and conditions apply:
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero Public License version 3 as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU Affero Public License for more details.
#
# You should have received a copy of the GNU Affero Public License
# along with this program.  If not, see http://www.gnu.org/licenses.
#
# http://numenta.org/licenses/
# ----------------------------------------------------------------------

from __future__ import print_function, division

import collections
import torch

import torch.nn as nn

import Sparse as htm



class SparseNet(nn.Module):

  def __init__(self,
               n=2000,
               k=200,
               outChannels=0,
               c_k=0,
               kernelSize=5,
               stride=1,
               inputSize=28*28,
               outputSize=10,
               kInferenceFactor=1.0,
               weightSparsity=0.5,
               weightSparsityCNN=0.5,
               boostStrength=1.0,
               boostStrengthFactor=1.0,
               dropout=0.0,
               useBatchNorm=True,
               normalizeWeights=False,
               useSoftmax=True,
               padding=0,
               maxPoolKernel=2):
    """
    A network with one or more hidden layers, which can be a sequence of
    k-sparse CNN followed by a sequence of k-sparse linear layer with optional
    dropout layers in between the k-sparse linear layers.
        [CNNSDR] x len(outChannels)
            |
        [Flatten]
            |
        [LinearSDR => Dropout] x len(n)
            |
        [Linear => Softmax]
    :param n:
      Number of units in each fully connected k-sparse linear layer.
      Use 0 to disable the linear layer
    :type n: int or list[int]
    :param k:
      Number of ON (non-zero) units per iteration in each k-sparse linear layer.
      The sparsity of this layer will be k / n. If k >= n, the layer acts as a
      traditional fully connected RELU layer
    :type k: int or list[int]
    :param outChannels:
      Number of channels (filters) in each k-sparse convolutional layer.
      Use 0 to disable the CNN layer
    :type outChannels: int or list[int]
    :param c_k:
      Number of ON (non-zero) units per iteration in each k-sparse convolutional
      layer. The sparsity of this layer will be c_k / c_n. If c_k >= c_n, the
      layer acts as a traditional convolutional layer.
    :type c_k: int or list[int]
    :param kernelSize:
      Kernel size to use in each k-sparse convolutional layer.
    :type kernelSize: int or list[int]
    :param stride:
      Stride value to use in each k-sparse convolutional layer.
    :type stride: int or list[int]
    :param inputSize:
      If the CNN layer is enable this parameter holds a tuple representing
      (in_channels,height,width). Otherwise it will hold the total
      dimensionality of input vector of the first linear layer. We apply
      view(-1, inputSize) to the data before passing it to Linear layers.
    :type inputSize: int or tuple[int,int,int]
    :param outputSize:
      Total dimensionality of output vector
    :type outputSize: int
    :param kInferenceFactor:
      During inference (training=False) we increase k by this factor.
    :type kInferenceFactor: float
    :param weightSparsity:
      Pct of weights that are allowed to be non-zero in each linear layer.
    :type weightSparsity: float or list[float]
    :param weightSparsityCNN:
      Pct of weights that are allowed to be non-zero in each convolutional layer.
    :type weightSparsityCNN: float or list[float]
    :param boostStrength:
      boost strength (0.0 implies no boosting).
    :type boostStrength: float
    :param boostStrengthFactor:
      boost strength is multiplied by this factor after each epoch.
      A value < 1.0 will decrement it every epoch.
    :type boostStrengthFactor: float
    :param dropout:
      dropout probability used to train the second and subsequent layers.
      A value 0.0 implies no dropout
    :type dropout: float
    :param useBatchNorm:
      If True, applies batchNorm for each layer.
    :type useBatchNorm: bool
    :param normalizeWeights:
      If True, each LinearSDR layer will have its weights normalized to the
      number of non-zeros instead of the whole input size
    :type normalizeWeights: bool
    :param useSoftmax:
      If True, use soft max to compute probabilities
    :type useSoftmax: bool
    :param padding:
        cnn layer Zero-padding added to both sides of the input
    :type padding: int
    :param maxPoolKernel:
      The size of the window to take a max over
    :type maxPoolKernel: int
    """
    super(SparseNet, self).__init__()


    # Validate CNN sdr params
    if isinstance(inputSize, collections.Sequence):
      assert inputSize[1] == inputSize[2], "sparseCNN only supports square images"

    if type(outChannels) is not list:
      outChannels = [outChannels]
    if type(c_k) is not list:
      c_k = [c_k] * len(outChannels)
    assert(len(outChannels) == len(c_k))
    if type(kernelSize) is not list:
      kernelSize = [kernelSize] * len(outChannels)
    assert(len(outChannels) == len(kernelSize))
    if type(stride) is not list:
      stride = [stride] * len(outChannels)
    assert(len(outChannels) == len(stride))
    if type(padding) is not list:
      padding = [padding] * len(outChannels)
    assert(len(outChannels) == len(padding))
    if type(weightSparsityCNN) is not list:
      weightSparsityCNN = [weightSparsityCNN] * len(outChannels)
    assert(len(outChannels) == len(weightSparsityCNN))
    for i in range(len(outChannels)):
      assert (weightSparsityCNN[i] >= 0)

    # Validate linear sdr params
    if type(n) is not list:
      n = [n]
    if type(k) is not list:
      k = [k] * len(n)
    assert(len(n) == len(k))
    for i in range(len(n)):
      assert(k[i] <= n[i])
    if type(weightSparsity) is not list:
      weightSparsity = [weightSparsity] * len(n)
    assert(len(n) == len(weightSparsity))
    for i in range(len(n)):
      assert (weightSparsity[i] >= 0)

    self.k = k
    self.kInferenceFactor = kInferenceFactor
    self.n = n
    self.outChannels = outChannels
    self.c_k = c_k
    self.inputSize = inputSize
    self.weightSparsity = weightSparsity   # Pct of weights that are non-zero
    self.boostStrengthFactor = boostStrengthFactor
    self.boostStrength = boostStrength
    self.kernelSize = kernelSize
    self.stride = stride
    self.padding = padding
    self.learningIterations = 0


    inputFeatures = inputSize
    outputLength = inputFeatures
    cnnSdr = nn.Sequential()
    # CNN Layers
    for i in range(len(outChannels)):
      if outChannels[i] != 0:
        inChannels, h, w = inputFeatures
        cnn = nn.Conv2d(in_channels=inChannels,
                        out_channels=outChannels[i],
                        kernel_size=kernelSize[i],
                        padding=padding[i],
                        stride=stride[i])

        if 0 < weightSparsityCNN[i] < 1:
          sparseCNN = htm.SparseWeights2d(cnn, weightSparsityCNN[i])
          cnnSdr.add_module("cnnSdr{}_cnn".format(i + 1), sparseCNN)
        else:
          cnnSdr.add_module("cnnSdr{}_cnn".format(i + 1), cnn)

        # Batch Norm
        if useBatchNorm:
          bn = nn.BatchNorm2d(outChannels[i], affine=False)
          cnnSdr.add_module("cnnSdr{}_bn".format(i + 1), bn)

        # Max pool
        maxpool = nn.MaxPool2d(kernel_size=maxPoolKernel)
        cnnSdr.add_module("cnnSdr{}_maxpool".format(i + 1), maxpool)

        wout = (w + 2 * padding[i] - kernelSize[i]) // stride[i] + 1
        maxpoolWidth = wout // 2
        outputLength = maxpoolWidth * maxpoolWidth * outChannels[i]
        if 0 < c_k[i] < outputLength:
          kwinner = KWinners2d(n=outputLength, k=c_k[i],
                                   channels=outChannels[i],
                                   kInferenceFactor=kInferenceFactor,
                                   boostStrength=boostStrength,
                                   boostStrengthFactor=boostStrengthFactor)
          cnnSdr.add_module("cnnSdr{}_kwinner".format(i + 1), kwinner)
        else:
          cnnSdr.add_module("cnnSdr{}_relu".format(i + 1), nn.ReLU())

        # Feed this layer output into next layer input
        inputFeatures = (outChannels[i], maxpoolWidth, maxpoolWidth)

    if len(cnnSdr) > 0:
      inputFeatures = outputLength
      self.cnnSdr = cnnSdr
    else:
      self.cnnSdr = None

    # Flatten input before passing to linear layers
    self.flatten = torch.nn.Flatten()

    # Linear layers
    self.linearSdr = nn.Sequential()

    for i in range(len(n)):
      if n[i] != 0:
        linear = nn.Linear(inputFeatures, n[i])
        if 0 < weightSparsity[i] < 1:
          linear = htm.SparseWeights(linear, weightSparsity=weightSparsity[i])
          if normalizeWeights:
            linear.apply(htm.normalizeSparseWeights)
        self.linearSdr.add_module("linearSdr{}".format(i + 1), linear)

        if useBatchNorm:
          self.linearSdr.add_module("linearSdr{}_bn".format(i + 1),
                                    nn.BatchNorm1d(n[i], affine=False))

        if dropout > 0.0:
          self.linearSdr.add_module("linearSdr{}_dropout".format(i + 1),
                                    nn.Dropout(dropout))

        if 0 < k[i] < n[i]:
          kwinner = KWinners(n=n[i], k=k[i],
                                 kInferenceFactor=kInferenceFactor,
                                 boostStrength=boostStrength,
                                 boostStrengthFactor=boostStrengthFactor)
          self.linearSdr.add_module("linearSdr{}_kwinner".format(i + 1), kwinner)
        else:
          self.linearSdr.add_module("linearSdr{}_relu".format(i + 1), nn.ReLU())

        # Feed this layer output into next layer input
        inputFeatures = n[i]

    # Add one fully connected layer after all hidden layers
    self.fc = nn.Linear(inputFeatures, outputSize)

    # Use softmax to compute probabilities
    if useSoftmax:
      self.softmax = nn.LogSoftmax(dim=1)
    else:
      self.softmax = None


  def postEpoch(self):
    self.apply(htm.updateBoostStrength)
    self.apply(htm.rezeroWeights)


  def forward(self, x):
    if self.cnnSdr is not None:
      x = self.cnnSdr(x)
    x = self.flatten(x)
    x = self.linearSdr(x)
    x = self.fc(x)

    if self.softmax is not None:
      x = self.softmax(x)

    if self.training:
      batchSize = x.shape[0]
      self.learningIterations += batchSize

    return x


  def getLearningIterations(self):
    return self.learningIterations

  def maxEntropy(self):
    entropy = 0
    for module in self.modules():
      if module == self:
        continue
      if hasattr(module, "maxEntropy"):
        entropy += module.maxEntropy()

    return entropy

  def entropy(self):
    """
    Returns the current entropy
    """
    entropy = 0
    for module in self.modules():
      if module == self:
        continue
      if hasattr(module, "entropy"):
        entropy += module.entropy()

    return entropy


  def pruneWeights(self, minWeight):
    """
    Prune all the weights whose absolute magnitude is less than minWeight
    :param minWeight: min weight to prune. If zero then no pruning
    :type minWeight: float
    """
    if minWeight == 0.0:
      return

    # Collect all weights
    weights = [v for k, v in self.named_parameters() if 'weight' in k]
    for w in weights:
      # Filter weights above threshold
      mask = torch.ge(torch.abs(w.data), minWeight)
      # Zero other weights
      w.data.mul_(mask.type(torch.float32))

  def pruneDutycycles(self, threshold=0.0):
    """
    Prune all the units with dutycycles whose absolute magnitude is less than
    the given threshold
    :param threshold: min threshold to prune. If less than zero then no pruning
    :type threshold: float
    """
    if threshold < 0.0:
      return

    # Collect all layers with 'dutyCycle'
    for m in self.modules():
      if m == self:
        continue
      if hasattr(m, 'pruneDutycycles'):
        m.pruneDutycycles(threshold)

In [28]:
net = SparseNet(outChannels=[32], inputSize=(1,24,24), c_k=25)
net

SparseNet(
  (cnnSdr): Sequential(
    (cnnSdr1_cnn): SparseWeights2d(
      (module): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1))
    )
    (cnnSdr1_bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
    (cnnSdr1_maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (cnnSdr1_kwinner): KWinners2d()
  )
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linearSdr): Sequential(
    (linearSdr1): SparseWeights(
      (module): Linear(in_features=3200, out_features=2000, bias=True)
    )
    (linearSdr1_bn): BatchNorm1d(2000, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
    (linearSdr1_kwinner): KWinners()
  )
  (fc): Linear(in_features=2000, out_features=10, bias=True)
  (softmax): LogSoftmax(dim=1)
)

In [27]:
m = nn.Hardsigmoid()
input = torch.randn(2)
output = m(input)

print(input)
print(output)

tensor([ 0.4544, -1.6328])
tensor([0.5757, 0.2279])


In [35]:
a = net.cnnSdr[0]
a

SparseWeights2d(
  (module): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1))
)

In [34]:
a.zeroWts.shape

torch.Size([2, 384])

In [42]:
a.zeroWts[0]

tensor([ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  1,
         1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
         3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
         6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,
         7,  7,  7,  7,  7,  7,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
         9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9, 10, 10, 10, 10, 10, 10,
        10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
        12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13,
        13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
        15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16,
        16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
        18, 18, 18, 18, 18, 18, 18, 18, 

In [39]:
a.module.weight.data.shape

torch.Size([32, 1, 5, 5])

In [48]:
384/32

12.0