In [1]:
# https://github.com/mineshmathew/pyTorch_RNN_Examples/blob/master/BinaryStringAddition/AddBinaryStrings.py.ipynb
# ============================================================================
# Make a simple RNN learn binary addition
# ============================================================================
# author  mineshmathew.github.io
# ============================================================================

from __future__ import print_function
import numpy as np
from time import sleep
import random
import sys
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.random.manual_seed(10)

def getSample(stringLength, testFlag):
  #takes stringlength as input
  #returns a sample for the network - an input sequence - x and its target -y
  #x is a T*2 array, T is the length of the string and 2 since we take one bit each from each string
  #testFlag if set prints the input numbers and its sum in both decimal and binary form
  lowerBound=pow(2,stringLength-1)+1
  upperBound=pow(2,stringLength)

  num1=random.randint(lowerBound,upperBound)
  num2=random.randint(lowerBound,upperBound)

  num3=num1+num2
  num3Binary=(bin(num3)[2:])
  num1Binary=(bin(num1)[2:])
  num2Binary=(bin(num2)[2:])

  if testFlag==1:
    print('input numbers and their sum are: ', num1, ', ', num2, ', ', num3)
    print ('binary strings are: ', num1Binary, ', ' , num2Binary, ', ' , num3Binary)
  len_num1= (len(num1Binary))
  len_num2= (len(num2Binary))
  len_num3= (len(num3Binary))

  # since num3 will be the largest, we pad other numbers with zeros to that num3_len
  num1Binary= ('0'*(len(num3Binary)-len(num1Binary))+num1Binary)
  num2Binary= ('0'*(len(num3Binary)-len(num2Binary))+num2Binary)

  #num1Binary = np.array(list(reversed(num1Binary)))
  #num2Binary = np.array(list(reversed(num2Binary)))
  #num3Binary = np.array(list(reversed(num3Binary)))

  # forming the input sequence
  # the input at first timestep is the least significant bits of the two input binary strings
  # x will be then a len_num3 ( or T ) * 2 array
  x=np.zeros((len_num3,2),dtype=np.int)
  for i in range(0, len_num3):
    x[i,0]=num1Binary[len_num3-1-i] # note that MSB of the binary string should be the last input along the time axis
    x[i,1]=num2Binary[len_num3-1-i]
  # target vector is the sum in binary
  # convert binary string in <string> to a numpy 1D array
  #https://stackoverflow.com/questions/29091869/convert-bitstring-string-of-1-and-0s-to-numpy-array
  # print('num3Binary')
  #y=np.array(map(int, num3Binary[::-1]))
  y = np.fromiter(num3Binary[::-1], dtype=np.int)
  # print('x and y are')
  # print (x)
  # print (y)
  if testFlag==1:
    print('a,b,c current  are: {},{},{}'.format(np.array(x[:,0]),
                                              np.array(x[:,1]),
                                              np.array(y)))
  return x,y

class Adder (nn.Module):
  def __init__(self, inputDim, hiddenDim, outputDim):
    super(Adder, self).__init__()
    self.inputDim=inputDim
    self.hiddenDim=hiddenDim
    self.outputDim=outputDim
    self.rnn=nn.RNN(inputDim, hiddenDim)
    self.outputLayer=nn.Linear(hiddenDim, outputDim)
    self.sigmoid=nn.Sigmoid()
  def forward(self, x):
    #size of x is T x B x featDim
    #B=1 is dummy batch dimension added, because pytorch mandates it
    #if you want B as first dimension of x then specift batchFirst=True when LSTM is initalized
    #T,D  = x.size(0), x.size(1)
    #batch is a must
    rnnOut,_ = self.rnn(x) #x has two  dimensions  seqLen *batch* FeatDim=2
    T,B,D  = rnnOut.size(0),rnnOut.size(1),rnnOut.size(2)
    rnnOut = rnnOut.contiguous()
        # before  feeding to linear layer we squash one dimension
    rnnOut = rnnOut.view(B*T, D)
    outputLayerActivations=self.outputLayer(rnnOut)
    #reshape activations to T*B*outputlayersize
    outputLayerActivations=outputLayerActivations.view(T,B,-1).squeeze(1)
    outputSigmoid=self.sigmoid(outputLayerActivations)
    return outputSigmoid

featDim=2 #two bits each from each of the String
outputDim=1 #one output node which would output a zero or 1
lstmSize=16

lossFunction = nn.MSELoss()
model = Adder(featDim, lstmSize, outputDim)
print ('Model initialized')
#optimizer = optim.SGD(model.parameters(), lr=3e-2, momentum=0.8)
optimizer = optim.SGD(model.parameters(), lr=0.1)
#optimizer=optim.Adam(model.parameters(), lr=0.1)
epochs=15000
### epochs ##
#totalLoss= float("inf")
#print("Avg. Loss for last 500 samples = %lf"%(totalLoss))
totalLoss=0
for i in range(0,epochs): # average the loss over 200 samples
    stringLen=8
    testFlag=0
    x,y=getSample(stringLen, testFlag)

    model.zero_grad()

    x_var=autograd.Variable(torch.from_numpy(x).unsqueeze(1).float()) #convert to torch tensor and variable
    # unsqueeze() is used to add the extra dimension since
    # your input need to be of t*batchsize*featDim; you cant do away with the batch in pytorch
    seqLen=x_var.size(0)
    x_var= x_var.contiguous()
    y_var=autograd.Variable(torch.from_numpy(y).float())
    finalScores = model(x_var)
    #finalScores=finalScores.

    loss=lossFunction(finalScores,y_var)
    totalLoss+=loss.data
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

totalLoss=totalLoss/epochs
print('Final total loss is:' + str(totalLoss))


Model initialized
Final total loss is:tensor(0.2468)


  return F.mse_loss(input, target, reduction=self.reduction)


In [2]:
###### Testing the model ######

stringLen=5
testFlag=1
# test the network on 10 random binary string addition cases where stringLen=4
for i in range (0,10):
	x,y=getSample(stringLen,testFlag)
	print('x and y are: {}, {}'.format(x,y))
	x_var=autograd.Variable(torch.from_numpy(x).unsqueeze(1).float())
	y_var=autograd.Variable(torch.from_numpy(y).float())
	seqLen=x_var.size(0)
	x_var= x_var.contiguous()
	finalScores = model(x_var).data.t()
	print('model output: {}'.format(finalScores))
	bits=np.round(finalScores)
	print('result is {}'.format(bits))

	#print('sum predicted by RNN is ',bits[::-1])
	print('--------------------------------------------')

input numbers and their sum are:  29 ,  17 ,  46
binary strings are:  11101 ,  10001 ,  101110
a,b,c current  are: [1 0 1 1 1 0],[1 0 0 0 1 0],[0 1 1 1 0 1]
x and y are: [[1 1]
 [0 0]
 [1 0]
 [1 0]
 [1 1]
 [0 0]], [0 1 1 1 0 1]


RuntimeError: t() expects a tensor with <= 2 dimensions, but self is 3D