In [5]:
from __future__ import print_function
import numpy as np
from time import sleep
import random
import sys
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
random.seed(10)

def getSample(stringLength, testFlag):
  lowerBound=pow(2,stringLength-1)+1
  upperBound=pow(2,stringLength)

  num1=random.randint(lowerBound,upperBound)
  num2=random.randint(lowerBound,upperBound)

  num3=num1+num2
  num3Binary=(bin(num3)[2:])
  num1Binary=(bin(num1)[2:])
  num2Binary=(bin(num2)[2:])

  if testFlag==1:
    print('input numbers and their sum  are', num1, ' ', num2, ' ', num3)
    print ('binary strings are', num1Binary, ' ' , num2Binary, ' ' , num3Binary)
  len_num3= (len(num3Binary))

  # since num3 will be the largest, we pad other numbers with zeros to that num3_len
  num1Binary= ('0'*(len(num3Binary)-len(num1Binary))+num1Binary)
  num2Binary= ('0'*(len(num3Binary)-len(num2Binary))+num2Binary)

  # forming the input sequence
  # the input at first timestep is the least significant bits of the two input binary strings
  # x will be then a len_num3 ( or T ) * 2 array
  x=np.zeros((len_num3,2),dtype=np.float32)
  for i in range(0, len_num3):
    x[i,0]=num1Binary[len_num3-1-i] # note that MSB of the binary string should be the last input along the time axis
    x[i,1]=num2Binary[len_num3-1-i]
  y = np.fromiter(num3Binary[::-1], dtype=np.int)
  return x,y

class Model(nn.Module):
  def __init__(self, inputDim, hiddenDim, outputDim):
    super(Model, self).__init__()
    self.inputDim = inputDim
    self.hiddenDim = hiddenDim
    self.outputDim = outputDim
    self.rnn = nn.LSTM(inputDim, hiddenDim)
    self.outputLayer = nn.Linear(hiddenDim, outputDim)
    self.sigmoid = nn.Sigmoid()
  def forward(self, x):
    #size of x is T x B x featDim
    #B = 1 is dummy batch dimension added, because pytorch mandates it
    #if you want B as first dimension of x then specify batchFirst=True when LSTM is initalized
    #T,D  = x.size(0), x.size(1)
    #batch is a must
    out,hidden = self.rnn(x) #x has two  dimensions  seqLen *batch* FeatDim=2
    T,B,D  = out.size(0), out.size(1), out.size(2)
    out = out.contiguous()
    out = out.view(B*T, D)
    outputLayerActivations = self.outputLayer(out)
    outputSigmoid = self.sigmoid(outputLayerActivations)
    return outputSigmoid

featDim = 2 # two bits each from each of the String
outputDim = 1 # one output node which would output a zero or 1

rnnSize=10

lossFunction = nn.MSELoss()
model = Model(featDim, rnnSize, outputDim)
optimizer=optim.Adam(model.parameters(),lr=0.001)
epochs=500
totalLoss= float("inf")

print(" Avg. Loss for last 500 samples = %lf"%(totalLoss))
totalLoss=0
for i in range(0,epochs): # average the loss over 200 samples
    stringLen=4
    testFlag=0
    x,y = getSample(stringLen, testFlag)

    model.zero_grad()

    x_var=autograd.Variable(torch.from_numpy(x).unsqueeze(1).float()) #convert to torch tensor and variable
    # unsqueeze() is used to add the extra dimension since
    # your input need to be of t*batchsize*featDim; you cant do away with the batch in pytorch
    seqLen = x_var.size(0)
    x_var = x_var.contiguous()
    y_var = autograd.Variable(torch.from_numpy(y).float())
    finalScores = model(x_var)

    loss = lossFunction(finalScores,y_var)
    totalLoss+=loss.data
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

totalLoss = totalLoss/epochs
print('Final total loss is:' + str(totalLoss))

 Avg. Loss for last 500 samples = inf
Final total loss is:tensor(0.2328)


In [6]:
###### Testing the model ######

stringLen=7
testFlag=1
# test the network on 10 random binary string addition cases where stringLen=4
for i in range (0,10):
	x,y=getSample(stringLen,testFlag)
	x_var=autograd.Variable(torch.from_numpy(x).unsqueeze(1).float())
	y_var=autograd.Variable(torch.from_numpy(y).float())
	seqLen=x_var.size(0)
	x_var= x_var.contiguous()
	finalScores = model(x_var).data.t()
	#print(finalScores)
	bits=finalScores.gt(0.5)
	bits=bits[0].numpy()

	print ('sum predicted by RNN is ',bits[::-1])
	print('##################################################')


input numbers and their sum  are 83   93   176
binary strings are 1010011   1011101   10110000
sum predicted by RNN is  [ True  True  True  True  True  True  True  True]
##################################################
input numbers and their sum  are 100   93   193
binary strings are 1100100   1011101   11000001
sum predicted by RNN is  [ True  True  True  True  True  True  True  True]
##################################################
input numbers and their sum  are 79   72   151
binary strings are 1001111   1001000   10010111
sum predicted by RNN is  [ True  True  True  True  True  True  True  True]
##################################################
input numbers and their sum  are 72   118   190
binary strings are 1001000   1110110   10111110
sum predicted by RNN is  [ True  True  True  True  True  True  True  True]
##################################################
input numbers and their sum  are 116   69   185
binary strings are 1110100   1000101   10111001
sum predicted by R