# Subset Sum Problem
The [subset sum problem](https://en.wikipedia.org/wiki/Subset_sum_problem) is defined as, given a set of numbers, find a subset which adds up to another number.

## Implementation

For example let us have a set $S$ and a target $T$

$$
T = 7.0
$$

\begin{equation*}
S = 
\begin{bmatrix}
1.0 & 2.0 & 3.0 & 4.0 & 5.0 \\
\end{bmatrix}
\end{equation*}

Our goal is to find a mask $M$, such that, the dot product results in the target. Here is an example of a mask that adds up to our target.

\begin{equation*}
M = 
\begin{bmatrix}
0.0 & 0.0 & 1.0 & 1.0 & 0.0 \\
\end{bmatrix}
\end{equation*}

We can verify that 

$$ T = M \cdot S $$


In [1]:
import tensorflow as tf

In [2]:
@tf.function
def compute_subset_sum(S, M):
    return tf.tensordot(S, M, 1)

S = tf.Variable([1,2,3,4,5],dtype=tf.float32)
M = tf.Variable([0,0,1,1,0],dtype=tf.float32)

with tf.GradientTape(persistent=True) as tape:
    T_ = compute_subset_sum(S, M)
    
print(T_)
print(tape.gradient(T_, S))
print(tape.gradient(T_, M))

tf.Tensor(7.0, shape=(), dtype=float32)
tf.Tensor([0. 0. 1. 1. 0.], shape=(5,), dtype=float32)
tf.Tensor([1. 2. 3. 4. 5.], shape=(5,), dtype=float32)


## Training

However, if we train as is, we find that $M$ is not a mask but it forms a linear combination with its inputs.

In [3]:
opt = tf.keras.optimizers.Adam(5e-3)

@tf.function
def train_step(S, M, T):
    with tf.GradientTape() as tape:
        T_ = compute_subset_sum(S, M)
        loss = tf.nn.l2_loss(T_ - T)
    
    grads = tape.gradient(loss, M)
    opt.apply_gradients(zip([grads], [M]))
    
    return loss, T_

S = tf.Variable([1,2,3,4,5],dtype=tf.float32)
M = tf.Variable([1,1,1,1,1],dtype=tf.float32)
T = 7

for i in range(1000):
    loss, T_ = train_step(S, M, T)
    if i % 100 == 0:
        actual = compute_subset_sum(S, tf.round(M))
        tf.print(loss, M, T_, actual)

32 [0.994999826 0.994999826 0.994999826 0.994999826 0.994999826] 15 15
2.11328506 [0.601275 0.601274908 0.601274729 0.601274788 0.601274908] 9.05586243 15
0.0201957505 [0.479682386 0.479682267 0.479682088 0.479682148 0.479682267] 7.20097637 0
1.51871245e-05 [0.467018306 0.467018187 0.467018 0.467018068 0.467018187] 7.00551128 0
7.27595761e-12 [0.466667056 0.466666937 0.466666758 0.466666818 0.466666937] 7.00000381 0
5.57065505e-12 [0.466666609 0.46666649 0.466666311 0.466666371 0.46666649] 6.99999666 0
5.57065505e-12 [0.466666609 0.46666649 0.466666311 0.466666371 0.46666649] 6.99999666 0
5.57065505e-12 [0.466666609 0.46666649 0.466666311 0.466666371 0.46666649] 6.99999666 0
5.57065505e-12 [0.466666609 0.46666649 0.466666311 0.466666371 0.46666649] 6.99999666 0
5.57065505e-12 [0.466666609 0.46666649 0.466666311 0.466666371 0.46666649] 6.99999666 0


### Bistable loss
To force the values to be close to 0 and 1, we introduce the [Bistable Loss](notebooks/boolean-satisfiability.ipynb)

In [4]:
from library.loss import bistable_loss

On retraining we find that each element the mask is now closer to 0 or 1

In [5]:
opt = tf.keras.optimizers.Adam(5e-3)

@tf.function
def train_step(S, M, T):
    with tf.GradientTape() as tape:
        T_ = compute_subset_sum(S, M)
        loss = tf.nn.l2_loss(T_ - T)
        loss += tf.reduce_sum(bistable_loss(M)) * 10
    
    grads = tape.gradient(loss, M)
    opt.apply_gradients(zip([grads], [M]))
    
    return loss, T_

S = tf.Variable([1,2,3,4,5],dtype=tf.float32)
M = tf.Variable([1,1,1,1,1],dtype=tf.float32)
T = 7

for i in range(1000):
    loss, T_ = train_step(S, M, T)
    if i % 100 == 0:
        actual = compute_subset_sum(S, tf.round(M))
        tf.print(loss, M, T_, actual)

32 [0.994999826 0.994999826 0.994999826 0.994999826 0.994999826] 15 15
5.2937994 [0.669517 0.625239313 0.614203155 0.609240353 0.606423259] 9.26705265 15
2.96882701 [0.707282722 0.499178141 0.474773556 0.466818601 0.463050485] 7.31969452 1
2.40181923 [0.987101078 0.454701811 0.422654927 0.420172453 0.420870364] 6.95283318 1
2.37065029 [1.01076198 0.424444467 0.392945588 0.407994449 0.419168055] 6.76682663 1
2.31917667 [1.01266503 0.372789234 0.362577498 0.415547 0.444725811] 6.73213863 1
2.06843424 [1.01433361 0.235568792 0.310641587 0.442599148 0.502211452] 6.69892883 6
1.52477646 [1.01065075 0.0557946712 0.212935656 0.499629229 0.609795094] 6.80628681 6
1.02521729 [0.999320805 0.00256707473 0.0553062148 0.550387084 0.734053254] 7.03983212 10
0.856600106 [0.989116728 -0.0184933655 -0.0223721061 0.554791 0.823360503] 7.2201438 10


### One hot softmax

To further make sure that the mask remains either 0 or 1, we increase the dimentionality of the $M$ and apply softmax along the vertical axis.

\begin{equation*}
M = 
\begin{bmatrix}
0 & 0 & 1 & 1 & 0 \\
\end{bmatrix}
\end{equation*}

becomes

\begin{equation*}
M = 
\begin{bmatrix}
1 & 1 & 0 & 0 & 1 \\
0 & 0 & 1 & 1 & 0 \\
\end{bmatrix}
\end{equation*}

Therefore, $\bar{T}$ becomes

$$ M_s = softmax(M, axis=vertical) $$

$$ \bar{T} = \frac{(M_s[1] \cdot S) + ((1 - M_s[0]) \cdot S)}{2} $$

In [6]:
@tf.function
def compute_subset_sum_v2(S, M):
    M = tf.transpose(M)
    pos = tf.tensordot(S, M[1], 1)
    neg = tf.tensordot(S, 1 - M[0], 1)
    return (pos + neg) / 2

S = tf.Variable([1,2,3,4,5],dtype=tf.float32)
M = tf.Variable(tf.one_hot([0,0,1,1,0], 2),dtype=tf.float32)

with tf.GradientTape(persistent=True) as tape:
    M_s = tf.nn.softmax(M, axis=1)
    T_ = compute_subset_sum_v2(S, M_s)

tf.print(tf.transpose(M))
tf.print(T_)
tf.print(tape.gradient(T_, S))
tf.print(tape.gradient(T_, M))

[[1 1 0 0 1]
 [0 0 1 1 0]]
7.2689414
[0.268941402 0.268941402 0.731058598 0.731058598 0.268941402]
[[-0.196611926 0.196611941]
 [-0.393223852 0.393223882]
 [-0.589835823 0.589835823]
 [-0.786447763 0.786447704]
 [-0.983059645 0.983059704]]


In [7]:
opt = tf.keras.optimizers.Adam(5e-3)

@tf.function
def train_step(S, M, T):
    with tf.GradientTape() as tape:
        M_s = tf.nn.softmax(M, axis=1)
        T_ = compute_subset_sum_v2(S, M_s)
        loss = tf.nn.l2_loss(T_ - T)
        loss += tf.reduce_sum(bistable_loss(M_s)) * 10
    
    grads = tape.gradient(loss, M)
    opt.apply_gradients(zip([grads], [M]))
    
    return loss, T_

S = tf.Variable([1,2,3,4,5],dtype=tf.float32)
M = tf.Variable(tf.one_hot([1,1,1,1,1], 2), dtype=tf.float32)
T = 7

for i in range(1000):
    loss, T_ = train_step(S, M, T)
    if i % 100 == 0:
        M_T = tf.transpose(M)
        M_T = tf.nn.softmax(M_T, axis=0)
        M_s = tf.nn.softmax(M, axis=1)
        actual = compute_subset_sum_v2(S, tf.round(M_s))
        tf.print(loss, M_T[1], T_, actual)
        
tf.print(M_T)

11.7297249 [0.729087949 0.729087889 0.729087889 0.729087889 0.729087889] 10.9658794 15
6.08404255 [0.849486172 0.522940934 0.5265553 0.527815342 0.528452575] 8.25115776 15
4.63299894 [0.936596215 0.315846235 0.405601978 0.432577103 0.444250286] 6.74399376 1
3.96075273 [0.961368561 0.159341335 0.334769219 0.4513897 0.492678] 6.5517168 1
3.10632539 [0.971199095 0.0969442874 0.204626277 0.5061602 0.608011901] 6.83957767 10
2.3400321 [0.975647 0.0659312457 0.110726014 0.537623584 0.725107] 7.21328068 10
1.93451571 [0.978034198 0.0484895259 0.0714 0.491285 0.801038265] 7.26118088 6
1.38491344 [0.980378 0.0397343 0.0554133914 0.338027656 0.86306566] 6.89832544 6
0.804450035 [0.983304262 0.0364712402 0.050469853 0.196737602 0.912564933] 6.55917883 6
0.570799768 [0.985896528 0.0352818146 0.0493095517 0.140285924 0.939776659] 6.46486235 6
[[0.0141034378 0.964718223 0.950690448 0.859714091 0.0602233037]
 [0.985896528 0.0352818146 0.0493095517 0.140285924 0.939776659]]
