# The Knapsack Problem DQM
CDL Quantum Hackathon 2021

In [1]:
# Ziwei Qiu, ziweiqiu@g.harvard.edu
import os
os.chdir('..')
from utils.data import read_profit_optimization_data
from dimod import DiscreteQuadraticModel
from dimod import ExactSolver
import sys
from dwave.system import LeapHybridDQMSampler
from neal import SimulatedAnnealingSampler
from math import log2, floor
import dimod
import os
import numpy as np
import pandas as pd

## Construct DQM

In [2]:
def build_knapsack_dqm_fixedbound(values, weights, weight_capacity, bound, verbose = False):
    """Construct DQM for the generalized knapsack problem
    Args:
        values (array-like):
            Array of values associated with the items
        weights (array-like):
            Array of weights associated with the items
        weight_capacity (int):
            Maximum allowable weight
        bound(int):
            Maximum allowable pieces for each item
    Returns:
        Discrete quadratic model instance
        x: variable
    """
    bound += 1 # also take into account the value 0
    pieces = range(bound)
    
    # First guess the lagrange
    lagrange = max(values)*0.5
    if verbose:
        print('lagrange:',lagrange)

    # Number of objects
    x_size = len(values)

    # Lucas's algorithm introduces additional slack variables to
    # handle the inequality. M+1 binary slack variables are needed to
    # represent the sum using a set of powers of 2.
    M = floor(log2(weight_capacity))
    num_slack_variables = M + 1

    # Slack variable list for Lucas's algorithm. The last variable has
    # a special value because it terminates the sequence.
    y = [2**n for n in range(M)]
    y.append(weight_capacity + 1 - 2**M)
    
    ##@  Discrete Quadratic Model @##
    dqm = DiscreteQuadraticModel()
    x = []
    #@ Add variables @##
    for k in range(x_size):
        x.append(dqm.add_variable(bound, label='x' + str(k)))

    for k in range(num_slack_variables):
        dqm.add_variable(2, label='y' + str(k)) # either 0 or 1

    ##@ Hamiltonian xi-xi terms ##
    for k in range(x_size):
        dqm.set_linear('x' + str(k), lagrange * (weights[k]**2) * (np.array(pieces)**2) - values[k]*pieces)


    # # Hamiltonian xi-xj terms
    for i in range(x_size):
        for j in range(i + 1, x_size):
            biases_dict = {}
            for piece1 in pieces:
                for piece2 in pieces:
                    biases_dict[(piece1, piece2)]=(2 * lagrange * weights[i] * weights[j])*piece1*piece2

            dqm.set_quadratic('x' + str(i), 'x' + str(j), biases_dict)

    # Hamiltonian y-y terms
    for k in range(num_slack_variables):
        dqm.set_linear('y' + str(k), lagrange*np.array([0,1])* (y[k]**2))

    # Hamiltonian yi-yj terms 
    for i in range(num_slack_variables):
        for j in range(i + 1, num_slack_variables): 
            dqm.set_quadratic('y' + str(i), 'y' + str(j), {(1,1):2 * lagrange * y[i] * y[j]})

    # Hamiltonian x-y terms
    for i in range(x_size):
        for j in range(num_slack_variables):
            biases_dict = {}
            for piece1 in pieces:
                biases_dict[(piece1, 1)]=-2 * lagrange * weights[i] * y[j]*piece1

            dqm.set_quadratic('x' + str(i), 'y' + str(j), biases_dict) 
    
    return dqm, x

In [3]:
# Use the Andrew Lucas log trick
def build_knapsack_dqm_variablebound2(values, weights, weight_capacity, bound, verbose = False):
    """Construct DQM for the generalized knapsack problem
    Args:
        values (array-like):
            Array of values associated with the items
        weights (array-like):
            Array of weights associated with the items
        weight_capacity (int):
            Maximum allowable weight
        bound(int):
            Maximum allowable pieces for each item
    Returns:
        Discrete quadratic model instance
    """
    bound = [b+1 for b in bound] # also take into account the value 0
#     pieces = range(bound)
    
    # First guess the lagrange
    lagrange = max(values)*0.5
    if verbose:
        print('lagrange:',lagrange)

    # Number of objects
    x_size = len(values)

    # Lucas's algorithm introduces additional slack variables to
    # handle the inequality. M+1 binary slack variables are needed to
    # represent the sum using a set of powers of 2.
    M = floor(log2(weight_capacity))
    num_slack_variables = M + 1

    # Slack variable list for Lucas's algorithm. The last variable has
    # a special value because it terminates the sequence.
    y = [2**n for n in range(M)]
    y.append(weight_capacity + 1 - 2**M)
    
    ##@  Discrete Quadratic Model @##
    dqm = DiscreteQuadraticModel()
    
    x = []
    #@ Add variables @##
    for k in range(x_size):
        x.append(dqm.add_variable(bound[k], label='x' + str(k)))

    for k in range(num_slack_variables):
        dqm.add_variable(2, label='y' + str(k)) # either 0 or 1

    ##@ Hamiltonian xi-xi terms ##
    for k in range(x_size):
        pieces = range(bound[k])
        dqm.set_linear('x' + str(k), lagrange * (weights[k]**2) * (np.array(pieces)**2) - values[k]*pieces)


    # # Hamiltonian xi-xj terms
    for i in range(x_size):
        for j in range(i + 1, x_size):
            biases_dict = {}
            for piece1 in range(bound[i]):
                for piece2 in range(bound[j]):
                    biases_dict[(piece1, piece2)]=(2 * lagrange * weights[i] * weights[j])*piece1*piece2

            dqm.set_quadratic('x' + str(i), 'x' + str(j), biases_dict)

    # Hamiltonian y-y terms
    for k in range(num_slack_variables):
        dqm.set_linear('y' + str(k), lagrange*np.array([0,1])* (y[k]**2))

    # Hamiltonian yi-yj terms 
    for i in range(num_slack_variables):
        for j in range(i + 1, num_slack_variables): 
            dqm.set_quadratic('y' + str(i), 'y' + str(j), {(1,1):2 * lagrange * y[i] * y[j]})

    # Hamiltonian x-y terms
    for i in range(x_size):
        for j in range(num_slack_variables):
            biases_dict = {}
            for piece1 in range(bound[i]):
                biases_dict[(piece1, 1)]=-2 * lagrange * weights[i] * y[j]*piece1

            dqm.set_quadratic('x' + str(i), 'y' + str(j), biases_dict) 
    
    return dqm,x

# Without the Andrew Lucas log trick
def build_knapsack_dqm_variablebound(values, weights, weight_capacity, bound, verbose = False):
    """Construct DQM for the generalized knapsack problem
    Args:
        values (array-like):
            Array of values associated with the items
        weights (array-like):
            Array of weights associated with the items
        weight_capacity (int):
            Maximum allowable weight
        bound(array-like):
            Maximum allowable pieces for each item
    Returns:
        Discrete quadratic model instance
        x: varibles
    """
    bound = [b+1 for b in bound] # also take into account the value 0
    
    # Lagrange multipliers A>max(values)>0
    A1 = max(values)*8
    A2 = max(values)*2
    
    num_of_items = len(values)
    ##@  Discrete Quadratic Model @##
    dqm = DiscreteQuadraticModel()

    x = []
    #@ Add variables @##
    for k in range(num_of_items):
        x.append(dqm.add_variable(bound[k], label='x' + str(k))) # number of discrete values 

    for n in range(1,weight_capacity+1):
        dqm.add_variable(2, label='y' + str(n)) # either 0 or 1, 2 values possible

    ##@ Hamiltonian xi-xi terms ##
    for k in range(num_of_items):
        pieces = range(bound[k])
    #     dqm.set_linear('x' + str(k),  - values[k]*pieces)
        dqm.set_linear('x' + str(k), A2 * (weights[k]**2) * (np.array(pieces)**2) - values[k]*np.array(pieces))

    # Hamiltonian y-y terms
    for n in range(1,weight_capacity+1):
        dqm.set_linear('y' + str(n), np.array([0,1])* (n**2*A2-A1))

    # Hamiltonian yi-yj terms 
    for n in range(1,weight_capacity+1):
        for m in range(n + 1, weight_capacity+1): 
            dqm.set_quadratic('y' + str(n), 'y' + str(m), {(1,1):2 * A1 * (1+m*n)})

    # # Hamiltonian xi-xj terms
    for i in range(num_of_items):
        for j in range(i + 1, num_of_items):
            biases_dict = {}
            for piece1 in range(bound[i]):
                for piece2 in range(bound[j]):
                    biases_dict[(piece1, piece2)]=(2 * A2 * weights[i] * weights[j])*piece1*piece2
            dqm.set_quadratic('x' + str(i), 'x' + str(j), biases_dict)

    # Hamiltonian x-y terms
    for i in range(num_of_items):
        for n in range(1,weight_capacity+1):
            biases_dict = {}
            for piece1 in range(bound[i]):
                biases_dict[(piece1, 1)]=-2 * A2 * weights[i] * n* piece1

            dqm.set_quadratic('x' + str(i), 'y' + str(n), biases_dict) 
            
    return dqm, x

In [4]:
def solve_dqm(dqm, x, sampler = None, verbose = False):
    if sampler is None:
        sampler = LeapHybridDQMSampler()
    sampleset = sampler.sample_dqm(dqm)
    best_solution = sampleset.first.sample    
    best_solution = [best_solution[i] for i in x]
    print(best_solution)
    
    return best_solution

# Implementation

In [5]:
num_of_items = 12
values = list(np.random.randint(1,10, size=(num_of_items)))
weights = list(np.random.randint(1,10, size=(num_of_items)))
weight_capacity = np.random.randint(12, 40)
print('values:',values)
print('weights:',weights)
print('weight_capacity:',weight_capacity)

values: [6, 7, 4, 4, 6, 3, 4, 6, 6, 7, 9, 4]
weights: [4, 3, 8, 8, 3, 3, 9, 4, 4, 2, 7, 5]
weight_capacity: 23


### Solve Bounded Knapsack Problem 1: All Items have the Same Bound

In [6]:
fixed_bound = 3
print('fixed bound:',fixed_bound)

(dqm,x) = build_knapsack_dqm_fixedbound(values, weights, weight_capacity, fixed_bound)
best_solution = solve_dqm(dqm,x)

total_weights = sum([weights[i]*best_solution[i] for i in range(len(x))])
total_value = sum([values[i]*best_solution[i] for i in range(len(x))])
    
print('Total weight:',total_weights)
print('Total value:',total_value)

fixed bound: 3
[0, 1, 0, 0, 1, 0, 0, 0, 1, 3, 1, 0]
Total weight: 23
Total value: 49


### Solve Bounded Knapsack Problem 2: Each Item has Different Bounds

In [7]:
# Define an array of bounds
variable_bounds = list(np.random.randint(2, 6,size=(num_of_items)))
# variable_bounds = [3 for i in range(num_of_items)]
print('variable bounds:',variable_bounds)

(dqm,x) = build_knapsack_dqm_variablebound(values, weights, weight_capacity, variable_bounds)
best_solution = solve_dqm(dqm,x)

total_weights = sum([weights[i]*best_solution[i] for i in range(len(x))])
total_value = sum([values[i]*best_solution[i] for i in range(len(x))])
    
print('Total weight:',total_weights)
print('Total value:',total_value)

variable bounds: [4, 4, 3, 5, 2, 5, 4, 3, 3, 4, 2, 3]
[0, 0, 0, 0, 1, 0, 0, 0, 2, 2, 1, 0]
Total weight: 22
Total value: 41


### Solve Bounded Knapsack Problem 2: Each Item has Different Bounds (with Lucas log trick)

In [8]:
# Define an array of bounds
variable_bounds = list(np.random.randint(2, 6,size=(num_of_items)))
# variable_bounds = [3 for i in range(num_of_items)]
print('variable bounds:',variable_bounds)

(dqm,x) = build_knapsack_dqm_variablebound2(values, weights, weight_capacity, variable_bounds)
best_solution = solve_dqm(dqm,x)

total_weights = sum([weights[i]*best_solution[i] for i in range(len(x))])
total_value = sum([values[i]*best_solution[i] for i in range(len(x))])
    
print('Total weight:',total_weights)
print('Total value:',total_value)

variable bounds: [3, 5, 4, 2, 5, 5, 3, 3, 3, 4, 4, 2]
[0, 2, 0, 0, 0, 0, 0, 1, 0, 3, 1, 0]
Total weight: 23
Total value: 50


# Grocery Data

In [9]:
profit, cost = read_profit_optimization_data(os.path.join(os.getcwd(),'data/small-cost-mock.csv'))
budget = np.mean(cost)*50*np.random.rand()

print('average cost: ', np.mean(cost))
print('average profit: ', np.mean(profit))
print('budget: ', budget)

average cost:  11.723847457000542
average profit:  23.447694914001083
budget:  255.10316954983296


We need to formulate everything as integers, so multiply by 100

In [10]:
profit_integers = np.array([int(p*100) for p in profit])
cost_integers = np.array([int(c*100) for c in cost])
budget_integer = int(budget*100)

### Solve Bounded Knapsack Problem 1: All Items have the Same Bound

In [11]:
fixed_bound = 50
print('fixed bound:',fixed_bound)

(dqm,x) = build_knapsack_dqm_fixedbound(profit_integers, cost_integers, budget_integer, fixed_bound)
best_solution = solve_dqm(dqm,x)

total_costs = sum([cost[index]*count for index, count in enumerate(best_solution)])
total_profit = sum([profit[index]*count for index, count in enumerate(best_solution)])
    
print('Total cost:',total_costs)
print('Total profit:',total_profit)

fixed bound: 50
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 23, 27]
Total cost: 213.16192623039748
Total profit: 426.32385246079497


### Solve Bounded Knapsack Problem 2: Each Item has Different Bounds

In [12]:
# Define an array of bounds using the log trick
variable_bounds = list(np.random.randint(10, 50,size=(len(profit))))
print('variable bounds:',variable_bounds)

(dqm,x) = build_knapsack_dqm_variablebound2(profit_integers, cost_integers, budget_integer, variable_bounds)
best_solution = solve_dqm(dqm,x)

total_costs = sum([cost[index]*count for index, count in enumerate(best_solution)])
total_profit = sum([profit[index]*count for index, count in enumerate(best_solution)])
    
print('Total cost:',total_costs)
print('Total profit:',total_profit)

variable bounds: [15, 45, 32, 24, 46, 30, 32, 15, 20, 32, 23, 38, 32, 20, 43, 45, 40, 24, 43, 41]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 24, 14]
Total cost: 154.17784972465597
Total profit: 308.35569944931194


In [13]:
# Define an array of bounds without using the log trick
print('variable bounds:',variable_bounds)

(dqm,x) = build_knapsack_dqm_variablebound(profit_integers, cost_integers, budget_integer, variable_bounds)
best_solution = solve_dqm(dqm,x)

total_costs = sum([cost[index]*count for index, count in enumerate(best_solution)])
total_profit = sum([profit[index]*count for index, count in enumerate(best_solution)])
    
print('Total cost:',total_costs)
print('Total profit:',total_profit)

variable bounds: [15, 45, 32, 24, 46, 30, 32, 15, 20, 32, 23, 38, 32, 20, 43, 45, 40, 24, 43, 41]


KeyboardInterrupt: 