# The Knapsack Problem DQM
CDL Quantum Hackathon 2021

In [1]:
# Ziwei Qiu, ziweiqiu@g.harvard.edu
from dimod import DiscreteQuadraticModel
from dimod import ExactSolver
import sys
from dwave.system import LeapHybridDQMSampler
from neal import SimulatedAnnealingSampler
from math import log2, floor
import dimod
import os
import numpy as np
import pandas as pd

## Construct DQM

In [2]:
def build_knapsack_dqm_fixedbound(values, weights, weight_capacity, bound, verbose = False):
    """Construct DQM for the generalized knapsack problem
    Args:
        values (array-like):
            Array of values associated with the items
        weights (array-like):
            Array of weights associated with the items
        weight_capacity (int):
            Maximum allowable weight
        bound(int):
            Maximum allowable pieces for each item
    Returns:
        Discrete quadratic model instance
        x: variable
    """
    bound += 1 # also take into account the value 0
    pieces = range(bound)
    
    # First guess the lagrange
    lagrange = max(values)*0.5
    if verbose:
        print('lagrange:',lagrange)

    # Number of objects
    x_size = len(values)

    # Lucas's algorithm introduces additional slack variables to
    # handle the inequality. M+1 binary slack variables are needed to
    # represent the sum using a set of powers of 2.
    M = floor(log2(weight_capacity))
    num_slack_variables = M + 1

    # Slack variable list for Lucas's algorithm. The last variable has
    # a special value because it terminates the sequence.
    y = [2**n for n in range(M)]
    y.append(weight_capacity + 1 - 2**M)
    
    ##@  Discrete Quadratic Model @##
    dqm = DiscreteQuadraticModel()
    x = []
    #@ Add variables @##
    for k in range(x_size):
        x.append(dqm.add_variable(bound, label='x' + str(k)))

    for k in range(num_slack_variables):
        dqm.add_variable(2, label='y' + str(k)) # either 0 or 1

    ##@ Hamiltonian xi-xi terms ##
    for k in range(x_size):
        dqm.set_linear('x' + str(k), lagrange * (weights[k]**2) * (np.array(pieces)**2) - values[k]*pieces)


    # # Hamiltonian xi-xj terms
    for i in range(x_size):
        for j in range(i + 1, x_size):
            biases_dict = {}
            for piece1 in pieces:
                for piece2 in pieces:
                    biases_dict[(piece1, piece2)]=(2 * lagrange * weights[i] * weights[j])*piece1*piece2

            dqm.set_quadratic('x' + str(i), 'x' + str(j), biases_dict)

    # Hamiltonian y-y terms
    for k in range(num_slack_variables):
        dqm.set_linear('y' + str(k), lagrange*np.array([0,1])* (y[k]**2))

    # Hamiltonian yi-yj terms 
    for i in range(num_slack_variables):
        for j in range(i + 1, num_slack_variables): 
            dqm.set_quadratic('y' + str(i), 'y' + str(j), {(1,1):2 * lagrange * y[i] * y[j]})

    # Hamiltonian x-y terms
    for i in range(x_size):
        for j in range(num_slack_variables):
            biases_dict = {}
            for piece1 in pieces:
                biases_dict[(piece1, 1)]=-2 * lagrange * weights[i] * y[j]*piece1

            dqm.set_quadratic('x' + str(i), 'y' + str(j), biases_dict) 
    
    return dqm, x

In [3]:
def build_knapsack_dqm_variablebound(values, weights, weight_capacity, bound, verbose = False):
    """Construct DQM for the generalized knapsack problem
    Args:
        values (array-like):
            Array of values associated with the items
        weights (array-like):
            Array of weights associated with the items
        weight_capacity (int):
            Maximum allowable weight
        bound(array-like):
            Maximum allowable pieces for each item
    Returns:
        Discrete quadratic model instance
        x: varibles
    """
    bound = [b+1 for b in bound] # also take into account the value 0
    
    # Lagrange multipliers A>max(values)>0
    A1 = max(values)*8
    A2 = max(values)*2
    
    ##@  Discrete Quadratic Model @##
    dqm = DiscreteQuadraticModel()

    x = []
    #@ Add variables @##
    for k in range(num_of_items):
        x.append(dqm.add_variable(bound[k], label='x' + str(k))) # number of discrete values 

    for n in range(1,weight_capacity+1):
        dqm.add_variable(2, label='y' + str(n)) # either 0 or 1, 2 values possible

    ##@ Hamiltonian xi-xi terms ##
    for k in range(num_of_items):
        pieces = range(bound[k])
    #     dqm.set_linear('x' + str(k),  - values[k]*pieces)
        dqm.set_linear('x' + str(k), A2 * (weights[k]**2) * (np.array(pieces)**2) - values[k]*np.array(pieces))

    # Hamiltonian y-y terms
    for n in range(1,weight_capacity+1):
        dqm.set_linear('y' + str(n), np.array([0,1])* (n**2*A2-A1))

    # Hamiltonian yi-yj terms 
    for n in range(1,weight_capacity+1):
        for m in range(n + 1, weight_capacity+1): 
            dqm.set_quadratic('y' + str(n), 'y' + str(m), {(1,1):2 * A1 * (1+m*n)})

    # # Hamiltonian xi-xj terms
    for i in range(num_of_items):
        for j in range(i + 1, num_of_items):
            biases_dict = {}
            for piece1 in range(bound[i]):
                for piece2 in range(bound[j]):
                    biases_dict[(piece1, piece2)]=(2 * A2 * weights[i] * weights[j])*piece1*piece2
            dqm.set_quadratic('x' + str(i), 'x' + str(j), biases_dict)

    # Hamiltonian x-y terms
    for i in range(num_of_items):
        for n in range(1,weight_capacity+1):
            biases_dict = {}
            for piece1 in range(bound[i]):
                biases_dict[(piece1, 1)]=-2 * A2 * weights[i] * n* piece1

            dqm.set_quadratic('x' + str(i), 'y' + str(n), biases_dict) 
            
    return dqm, x

In [56]:
def solve_dqm(dqm, x, sampler = None, verbose = False):
    if sampler is None:
        sampler = LeapHybridDQMSampler()
    sampleset = sampler.sample_dqm(dqm)
    best_solution = sampleset.first.sample    
    best_solution = [best_solution[i] for i in x]
    print(best_solution)
    
    return best_solution

# Implementation

In [58]:
num_of_items = 12
values = list(np.random.randint(1,10, size=(num_of_items)))
weights = list(np.random.randint(1,10, size=(num_of_items)))
weight_capacity = np.random.randint(12, 40)
print('values:',values)
print('weights:',weights)
print('weight_capacity:',weight_capacity)

values: [5, 5, 1, 7, 9, 1, 7, 4, 7, 1, 7, 8]
weights: [3, 8, 5, 1, 4, 6, 8, 1, 7, 6, 4, 3]
weight_capacity: 17


### Solve Bounded Knapsack Problem 1: All Items have the Same Bound

In [59]:
fixed_bound = 3
print('fixed bound:',fixed_bound)

(dqm,x) = build_knapsack_dqm_fixedbound(values, weights, weight_capacity, fixed_bound)
best_solution = solve_dqm(dqm,x)

total_weights = sum([weights[i]*best_solution[i] for i in range(len(x))])
total_value = sum([values[i]*best_solution[i] for i in range(len(x))])
    
print('Total weight:',total_weights)
print('Total value:',total_value)

fixed bound: 3
[0, 0, 0, 3, 1, 0, 0, 3, 0, 0, 0, 2]
Total weight: 16
Total value: 58


### Solve Bounded Knapsack Problem 2: Each Item has Different Bounds

In [60]:
# Define an array of bounds
variable_bounds = list(np.random.randint(2, 6,size=(num_of_items)))
# variable_bounds = [3 for i in range(num_of_items)]
print('variable bounds:',variable_bounds)

(dqm,x) = build_knapsack_dqm_variablebound(values, weights, weight_capacity, variable_bounds)
best_solution = solve_dqm(dqm,x)

total_weights = sum([weights[i]*best_solution[i] for i in range(len(x))])
total_value = sum([values[i]*best_solution[i] for i in range(len(x))])
    
print('Total weight:',total_weights)
print('Total value:',total_value)

variable bounds: [5, 2, 2, 3, 5, 2, 3, 2, 2, 4, 5, 3]
[0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 3, 1]
Total weight: 17
Total value: 43


# Grocery Data

In [61]:
# Load Data
supplier_data = os.path.join(os.getcwd(),'data/small-cost-mock.csv')
supplier_df = pd.read_csv(supplier_data)
supplier_df.head()

Unnamed: 0.1,Unnamed: 0,item0,item1,item2,item3,item4,item5,item6,item7,item8,...,item10,item11,item12,item13,item14,item15,item16,item17,item18,item19
0,supplier0,25.651062,16.88558,9.874869,4.073876,11.845272,4.486785,2.600526,10.754414,11.321333,...,8.981416,18.169675,-1.0,6.955355,-1.0,7.081792,23.158746,22.546562,-1.0,12.837133
1,supplier1,25.449085,23.421934,-1.0,3.909275,16.661481,3.3467,2.515956,-1.0,10.940784,...,-1.0,18.169675,-1.0,9.397129,3.742742,7.081792,23.397496,20.312218,-1.0,15.849929
2,supplier2,-1.0,-1.0,-1.0,-1.0,14.708964,-1.0,2.431386,14.080521,11.987294,...,-1.0,19.207942,10.194032,9.619108,4.668781,6.809415,27.694995,24.780906,-1.0,-1.0
3,supplier3,18.783848,16.522449,9.714302,4.032726,14.839132,-1.0,2.262246,10.089192,10.560235,...,-1.0,-1.0,11.566305,8.805184,4.630196,6.319138,27.933745,-1.0,27.917692,15.980921
4,supplier4,19.389779,21.424714,7.225514,4.69113,12.496111,3.751246,2.410244,13.082689,-1.0,...,8.309908,20.419254,10.58611,8.95317,-1.0,6.75494,26.262495,24.374662,20.938269,16.766867


In [62]:
profit_data = os.path.join(os.getcwd(),'data/small-price-mock.csv')
profit_df = pd.read_csv(profit_data)
profit_df.head()

Unnamed: 0.1,Unnamed: 0,item0,item1,item2,item3,item4,item5,item6,item7,item8,...,item10,item11,item12,item13,item14,item15,item16,item17,item18,item19
0,price,61.562548,56.212641,24.663087,11.653756,39.987555,10.768284,6.241263,34.325428,28.769505,...,25.785897,51.49805,27.759133,23.08586,11.390282,16.996301,67.040988,59.474175,67.002461,40.240482


In [63]:
item_list = list(supplier_df.columns[1:])
item_avg_cost=[np.int64(round(np.average([i for i in supplier_df[item] if i>0])*100)) for item in item_list] # unit: cent
item_profit = [np.int64(round(p*100)) for p in list(profit_df.iloc[0,1:])] # unit: cent
budget = round(sum(item_avg_cost)*50*np.random.rand())

print('average cost:', item_avg_cost)
print('item profit:',item_profit)
print('budget:',budget)

average cost: [2210, 2021, 899, 437, 1444, 375, 240, 1266, 1097, 1794, 933, 1948, 1011, 830, 416, 648, 2510, 2222, 2594, 1515]
item profit: [6156, 5621, 2466, 1165, 3999, 1077, 624, 3433, 2877, 4945, 2579, 5150, 2776, 2309, 1139, 1700, 6704, 5947, 6700, 4024]
budget: 627919


### Solve Bounded Knapsack Problem 1: All Items have the Same Bound

In [64]:
fixed_bound = 50
print('fixed bound:',fixed_bound)

(dqm,x) = build_knapsack_dqm_fixedbound(item_profit, item_avg_cost, budget, fixed_bound)
best_solution = solve_dqm(dqm,x)

total_costs = sum([item_avg_cost[i]*best_solution[i] for i in range(len(x))])
total_profit = sum([item_profit[i]*best_solution[i] for i in range(len(x))])
    
print('Total cost:',total_costs)
print('Total profit:',total_profit)

fixed bound: 50
[18, 16, 11, 16, 22, 30, 36, 32, 1, 5, 38, 2, 27, 33, 19, 46, 44, 50, 19, 11]
Total cost: 610474
Total profit: 1649902


### Solve Bounded Knapsack Problem 2: Each Item has Different Bounds

In [None]:
# Define an array of bounds
variable_bounds = list(np.random.randint(10, 50,size=(num_of_items)))
print('variable bounds:',variable_bounds)

(dqm,x) = build_knapsack_dqm_variablebound(item_profit, item_avg_cost, budget, variable_bounds)
best_solution = solve_dqm(dqm,x)

total_costs = sum([item_avg_cost[i]*best_solution[i] for i in range(len(x))])
total_profit = sum([item_profit[i]*best_solution[i] for i in range(len(x))])
    
print('Total cost:',total_costs)
print('Total profit:',total_profit)

variable bounds: [12, 13, 38, 36, 14, 30, 46, 16, 11, 13, 33, 17]
