# The Set Cover Problem DQM
CDL Quantum Hackathon 2021

In [62]:
# Ziwei Qiu, ziweiqiu@g.harvard.edu
from dimod import BinaryQuadraticModel
from dimod import ExactSolver
from neal import SimulatedAnnealingSampler
from itertools import combinations
from dwave.system import LeapHybridSampler
import os
import numpy as np
import pandas as pd

In [140]:
def build_setcover_bqm(U, V, verbose = False):
    """Construct BQM for the set cover problem
    Args:
        U (array-like):
            A set of elements defining the universe
        V (array of sets):
            Array of subsets
    Returns:
        Binary quadratic model instance
        x: variable
    """
    
    # Create indicator variables
    I = []
    for i in range(len(V)):
        I.append([1 if U[a] in V[i] else 0 for a in range(len(U))])
    
    if verbose:
        print('Indicator variables: I_i,a',I)
    
    # Lagrange multipliers A>B>0
    A = 2
    B = 1
    
    ##@  Binary Quadratic Model @##
    bqm = BinaryQuadraticModel('BINARY')

    # Add linear terms
    # x linear terms
    x = [bqm.add_variable('x_'+str(i+1), A*sum(I[i])+B) for i in range(0,len(V))]
    if verbose:
        print('x variables:',x)

    # y_am linear terms
    y = []
    for a in range(1,len(U)+1):
        y.append([bqm.add_variable('y_('+str(a)+', '+str(m)+')', A*(m**2-1)) for m in range(1,len(V)+1)])
    if verbose:
        print('y variables:',y)

    # Add quadratic terms

    # x_i-x_j terms
    for i in range(1,len(V)+1):
        for j in range(i+1,len(V)+1):
            key = ('x_' + str(i), 'x_' + str(j))
            bqm.quadratic[key] = 2*A*np.dot(np.array(I[i-1]),np.array(I[j-1]))

    # y_am - y_an terms
    for m in range(1,len(V)+1):
        for n in range(m+1,len(V)+1):
            for a in range(1,len(U)+1):
                key = ('y_('+str(a)+', '+str(m)+')', 'y_('+str(a)+', '+str(n)+')')
                bqm.quadratic[key] = 2*A*(1+m*n)

    # x_i-y_am terms
    for i in range(1,len(V)+1):
        for m in range(1,len(V)+1):
            for a in range(1,len(U)+1):
                key = ('x_' + str(i), 'y_('+str(a)+', '+str(m)+')')
                bqm.quadratic[key] = -2*A*m*I[i-1][a-1]
    return bqm, x

def solve_bqm(bqm, x, sampler):
    response = sampler.sample(bqm)
    best_solution = response.first.sample
    best_solution = [best_solution[i] for i in x]
    print(best_solution)
    
    return best_solution

# Implementation

In [58]:
# Define a simple set cover problem
U = list(set(np.random.randint(10, size=(10))))

V = [set(U[i] for i in np.random.randint(len(U), size=(8))) for j in range(5)]

print('The universe is',U)
print('Number of elements in the universe: {:d}'.format(len(U)))

print('There are {:d} collections:'.format(len(V)),V)
print('Number of sets: N={:d}'.format(len(V)))

The universe is [0, 2, 3, 6, 7, 8]
Number of elements in the universe: 6
There are 5 collections: [{0, 3, 6, 7, 8}, {8, 0, 3, 7}, {2, 3, 6, 7}, {0, 2, 6}, {0, 8, 3, 7}]
Number of sets: N=5


### Solve the Set Cover Problem with Simulated Annealing

In [59]:
bqm,x = build_setcover_bqm(U, V)
best_solution = solve_bqm(bqm, x, SimulatedAnnealingSampler())

[0, 1, 1, 0, 0]


### Solve the Set Cover Problem with Quantum Annealing (Leap Hybrid Solver)

In [60]:
bqm,x = build_setcover_bqm(U, V)
best_solution = solve_bqm(bqm, x, LeapHybridSampler())

[1, 0, 0, 1, 0]


# Grocery Data 
## Small dataset

In [162]:
# Load Data
supplier_data = os.path.join(os.getcwd(),'data/small-cost-mock.csv')
df = pd.read_csv(supplier_data)
df.head()

Unnamed: 0.1,Unnamed: 0,item0,item1,item2,item3,item4,item5,item6,item7,item8,...,item10,item11,item12,item13,item14,item15,item16,item17,item18,item19
0,supplier0,25.651062,16.88558,9.874869,4.073876,11.845272,4.486785,2.600526,10.754414,11.321333,...,8.981416,18.169675,-1.0,6.955355,-1.0,7.081792,23.158746,22.546562,-1.0,12.837133
1,supplier1,25.449085,23.421934,-1.0,3.909275,16.661481,3.3467,2.515956,-1.0,10.940784,...,-1.0,18.169675,-1.0,9.397129,3.742742,7.081792,23.397496,20.312218,-1.0,15.849929
2,supplier2,-1.0,-1.0,-1.0,-1.0,14.708964,-1.0,2.431386,14.080521,11.987294,...,-1.0,19.207942,10.194032,9.619108,4.668781,6.809415,27.694995,24.780906,-1.0,-1.0
3,supplier3,18.783848,16.522449,9.714302,4.032726,14.839132,-1.0,2.262246,10.089192,10.560235,...,-1.0,-1.0,11.566305,8.805184,4.630196,6.319138,27.933745,-1.0,27.917692,15.980921
4,supplier4,19.389779,21.424714,7.225514,4.69113,12.496111,3.751246,2.410244,13.082689,-1.0,...,8.309908,20.419254,10.58611,8.95317,-1.0,6.75494,26.262495,24.374662,20.938269,16.766867


In [163]:
U = list(df.columns[1:]) # This is our inventory
V = [set([item for item in U if df.loc[row,item]>0]) for row in range(len(df))] # This is a list of suppliers
print('There are {:d} items in the universe.\n'.format(len(U)))
print('There are {:d} suppliers.\n'.format(len(V)))

# Build the BQM
bqm,x = build_setcover_bqm(U, V)

# Quantum Annealing
print('Solution:')
best_solution = solve_bqm(bqm, x, LeapHybridSampler())
print('There are {:d} suppliers selected.'.format(sum(best_solution)))
suppliers = [df['Unnamed: 0'][i] for i in range(len(best_solution)) if best_solution[i]>0]
print('Selected Suppliers:', suppliers)

There are 20 items in the universe.

There are 10 suppliers.

Solution:
[1, 0, 0, 0, 0, 0, 1, 1, 0, 0]
There are 3 suppliers selected.
Selected Suppliers: ['supplier0', 'supplier6', 'supplier7']


## Medium dataset

In [164]:
# Load Data
supplier_data = os.path.join(os.getcwd(),'data/medium-cost-mock.csv')
df = pd.read_csv(supplier_data)
df.head()

Unnamed: 0.1,Unnamed: 0,item0,item1,item2,item3,item4,item5,item6,item7,item8,...,item90,item91,item92,item93,item94,item95,item96,item97,item98,item99
0,supplier0,-1.0,12.793319,4.783076,18.163539,13.265165,25.160152,25.060539,5.880502,20.488834,...,14.473348,21.006017,4.114744,-1.0,1.613352,22.400665,5.290139,18.496649,13.885133,11.715602
1,supplier1,0.228314,13.933317,4.123341,17.177921,12.499867,27.041285,18.548745,6.389392,-1.0,...,17.283707,-1.0,4.724336,10.983356,-1.0,16.800498,6.289387,22.968367,-1.0,-1.0
2,supplier2,0.23312,12.413319,3.875941,18.163539,16.581456,29.627842,21.903305,6.389392,24.991874,...,16.721635,17.912108,-1.0,-1.0,-1.0,22.750675,5.936711,24.187926,13.422296,-1.0
3,supplier3,0.285993,12.413319,4.989243,12.813039,13.520264,30.568409,24.073903,6.898282,24.991874,...,17.705261,14.981035,3.962346,-1.0,1.776168,17.675524,-1.0,18.699909,12.380911,13.460479
4,supplier4,0.307622,13.426651,5.030476,15.488289,12.627417,23.514161,21.311324,-1.0,-1.0,...,14.192312,18.237782,4.610037,10.889481,1.465338,19.075566,6.406946,24.391186,13.769424,12.089504


In [165]:
U = list(df.columns[1:]) # This is our inventory
V = [set([item for item in U if df.loc[row,item]>0]) for row in range(len(df))] # This is a list of suppliers
print('There are {:d} items in the universe.\n'.format(len(U)))
print('There are {:d} suppliers.\n'.format(len(V)))

# Build the BQM
bqm,x = build_setcover_bqm(U, V)

# Quantum Annealing
print('Solution:')
best_solution = solve_bqm(bqm, x, LeapHybridSampler())
print('There are {:d} suppliers selected.'.format(sum(best_solution)))
suppliers = [df['Unnamed: 0'][i] for i in range(len(best_solution)) if best_solution[i]>0]
print('Selected Suppliers:', suppliers)

There are 100 items in the universe.

There are 40 suppliers.

Solution:
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1]
There are 35 suppliers selected.
Selected Suppliers: ['supplier0', 'supplier1', 'supplier2', 'supplier3', 'supplier4', 'supplier5', 'supplier6', 'supplier7', 'supplier8', 'supplier9', 'supplier10', 'supplier11', 'supplier13', 'supplier14', 'supplier16', 'supplier17', 'supplier18', 'supplier20', 'supplier22', 'supplier23', 'supplier24', 'supplier25', 'supplier26', 'supplier27', 'supplier28', 'supplier29', 'supplier30', 'supplier31', 'supplier33', 'supplier34', 'supplier35', 'supplier36', 'supplier37', 'supplier38', 'supplier39']


## Large dataset

In [160]:
# Load Data
supplier_data = os.path.join(os.getcwd(),'data/large-cost-mock.csv')
df = pd.read_csv(supplier_data)
df.head()

Unnamed: 0.1,Unnamed: 0,item0,item1,item2,item3,item4,item5,item6,item7,item8,...,item190,item191,item192,item193,item194,item195,item196,item197,item198,item199
0,supplier0,9.525326,28.172914,21.403286,1.194258,18.956791,11.873612,24.224098,9.660749,11.985156,...,-1.0,4.633874,19.557551,-1.0,1.213841,14.849395,16.399104,8.250454,14.382623,11.668045
1,supplier1,9.525326,30.844483,17.300989,-1.0,-1.0,12.293917,27.198987,12.508128,9.797389,...,24.112108,6.327979,-1.0,3.71458,-1.0,-1.0,-1.0,-1.0,16.711238,-1.0
2,supplier2,10.704652,24.529865,22.29509,1.433109,17.248972,9.456859,27.623972,9.253981,8.941307,...,29.032947,5.231793,17.256662,-1.0,-1.0,16.134439,21.136624,-1.0,-1.0,12.372151
3,supplier3,10.523217,-1.0,16.587547,1.446379,-1.0,-1.0,-1.0,10.16921,11.699795,...,25.58836,5.331447,19.174069,-1.0,1.03595,16.277222,-1.0,9.333847,17.670079,10.762765
4,supplier4,8.164565,25.744215,17.836072,1.247336,21.006174,10.927926,22.099177,11.186131,9.226667,...,-1.0,-1.0,18.215366,4.473473,0.952237,17.847831,-1.0,9.000496,15.478442,9.555726


In [None]:
U = list(df.columns[1:]) # This is our inventory
V = [set([item for item in U if df.loc[row,item]>0]) for row in range(len(df))] # This is a list of suppliers
print('There are {:d} items in the universe.\n'.format(len(U)))
print('There are {:d} suppliers.'.format(len(V)))

# Build the BQM
bqm,x = build_setcover_bqm(U, V)

# Quantum Annealing
best_solution = solve_bqm(bqm, x, LeapHybridSampler())

suppliers = [df['Unnamed: 0'][i] for i in range(len(best_solution)) if best_solution[i]>0]
print('Selected Suppliers:', suppliers)

## Extra Large Dataset

In [None]:
# Load Data
supplier_data = os.path.join(os.getcwd(),'data/extra_large-cost-mock.csv')
df = pd.read_csv(supplier_data)
df.head()

In [None]:
U = list(df.columns[1:]) # This is our inventory
V = [set([item for item in U if df.loc[row,item]>0]) for row in range(len(df))] # This is a list of suppliers
print('There are {:d} items in the universe.\n'.format(len(U)))
print('There are {:d} suppliers.'.format(len(V)))

# Build the BQM
bqm,x = build_setcover_bqm(U, V)

# Quantum Annealing
best_solution = solve_bqm(bqm, x, LeapHybridSampler())

suppliers = [df['Unnamed: 0'][i] for i in range(len(best_solution)) if best_solution[i]>0]
print('Selected Suppliers:', suppliers)