# Problem 2
**Author:** Sergio López Baños (/LopezBanos)

## Import Packages

In [1]:
import dimod           # D-WAVE share API for samplers [1]
import neal
import numpy as np     # Python Scientific Package     [2]
import pandas as pd    # Pandas (Data management)      [3]
from utils import path_for_any_os
from dimod import ConstrainedQuadraticModel,BinaryQuadraticModel, Binary, quicksum

## Loading and Cleaning Data
I defined a function in `utils.py` that allow me to read the instances.txt files for any system as long as they are saved in the same way I did, i.e.,  <br>
*instances -> QKP -> example.txt*

In [3]:
# Path to file
path = path_for_any_os('QKP', 'r_10_100_13.txt')

# Name of the file
NAME = pd.read_csv(path, skiprows=0, delim_whitespace=True, nrows=1, header=None).iloc[0].to_list()[0]

# Number of decision variables
N = int(pd.read_csv(path, skiprows=1, delim_whitespace=True, nrows=1, header=None).iloc[0].to_list()[0])

# Objective function coefficients (No constraints)
df_obj = pd.read_csv(path, skiprows=2, delim_whitespace=True, nrows=N, header=None)

# Sanity check
print('The total number of decision variables is ', N)

The total number of decision variables is  10


In [4]:
df_obj

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,91,78.0,22.0,4.0,48.0,85.0,46.0,81.0,3.0,26.0
1,55,23.0,35.0,44.0,5.0,91.0,95.0,26.0,40.0,
2,92,11.0,20.0,43.0,71.0,83.0,27.0,65.0,,
3,7,57.0,33.0,38.0,57.0,63.0,82.0,,,
4,100,87.0,91.0,83.0,44.0,48.0,,,,
5,69,57.0,79.0,89.0,21.0,,,,,
6,9,40.0,22.0,26.0,,,,,,
7,50,6.0,7.0,,,,,,,
8,71,52.0,,,,,,,,
9,17,,,,,,,,,


In [5]:
#########################
# Knapsack Coefficients # 
#########################
# Diagonal terms (i=j)
v = df_obj.iloc[0].to_list()

# Crossed terms (i != j)
vv = []
for i in range(1,N):
    line_list = df_obj.iloc[i].to_list()
    # Clean nan values
    cleanedList = [x for x in line_list if str(x) != 'nan']
    vv.append(cleanedList)

# Convert the list of list into a np.array (either you can have a list of list and iterate over it
# where the first list correspond to all the combinations of the first variable x1x2 x1x3 x1x4 ...)
vv = [item for sublist in vv for item in sublist]
# Sanity Check
print('The diagonal terms are stored in the following list: ',v)
print('The crossed terms are stored in the following list:')
print(vv)

The diagonal terms are stored in the following list:  [91.0, 78.0, 22.0, 4.0, 48.0, 85.0, 46.0, 81.0, 3.0, 26.0]
The crossed terms are stored in the following list:
[55.0, 23.0, 35.0, 44.0, 5.0, 91.0, 95.0, 26.0, 40.0, 92.0, 11.0, 20.0, 43.0, 71.0, 83.0, 27.0, 65.0, 7.0, 57.0, 33.0, 38.0, 57.0, 63.0, 82.0, 100.0, 87.0, 91.0, 83.0, 44.0, 48.0, 69.0, 57.0, 79.0, 89.0, 21.0, 9.0, 40.0, 22.0, 26.0, 50.0, 6.0, 7.0, 71.0, 52.0, 17.0]


In [6]:
#########################
#      Constraints      # 
#########################
# Constraints coefficients
df_constraints = pd.read_csv(path, delim_whitespace=True, skiprows=N+5, nrows=1, header=None)
a = df_constraints.iloc[0].to_list()

# Constraint upper bound
df_upper_bound = pd.read_csv(path, delim_whitespace=True, skiprows=N+4, nrows=1, header=None)
b = df_upper_bound.iloc[0].to_list()[0]

## Build the CQM model (fastest way to build a QUBO model using D-Wave)

In [7]:
# Build the CQM
cqm = ConstrainedQuadraticModel()

In [8]:
# Create the variables classes(CQM admits integer variables)
bin_variables = [Binary('x_{}'.format(i+1)) for i in range(N)]
index_columns = ['energy', 'num_occurrences','is_feasible', 'is_satisfied']
for i in range(N):
    index_columns.append('x_{}'.format(i+1))
obj_weight_value = 1.0   # In this case this is irrelevant since we have a single objective, but in case we had more we can set how relevant each objective is by adjusting this weight

In [9]:
# Set the objectives. In this case: maximize value which is analogous as minimize the
# same function with a minus in front.

# Lineal objective
lineal_objective = -obj_weight_value * quicksum(v[i] * bin_variables[i] for i in range(N))

# Crossed-terms
quadratic_objective = 0
slice_counter = 0
for i in range(N-1):
    quadratic_objective-=obj_weight_value * quicksum(vv[slice_counter + j] * bin_variables[i] * bin_variables[j] for j in range(N-1-i))
    slice_counter += 9-i


In [10]:
# Add terms of objective function
lineal_objective
quadratic_objective
objective = lineal_objective + quadratic_objective
objective

BinaryQuadraticModel({'x_1': -146.0, 'x_2': -89.0, 'x_3': -55.0, 'x_4': -87.0, 'x_5': -69.0, 'x_6': -85.0, 'x_7': -46.0, 'x_8': -81.0, 'x_9': -3.0, 'x_10': -26.0}, {('x_2', 'x_1'): -115.0, ('x_3', 'x_1'): -42.0, ('x_3', 'x_2'): -77.0, ('x_4', 'x_1'): -144.0, ('x_4', 'x_2'): -130.0, ('x_4', 'x_3'): -129.0, ('x_5', 'x_1'): -74.0, ('x_5', 'x_2'): -128.0, ('x_5', 'x_3'): -136.0, ('x_5', 'x_4'): -133.0, ('x_6', 'x_1'): -100.0, ('x_6', 'x_2'): -123.0, ('x_6', 'x_3'): -85.0, ('x_6', 'x_4'): -74.0, ('x_7', 'x_1'): -145.0, ('x_7', 'x_2'): -33.0, ('x_7', 'x_3'): -89.0, ('x_8', 'x_1'): -97.0, ('x_8', 'x_2'): -117.0, ('x_9', 'x_1'): -57.0}, -0.0, 'BINARY')

In [11]:
objective.shape

(10, 20)

In [12]:
# Add the objective to the CQM
cqm.set_objective(objective)

# Set the constraints. In this case: maximum weight constraint
cqm.add_constraint(quicksum(a[i]*bin_variables[i] for i in range(N)) <= b, label = 'max_weight')

'max_weight'

### Warning:
If the problem has many variables it is not recomendable to do an exact sample as we would spent tons of computational resources for it. Instead we should look for an heuristic such as simulated annealing. Furthermore, if you try to solve cqm model with more than 32 variables you will get the following error: <br>
`maximum supported dimension for an ndarray is 32, found 100`

In [13]:
# Finally, run the algorithm and get the results
cqm_exactsolver = dimod.ExactCQMSolver()   # Select the class
results = cqm_exactsolver.sample_cqm(cqm)  # Select a method for that class 
print(results.info)

{'constraint_labels': ['max_weight']}


In [14]:
results

SampleSet(rec.array([([0, 0, 0, 0, 0, 0, 0, 0, 0, 0],     0., 1, [ True],  True),
           ([0, 0, 1, 0, 0, 0, 0, 0, 0, 0],   -89., 1, [ True],  True),
           ([1, 0, 0, 0, 0, 0, 0, 0, 0, 0],  -146., 1, [ True],  True),
           ...,
           ([0, 1, 1, 1, 1, 1, 1, 1, 1, 1], -1795., 1, [False], False),
           ([1, 1, 0, 1, 1, 1, 1, 1, 1, 1], -1903., 1, [False], False),
           ([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], -2715., 1, [False], False)],
          dtype=[('sample', '<i8', (10,)), ('energy', '<f8'), ('num_occurrences', '<i8'), ('is_satisfied', '?', (1,)), ('is_feasible', '?')]), Variables(['x_1', 'x_10', 'x_2', 'x_3', 'x_4', 'x_5', 'x_6', 'x_7', 'x_8', 'x_9']), {'constraint_labels': ['max_weight']}, 'INTEGER')

In [15]:
results_table = results.to_pandas_dataframe()
results_table = results_table[index_columns]
results_table = results_table.sort_values(by=['energy'])
pd.set_option('display.max_rows', 7)
# We are only interested in feasible solutions i.e. those that satisfy the constraints
results_table[results_table.is_feasible == True].head(5)

Unnamed: 0,energy,num_occurrences,is_feasible,is_satisfied,x_1,x_2,x_3,x_4,x_5,x_6,x_7,x_8,x_9,x_10
223,-2162.0,1,True,True,1,1,1,1,1,0,1,1,0,0
239,-2089.0,1,True,True,1,1,1,1,0,1,1,1,0,0
95,-1867.0,1,True,True,1,1,1,1,1,0,1,0,0,0
159,-1849.0,1,True,True,1,1,1,1,1,0,0,1,0,0
254,-1817.0,1,True,True,1,0,1,1,1,1,1,1,0,0


## From cqm_to_bqm:
The instances Fujitsu has proposed have many variables. For this reason it is a good idea to solve the problem with an heuristic method such as simulated annealing.

In [19]:
bqm, invert = dimod.cqm_to_bqm(cqm)
#results = dimod.SimulatedAnnealingSampler().sample(bqm, num_reads=50)
results = neal.sampler.SimulatedAnnealingSampler().sample(bqm, num_reads=1000, num_sweeps=100*N, num_sweeps_per_beta=10*N)

In [20]:
results_table = results.to_pandas_dataframe()
results_table = results_table[index_columns]
results_table = results_table.sort_values(by=['energy'])
pd.set_option('display.max_rows', 7)
pd.set_option('display.max_columns', 500)
# We are only interested in feasible solutions i.e. those that satisfy the constraints
# results_table[results_table.is_feasible == True].head(5)

In [21]:
results_table.head(5)

Unnamed: 0,energy,num_occurrences,x_1,x_2,x_3,x_4,x_5,x_6,x_7,x_8,x_9,x_10
375,-1867.0,1,1,1,1,1,1,0,1,0,0,0
630,-1867.0,1,1,1,1,1,1,0,1,0,0,0
84,-1849.0,1,1,1,1,1,1,0,0,1,0,0
580,-1849.0,1,1,1,1,1,1,0,0,1,0,0
722,-1817.0,1,1,0,1,1,1,1,1,1,0,0


## Read .csv
The following cell can be used to load any .csv file.

In [25]:
df_sol = pd.read_csv('QKP_csv/r_100_50_4.csv')
#df_sol = df_sol[index_columns]
df_sol = df_sol.sort_values(by=['energy'])
pd.set_option('display.max_rows', 7)
pd.set_option('display.max_columns', 500)
df_sol

Unnamed: 0.1,Unnamed: 0,energy,num_occurrences,x_1,x_2,x_3,x_4,x_5,x_6,x_7,x_8,x_9,x_10,x_11,x_12,x_13,x_14,x_15,x_16,x_17,x_18,x_19,x_20,x_21,x_22,x_23,x_24,x_25,x_26,x_27,x_28,x_29,x_30,x_31,x_32,x_33,x_34,x_35,x_36,x_37,x_38,x_39,x_40,x_41,x_42,x_43,x_44,x_45,x_46,x_47,x_48,x_49,x_50,x_51,x_52,x_53,x_54,x_55,x_56,x_57,x_58,x_59,x_60,x_61,x_62,x_63,x_64,x_65,x_66,x_67,x_68,x_69,x_70,x_71,x_72,x_73,x_74,x_75,x_76,x_77,x_78,x_79,x_80,x_81,x_82,x_83,x_84,x_85,x_86,x_87,x_88,x_89,x_90,x_91,x_92,x_93,x_94,x_95,x_96,x_97,x_98,x_99,x_100
0,37,-102160.0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,1,1,0,1,1,0,0,0,1,1,0,0,0,1,1,0,0,0,1,1,0,1,0,1,1,1,1,0,0,1
1,12,-99511.0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,0,0,1,1,0,0,0,1,1,0,1,0,0,1,1,0,1,0,1,0,1,1,0,0,0,0,0,0,1,1,0,1,0,1,1,0,0,0,1,1
2,9,-94080.0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,1,0,0,1,0,0,1,1,1,1,0,0,0,0,0,1,1,0,1,0,1,1,0,0,1,1,0,0,0,1,0,1,0,1,1,1,0,1,0,1,1,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47,40,796.0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,1,1,0,1,1,1,0
48,11,1823.0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,1
49,13,17316.0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,1,0,0,0,1,1,0,0,1


## Bibliography
**[1]** *dimod Documentation*: https://readthedocs.org/projects/test-projecttemplate-dimod/downloads/pdf/latest/ <br>
**[2]** *Harris, C.R., Millman, K.J., van der Walt, S.J. et al. Array programming with NumPy. Nature 585, 357–362 (2020). DOI: 10.1038/s41586-020-2649-2.* <br>
**[3]** *McKinney, W. (2010). Data Structures for Statistical Computing in Python. In Proceedings of the Python in Science Conferences. https://doi.org/10.25080/majora-92bf1922-00a* <br>