In [1]:
import sys
import pathlib
sys.path.append(str(pathlib.PurePath(pathlib.Path.cwd().parent)))

import os
import h5py
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx

import dimod
import dwave.samplers
import dwave.system
from dwave.system import DWaveSampler, FixedEmbeddingComposite
import dwave.inspector
import dwave_networkx as dnx
import minorminer
import embera

import SALib as salib
from SALib.sample import sobol as salib_sample_sobol
from SALib.analyze import delta, dgsm, fast, ff, hdmr, morris, pawn, rbd_fast, sobol

from src.particle_funcs import distance_matrix as distance_matrix
import src.leap_funcs.qubo.q_matrix as q_matrix
from src.leap_funcs.qubo import filter_samples

from src import h5py_funcs
from src.h5py_funcs import discoveries, init_custom_getstates, io, parameterstudy_using_info_file

In [2]:
num_particles = 5

part_coords_n = np.zeros((num_particles,2))
part_coords_nm1 = np.zeros((num_particles,2))

for i in range(np.shape(part_coords_n)[0]):
    part_coords_n[i,:] = [0, i]
    part_coords_nm1[i,:] = [0.5*1, i]

#fig_initial, axs_initial = plt.subplots(1,1)
#axs_initial.scatter(part_coords_n[:,0], part_coords_n[:,1], label="n")
#axs_initial.scatter(part_coords_nm1[:,0], part_coords_nm1[:,1], label="n-1")
#fig_initial.legend()
#fig_initial.show()

correct_sol = np.zeros(num_particles*num_particles)
for i in range(1, num_particles+1):
    correct_sol[(i-1)*num_particles + i -1] = 1.

print(correct_sol)

distances = distance_matrix.calc_phi_ij(part_coords_n, part_coords_nm1)
Q_dist_diag = q_matrix.q_dist_diag(distances)
Q_part = q_matrix.q_part(np.shape(distances)[0])
Q_pos = q_matrix.q_pos(np.shape(distances)[0])
Q_array = Q_dist_diag + Q_part + Q_pos
Q_dict = q_matrix.Q_convert_to_dict(Q_array)
#with np.printoptions(precision=3, linewidth=1000, threshold=1e6):
#    print(Q_array)

[1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.
 1.]


In [3]:
problem = {
    'num_vars': 2,
    'names': ['num_reads', 'num_sweeps'],
    'sets': np.array([[100,100], [1000,100], [2500,100], [5000,100], [100,1000], [1000,1000], [2500,1000], [5000,1000], [100,2500], [1000,2500], [2500,2500], [5000,2500]])
}

problem_salib = {
    'num_vars': 2,
    'names': ['num_reads', 'num_sweeps'],
    'bounds': [[100,5000], [100,2500]]
}
samples_salib = salib.sample.sobol.sample(problem_salib, 2)

#print(type(problem['sets']),problem['sets'])
#print(type(problem['sets'][0]), problem['sets'][0])
#print(type(samples_salib), samples_salib)
#print(type(samples_salib[0]), samples_salib[0])
#print(type(samples_salib[0][0]), samples_salib[0][0])

In [4]:
metadata_dict = {'num_particles': num_particles,
                 'num_species': 2,
                 'num_reads': 100,
                 #'deeper dict': {'key1': 'value1', 'key2': 'value2'}}
                 'num_sweeps': 100}
problem_dict = {'num_vars': 2,
                'names': ['num_reads', 'num_sweeps'],
                'bounds': [[100,5000], [100,2500]]}
parametersets_array = np.array([[100,100], [1000,100], [2500,100], [5000,100], [100,1000], [1000,1000], [2500,1000], [5000,1000], [100,2500], [1000,2500], [2500,2500], [5000,2500]])
#np.array([[100,100], [1000,100], [5000,100], [100,1000], [1000,1000], [5000,1000], [100,2500], [1000,2500], [5000,2500]])

folder_path_name = 'test_workflow_sa_out'

h5py_funcs.parameterstudy_using_info_file.prepare_info_file(metadata_dict=metadata_dict, problem_dict=problem_dict, parametersets_array=parametersets_array, folder_path_name=folder_path_name)

Folder test_workflow_sa_out does not exist. Create it.
Created new info-file parameterstudy_info.h5 in folder test_workflow_sa_out.


In [5]:
#a = np.datetime64('now')
#print(a, type(a), a.dtype)
#b = np.datetime_as_string(a)
#print(b, type(b), b.dtype)
#c = b.encode('utf-8')
#print(c, type(c))#, c.dtype)
#d = np.array([c])
#print(d, d[0], type(d), d.dtype)
#e = np.array(c)
#print(e, type(e), e.dtype)
#f = np.array([c, c, c, c])
#print(f, f[0], type(f), f.dtype)

In [6]:
with h5py.File('test_workflow_sa_out/parameterstudy_info.h5', 'r') as f:
    h5py_funcs.discoveries.discover_hdf5_file_structure(f, print_dataset_values=True)
    a = f['/parametersets/study']
    print(a.fields('sets')[()])

Filename:  test_workflow_sa_out/parameterstudy_info.h5
 - parametersets   <HDF5 group "/parametersets" (2 members)>

 - Group: /parametersets
 | - Attributes:
 |   -  num_particles : 5
 |   -  num_species : 2
 |   -  num_reads : 100
 |   -  num_sweeps : 100
 |   -  num_vars : 2
 |   -  names : ['num_reads' 'num_sweeps']
 |   -  bounds : [[ 100 5000]
 [ 100 2500]]
 | - Dataset: /parametersets/study
 |   - No attributes
 |   - shape:  (12,)
 |   - size:   12
 |   - ndim:   1
 |   - dtype:  [('sets', [('num_reads', '<i4'), ('num_sweeps', '<i4')], (1,)), ('identifiers', 'S13')]
 |   - nbytes: 252
 |     <HDF5 dataset "study": shape (12,), type "|V21">
[([( 100,  100)], b'zz_9819295194') ([(1000,  100)], b'zz_4422635505')
 ([(2500,  100)], b'zz_8725244622') ([(5000,  100)], b'zz_3269527356')
 ([( 100, 1000)], b'zz_2462195497') ([(1000, 1000)], b'zz_8023850743')
 ([(2500, 1000)], b'zz_3420268429') ([(5000, 1000)], b'zz_8928048588')
 ([( 100, 2500)], b'zz_0253663889') ([(1000, 2500)], b'zz_10

In [7]:
#ids = ['000000000000']*12
#
#recarray_dtype = np.dtype([('parametersets', [(name, np.float64) for name in problem_dict['names']],(1,)), ('identifiers', 'U{}'.format(len(ids[0])))])
#print(recarray_dtype)
#print(recarray_dtype.names)
#print(recarray_dtype)
#
#recarray_shape = (np.shape(samples_salib)[0],)
#print(recarray_shape)
#
#a = np.array([((0,1),'0000000000'),((2,3),'0000000001')], dtype=recarray_dtype)
#print(a, a.dtype, a.shape)
#b = np.rec.array([((0,1),'0000000000'),((2,3),'0000000001')], dtype=recarray_dtype)
#print(b, b.dtype, b.shape)
#print(b.parametersets)
#print(b.identifiers)
#print(np.shape(samples_salib), np.shape(ids))
#recarray_dtype = np.dtype([('parametersets', [('a', np.float64), ('b', np.float64)],(1,)), ('identifiers', 'U{}'.format(len(ids[0])))])
#c = np.rec.fromarrays([np.array([1., 1.2, 1.3]), np.array([1.4, 1.5, 1.6]), np.array(['0000000000', '0000000000', '0000000000'])], shape=(3,), formats=['f8','f8','U10'])
#print(c, c.dtype, c.shape)
#d = np.rec.fromarrays([np.array([np.array([1.]), np.array([2.])]).reshape(2,1), np.array(['0000000000'])], dtype=[('a', [('aa','f8'),('ab','f8')],(2,1)), ('b', 'U10', (1,1))])
#print(d, d.dtype, d.shape)
#print(d.a)
#print(d.a.aa)
#e_data = [([(1.,2.)], '0000000000'),
#          ([(3.,4.)], '0000000001')]
#e_dtype = [('a', [('aa','f'),('ab','f')],(1,)),
#           ('b', 'U10')]
#e = np.array(e_data, dtype=e_dtype)
#print(e, e.dtype, e.shape)
#print(e['a'])
#print(e['a']['aa'])
#
#f_data = [((1.,2.),'0000000000')]
#f_data_2 = [(tuple(samples_salib[i,j] for j in range(np.shape(samples_salib)[1])), ids[i]) for i in range(np.shape(samples_salib)[0])]
#f_dtype = [('a', [('aa','f'),('ab','f')],(1,)),
#           ('b', 'U10')]
#f = np.rec.fromrecords(f_data_2, dtype=f_dtype)
#print(f, f.dtype, f.shape)
#print(f['a'])
#print(f['a']['aa'])

In [8]:
#print(problem['names'])
data_file_name = 'parameterstudy_data.h5'
data_file_name_path = os.path.join(folder_path_name, data_file_name)
info_file_name = 'parameterstudy_info.h5'
info_file_name_path = os.path.join(folder_path_name, info_file_name)
read_parametersets = {}
with h5py.File(os.path.join(folder_path_name, info_file_name), 'r') as f:
    read_parametersets['sets'] = f['/parametersets/study'].fields('sets')[()]
    read_parametersets['identifiers'] = f['/parametersets/study'].fields('identifiers')[()]
    read_parametersets['info'] = {key: value for key, value in f['/parametersets'].attrs.items()}
#print(read_parametersets)
#print(read_parametersets['sets']['num_reads'])
#print(read_parametersets['sets'].shape)
print('Number of parametersets:', len(read_parametersets['sets']))
print('number of identifiers:', len(read_parametersets['identifiers']))
if len(problem['sets']) == len(read_parametersets['identifiers']):
    num_runs = len(problem['sets'])
else:
    raise ValueError('Number of identifiers does not match number of parametersets ({} != {})'.format(len(problem['sets']), len(read_parametersets['identifiers'])))
failed_runs = {}
for i in range(num_runs):
    try:
        print('start run {} of {}'.format(i+1, num_runs))
        kwargs = {key: read_parametersets['sets'][key][i][0] for key in read_parametersets['sets'].dtype.names} # ['sets'][key][i][0] : [0] is required because have shape (1,) 
        #print(kwargs)
        set_identifier = read_parametersets['identifiers'][i]
        sampler_sa = dwave.samplers.SimulatedAnnealingSampler()    
        h5py_funcs.parameterstudy_using_info_file.update_timestamp_in_info_file(file_name_path=info_file_name_path, set_identifier=set_identifier, name='start')
        samples_sa = sampler_sa.sample_qubo(Q_dict, **kwargs)
        print('  successfully finished sampling')
        print('  start writing data to file')

        dict_data = {'sampleset_sa': samples_sa.to_serializable(pack_samples=False), 'set_identifier': set_identifier}
        h5py_funcs.io.write_to_hdf5_file(file_name_path=data_file_name_path, dict_data=dict_data, data_name='sampleset_sa', name_suffix='', overwrite_data_in_file=False)
        h5py_funcs.parameterstudy_using_info_file.update_timestamp_in_info_file(file_name_path=info_file_name_path, set_identifier=set_identifier, name='finish')
        print('  successfully finished writing data to file')
    except Exception as e:
        print('  failed run {} of {}, check message for info, samples might still be vaild'.format(i+1, num_runs))
        failed_runs[f'{i}'] = {'set_identifier': set_identifier, 'Message': e, 'kwargs': kwargs}
        continue
print('number of failed runs: {}'.format(len(failed_runs)), ' Failed runs:', failed_runs)

Number of parametersets: 12
number of identifiers: 12
start run 1 of 12
  successfully finished sampling
  start writing data to file
  successfully finished writing data to file
start run 2 of 12
  successfully finished sampling
  start writing data to file
  successfully finished writing data to file
start run 3 of 12
  successfully finished sampling
  start writing data to file
  successfully finished writing data to file
start run 4 of 12
  successfully finished sampling
  start writing data to file
  successfully finished writing data to file
start run 5 of 12
  successfully finished sampling
  start writing data to file
  successfully finished writing data to file
start run 6 of 12
  successfully finished sampling
  start writing data to file
  successfully finished writing data to file
start run 7 of 12
  successfully finished sampling
  start writing data to file
  successfully finished writing data to file
start run 8 of 12
  successfully finished sampling
  start writing data

In [9]:
read_parametersets['sets'].dtype.names

('num_reads', 'num_sweeps')

In [10]:
ids_valid_solutions= filter_samples.indices_of_valid_solutions(samples_sa, sort_energies=False)
print(ids_valid_solutions)
#print(samples_sa.record[ids_valid_solutions])

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 130, 132, 133, 134, 135, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 222, 223, 224, 225, 226, 22

In [11]:
for i, sol in enumerate(samples_sa.record):
    if i in ids_valid_solutions:
        if np.array_equal(correct_sol, sol[0]):
            print('sol', i, 'is valid and optimal')
            print(' ', sol)
#        else: 
#            print('sol', i, 'is valid but not optimal')
#            print(' ', sol)
#

sol 0 is valid and optimal
  ([1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1], -7.5, 1)
sol 2 is valid and optimal
  ([1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1], -7.5, 1)
sol 3 is valid and optimal
  ([1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1], -7.5, 1)
sol 4 is valid and optimal
  ([1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1], -7.5, 1)
sol 6 is valid and optimal
  ([1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1], -7.5, 1)
sol 8 is valid and optimal
  ([1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1], -7.5, 1)
sol 10 is valid and optimal
  ([1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1], -7.5, 1)
sol 11 is valid and optimal
  ([1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1], -7.5, 1)
sol 12 is valid and optimal
  ([1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1,

In [12]:
unique_sols, indices, occurences = np.unique(samples_sa.record.sample, axis=0, return_index=True, return_counts=True)
print(unique_sols)
print(indices)
print(occurences)

[[0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1]
 [0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 1 0]
 [0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1]
 [0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 1 0]
 [0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1]
 [0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1]
 [0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 1]
 [0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1]
 [0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1]
 [0 1 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 1]
 [0 1 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1]
 [0 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0]
 [0 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 1 0]
 [0 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0]
 [0 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1]
 [1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1]
 [1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 

In [13]:
print(np.argsort(occurences))
indices_sorted_for_occurences_low_high = indices[np.argsort(occurences)[::-1]]
print(indices_sorted_for_occurences_low_high[:100])
#samples_sa.record[indices_sorted_for_occurences_low_high]

[ 0 14 21 12 22 10  9 16  4 27  1  7 20  6  2  8 23 18 13 29  5 28 11 32
 26 17 35 33 19 25 31  3 15 24 34 30 36]
[   0    1   17    5    9  513  221   86  131  330  136  526  246  129
   21 1505  610   47 1672 2176 2064  643 3120 3473 3452  807 4062  658
 2436  483 1335 1871 2990 2191 2690 4078 1290]


In [14]:
ids_valid_solutions= filter_samples.indices_of_valid_solutions(samples_sa, sort_energies=False)
print(ids_valid_solutions)
for i in range(5):
    print(indices_sorted_for_occurences_low_high[i] in ids_valid_solutions, 
          indices_sorted_for_occurences_low_high[i], 
          samples_sa.record[indices_sorted_for_occurences_low_high[i]])

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 130, 132, 133, 134, 135, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 222, 223, 224, 225, 226, 22

In [15]:
h5py_funcs.io.write_to_hdf5_file(file_name_path='test.h5', dict_data=samples_sa.to_serializable(pack_samples=False), data_name='sampleset_sa', overwrite_data_in_file=True)

KeyError: 'set_identifier'