In [10]:
import time
import dask

import matplotlib.pyplot as plt
from pennylane import numpy as np
import pennylane as qml
from pennylane import qchem

##############################################################################
# .. warning::
#
#    This demonstration contains features like ``qml.ExpvalCost`` that are now deprecated and will soon be removed from PennyLane.
#
# This tutorial requires the ``pennylane-forest`` and ``dask``
# packages, which are installed separately using:
#
# .. code-block:: bash
#
#    pip install pennylane-forest
#    pip install "dask[delayed]"
#
# Finding the qubit Hamiltonians of :math:`H_{2}`
# -----------------------------------------------
#
# The objective of this tutorial is to evaluate the potential energy surface of molecular
# hydrogen. This is achieved by finding the ground state energy of :math:`H_{2}` as we increase
# the bond length between the hydrogen atoms.
#
# Each inter-atomic distance results in a different qubit Hamiltonian. To find the corresponding
# Hamiltonian, we use the :func:`~.pennylane.qchem.molecular_hamiltonian` function of the
# :mod:`~.pennylane.qchem` package. Further details on the mapping from the electronic
# Hamiltonian of a molecule to a qubit Hamiltonian can be found in the
# :doc:`tutorial_quantum_chemistry` and :doc:`tutorial_vqe`
# tutorials.
#
# We begin by creating a dictionary containing a selection of bond lengths and corresponding data
# files saved in `XYZ <https://en.wikipedia.org/wiki/XYZ_file_format>`__ format. These files
# follow a standard format for specifying the geometry of a molecule and can be downloaded as a
# Zip from :download:`here <../demonstrations/vqe_parallel/vqe_parallel.zip>`.

data = {  # keys: atomic separations (in Angstroms), values: corresponding files
    0.3: "vqe_parallel/h2_0.30.xyz",
    0.5: "vqe_parallel/h2_0.50.xyz",
    0.7: "vqe_parallel/h2_0.70.xyz",
    0.9: "vqe_parallel/h2_0.90.xyz",
    1.1: "vqe_parallel/h2_1.10.xyz",
    1.3: "vqe_parallel/h2_1.30.xyz",
    1.5: "vqe_parallel/h2_1.50.xyz",
    1.7: "vqe_parallel/h2_1.70.xyz",
    1.9: "vqe_parallel/h2_1.90.xyz",
    2.1: "vqe_parallel/h2_2.10.xyz",
}

##############################################################################
# The next step is to create the qubit Hamiltonians for each value of the inter-atomic distance.
# We do this by first reading the molecular geometry from the external file using the
# :func:`~.pennylane.qchem.read_structure` function and passing the atomic symbols
# and coordinates to :func:`~.pennylane.qchem.molecular_hamiltonian`.


hamiltonians = []

for separation, file in data.items():
    symbols, coordinates = qchem.read_structure(file)
    h = qchem.molecular_hamiltonian(symbols, coordinates, name=str(separation))[0]
    hamiltonians.append(h)

##############################################################################
# Each Hamiltonian can be written as a linear combination of fifteen tensor products of Pauli
# matrices. Let's take a look more closely at one of the Hamiltonians:

h = hamiltonians[0]

print("Number of terms: {}\n".format(len(h.ops)))
for op in h.ops:
    print("Measurement {} on wires {}".format(op.name, op.wires))

##############################################################################
# .. rst-class:: sphx-glr-script-out
#
#  Out:
#
#  .. code-block:: none
#
#    Number of terms: 15
#
#    Measurement Identity on wires <Wires = [0]>
#    Measurement PauliZ on wires <Wires = [0]>
#    Measurement PauliZ on wires <Wires = [1]>
#    Measurement PauliZ on wires <Wires = [2]>
#    Measurement PauliZ on wires <Wires = [3]>
#    Measurement ['PauliZ', 'PauliZ'] on wires <Wires = [0, 1]>
#    Measurement ['PauliY', 'PauliX', 'PauliX', 'PauliY'] on wires <Wires = [0, 1, 2, 3]>
#    Measurement ['PauliY', 'PauliY', 'PauliX', 'PauliX'] on wires <Wires = [0, 1, 2, 3]>
#    Measurement ['PauliX', 'PauliX', 'PauliY', 'PauliY'] on wires <Wires = [0, 1, 2, 3]>
#    Measurement ['PauliX', 'PauliY', 'PauliY', 'PauliX'] on wires <Wires = [0, 1, 2, 3]>
#    Measurement ['PauliZ', 'PauliZ'] on wires <Wires = [0, 2]>
#    Measurement ['PauliZ', 'PauliZ'] on wires <Wires = [0, 3]>
#    Measurement ['PauliZ', 'PauliZ'] on wires <Wires = [1, 2]>
#    Measurement ['PauliZ', 'PauliZ'] on wires <Wires = [1, 3]>
#    Measurement ['PauliZ', 'PauliZ'] on wires <Wires = [2, 3]>

##############################################################################
# Defining the energy function
# ----------------------------
#
# The fifteen Pauli terms comprising each Hamiltonian can conventionally be evaluated in a
# sequential manner: we evaluate one expectation value at a time before moving on to the next.
# However, this task is highly suited to parallelization. With access to multiple QPUs,
# we can split up evaluating the terms between the QPUs and gain an increase in processing speed.
#
#
# .. note::
#    Some of the Pauli terms commute, and so they can be evaluated in practice with fewer than
#    fifteen quantum circuit runs. Nevertheless, these quantum circuit runs can still be
#    parallelized to multiple QPUs.
#
# Let's suppose we have access to two quantum devices. In this tutorial we consider two
# simulators from Rigetti: ``4q-qvm`` and ``9q-square-qvm``, but we could also run on hardware
# devices from Rigetti or other providers.
#
# We can evaluate the expectation value of each Hamiltonian with eight terms run on
# one device and seven terms run on the other, as summarized by the diagram below:
#
# .. figure:: /demonstrations/vqe_parallel/vqe_diagram.png
#    :width: 65%
#    :align: center
#
# To do this, start by instantiating a device for each term:

dev1 = [qml.device("forest.qvm", device="4q-qvm") for _ in range(8)]
dev2 = [qml.device("forest.qvm", device="9q-square-qvm") for _ in range(7)]
devs = dev1 + dev2

##############################################################################
# .. note::
#
#     For the purposes of this demonstration, we are simulating the QPUs using the
#     ``forest.qvm`` simulator. To run this demonstration on hardware, simply
#     swap ``forest.qvm`` for ``forest.qpu`` and specify the hardware device to run on.
#
#     Please refer to the `Rigetti website <https://rigetti.com/>`__ for an up-to-date
#     list on available QPUs.
#
# .. warning::
#    Rigetti's QVM and Quil Compiler services must be running for this tutorial to execute. They
#    can be installed by consulting the `Rigetti documentation
#    <http://docs.rigetti.com/qcs/>`__ or, for users with Docker, by running:
#
#    .. code-block:: bash
#
#        docker run -d -p 5555:5555 rigetti/quilc -R -p 5555
#        docker run -d -p 5000:5000 rigetti/qvm -S -p 5000
#
# We must also define a circuit to prepare the ground state, which is a superposition of the
# Hartree-Fock (:math:`|1100\rangle`) and doubly-excited (:math:`|0011\rangle`) configurations.
# The simple circuit below is able to prepare states of the form :math:`\alpha |1100\rangle +
# \beta |0011\rangle` and hence encode the ground state wave function of the hydrogen molecule. The
# circuit has a single free parameter, which controls a Y-rotation on the third qubit.


def circuit(param, H):
    qml.BasisState(np.array([1, 1, 0, 0], requires_grad=False), wires=[0, 1, 2, 3])
    qml.RY(param, wires=2)
    qml.CNOT(wires=[2, 3])
    qml.CNOT(wires=[2, 0])
    qml.CNOT(wires=[3, 1])
    return qml.expval(H)


##############################################################################
# The ground state for each inter-atomic distance is characterized by a different Y-rotation angle.
# The values of these Y-rotations can be found by minimizing the ground state energy as outlined in
# :doc:`tutorial_vqe`. In this tutorial, we load pre-optimized rotations and focus on
# comparing the speed of evaluating the potential energy surface with sequential and parallel
# evaluation. These parameters can be downloaded by clicking :download:`here
# <../demonstrations/vqe_parallel/RY_params.npy>`.

params = np.load("vqe_parallel/RY_params.npy")

##############################################################################
# Calculating the potential energy surface
# ----------------------------------------
# The most vanilla execution of these 10 energy surfaces is using the standard PennyLane functionalities by executing the QNodes.
# Internally, this creates a measurement for each term in the Hamiltonian that are then sequentially computed.

Number of terms: 15

Measurement Identity on wires <Wires = [0]>
Measurement PauliZ on wires <Wires = [0]>
Measurement PauliZ on wires <Wires = [1]>
Measurement ['PauliZ', 'PauliZ'] on wires <Wires = [0, 1]>
Measurement ['PauliY', 'PauliX', 'PauliX', 'PauliY'] on wires <Wires = [0, 1, 2, 3]>
Measurement ['PauliY', 'PauliY', 'PauliX', 'PauliX'] on wires <Wires = [0, 1, 2, 3]>
Measurement ['PauliX', 'PauliX', 'PauliY', 'PauliY'] on wires <Wires = [0, 1, 2, 3]>
Measurement ['PauliX', 'PauliY', 'PauliY', 'PauliX'] on wires <Wires = [0, 1, 2, 3]>
Measurement PauliZ on wires <Wires = [2]>
Measurement ['PauliZ', 'PauliZ'] on wires <Wires = [0, 2]>
Measurement PauliZ on wires <Wires = [3]>
Measurement ['PauliZ', 'PauliZ'] on wires <Wires = [0, 3]>
Measurement ['PauliZ', 'PauliZ'] on wires <Wires = [1, 2]>
Measurement ['PauliZ', 'PauliZ'] on wires <Wires = [1, 3]>
Measurement ['PauliZ', 'PauliZ'] on wires <Wires = [2, 3]>


In [11]:
H = hamiltonians[0]

In [17]:
obs_groupings, coeffs_groupings = qml.grouping.group_observables(H.ops, H.coeffs, "commuting")

In [18]:
len(obs_groupings)

2

In [88]:
print("Evaluating the potential energy surface sequantially")
t0 = time.time()

energies_seq = []
for i, (h, param) in enumerate(zip(hamiltonians, params)):
    print(f"{i+1} / {len(params)}: Sequential execution; Running for inter-atomic distance {list(data.keys())[i]} Å")
    energies_seq.append(qml.QNode(circuit, devs[0])(param, h))

dt_seq = time.time() - t0

print(f"Evaluation time: {dt_seq:.2f} s")

Evaluating the potential energy surface sequantially
1 / 10: Sequential execution; Running for inter-atomic distance 0.3 Å
2 / 10: Sequential execution; Running for inter-atomic distance 0.5 Å
3 / 10: Sequential execution; Running for inter-atomic distance 0.7 Å
4 / 10: Sequential execution; Running for inter-atomic distance 0.9 Å
5 / 10: Sequential execution; Running for inter-atomic distance 1.1 Å
6 / 10: Sequential execution; Running for inter-atomic distance 1.3 Å
7 / 10: Sequential execution; Running for inter-atomic distance 1.5 Å
8 / 10: Sequential execution; Running for inter-atomic distance 1.7 Å
9 / 10: Sequential execution; Running for inter-atomic distance 1.9 Å
10 / 10: Sequential execution; Running for inter-atomic distance 2.1 Å
Evaluation time: 21.01 s


In [89]:
print("Evaluating the potential energy surface sequantially")
t0 = time.time()

energies_seq = []
for i, (h, param) in enumerate(zip(hamiltonians, params)):
    H_par = qml.Hamiltonian(h.coeffs, h.ops, grouping_type="qwc")
    print(f"{i+1} / {len(params)}: Sequential execution; Running for inter-atomic distance {list(data.keys())[i]} Å")
    energies_seq.append(qml.QNode(circuit, devs[0])(param, h))

dt_seq = time.time() - t0

print(f"Evaluation time: {dt_seq:.2f} s")

Evaluating the potential energy surface sequantially
1 / 10: Sequential execution; Running for inter-atomic distance 0.3 Å
2 / 10: Sequential execution; Running for inter-atomic distance 0.5 Å
3 / 10: Sequential execution; Running for inter-atomic distance 0.7 Å
4 / 10: Sequential execution; Running for inter-atomic distance 0.9 Å
5 / 10: Sequential execution; Running for inter-atomic distance 1.1 Å
6 / 10: Sequential execution; Running for inter-atomic distance 1.3 Å
7 / 10: Sequential execution; Running for inter-atomic distance 1.5 Å
8 / 10: Sequential execution; Running for inter-atomic distance 1.7 Å
9 / 10: Sequential execution; Running for inter-atomic distance 1.9 Å
10 / 10: Sequential execution; Running for inter-atomic distance 2.1 Å
Evaluation time: 20.99 s


In [90]:
H = hamiltonians[0]
qml.grouping.group_observables(H.ops, H.coeffs, "qwc")

([[Identity(wires=[0]),
   PauliY(wires=[0]) @ PauliX(wires=[1]) @ PauliX(wires=[2]) @ PauliY(wires=[3])],
  [PauliY(wires=[0]) @ PauliY(wires=[1]) @ PauliX(wires=[2]) @ PauliX(wires=[3])],
  [PauliX(wires=[0]) @ PauliX(wires=[1]) @ PauliY(wires=[2]) @ PauliY(wires=[3])],
  [PauliX(wires=[0]) @ PauliY(wires=[1]) @ PauliY(wires=[2]) @ PauliX(wires=[3])],
  [PauliZ(wires=[0]),
   PauliZ(wires=[1]),
   PauliZ(wires=[0]) @ PauliZ(wires=[1]),
   PauliZ(wires=[2]),
   PauliZ(wires=[0]) @ PauliZ(wires=[2]),
   PauliZ(wires=[3]),
   PauliZ(wires=[0]) @ PauliZ(wires=[3]),
   PauliZ(wires=[1]) @ PauliZ(wires=[2]),
   PauliZ(wires=[1]) @ PauliZ(wires=[3]),
   PauliZ(wires=[2]) @ PauliZ(wires=[3])]],
 [tensor([1.30072384, 0.04020463], requires_grad=True),
  tensor([-0.04020463], requires_grad=True),
  tensor([-0.04020463], requires_grad=True),
  tensor([0.04020463], requires_grad=True),
  tensor([ 0.25869154,  0.25869154,  0.18800464, -0.54995735,  0.14527089,
          -0.54995735,  0.18547552,  

In [91]:
H = hamiltonians[0]
qml.grouping.group_observables(H.ops, H.coeffs, "commuting")

([[Identity(wires=[0]),
   PauliZ(wires=[0]),
   PauliZ(wires=[1]),
   PauliZ(wires=[0]) @ PauliZ(wires=[1]),
   PauliZ(wires=[2]),
   PauliZ(wires=[0]) @ PauliZ(wires=[2]),
   PauliZ(wires=[3]),
   PauliZ(wires=[0]) @ PauliZ(wires=[3]),
   PauliZ(wires=[1]) @ PauliZ(wires=[2]),
   PauliZ(wires=[1]) @ PauliZ(wires=[3]),
   PauliZ(wires=[2]) @ PauliZ(wires=[3])],
  [PauliY(wires=[0]) @ PauliX(wires=[1]) @ PauliX(wires=[2]) @ PauliY(wires=[3]),
   PauliY(wires=[0]) @ PauliY(wires=[1]) @ PauliX(wires=[2]) @ PauliX(wires=[3]),
   PauliX(wires=[0]) @ PauliX(wires=[1]) @ PauliY(wires=[2]) @ PauliY(wires=[3]),
   PauliX(wires=[0]) @ PauliY(wires=[1]) @ PauliY(wires=[2]) @ PauliX(wires=[3])]],
 [tensor([ 1.30072384,  0.25869154,  0.25869154,  0.18800464, -0.54995735,
           0.14527089, -0.54995735,  0.18547552,  0.18547552,  0.14527089,
           0.19623437], requires_grad=True),
  tensor([ 0.04020463, -0.04020463, -0.04020463,  0.04020463], requires_grad=True)])

In [83]:
def compute_energy_parallel(H, devs, param):
    assert len(H.ops) == len(devs)
    results = []

    for i in range(len(H.ops)):
        qnode = qml.QNode(circuit, devs[i])
        results.append(dask.delayed(qnode)(param, H.ops[i]))

    result = H.coeffs @ dask.compute(*results, scheduler="threads")
    return result

##############################################################################
# We can now compute all 10 different energy surfaces sequentially, where each execution is making use of 
# parallel device execution.

print("Evaluating the potential energy surface in parallel")
t0 = time.time()

energies_par = []
for i, (h, param) in enumerate(zip(hamiltonians, params)):
    print(f"{i+1} / {len(params)}: Parallel execution; Running for inter-atomic distance {list(data.keys())[i]} Å")
    energies_par.append(compute_energy_parallel(h, devs, param))

dt_par = time.time() - t0

print(f"Evaluation time: {dt_par:.2f} s")

Evaluating the potential energy surface in parallel
1 / 10: Parallel execution; Running for inter-atomic distance 0.3 Å
2 / 10: Parallel execution; Running for inter-atomic distance 0.5 Å
3 / 10: Parallel execution; Running for inter-atomic distance 0.7 Å
4 / 10: Parallel execution; Running for inter-atomic distance 0.9 Å
5 / 10: Parallel execution; Running for inter-atomic distance 1.1 Å
6 / 10: Parallel execution; Running for inter-atomic distance 1.3 Å
7 / 10: Parallel execution; Running for inter-atomic distance 1.5 Å
8 / 10: Parallel execution; Running for inter-atomic distance 1.7 Å
9 / 10: Parallel execution; Running for inter-atomic distance 1.9 Å
10 / 10: Parallel execution; Running for inter-atomic distance 2.1 Å
Evaluation time: 72.24 s


In [86]:
def compute_energy_parallel_optimized(H, devs, param):
    assert len(H.ops) == len(devs)
    results = []

    obs_groupings, coeffs_groupings = qml.grouping.group_observables(H.ops, H.coeffs, "qwc")

    for i, (obs, coeffs) in enumerate(zip(obs_groupings, coeffs_groupings)):
        H_part = qml.Hamiltonian(coeffs, obs)
        qnode = qml.QNode(circuit, devs[i])
        results.append(dask.delayed(qnode)(param, H_part))

    result = qml.math.sum(dask.compute(*results, scheduler="threads"))
    return result

print("Evaluating the potential energy surface in parallel with measurement optimization")
t0 = time.time()

energies_par_opt = []
for i, (h, param) in enumerate(zip(hamiltonians, params)):
    print(f"{i+1} / {len(params)}: Parallel execution and measurement optimization; Running for inter-atomic distance {list(data.keys())[i]} Å")
    energies_par_opt.append(compute_energy_parallel_optimized(h, devs, param))

dt_par_opt = time.time() - t0

print(f"Evaluation time: {dt_par_opt:.2f} s")

Evaluating the potential energy surface in parallel with measurement optimization
1 / 10: Parallel execution and measurement optimization; Running for inter-atomic distance 0.3 Å
2 / 10: Parallel execution and measurement optimization; Running for inter-atomic distance 0.5 Å
3 / 10: Parallel execution and measurement optimization; Running for inter-atomic distance 0.7 Å
4 / 10: Parallel execution and measurement optimization; Running for inter-atomic distance 0.9 Å
5 / 10: Parallel execution and measurement optimization; Running for inter-atomic distance 1.1 Å
6 / 10: Parallel execution and measurement optimization; Running for inter-atomic distance 1.3 Å
7 / 10: Parallel execution and measurement optimization; Running for inter-atomic distance 1.5 Å
8 / 10: Parallel execution and measurement optimization; Running for inter-atomic distance 1.7 Å
9 / 10: Parallel execution and measurement optimization; Running for inter-atomic distance 1.9 Å
10 / 10: Parallel execution and measurement o

In [87]:
def compute_energy_parallel_optimized(H, devs, param):
    assert len(H.ops) == len(devs)
    results = []

    obs_groupings, coeffs_groupings = qml.grouping.group_observables(H.ops, H.coeffs, "commuting")

    for i, (obs, coeffs) in enumerate(zip(obs_groupings, coeffs_groupings)):
        H_part = qml.Hamiltonian(coeffs, obs)
        qnode = qml.QNode(circuit, devs[i])
        results.append(dask.delayed(qnode)(param, H_part))

    result = qml.math.sum(dask.compute(*results, scheduler="threads"))
    return result

print("Evaluating the potential energy surface in parallel with measurement optimization")
t0 = time.time()

energies_par_opt = []
for i, (h, param) in enumerate(zip(hamiltonians, params)):
    print(f"{i+1} / {len(params)}: Parallel execution and measurement optimization; Running for inter-atomic distance {list(data.keys())[i]} Å")
    energies_par_opt.append(compute_energy_parallel_optimized(h, devs, param))

dt_par_opt = time.time() - t0

print(f"Evaluation time: {dt_par_opt:.2f} s")

Evaluating the potential energy surface in parallel with measurement optimization
1 / 10: Parallel execution and measurement optimization; Running for inter-atomic distance 0.3 Å
2 / 10: Parallel execution and measurement optimization; Running for inter-atomic distance 0.5 Å
3 / 10: Parallel execution and measurement optimization; Running for inter-atomic distance 0.7 Å
4 / 10: Parallel execution and measurement optimization; Running for inter-atomic distance 0.9 Å
5 / 10: Parallel execution and measurement optimization; Running for inter-atomic distance 1.1 Å
6 / 10: Parallel execution and measurement optimization; Running for inter-atomic distance 1.3 Å
7 / 10: Parallel execution and measurement optimization; Running for inter-atomic distance 1.5 Å
8 / 10: Parallel execution and measurement optimization; Running for inter-atomic distance 1.7 Å
9 / 10: Parallel execution and measurement optimization; Running for inter-atomic distance 1.9 Å
10 / 10: Parallel execution and measurement o

# More general example

In [1]:
import time
import dask

import matplotlib.pyplot as plt
from pennylane import numpy as np
import pennylane as qml

In [2]:
symbols, coordinates = (['H', 'Be', 'H'], np.array([0., 0., -0.66140414, 0., 0., 0., 0., 0., 0.66140414]))
H, n_wires = qml.qchem.molecular_hamiltonian(symbols, coordinates)
devs = [qml.device("forest.qvm", device=f"{n_wires}q-qvm") for _ in range(5)]

In [3]:
def circuit(params, H):
    qml.StronglyEntanglingLayers(params, wires=range(n_wires))
    return qml.expval(H)

params = np.random.rand(2,n_wires,3)


In [5]:
# Vanilla execution
dev = devs[0]

t0 = time.time()
E_seq = qml.QNode(circuit, dev)(params, H)
dt_seq = time.time() - t0

print(f"Evaluation time with standard expval: {dt_seq:.2f} s")

KeyboardInterrupt: 

In [4]:
# dasked execution
def compute_energy_parallel(H, devs, param):
    results = []

    for i in range(len(H.ops)):
        qnode = qml.QNode(circuit, devs[i%len(devs)])
        results.append(dask.delayed(qnode)(param, H.ops[i]))

    result = H.coeffs @ dask.compute(*results, scheduler="threads")
    return result


t0 = time.time()
E_par = compute_energy_parallel(H, devs, params)
dt_par = time.time() - t0

print(f"Evaluation time with dask: {dt_par:.2f} s")

RPCError: Unhandled error in host program:
At line 240: The name "ro" has been DECLAREd more than once.

In [22]:
# dasked and optimized execution
devs = [qml.device("forest.qvm", device="10q-qvm") for _ in range(5)]
def compute_energy_parallel_optimized(H, devs, param):
    results = []

    obs_groupings, coeffs_groupings = qml.grouping.group_observables(H.ops, H.coeffs, "qwc")

    for i, (obs, coeffs) in enumerate(zip(obs_groupings, coeffs_groupings)):
        H_part = qml.Hamiltonian(coeffs, obs)
        qnode = qml.QNode(circuit, devs[i%len(devs)])
        results.append(dask.delayed(qnode)(param, H_part))

    result = qml.math.sum(dask.compute(*results, scheduler="threads"))
    return result

t0 = time.time()
E_par_opt = compute_energy_parallel_optimized(H, devs, params)
dt_par_opt = time.time() - t0

print(f"Evaluation time with dask and measurement optimization: {dt_par_opt:.2f} s")

Evaluation time with dask and measurement optimization: 12.10 s


In [23]:
# dasked and optimized execution

devs = [qml.device("forest.qvm", device="10q-qvm") for _ in range(5)]
def compute_energy_parallel_optimized(H, devs, param):
    results = []

    obs_groupings, coeffs_groupings = qml.grouping.group_observables(H.ops, H.coeffs, "commuting")

    for i, (obs, coeffs) in enumerate(zip(obs_groupings, coeffs_groupings)):
        H_part = qml.Hamiltonian(coeffs, obs)
        qnode = qml.QNode(circuit, devs[i%len(devs)])
        results.append(dask.delayed(qnode)(param, H_part))

    result = qml.math.sum(dask.compute(*results, scheduler="threads"))
    return result

t0 = time.time()
E_par_opt = compute_energy_parallel_optimized(H, devs, params)
dt_par_opt = time.time() - t0

print(f"Evaluation time with dask + optimization: {dt_par_opt:.2f} s")

Evaluation time with dask + optimization: 12.46 s
