PennyLaneAI · josh146 · Sep 23, 2021 · Sep 10, 2021 · Sep 10, 2021 · Sep 10, 2021
diff --git a/doc/releases/changelog-dev.md b/doc/releases/changelog-dev.md
@@ -81,6 +81,7 @@
   `qml.beta.QNode`, and `@qml.beta.qnode`.
   [(#1642)](https://github.com/PennyLaneAI/pennylane/pull/1642)
   [(#1646)](https://github.com/PennyLaneAI/pennylane/pull/1646)
+  [(#1651)](https://github.com/PennyLaneAI/pennylane/pull/1651)
 
   It differs from the standard QNode in several ways:
 
@@ -106,21 +107,52 @@
     significant performance improvement when executing the QNode on remote
     quantum hardware.
 
+  - When decomposing the circuit, the default decomposition strategy will prioritize
+    decompositions that result in the smallest number of parametrized operations
+    required to satisfy the differentiation method. Additional decompositions required
+    to satisfy the native gate set of the quantum device will be performed later, by the
+    device at execution time. While this may lead to a slight increase in classical processing,
+    it significantly reduces the number of circuit evaluations needed to compute
+    gradients of complex unitaries.
+
   In an upcoming release, this QNode will replace the existing one. If you come across any bugs
   while using this QNode, please let us know via a [bug
   report](https://github.com/PennyLaneAI/pennylane/issues/new?assignees=&labels=bug+%3Abug%3A&template=bug_report.yml&title=%5BBUG%5D)
   on our GitHub bug tracker.
 
   Currently, this beta QNode does not support the following features:
 
-  - Circuit decompositions
   - Non-mutability via the `mutable` keyword argument
   - Viewing specifications with `qml.specs`
   - The `reversible` QNode differentiation method
   - The ability to specify a `dtype` when using PyTorch and TensorFlow.
 
   It is also not tested with the `qml.qnn` module.
 
+* Two new methods were added to the Device API, allowing PennyLane devices
+  increased control over circuit decompositions.
+  [(#1651)](https://github.com/PennyLaneAI/pennylane/pull/1651)
+
+  - `Device.expand_fn(tape) -> tape`: expands a tape such that it is supported by the device. By
+    default, performs the standard device-specific gate set decomposition done in the default
+    QNode. Devices may overwrite this method in order to define their own decomposition logic.
+
+    Note that the numerical result after applying this method should remain unchanged; PennyLane
+    will assume that the expanded tape returns exactly the same value as the original tape when
+    executed.
+
+  - `Device.batch_transform(tape) -> (tapes, processing_fn)`: pre-processes the tape in the case
+    where the device needs to generate multiple circuits to execute from the input circuit. The
+    requirement of a post-processing function makes this distinct to the `expand_fn` method above.
+
+    By default, this method applies the transform
+
+    .. math:: \left\langle \sum_i c_i h_i\right\rangle -> \sum_i c_i \left\langle h_i \right\rangle
+
+    if `expval(H)` is present on devices that do not natively support Hamiltonians with
+    non-commuting terms.
+
+
 <h3>Improvements</h3>
 
 * The tests for qubit operations are split into multiple files.

diff --git a/pennylane/_device.py b/pennylane/_device.py
@@ -561,6 +561,97 @@ def gradients(self, circuits, method="jacobian", **kwargs):
         gradient_method = getattr(self, method)
         return [gradient_method(circuit, **kwargs) for circuit in circuits]
 
+    def expand_fn(self, circuit, max_expansion=10):
+        """Method for expanding or decomposing an input circuit.
+        This method should be overwritten if custom expansion logic is
+        required.
+
+        By default, this method expands the tape if:
+
+        - nested tapes are present,
+        - any operations are not supported on the device, or
+        - multiple observables are measured on the same wire.
+
+        Args:
+            circuit (.QuantumTape): the circuit to expand.
+            max_expansion (int): The number of times the circuit should be
+                expanded. Expansion occurs when an operation or measurement is not
+                supported, and results in a gate decomposition. If any operations
+                in the decomposition remain unsupported by the device, another
+                expansion occurs.
+
+        Returns:
+            .QuantumTape: The expanded/decomposed circuit, such that the device
+            will natively support all operations.
+        """
+        obs_on_same_wire = len(
+            circuit._obs_sharing_wires  # pylint: disable=protected-access
+        ) > 0 and not self.supports_observable("Hamiltonian")
+
+        ops_not_supported = any(
+            isinstance(op, qml.tape.QuantumTape)  # nested tapes must be expanded
+            or not self.supports_operation(op.name)  # unsupported ops must be expanded
+            for op in circuit.operations
+        )
+
+        if ops_not_supported or obs_on_same_wire:
+            circuit = circuit.expand(
+                depth=max_expansion,
+                stop_at=lambda obj: not isinstance(obj, qml.tape.QuantumTape)
+                and self.supports_operation(obj.name),
+            )
+
+        return circuit
+
+    def batch_transform(self, circuit):
+        """Apply a differentiable batch transform for preprocessing a circuit
+        prior to execution. This method is called directly by the QNode, and
+        should be overwritten if the device requires a transform that
+        generates multiple circuits prior to execution.
+
+        By default, this method contains logic for generating multiple
+        circuits, one per term, of a circuit that terminates in ``expval(H)``,
+        if the underlying device does not support Hamiltonian expectation values,
+        or if the device requires finite-shots.
+
+        .. warning::
+
+            This method will be tracked by autodifferentiation libraries,
+            such as Autograd, JAX, TensorFlow, and Torch. Please make sure
+            to use ``qml.math`` for autodiff-agnostic tensor processing
+            if required.
+
+        Args:
+            circuit (.QuantumTape): the circuit to preprocess
+
+        Returns:
+            tuple[Sequence[.QuantumTape], callable]: Returns a tuple containing
+            the sequence of circuits to be executed, and a post-processing function
+            to be applied to the list of evaluated circuit results.
+        """
+
+        # If the observable contains a Hamiltonian and the device does not
+        # support Hamiltonians, or if the simulation uses finite shots,
+        # split tape into multiple tapes of diagonalizable known observables.
+        # In future, this logic should be moved to the device
+        # to allow for more efficient batch execution.
+        supports_hamiltonian = self.supports_observable("Hamiltonian")
+        finite_shots = self.shots is not None
+
+        hamiltonian_in_obs = "Hamiltonian" in [obs.name for obs in circuit.observables]
+
+        if hamiltonian_in_obs and (not supports_hamiltonian or finite_shots):
+            try:
+                return qml.transforms.hamiltonian_expand(circuit, group=False)
+
+            except ValueError as e:
+                raise ValueError(
+                    "Can only return the expectation of a single Hamiltonian observable"
+                ) from e
+
+        # otherwise, return an identity transform
+        return [circuit], lambda res: res[0]
+
     @property
     def op_queue(self):
         """The operation queue to be applied.

diff --git a/pennylane/beta/qnode.py b/pennylane/beta/qnode.py
@@ -51,9 +51,10 @@ class QNode:
 
         Currently, this beta QNode does not support the following features:
 
-        - Circuit decompositions
         - Non-mutability via the ``mutable`` keyword argument
         - Viewing specifications with ``qml.specs``
+        - The ``reversible`` QNode differentiation method
+        - The ability to specify a ``dtype`` when using PyTorch and TensorFlow.
 
         It is also not tested with the :mod:`~.qnn` module.
 
@@ -123,6 +124,19 @@ class QNode:
 
             * ``None``: QNode cannot be differentiated. Works the same as ``interface=None``.
 
+        expansion_strategy (str): The strategy to use when circuit expansions or decompositions
+            are required.
+
+            - ``gradient``: The QNode will attempt to decompose
+              the internal circuit such that all circuit operations are supported by the gradient
+              method. Further decompositions required for device execution are performed by the
+              device prior to circuit execution.
+
+            - ``device``: The QNode will attempt to decompose the internal circuit
+              such that all circuit operations are natively supported by the device.
+
+            The ``gradient`` strategy typically results in a reduction in quantum device evaluations
+            required during optimization, at the expense of an increase in classical pre-processing.
         max_expansion (int): The number of times the internal circuit should be expanded when
             executed on a device. Expansion occurs when an operation or measurement is not
             supported, and results in a gate decomposition. If any operations in the decomposition
@@ -174,6 +188,7 @@ def __init__(
         device,
         interface="autograd",
         diff_method="best",
+        expansion_strategy="gradient",
         max_expansion=10,
         mode="best",
         cache=True,
@@ -208,6 +223,7 @@ def __init__(
         self.device = device
         self._interface = interface
         self.diff_method = diff_method
+        self.expansion_strategy = expansion_strategy
         self.max_expansion = max_expansion
 
         # execution keyword arguments
@@ -216,8 +232,12 @@ def __init__(
             "cache": cache,
             "cachesize": cachesize,
             "max_diff": max_diff,
+            "max_expansion": max_expansion,
         }
 
+        if self.expansion_strategy == "device":
+            self.execute_kwargs["expand_fn"] = None
+
         # internal data attributes
         self._tape = None
         self._qfunc_output = None
@@ -518,6 +538,14 @@ def construct(self, args, kwargs):
                         "Operator {} must act on all wires".format(obj.name)
                     )
 
+        if self.expansion_strategy == "device":
+            self._tape = self.device.expand_fn(self.tape, max_expansion=self.max_expansion)
+
+        # If the gradient function is a transform, expand the tape so that
+        # all operations are supported by the transform.
+        if isinstance(self.gradient_fn, qml.gradients.gradient_transform):
+            self._tape = self.gradient_fn.expand_fn(self._tape)
+
     def __call__(self, *args, **kwargs):
         override_shots = False
 
@@ -540,15 +568,20 @@ def __call__(self, *args, **kwargs):
         # construct the tape
         self.construct(args, kwargs)
 
+        # preprocess the tapes by applying any device-specific transforms
+        tapes, processing_fn = self.device.batch_transform(self.tape)
+
         res = qml.execute(
-            [self.tape],
+            tapes,
             device=self.device,
             gradient_fn=self.gradient_fn,
             interface=self.interface,
             gradient_kwargs=self.gradient_kwargs,
             override_shots=override_shots,
             **self.execute_kwargs,
-        )[0]
+        )
+
+        res = processing_fn(res)
 
         if override_shots is not False:
             # restore the initialization gradient function

diff --git a/pennylane/gradients/gradient_transform.py b/pennylane/gradients/gradient_transform.py
@@ -48,7 +48,10 @@ def gradient_expand(tape, depth=10):
             and ((supported_op(obj) and trainable_op(obj)) or not trainable_op(obj))
         )
 
-        return tape.expand(depth=depth, stop_at=stop_cond)
+        new_tape = tape.expand(depth=depth, stop_at=stop_cond)
+        params = new_tape.get_parameters(trainable_only=False)
+        new_tape.trainable_params = qml.math.get_trainable_indices(params)
+        return new_tape
 
     return tape
 

diff --git a/pennylane/interfaces/batch/__init__.py b/pennylane/interfaces/batch/__init__.py
@@ -69,7 +69,7 @@ def set_shots(device, shots):
             device.shots = original_shots
 
 
-def cache_execute(fn, cache, pass_kwargs=False, return_tuple=True):
+def cache_execute(fn, cache, pass_kwargs=False, return_tuple=True, expand_fn=None):
     """Decorator that adds caching to a function that executes
     multiple tapes on a device.
 
@@ -106,6 +106,12 @@ def cache_execute(fn, cache, pass_kwargs=False, return_tuple=True):
         function: a wrapped version of the execution function ``fn`` with caching
         support
     """
+    if expand_fn is not None:
+        original_fn = fn
+
+        def fn(tapes, **kwargs):  # pylint: disable=function-redefined
+            tapes = [expand_fn(tape) for tape in tapes]
+            return original_fn(tapes, **kwargs)
 
     @wraps(fn)
     def wrapper(tapes, **kwargs):
@@ -189,6 +195,8 @@ def execute(
     cachesize=10000,
     max_diff=2,
     override_shots=False,
+    expand_fn="device",
+    max_expansion=10,
 ):
     """Execute a batch of tapes on a device in an autodifferentiable-compatible manner.
 
@@ -217,6 +225,14 @@ def execute(
             the maximum number of derivatives to support. Increasing this value allows
             for higher order derivatives to be extracted, at the cost of additional
             (classical) computational overhead during the backwards pass.
+        expand_fn (function): Tape expansion function to be called prior to device execution.
+            Must have signature of the form ``expand_fn(tape, max_expansion)``, and return a
+            single :class:`~.QuantumTape`. If not provided, by default :meth:`Device.expand_fn`
+            is called.
+        max_expansion (int): The number of times the internal circuit should be expanded when
+            executed on a device. Expansion occurs when an operation or measurement is not
+            supported, and results in a gate decomposition. If any operations in the decomposition
+            remain unsupported by the device, another expansion occurs.
 
     Returns:
         list[list[float]]: A nested list of tape results. Each element in
@@ -284,21 +300,33 @@ def cost_fn(params, x):
 
     batch_execute = set_shots(device, override_shots)(device.batch_execute)
 
+    if expand_fn == "device":
+        expand_fn = lambda tape: device.expand_fn(tape, max_expansion=max_expansion)
+
     if gradient_fn is None:
         with qml.tape.Unwrap(*tapes):
-            res = cache_execute(batch_execute, cache, return_tuple=False)(tapes)
+            res = cache_execute(batch_execute, cache, return_tuple=False, expand_fn=expand_fn)(
+                tapes
+            )
 
         return res
 
     if gradient_fn == "backprop" or interface is None:
-        return cache_execute(batch_execute, cache, return_tuple=False)(tapes)
+        return cache_execute(batch_execute, cache, return_tuple=False, expand_fn=expand_fn)(tapes)
 
     # the default execution function is batch_execute
-    execute_fn = cache_execute(batch_execute, cache)
+    execute_fn = cache_execute(batch_execute, cache, expand_fn=expand_fn)
 
     if gradient_fn == "device":
         # gradient function is a device method
 
+        # Expand all tapes as per the device's expand function here.
+        # We must do this now, prior to the interface, to ensure that
+        # decompositions with parameter processing is tracked by the
+        # autodiff frameworks.
+        for i, tape in enumerate(tapes):
+            tapes[i] = expand_fn(tape)
+
         if mode in ("forward", "best"):
             # replace the forward execution function to return
             # both results and gradients

diff --git a/pennylane/interfaces/batch/autograd.py b/pennylane/interfaces/batch/autograd.py
@@ -174,7 +174,7 @@ def grad_fn(dy):
 
                 # Generate and execute the required gradient tapes
                 if _n == max_diff:
-                    with qml.tape.Unwrap(*tapes):
+                    with qml.tape.Unwrap(*tapes, set_trainable=False):
                         vjp_tapes, processing_fn = qml.gradients.batch_vjp(
                             tapes,
                             dy,

diff --git a/pennylane/interfaces/batch/torch.py b/pennylane/interfaces/batch/torch.py
@@ -160,7 +160,7 @@ def backward(ctx, *dy):
                     # The derivative order is at the maximum. Compute the VJP
                     # in a non-differentiable manner to reduce overhead.
 
-                    with qml.tape.Unwrap(*ctx.tapes):
+                    with qml.tape.Unwrap(*ctx.tapes, set_trainable=False):
                         vjp_tapes, processing_fn = qml.gradients.batch_vjp(
                             ctx.tapes,
                             dy,

diff --git a/pennylane/templates/embeddings/amplitude.py b/pennylane/templates/embeddings/amplitude.py
@@ -124,6 +124,7 @@ def circuit(f=None):
     num_params = 1
     num_wires = AnyWires
     par_domain = "A"
+    grad_method = None
 
     def __init__(
         self, features, wires, pad_with=None, normalize=False, pad=None, do_queue=True, id=None

diff --git a/pennylane/templates/embeddings/angle.py b/pennylane/templates/embeddings/angle.py
@@ -49,6 +49,7 @@ class AngleEmbedding(Operation):
     num_params = 1
     num_wires = AnyWires
     par_domain = "A"
+    grad_method = None
 
     def __init__(self, features, wires, rotation="X", do_queue=True, id=None):
 

diff --git a/pennylane/templates/layers/basic_entangler.py b/pennylane/templates/layers/basic_entangler.py
@@ -124,6 +124,7 @@ def circuit(weights):
     num_params = 1
     num_wires = AnyWires
     par_domain = "A"
+    grad_method = None
 
     def __init__(self, weights, wires=None, rotation=None, do_queue=True, id=None):