diff --git a/pennylane/gradients/__init__.py b/pennylane/gradients/__init__.py index 3cbfaf03468..105201a1711 100644 --- a/pennylane/gradients/__init__.py +++ b/pennylane/gradients/__init__.py @@ -15,5 +15,7 @@ import pennylane as qml from . import finite_difference +from . import parameter_shift from .finite_difference import finite_diff, finite_diff_coeffs, generate_shifted_tapes +from .parameter_shift import param_shift diff --git a/pennylane/gradients/finite_difference.py b/pennylane/gradients/finite_difference.py index 557ed7035cc..cc91ba7fa80 100644 --- a/pennylane/gradients/finite_difference.py +++ b/pennylane/gradients/finite_difference.py @@ -16,12 +16,15 @@ of a quantum tape. """ # pylint: disable=protected-access,too-many-arguments +import functools + import numpy as np from scipy.special import factorial import pennylane as qml +@functools.lru_cache(maxsize=None) def finite_diff_coeffs(n, approx_order, strategy): r"""Generate the finite difference shift values and corresponding term coefficients for a given derivative order, approximation accuracy, diff --git a/pennylane/gradients/parameter_shift.py b/pennylane/gradients/parameter_shift.py new file mode 100644 index 00000000000..38005ddc334 --- /dev/null +++ b/pennylane/gradients/parameter_shift.py @@ -0,0 +1,516 @@ +# Copyright 2018-2021 Xanadu Quantum Technologies Inc. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This module contains functions for computing the parameter-shift gradient +of a qubit-based quantum tape. +""" +# pylint: disable=protected-access,too-many-arguments +import numpy as np + +import pennylane as qml + +from .finite_difference import finite_diff, generate_shifted_tapes + + +NONINVOLUTORY_OBS = { + "Hermitian": lambda obs: obs.__class__(obs.matrix @ obs.matrix, wires=obs.wires), + "SparseHamiltonian": lambda obs: obs.__class__(obs.matrix @ obs.matrix, wires=obs.wires), + "Projector": lambda obs: obs, +} +"""Dict[str, callable]: mapping from a non-involutory observable name +to a callable that accepts an observable object, and returns the square +of that observable. +""" + + +def _square_observable(obs): + """Returns the square of an observable.""" + + if isinstance(obs, qml.operation.Tensor): + # Observable is a tensor, we must consider its + # component observables independently. Note that + # we assume all component observables are on distinct wires. + + components_squared = [] + + for comp in obs.obs: + + try: + components_squared.append(NONINVOLUTORY_OBS[comp.name](comp)) + except KeyError: + # component is involutory + pass + + return qml.operation.Tensor(*components_squared) + + return NONINVOLUTORY_OBS[obs.name](obs) + + +def _get_operation_recipe(tape, t_idx, shift=np.pi / 2): + """Utility function to return the parameter-shift rule + of the operation corresponding to trainable parameter + t_idx on tape. + + If the corresponding operation has grad_recipe=None, then + the default two-term parameter-shift rule is assumed. + """ + # get the index of the parameter in the tape + parameter_idx = list(tape.trainable_params)[t_idx] + + # get the corresponding operation + op = tape._par_info[parameter_idx]["op"] + + # get the corresponding operation parameter index + # (that is, index of the parameter within the operation) + op_p_idx = tape._par_info[parameter_idx]["p_idx"] + + # return the parameter-shift gradient for that + # operation parameter. + return op.get_parameter_shift(op_p_idx, shift=shift) + + +def _process_gradient_recipe(gradient_recipe, tol=1e-10): + """Utility function to process gradient recipes.""" + gradient_recipe = np.array(gradient_recipe).T + # remove all small coefficients, shifts, and multipliers + gradient_recipe[np.abs(gradient_recipe) < tol] = 0 + # remove columns where the coefficients are 0 + gradient_recipe = gradient_recipe[:, ~(gradient_recipe[0] == 0)] + # sort columns according to abs(shift) + return gradient_recipe[:, np.argsort(np.abs(gradient_recipe)[-1])] + + +def _gradient_analysis(tape, use_graph=True): + """Update the parameter information dictionary of the tape with + gradient information of each parameter.""" + + if getattr(tape, "_gradient_fn", None) is param_shift: + # gradient analysis has already been performed on this tape + return + + tape._gradient_fn = param_shift + + for idx, info in tape._par_info.items(): + + if idx not in tape.trainable_params: + info["grad_method"] = None + else: + op = tape._par_info[idx]["op"] + + if op.grad_method == "F": + info["grad_method"] = "F" + else: + info["grad_method"] = tape._grad_method( + idx, use_graph=use_graph, default_method="A" + ) + + +def expval_param_shift(tape, argnum=None, shift=np.pi / 2, gradient_recipes=None, f0=None): + r"""Generate the parameter-shift tapes and postprocessing methods required + to compute the gradient of a gate parameter with respect to an + expectation value. + + Args: + tape (.QuantumTape): quantum tape to differentiate + argnum (int or list[int] or None): Trainable parameter indices to differentiate + with respect to. If not provided, the derivatives with respect to all + trainable indices are returned. + shift (float): The shift value to use for the two-term parameter-shift formula. + Only valid if the operation in question supports the two-term parameter-shift + rule (that is, it has two distinct eigenvalues) and ``gradient_recipes`` + is ``None``. + gradient_recipes (tuple(list[list[float]] or None)): List of gradient recipes + for the parameter-shift method. One gradient recipe must be provided + per trainable parameter. + f0 (tensor_like[float] or None): Output of the evaluated input tape. If provided, + and the gradient recipe contains an unshifted term, this value is used, + saving a quantum evaluation. + + Returns: + tuple[list[QuantumTape], function]: A tuple containing a + list of generated tapes, in addition to a post-processing + function to be applied to the results of the evaluated tapes. + """ + argnum = argnum or tape.trainable_params + + gradient_tapes = [] + gradient_coeffs = [] + shapes = [] + unshifted_coeffs = [] + + for idx, _ in enumerate(tape.trainable_params): + + if idx not in argnum: + # parameter has zero gradient + shapes.append(0) + gradient_coeffs.append([]) + continue + + # get the gradient recipe for the trainable parameter + recipe = gradient_recipes[argnum.index(idx)] + recipe = recipe or _get_operation_recipe(tape, idx, shift=shift) + recipe = _process_gradient_recipe(recipe) + coeffs, multipliers, shifts = recipe + + if shifts[0] == 0 and multipliers[0] == 1: + # Gradient recipe includes a term with zero shift. + + if not unshifted_coeffs and f0 is None: + # Ensure that the unshifted tape is appended + # to the gradient tapes, if not already. + gradient_tapes.append(tape) + + # Store the unshifted coefficient. We know that + # it will always be the first coefficient due to processing. + unshifted_coeffs.append(coeffs[0]) + coeffs, multipliers, shifts = recipe[:, 1:] + + # generate the gradient tapes + gradient_coeffs.append(coeffs) + g_tapes = generate_shifted_tapes(tape, idx, shifts, multipliers) + + gradient_tapes.extend(g_tapes) + shapes.append(len(g_tapes)) + + def processing_fn(results): + grads = [] + start = 1 if unshifted_coeffs and f0 is None else 0 + r0 = f0 or results[0] + + for i, s in enumerate(shapes): + + if s == 0: + # parameter has zero gradient + g = qml.math.zeros_like(results[0]) + grads.append(g) + continue + + res = results[start : start + s] + start = start + s + + # compute the linear combination of results and coefficients + res = qml.math.stack(res) + g = sum([c * r for c, r in zip(gradient_coeffs[i], res)]) + + if unshifted_coeffs: + # add on the unshifted term + g = g + unshifted_coeffs[i] * r0 + + grads.append(g) + + # The following is for backwards compatibility; currently, + # the device stacks multiple measurement arrays, even if not the same + # size, resulting in a ragged array. + # In the future, we might want to change this so that only tuples + # of arrays are returned. + for i, g in enumerate(grads): + g = qml.math.convert_like(g, res[0]) + if hasattr(g, "dtype") and g.dtype is np.dtype("object"): + grads[i] = qml.math.hstack(g) + + return qml.math.T(qml.math.stack(grads)) + + return gradient_tapes, processing_fn + + +def var_param_shift(tape, argnum, shift=np.pi / 2, gradient_recipes=None, f0=None): + r"""Generate the parameter-shift tapes and postprocessing methods required + to compute the gradient of a gate parameter with respect to a + variance value. + + Args: + tape (.QuantumTape): quantum tape to differentiate + argnum (int or list[int] or None): Trainable parameter indices to differentiate + with respect to. If not provided, the derivative with respect to all + trainable indices are returned. + shift (float): The shift value to use for the two-term parameter-shift formula. + Only valid if the operation in question supports the two-term parameter-shift + rule (that is, it has two distinct eigenvalues) and ``gradient_recipes`` + is ``None``. + gradient_recipes (tuple(list[list[float]] or None)): List of gradient recipes + for the parameter-shift method. One gradient recipe must be provided + per trainable parameter. + f0 (tensor_like[float] or None): Output of the evaluated input tape. If provided, + and the gradient recipe contains an unshifted term, this value is used, + saving a quantum evaluation. + + Returns: + tuple[list[QuantumTape], function]: A tuple containing a + list of generated tapes, in addition to a post-processing + function to be applied to the results of the evaluated tapes. + """ + argnum = argnum or tape.trainable_params + + # Determine the locations of any variance measurements in the measurement queue. + var_mask = [m.return_type is qml.operation.Variance for m in tape.measurements] + var_idx = np.where(var_mask)[0] + + # Get , the expectation value of the tape with unshifted parameters. + expval_tape = tape.copy(copy_operations=True) + + # Convert all variance measurements on the tape into expectation values + for i in var_idx: + obs = expval_tape._measurements[i].obs + expval_tape._measurements[i] = qml.measure.MeasurementProcess( + qml.operation.Expectation, obs=obs + ) + + gradient_tapes = [expval_tape] + + # evaluate the analytic derivative of + pdA_tapes, pdA_fn = expval_param_shift(expval_tape, argnum, shift, gradient_recipes, f0) + gradient_tapes.extend(pdA_tapes) + + # Store the number of first derivative tapes, so that we know + # the number of results to post-process later. + tape_boundary = len(pdA_tapes) + 1 + + # If there are non-involutory observables A present, we must compute d/dp. + # Get the indices in the measurement queue of all non-involutory + # observables. + non_involutory = [] + + for i in var_idx: + obs_name = tape.observables[i].name + + if isinstance(obs_name, list): + # Observable is a tensor product, we must investigate all constituent observables. + if any(name in NONINVOLUTORY_OBS for name in obs_name): + non_involutory.append(i) + + elif obs_name in NONINVOLUTORY_OBS: + non_involutory.append(i) + + # For involutory observables (A^2 = I) we have d/dp = 0. + involutory = set(var_idx) - set(non_involutory) + + if non_involutory: + expval_sq_tape = tape.copy(copy_operations=True) + + for i in non_involutory: + # We need to calculate d/dp; to do so, we replace the + # involutory observables A in the queue with A^2. + obs = _square_observable(expval_sq_tape._measurements[i].obs) + expval_sq_tape._measurements[i] = qml.measure.MeasurementProcess( + qml.operation.Expectation, obs=obs + ) + + # Non-involutory observables are present; the partial derivative of + # may be non-zero. Here, we calculate the analytic derivatives of the + # observables. + pdA2_tapes, pdA2_fn = expval_param_shift( + expval_sq_tape, argnum, shift, gradient_recipes, f0 + ) + gradient_tapes.extend(pdA2_tapes) + + def processing_fn(results): + # We need to expand the dimensions of the variance mask, + # and convert it to be the same type as the results. + mask = qml.math.convert_like(qml.math.reshape(var_mask, [-1, 1]), results[0]) + f0 = qml.math.expand_dims(results[0], -1) + + pdA = pdA_fn(results[1:tape_boundary]) + pdA2 = 0 + + if non_involutory: + # compute the second derivative of non-involutory observables + pdA2 = pdA2_fn(results[tape_boundary:]) + + if involutory: + # if involutory observables are present, ensure they have zero gradient. + # + # For the pdA2_tapes, we have replaced non-involutory + # observables with their square (A -> A^2). However, + # involutory observables have been left as-is (A), and have + # not been replaced by their square (A^2 = I). As a result, + # components of the gradient vector will not be correct. We + # need to replace the gradient value with 0 (the known, + # correct gradient for involutory variables). + + m = [tape.observables[i].name not in NONINVOLUTORY_OBS for i in var_idx] + m = qml.math.convert_like(m, pdA2) + pdA2 = qml.math.where(qml.math.reshape(m, [-1, 1]), 0, pdA2) + + # return d(var(A))/dp = d/dp -2 * * d/dp for the variances (mask==True) + # d/dp for plain expectations (mask==False) + return qml.math.where(mask, pdA2 - 2 * f0 * pdA, pdA) + + return gradient_tapes, processing_fn + + +def param_shift( + tape, argnum=None, shift=np.pi / 2, gradient_recipes=None, fallback_fn=finite_diff, f0=None +): + r"""Generate the parameter-shift tapes and postprocessing methods required + to compute the gradient of a gate parameter with respect to an + expectation value. + + Args: + tape (.QuantumTape): quantum tape to differentiate + argnum (int or list[int] or None): Trainable parameter indices to differentiate + with respect to. If not provided, the derivative with respect to all + trainable indices are returned. + shift (float): The shift value to use for the two-term parameter-shift formula. + Only valid if the operation in question supports the two-term parameter-shift + rule (that is, it has two distinct eigenvalues) and ``gradient_recipes`` + is ``None``. + gradient_recipes (tuple(list[list[float]] or None)): List of gradient recipes + for the parameter-shift method. One gradient recipe must be provided + per trainable parameter. + + This is a tuple with one nested list per parameter. For + parameter :math:`\phi_k`, the nested list contains elements of the form + :math:`[c_i, a_i, s_i]` where :math:`i` is the index of the + term, resulting in a gradient recipe of + + .. math:: \frac{\partial}{\partial\phi_k}f = \sum_{i} c_i f(a_i \phi_k + s_i). + + If ``None``, the default gradient recipe containing the two terms + :math:`[c_0, a_0, s_0]=[1/2, 1, \pi/2]` and :math:`[c_1, a_1, + s_1]=[-1/2, 1, -\pi/2]` is assumed for every parameter. + fallback_fn (None or Callable): a fallback gradient function to use for + any parameters that do not support the parameter-shift rule. + f0 (tensor_like[float] or None): Output of the evaluated input tape. If provided, + and the gradient recipe contains an unshifted term, this value is used, + saving a quantum evaluation. + + Returns: + tuple[list[QuantumTape], function]: A tuple containing a + list of generated tapes, in addition to a post-processing + function to be applied to the results of the evaluated tapes. + + For a variational evolution :math:`U(\mathbf{p}) \vert 0\rangle` with + :math:`N` parameters :math:`\mathbf{p}`, + consider the expectation value of an observable :math:`O`: + + .. math:: + + f(\mathbf{p}) = \langle \hat{O} \rangle(\mathbf{p}) = \langle 0 \vert + U(\mathbf{p})^\dagger \hat{O} U(\mathbf{p}) \vert 0\rangle. + + + The gradient of this expectation value can be calculated using :math:`2N` expectation + values using the parameter-shift rule: + + .. math:: + + \frac{\partial f}{\partial \mathbf{p}} = \frac{1}{2\sin s} \left[ f(\mathbf{p} + s) - + f(\mathbf{p} -s) \right]. + + **Gradients of variances** + + For a variational evolution :math:`U(\mathbf{p}) \vert 0\rangle` with + :math:`N` parameters :math:`\mathbf{p}`, + consider the variance of an observable :math:`O`: + + .. math:: + + g(\mathbf{p})=\langle \hat{O}^2 \rangle (\mathbf{p}) - [\langle \hat{O} + \rangle(\mathbf{p})]^2. + + We can relate this directly to the parameter-shift rule by noting that + + .. math:: + + \frac{\partial g}{\partial \mathbf{p}}= \frac{\partial}{\partial + \mathbf{p}} \langle \hat{O}^2 \rangle (\mathbf{p}) + - 2 f(\mathbf{p}) \frac{\partial f}{\partial \mathbf{p}}. + + This results in :math:`4N + 1` evaluations. + + In the case where :math:`O` is involutory (:math:`\hat{O}^2 = I`), the first term in the above + expression vanishes, and we are simply left with + + .. math:: + + \frac{\partial g}{\partial \mathbf{p}} = - 2 f(\mathbf{p}) + \frac{\partial f}{\partial \mathbf{p}}, + + allowing us to compute the gradient using :math:`2N + 1` evaluations. + + **Example** + + >>> params = np.array([0.1, 0.2, 0.3]) + >>> with qml.tape.JacobianTape() as tape: + ... qml.RX(params[0], wires=0) + ... qml.RY(params[1], wires=0) + ... qml.RX(params[2], wires=0) + ... qml.expval(qml.PauliZ(0)) + ... qml.var(qml.PauliZ(0)) + >>> tape.trainable_params = {0, 1, 2} + >>> gradient_tapes, fn = qml.gradients.param_shift(tape) + >>> res = dev.batch_execute(gradient_tapes) + >>> fn(res) + [[-0.38751721 -0.18884787 -0.38355704] + [ 0.69916862 0.34072424 0.69202359]] + """ + + # perform gradient method validation + if any(m.return_type is qml.operation.State for m in tape.measurements): + raise ValueError( + "Computing the gradient of circuits that return the state is not supported." + ) + + _gradient_analysis(tape) + gradient_tapes = [] + + # TODO: replace the JacobianTape._grad_method_validation + # functionality before deprecation. + method = "analytic" if fallback_fn is None else "best" + diff_methods = tape._grad_method_validation(method) + all_params_grad_method_zero = all(g == "0" for g in diff_methods) + if not tape.trainable_params or all_params_grad_method_zero: + return gradient_tapes, lambda _: np.zeros([tape.output_dim, len(tape.trainable_params)]) + + # TODO: replace the JacobianTape._choose_params_with_methods + # functionality before deprecation. + method_map = dict(tape._choose_params_with_methods(diff_methods, argnum)) + + # If there are unsupported operations, call the callback gradient function + unsupported_params = {idx for idx, g in method_map.items() if g == "F"} + + if unsupported_params: + g_tapes, fallback_proc_fn = fallback_fn(tape, argnum=unsupported_params) + gradient_tapes.extend(g_tapes) + fallback_len = len(g_tapes) + + # remove finite difference parameters from the method map + method_map = {t_idx: dm for t_idx, dm in method_map.items() if dm != "F"} + + # Generate parameter-shift gradient tapes + argnum = [i for i, dm in method_map.items() if dm == "A"] + + if gradient_recipes is None: + gradient_recipes = [None] * len(argnum) + + if any(m.return_type is qml.operation.Variance for m in tape.measurements): + g_tapes, fn = var_param_shift(tape, argnum, shift, gradient_recipes, f0) + else: + g_tapes, fn = expval_param_shift(tape, argnum, shift, gradient_recipes, f0) + + gradient_tapes.extend(g_tapes) + + if unsupported_params: + # If there are unsupported parameters, we must process + # the quantum results separately, once for the fallback + # function and once for the parameter-shift rule, and recombine. + + def processing_fn(results): + unsupported_grads = fallback_proc_fn(results[:fallback_len]) + supported_grads = fn(results[fallback_len:]) + return unsupported_grads + supported_grads + + return gradient_tapes, processing_fn + + return gradient_tapes, fn diff --git a/tests/gradients/test_parameter_shift.py b/tests/gradients/test_parameter_shift.py new file mode 100644 index 00000000000..ad2cc2bd009 --- /dev/null +++ b/tests/gradients/test_parameter_shift.py @@ -0,0 +1,916 @@ +# Copyright 2018-2021 Xanadu Quantum Technologies Inc. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the gradients.parameter_shift module.""" +import pytest + +import pennylane as qml +from pennylane import numpy as np +from pennylane.gradients import param_shift +from pennylane.gradients.parameter_shift import _gradient_analysis + + +class TestGradAnalysis: + """Tests for parameter gradient methods""" + + def test_non_differentiable(self): + """Test that a non-differentiable parameter is + correctly marked""" + psi = np.array([1, 0, 1, 0]) / np.sqrt(2) + + with qml.tape.JacobianTape() as tape: + qml.QubitStateVector(psi, wires=[0, 1]) + qml.RX(0.543, wires=[0]) + qml.RY(-0.654, wires=[1]) + qml.CNOT(wires=[0, 1]) + qml.probs(wires=[0, 1]) + + _gradient_analysis(tape) + + assert tape._par_info[0]["grad_method"] is None + assert tape._par_info[1]["grad_method"] == "A" + assert tape._par_info[2]["grad_method"] == "A" + + def test_analysis_caching(self, mocker): + """Test that the gradient analysis is only executed once per tape""" + psi = np.array([1, 0, 1, 0]) / np.sqrt(2) + + with qml.tape.JacobianTape() as tape: + qml.QubitStateVector(psi, wires=[0, 1]) + qml.RX(0.543, wires=[0]) + qml.RY(-0.654, wires=[1]) + qml.CNOT(wires=[0, 1]) + qml.probs(wires=[0, 1]) + + spy = mocker.spy(tape, "_grad_method") + _gradient_analysis(tape) + spy.assert_called() + + assert tape._par_info[0]["grad_method"] is None + assert tape._par_info[1]["grad_method"] == "A" + assert tape._par_info[2]["grad_method"] == "A" + + spy = mocker.spy(tape, "_grad_method") + _gradient_analysis(tape) + spy.assert_not_called() + + def test_independent(self): + """Test that an independent variable is properly marked + as having a zero gradient""" + + with qml.tape.JacobianTape() as tape: + qml.RX(0.543, wires=[0]) + qml.RY(-0.654, wires=[1]) + qml.expval(qml.PauliY(0)) + + _gradient_analysis(tape) + + assert tape._par_info[0]["grad_method"] == "A" + assert tape._par_info[1]["grad_method"] == "0" + + def test_independent_no_graph_mode(self): + """In non-graph mode, it is impossible to determine + if a parameter is independent or not""" + + with qml.tape.JacobianTape() as tape: + qml.RX(0.543, wires=[0]) + qml.RY(-0.654, wires=[1]) + qml.expval(qml.PauliY(0)) + + _gradient_analysis(tape, use_graph=False) + + assert tape._par_info[0]["grad_method"] == "A" + assert tape._par_info[1]["grad_method"] == "A" + + def test_finite_diff(self, monkeypatch): + """If an op has grad_method=F, this should be respected""" + monkeypatch.setattr(qml.RX, "grad_method", "F") + + psi = np.array([1, 0, 1, 0]) / np.sqrt(2) + + with qml.tape.JacobianTape() as tape: + qml.QubitStateVector(psi, wires=[0, 1]) + qml.RX(0.543, wires=[0]) + qml.RY(-0.654, wires=[1]) + qml.CNOT(wires=[0, 1]) + qml.probs(wires=[0, 1]) + + _gradient_analysis(tape) + + assert tape._par_info[0]["grad_method"] is None + assert tape._par_info[1]["grad_method"] == "F" + assert tape._par_info[2]["grad_method"] == "A" + + +def grad_fn(tape, dev, fn=qml.gradients.param_shift, **kwargs): + """Utility function to automate execution and processing of gradient tapes""" + tapes, fn = fn(tape, **kwargs) + return fn(dev.batch_execute(tapes)) + + +class TestShiftedTapes: + """Tests for the generation of shifted tapes""" + + def test_behaviour(self): + """Test that the function behaves as expected""" + + with qml.tape.JacobianTape() as tape: + qml.PauliZ(0) + qml.RX(1.0, wires=0) + qml.CNOT(wires=[0, 2]) + qml.Rot(2.0, 3.0, 4.0, wires=0) + qml.expval(qml.PauliZ(0)) + + tape.trainable_params = {0, 2} + gradient_recipes = [[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], [[1, 1, 1], [2, 2, 2], [3, 3, 3]]] + tapes, _ = qml.gradients.param_shift(tape, gradient_recipes=gradient_recipes) + + assert len(tapes) == 5 + assert tapes[0].get_parameters(trainable_only=False) == [0.2 * 1.0 + 0.3, 2.0, 3.0, 4.0] + assert tapes[1].get_parameters(trainable_only=False) == [0.5 * 1.0 + 0.6, 2.0, 3.0, 4.0] + assert tapes[2].get_parameters(trainable_only=False) == [1.0, 2.0, 1 * 3.0 + 1, 4.0] + assert tapes[3].get_parameters(trainable_only=False) == [1.0, 2.0, 2 * 3.0 + 2, 4.0] + assert tapes[4].get_parameters(trainable_only=False) == [1.0, 2.0, 3 * 3.0 + 3, 4.0] + + +class TestParamShift: + """Unit tests for the param_shift function""" + + def test_state_non_differentiable_error(self): + """Test error raised if attempting to differentiate with + respect to a state""" + psi = np.array([1, 0, 1, 0]) / np.sqrt(2) + + with qml.tape.JacobianTape() as tape: + qml.RX(0.543, wires=[0]) + qml.RY(-0.654, wires=[1]) + qml.state() + + with pytest.raises(ValueError, match=r"return the state is not supported"): + qml.gradients.param_shift(tape) + + def test_independent_parameter(self, mocker): + """Test that an independent parameter is skipped + during the Jacobian computation.""" + spy = mocker.spy(qml.gradients.parameter_shift, "expval_param_shift") + + with qml.tape.JacobianTape() as tape: + qml.RX(0.543, wires=[0]) + qml.RY(-0.654, wires=[1]) + qml.expval(qml.PauliZ(0)) + + dev = qml.device("default.qubit", wires=2) + tapes, fn = qml.gradients.param_shift(tape) + assert len(tapes) == 2 + + res = fn(dev.batch_execute(tapes)) + assert res.shape == (1, 2) + + # only called for parameter 0 + assert spy.call_args[0][0:2] == (tape, [0]) + + def test_no_trainable_parameters(self): + """Test that if the tape has no trainable parameters, no + subroutines are called and the returned Jacobian is empty""" + with qml.tape.JacobianTape() as tape: + qml.RX(0.543, wires=[0]) + qml.RY(-0.654, wires=[1]) + qml.expval(qml.PauliZ(0)) + + dev = qml.device("default.qubit", wires=2) + tape.trainable_params = {} + + tapes, fn = qml.gradients.param_shift(tape) + assert len(tapes) == 0 + + res = fn(dev.batch_execute(tapes)) + assert res.size == 0 + assert np.all(res == np.array([[]])) + + def test_y0(self, mocker): + """Test that if the gradient recipe has a zero-shift component, then + the tape is executed only once using the current parameter + values.""" + dev = qml.device("default.qubit", wires=2) + + with qml.tape.JacobianTape() as tape: + qml.RX(0.543, wires=[0]) + qml.RY(-0.654, wires=[0]) + qml.expval(qml.PauliZ(0)) + + gradient_recipes = [[[-1e7, 1, 0], [1e7, 1, 1e7]], [[-1e7, 1, 0], [1e7, 1, 1e7]]] + tapes, fn = qml.gradients.param_shift(tape, gradient_recipes=gradient_recipes) + + # one tape per parameter, plus one global call + assert len(tapes) == tape.num_params + 1 + + def test_y0_provided(self, mocker): + """Test that if the original tape output is provided, then + the tape is executed only once using the current parameter + values.""" + dev = qml.device("default.qubit", wires=2) + + with qml.tape.JacobianTape() as tape: + qml.RX(0.543, wires=[0]) + qml.RY(-0.654, wires=[0]) + qml.expval(qml.PauliZ(0)) + + gradient_recipes = [[[-1e7, 1, 0], [1e7, 1, 1e7]], [[-1e7, 1, 0], [1e7, 1, 1e7]]] + f0 = dev.execute(tape) + tapes, fn = qml.gradients.param_shift(tape, gradient_recipes=gradient_recipes, f0=f0) + + # one tape per parameter, plus one global call + assert len(tapes) == tape.num_params + + fn(dev.batch_execute(tapes)) + + def test_independent_parameters_analytic(self): + """Test the case where expectation values are independent of some parameters. For those + parameters, the gradient should be evaluated to zero without executing the device.""" + dev = qml.device("default.qubit", wires=2) + + with qml.tape.JacobianTape() as tape1: + qml.RX(1, wires=[0]) + qml.RX(1, wires=[1]) + qml.expval(qml.PauliZ(0)) + + with qml.tape.JacobianTape() as tape2: + qml.RX(1, wires=[0]) + qml.RX(1, wires=[1]) + qml.expval(qml.PauliZ(1)) + + tapes, fn = qml.gradients.param_shift(tape1) + j1 = fn(dev.batch_execute(tapes)) + + # We should only be executing the device to differentiate 1 parameter (2 executions) + assert dev.num_executions == 2 + + tapes, fn = qml.gradients.param_shift(tape2) + j2 = fn(dev.batch_execute(tapes)) + + exp = -np.sin(1) + + assert np.allclose(j1, [exp, 0]) + assert np.allclose(j2, [0, exp]) + + +class TestParameterShiftRule: + """Tests for the parameter shift implementation""" + + @pytest.mark.parametrize("theta", np.linspace(-2 * np.pi, 2 * np.pi, 7)) + @pytest.mark.parametrize("shift", [np.pi / 2, 0.3, np.sqrt(2)]) + @pytest.mark.parametrize("G", [qml.RX, qml.RY, qml.RZ, qml.PhaseShift]) + def test_pauli_rotation_gradient(self, mocker, G, theta, shift, tol): + """Tests that the automatic gradients of Pauli rotations are correct.""" + spy = mocker.spy(qml.gradients.parameter_shift, "_get_operation_recipe") + dev = qml.device("default.qubit", wires=1) + + with qml.tape.JacobianTape() as tape: + qml.QubitStateVector(np.array([1.0, -1.0]) / np.sqrt(2), wires=0) + G(theta, wires=[0]) + qml.expval(qml.PauliZ(0)) + + tape.trainable_params = {1} + + tapes, fn = qml.gradients.param_shift(tape, shift=shift) + assert len(tapes) == 2 + + autograd_val = fn(dev.batch_execute(tapes)) + manualgrad_val = ( + tape.execute(dev, params=[theta + np.pi / 2]) + - tape.execute(dev, params=[theta - np.pi / 2]) + ) / 2 + assert np.allclose(autograd_val, manualgrad_val, atol=tol, rtol=0) + + assert spy.call_args[1]["shift"] == shift + + # compare to finite differences + tapes, fn = qml.gradients.finite_diff(tape) + numeric_val = fn(dev.batch_execute(tapes)) + assert np.allclose(autograd_val, numeric_val, atol=tol, rtol=0) + + @pytest.mark.parametrize("theta", np.linspace(-2 * np.pi, 2 * np.pi, 7)) + @pytest.mark.parametrize("shift", [np.pi / 2, 0.3, np.sqrt(2)]) + def test_Rot_gradient(self, mocker, theta, shift, tol): + """Tests that the automatic gradient of an arbitrary Euler-angle-parameterized gate is correct.""" + spy = mocker.spy(qml.gradients.parameter_shift, "_get_operation_recipe") + dev = qml.device("default.qubit", wires=1) + params = np.array([theta, theta ** 3, np.sqrt(2) * theta]) + + with qml.tape.JacobianTape() as tape: + qml.QubitStateVector(np.array([1.0, -1.0]) / np.sqrt(2), wires=0) + qml.Rot(*params, wires=[0]) + qml.expval(qml.PauliZ(0)) + + tape.trainable_params = {1, 2, 3} + + tapes, fn = qml.gradients.param_shift(tape, shift=shift) + assert len(tapes) == 2 * len(tape.trainable_params) + + autograd_val = fn(dev.batch_execute(tapes)) + manualgrad_val = np.zeros_like(autograd_val) + + for idx in list(np.ndindex(*params.shape)): + s = np.zeros_like(params) + s[idx] += np.pi / 2 + + forward = tape.execute(dev, params=params + s) + backward = tape.execute(dev, params=params - s) + + manualgrad_val[0, idx] = (forward - backward) / 2 + + assert np.allclose(autograd_val, manualgrad_val, atol=tol, rtol=0) + assert spy.call_args[1]["shift"] == shift + + # compare to finite differences + tapes, fn = qml.gradients.finite_diff(tape) + numeric_val = fn(dev.batch_execute(tapes)) + assert np.allclose(autograd_val, numeric_val, atol=tol, rtol=0) + + @pytest.mark.parametrize("G", [qml.CRX, qml.CRY, qml.CRZ]) + def test_controlled_rotation_gradient(self, G, tol): + """Test gradient of controlled rotation gates""" + dev = qml.device("default.qubit", wires=2) + b = 0.123 + + with qml.tape.JacobianTape() as tape: + qml.QubitStateVector(np.array([1.0, -1.0]) / np.sqrt(2), wires=0) + G(b, wires=[0, 1]) + qml.expval(qml.PauliX(0)) + + tape.trainable_params = {1} + + res = tape.execute(dev) + assert np.allclose(res, -np.cos(b / 2), atol=tol, rtol=0) + + tapes, fn = qml.gradients.param_shift(tape) + grad = fn(dev.batch_execute(tapes)) + expected = np.sin(b / 2) / 2 + assert np.allclose(grad, expected, atol=tol, rtol=0) + + # compare to finite differences + tapes, fn = qml.gradients.finite_diff(tape) + numeric_val = fn(dev.batch_execute(tapes)) + assert np.allclose(grad, numeric_val, atol=tol, rtol=0) + + @pytest.mark.parametrize("theta", np.linspace(-2 * np.pi, np.pi, 7)) + def test_CRot_gradient(self, theta, tol): + """Tests that the automatic gradient of an arbitrary controlled Euler-angle-parameterized + gate is correct.""" + dev = qml.device("default.qubit", wires=2) + a, b, c = np.array([theta, theta ** 3, np.sqrt(2) * theta]) + + with qml.tape.JacobianTape() as tape: + qml.QubitStateVector(np.array([1.0, -1.0]) / np.sqrt(2), wires=0) + qml.CRot(a, b, c, wires=[0, 1]) + qml.expval(qml.PauliX(0)) + + tape.trainable_params = {1, 2, 3} + + res = tape.execute(dev) + expected = -np.cos(b / 2) * np.cos(0.5 * (a + c)) + assert np.allclose(res, expected, atol=tol, rtol=0) + + tapes, fn = qml.gradients.param_shift(tape) + assert len(tapes) == 4 * len(tape.trainable_params) + + grad = fn(dev.batch_execute(tapes)) + expected = np.array( + [ + [ + 0.5 * np.cos(b / 2) * np.sin(0.5 * (a + c)), + 0.5 * np.sin(b / 2) * np.cos(0.5 * (a + c)), + 0.5 * np.cos(b / 2) * np.sin(0.5 * (a + c)), + ] + ] + ) + assert np.allclose(grad, expected, atol=tol, rtol=0) + + # compare to finite differences + tapes, fn = qml.gradients.finite_diff(tape) + numeric_val = fn(dev.batch_execute(tapes)) + assert np.allclose(grad, numeric_val, atol=tol, rtol=0) + + def test_gradients_agree_finite_differences(self, tol): + """Tests that the parameter-shift rule agrees with the first and second + order finite differences""" + params = np.array([0.1, -1.6, np.pi / 5]) + + with qml.tape.JacobianTape() as tape: + qml.RX(params[0], wires=[0]) + qml.CNOT(wires=[0, 1]) + qml.RY(-1.6, wires=[0]) + qml.RY(params[1], wires=[1]) + qml.CNOT(wires=[1, 0]) + qml.RX(params[2], wires=[0]) + qml.CNOT(wires=[0, 1]) + qml.expval(qml.PauliZ(0)) + + tape.trainable_params = {0, 2, 3} + dev = qml.device("default.qubit", wires=2) + + grad_F1 = grad_fn(tape, dev, fn=qml.gradients.finite_diff, approx_order=1) + grad_F2 = grad_fn( + tape, dev, fn=qml.gradients.finite_diff, approx_order=2, strategy="center" + ) + grad_A = grad_fn(tape, dev) + + # gradients computed with different methods must agree + assert np.allclose(grad_A, grad_F1, atol=tol, rtol=0) + assert np.allclose(grad_A, grad_F2, atol=tol, rtol=0) + + def test_variance_gradients_agree_finite_differences(self, tol): + """Tests that the variance parameter-shift rule agrees with the first and second + order finite differences""" + params = np.array([0.1, -1.6, np.pi / 5]) + + with qml.tape.JacobianTape() as tape: + qml.RX(params[0], wires=[0]) + qml.CNOT(wires=[0, 1]) + qml.RY(-1.6, wires=[0]) + qml.RY(params[1], wires=[1]) + qml.CNOT(wires=[1, 0]) + qml.RX(params[2], wires=[0]) + qml.CNOT(wires=[0, 1]) + qml.expval(qml.PauliZ(0)), qml.var(qml.PauliZ(0) @ qml.PauliX(1)) + + tape.trainable_params = {0, 2, 3} + dev = qml.device("default.qubit", wires=2) + + grad_F1 = grad_fn(tape, dev, fn=qml.gradients.finite_diff, approx_order=1) + grad_F2 = grad_fn( + tape, dev, fn=qml.gradients.finite_diff, approx_order=2, strategy="center" + ) + grad_A = grad_fn(tape, dev) + + # gradients computed with different methods must agree + assert np.allclose(grad_A, grad_F1, atol=tol, rtol=0) + assert np.allclose(grad_A, grad_F2, atol=tol, rtol=0) + + def test_fallback(self, mocker, tol): + """Test that fallback gradient functions are correctly used""" + spy = mocker.spy(qml.gradients, "finite_diff") + dev = qml.device("default.qubit.autograd", wires=2) + x = 0.543 + y = -0.654 + + params = np.array([x, y], requires_grad=True) + + class RY(qml.RY): + grad_method = "F" + + def cost_fn(params): + with qml.tape.JacobianTape() as tape: + qml.RX(params[0], wires=[0]) + RY(params[1], wires=[1]) + qml.CNOT(wires=[0, 1]) + qml.expval(qml.PauliZ(0)) + qml.var(qml.PauliX(1)) + + tapes, fn = param_shift(tape, fallback_fn=qml.gradients.finite_diff) + assert len(tapes) == 5 + + # check that the fallback method was called for the specified argnums + spy.assert_called() + assert spy.call_args[1]["argnum"] == {1} + + return fn(dev.batch_execute(tapes)) + + res = cost_fn(params) + assert res.shape == (2, 2) + + expected = np.array([[-np.sin(x), 0], [0, -2 * np.cos(y) * np.sin(y)]]) + assert np.allclose(res, expected, atol=tol, rtol=0) + + # double check the derivative + jac = qml.jacobian(cost_fn)(params) + assert np.allclose(jac[0, 0, 0], -np.cos(x), atol=tol, rtol=0) + assert np.allclose(jac[1, 1, 1], -2 * np.cos(2 * y), atol=tol, rtol=0) + + def test_single_expectation_value(self, tol): + """Tests correct output shape and evaluation for a tape + with a single expval output""" + dev = qml.device("default.qubit", wires=2) + x = 0.543 + y = -0.654 + + with qml.tape.JacobianTape() as tape: + qml.RX(x, wires=[0]) + qml.RY(y, wires=[1]) + qml.CNOT(wires=[0, 1]) + qml.expval(qml.PauliZ(0) @ qml.PauliX(1)) + + tapes, fn = qml.gradients.param_shift(tape) + assert len(tapes) == 4 + + res = fn(dev.batch_execute(tapes)) + assert res.shape == (1, 2) + + expected = np.array([[-np.sin(y) * np.sin(x), np.cos(y) * np.cos(x)]]) + assert np.allclose(res, expected, atol=tol, rtol=0) + + def test_multiple_expectation_values(self, tol): + """Tests correct output shape and evaluation for a tape + with multiple expval outputs""" + dev = qml.device("default.qubit", wires=2) + x = 0.543 + y = -0.654 + + with qml.tape.JacobianTape() as tape: + qml.RX(x, wires=[0]) + qml.RY(y, wires=[1]) + qml.CNOT(wires=[0, 1]) + qml.expval(qml.PauliZ(0)) + qml.expval(qml.PauliX(1)) + + tapes, fn = qml.gradients.param_shift(tape) + assert len(tapes) == 4 + + res = fn(dev.batch_execute(tapes)) + assert res.shape == (2, 2) + + expected = np.array([[-np.sin(x), 0], [0, np.cos(y)]]) + assert np.allclose(res, expected, atol=tol, rtol=0) + + def test_var_expectation_values(self, tol): + """Tests correct output shape and evaluation for a tape + with expval and var outputs""" + dev = qml.device("default.qubit", wires=2) + x = 0.543 + y = -0.654 + + with qml.tape.JacobianTape() as tape: + qml.RX(x, wires=[0]) + qml.RY(y, wires=[1]) + qml.CNOT(wires=[0, 1]) + qml.expval(qml.PauliZ(0)) + qml.var(qml.PauliX(1)) + + tapes, fn = qml.gradients.param_shift(tape) + assert len(tapes) == 5 + + res = fn(dev.batch_execute(tapes)) + assert res.shape == (2, 2) + + expected = np.array([[-np.sin(x), 0], [0, -2 * np.cos(y) * np.sin(y)]]) + assert np.allclose(res, expected, atol=tol, rtol=0) + + def test_prob_expectation_values(self, tol): + """Tests correct output shape and evaluation for a tape + with prob and expval outputs""" + dev = qml.device("default.qubit", wires=2) + x = 0.543 + y = -0.654 + + with qml.tape.JacobianTape() as tape: + qml.RX(x, wires=[0]) + qml.RY(y, wires=[1]) + qml.CNOT(wires=[0, 1]) + qml.expval(qml.PauliZ(0)) + qml.probs(wires=[0, 1]) + + tapes, fn = qml.gradients.param_shift(tape) + assert len(tapes) == 4 + + res = fn(dev.batch_execute(tapes)) + assert res.shape == (5, 2) + + expected = ( + np.array( + [ + [-2 * np.sin(x), 0], + [ + -(np.cos(y / 2) ** 2 * np.sin(x)), + -(np.cos(x / 2) ** 2 * np.sin(y)), + ], + [ + -(np.sin(x) * np.sin(y / 2) ** 2), + (np.cos(x / 2) ** 2 * np.sin(y)), + ], + [ + (np.sin(x) * np.sin(y / 2) ** 2), + (np.sin(x / 2) ** 2 * np.sin(y)), + ], + [ + (np.cos(y / 2) ** 2 * np.sin(x)), + -(np.sin(x / 2) ** 2 * np.sin(y)), + ], + ] + ) + / 2 + ) + + assert np.allclose(res, expected, atol=tol, rtol=0) + + def test_involutory_variance(self, tol): + """Tests qubit observables that are involutory""" + dev = qml.device("default.qubit", wires=1) + a = 0.54 + + with qml.tape.JacobianTape() as tape: + qml.RX(a, wires=0) + qml.var(qml.PauliZ(0)) + + res = tape.execute(dev) + expected = 1 - np.cos(a) ** 2 + assert np.allclose(res, expected, atol=tol, rtol=0) + + # circuit jacobians + tapes, fn = qml.gradients.param_shift(tape) + gradA = fn(dev.batch_execute(tapes)) + assert len(tapes) == 1 + 2 * 1 + + tapes, fn = qml.gradients.finite_diff(tape) + gradF = fn(dev.batch_execute(tapes)) + assert len(tapes) == 2 + + expected = 2 * np.sin(a) * np.cos(a) + + assert gradF == pytest.approx(expected, abs=tol) + assert gradA == pytest.approx(expected, abs=tol) + + def test_non_involutory_variance(self, tol): + """Tests a qubit Hermitian observable that is not involutory""" + dev = qml.device("default.qubit", wires=1) + A = np.array([[4, -1 + 6j], [-1 - 6j, 2]]) + a = 0.54 + + with qml.tape.JacobianTape() as tape: + qml.RX(a, wires=0) + qml.var(qml.Hermitian(A, 0)) + + tape.trainable_params = {0} + + res = tape.execute(dev) + expected = (39 / 2) - 6 * np.sin(2 * a) + (35 / 2) * np.cos(2 * a) + assert np.allclose(res, expected, atol=tol, rtol=0) + + # circuit jacobians + tapes, fn = qml.gradients.param_shift(tape) + gradA = fn(dev.batch_execute(tapes)) + assert len(tapes) == 1 + 4 * 1 + + tapes, fn = qml.gradients.finite_diff(tape) + gradF = fn(dev.batch_execute(tapes)) + assert len(tapes) == 2 + + expected = -35 * np.sin(2 * a) - 12 * np.cos(2 * a) + assert gradA == pytest.approx(expected, abs=tol) + assert gradF == pytest.approx(expected, abs=tol) + + def test_involutory_and_noninvolutory_variance(self, tol): + """Tests a qubit Hermitian observable that is not involutory alongside + an involutory observable.""" + dev = qml.device("default.qubit", wires=2) + A = np.array([[4, -1 + 6j], [-1 - 6j, 2]]) + a = 0.54 + + with qml.tape.JacobianTape() as tape: + qml.RX(a, wires=0) + qml.RX(a, wires=1) + qml.var(qml.PauliZ(0)) + qml.var(qml.Hermitian(A, 1)) + + tape.trainable_params = {0, 1} + + res = tape.execute(dev) + expected = [1 - np.cos(a) ** 2, (39 / 2) - 6 * np.sin(2 * a) + (35 / 2) * np.cos(2 * a)] + assert np.allclose(res, expected, atol=tol, rtol=0) + + # circuit jacobians + tapes, fn = qml.gradients.param_shift(tape) + gradA = fn(dev.batch_execute(tapes)) + assert len(tapes) == 1 + 2 * 4 + + tapes, fn = qml.gradients.finite_diff(tape) + gradF = fn(dev.batch_execute(tapes)) + assert len(tapes) == 1 + 2 + + expected = [2 * np.sin(a) * np.cos(a), -35 * np.sin(2 * a) - 12 * np.cos(2 * a)] + assert np.diag(gradA) == pytest.approx(expected, abs=tol) + assert np.diag(gradF) == pytest.approx(expected, abs=tol) + + def test_expval_and_variance(self, tol): + """Test that the qnode works for a combination of expectation + values and variances""" + dev = qml.device("default.qubit", wires=3) + + a = 0.54 + b = -0.423 + c = 0.123 + + with qml.tape.JacobianTape() as tape: + qml.RX(a, wires=0) + qml.RY(b, wires=1) + qml.CNOT(wires=[1, 2]) + qml.RX(c, wires=2) + qml.CNOT(wires=[0, 1]) + qml.var(qml.PauliZ(0)) + qml.expval(qml.PauliZ(1)) + qml.var(qml.PauliZ(2)) + + res = tape.execute(dev) + expected = np.array( + [ + np.sin(a) ** 2, + np.cos(a) * np.cos(b), + 0.25 * (3 - 2 * np.cos(b) ** 2 * np.cos(2 * c) - np.cos(2 * b)), + ] + ) + assert np.allclose(res, expected, atol=tol, rtol=0) + + # # circuit jacobians + tapes, fn = qml.gradients.param_shift(tape) + gradA = fn(dev.batch_execute(tapes)) + + tapes, fn = qml.gradients.finite_diff(tape) + gradF = fn(dev.batch_execute(tapes)) + + expected = np.array( + [ + [2 * np.cos(a) * np.sin(a), -np.cos(b) * np.sin(a), 0], + [ + 0, + -np.cos(a) * np.sin(b), + 0.5 * (2 * np.cos(b) * np.cos(2 * c) * np.sin(b) + np.sin(2 * b)), + ], + [0, 0, np.cos(b) ** 2 * np.sin(2 * c)], + ] + ).T + assert gradA == pytest.approx(expected, abs=tol) + assert gradF == pytest.approx(expected, abs=tol) + + def test_projector_variance(self, tol): + """Test that the variance of a projector is correctly returned""" + dev = qml.device("default.qubit", wires=2) + P = np.array([1]) + x, y = 0.765, -0.654 + + with qml.tape.JacobianTape() as tape: + qml.RX(x, wires=0) + qml.RY(y, wires=1) + qml.CNOT(wires=[0, 1]) + qml.var(qml.Projector(P, wires=0) @ qml.PauliX(1)) + + tape.trainable_params = {0, 1} + + res = tape.execute(dev) + expected = 0.25 * np.sin(x / 2) ** 2 * (3 + np.cos(2 * y) + 2 * np.cos(x) * np.sin(y) ** 2) + assert np.allclose(res, expected, atol=tol, rtol=0) + + # # circuit jacobians + tapes, fn = qml.gradients.param_shift(tape) + gradA = fn(dev.batch_execute(tapes)) + + tapes, fn = qml.gradients.finite_diff(tape) + gradF = fn(dev.batch_execute(tapes)) + + expected = np.array( + [ + [ + 0.5 * np.sin(x) * (np.cos(x / 2) ** 2 + np.cos(2 * y) * np.sin(x / 2) ** 2), + -2 * np.cos(y) * np.sin(x / 2) ** 4 * np.sin(y), + ] + ] + ) + assert gradA == pytest.approx(expected, abs=tol) + assert gradF == pytest.approx(expected, abs=tol) + + +class TestParamShiftGradients: + """Test that the transform is differentiable""" + + def test_autograd(self, tol): + """Tests that the output of the parameter-shift transform + can be differentiated using autograd, yielding second derivatives.""" + dev = qml.device("default.qubit.autograd", wires=2) + params = np.array([0.543, -0.654], requires_grad=True) + + def cost_fn(x): + with qml.tape.JacobianTape() as tape: + qml.RX(x[0], wires=[0]) + qml.RY(x[1], wires=[1]) + qml.CNOT(wires=[0, 1]) + qml.var(qml.PauliZ(0) @ qml.PauliX(1)) + + tape.trainable_params = {0, 1} + tapes, fn = qml.gradients.param_shift(tape) + jac = fn(dev.batch_execute(tapes)) + return jac + + res = qml.jacobian(cost_fn)(params) + x, y = params + expected = np.array( + [ + [2 * np.cos(2 * x) * np.sin(y) ** 2, np.sin(2 * x) * np.sin(2 * y)], + [np.sin(2 * x) * np.sin(2 * y), -2 * np.cos(x) ** 2 * np.cos(2 * y)], + ] + ) + assert np.allclose(res, expected, atol=tol, rtol=0) + + def test_tf(self, tol): + """Tests that the output of the finite-difference transform + can be differentiated using TF, yielding second derivatives.""" + tf = pytest.importorskip("tensorflow") + + dev = qml.device("default.qubit.tf", wires=2) + params = tf.Variable([0.543, -0.654], dtype=tf.float64) + + with tf.GradientTape() as t: + with qml.tape.JacobianTape() as tape: + qml.RX(params[0], wires=[0]) + qml.RY(params[1], wires=[1]) + qml.CNOT(wires=[0, 1]) + qml.var(qml.PauliZ(0) @ qml.PauliX(1)) + + tape.trainable_params = {0, 1} + tapes, fn = qml.gradients.param_shift(tape) + jac = fn(dev.batch_execute(tapes)) + + x, y = 1.0 * params + + expected = np.array([np.sin(2 * x) * np.sin(y) ** 2, -np.cos(x) ** 2 * np.sin(2 * y)]) + assert np.allclose(jac, expected, atol=tol, rtol=0) + + res = t.jacobian(jac, params) + expected = np.array( + [ + [2 * np.cos(2 * x) * np.sin(y) ** 2, np.sin(2 * x) * np.sin(2 * y)], + [np.sin(2 * x) * np.sin(2 * y), -2 * np.cos(x) ** 2 * np.cos(2 * y)], + ] + ) + assert np.allclose(res, expected, atol=tol, rtol=0) + + def test_torch(self, tol): + """Tests that the output of the finite-difference transform + can be differentiated using Torch, yielding second derivatives.""" + torch = pytest.importorskip("torch") + from pennylane.interfaces.torch import TorchInterface + + dev = qml.device("default.qubit", wires=2) + params = torch.tensor([0.543, -0.654], dtype=torch.float64, requires_grad=True) + + with TorchInterface.apply(qml.tape.QubitParamShiftTape()) as tape: + qml.RX(params[0], wires=[0]) + qml.RY(params[1], wires=[1]) + qml.CNOT(wires=[0, 1]) + qml.var(qml.PauliZ(0) @ qml.PauliX(1)) + + tapes, fn = qml.gradients.param_shift(tape) + jac = fn([t.execute(dev) for t in tapes]) + cost = jac[0, 0] + cost.backward() + hess = params.grad + + x, y = params.detach().numpy() + + expected = np.array([np.sin(2 * x) * np.sin(y) ** 2, -np.cos(x) ** 2 * np.sin(2 * y)]) + assert np.allclose(jac.detach().numpy(), expected, atol=tol, rtol=0) + + expected = np.array([2 * np.cos(2 * x) * np.sin(y) ** 2, np.sin(2 * x) * np.sin(2 * y)]) + assert np.allclose(hess.detach().numpy(), expected, atol=0.1, rtol=0) + + def test_jax(self, tol): + """Tests that the output of the finite-difference transform + can be differentiated using JAX, yielding second derivatives.""" + jax = pytest.importorskip("jax") + from jax import numpy as jnp + from pennylane.interfaces.jax import JAXInterface + from jax.config import config + + config.update("jax_enable_x64", True) + + dev = qml.device("default.qubit", wires=2) + params = jnp.array([0.543, -0.654]) + + def cost_fn(x): + with JAXInterface.apply(qml.tape.QubitParamShiftTape()) as tape: + qml.RX(x[0], wires=[0]) + qml.RY(x[1], wires=[1]) + qml.CNOT(wires=[0, 1]) + qml.var(qml.PauliZ(0) @ qml.PauliX(1)) + + tape.trainable_params = {0, 1} + tapes, fn = qml.gradients.param_shift(tape) + jac = fn([t.execute(dev) for t in tapes]) + return jac + + res = jax.jacobian(cost_fn)(params) + x, y = params + expected = np.array( + [ + [2 * np.cos(2 * x) * np.sin(y) ** 2, np.sin(2 * x) * np.sin(2 * y)], + [np.sin(2 * x) * np.sin(2 * y), -2 * np.cos(x) ** 2 * np.cos(2 * y)], + ] + ) + assert np.allclose(res, expected, atol=tol, rtol=0)