From 21a58474d71443bfb124f450202e8bfe7045f30f Mon Sep 17 00:00:00 2001
From: Josh Izaac <josh146@gmail.com>
Date: Thu, 11 Nov 2021 02:23:55 +0800
Subject: [PATCH 01/20] Add support for @tf.function on non-TF devices

---
 pennylane/interfaces/batch/tensorflow.py | 36 ++++++++++++++----------
 1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/pennylane/interfaces/batch/tensorflow.py b/pennylane/interfaces/batch/tensorflow.py
index 7e3633ff504..bd47a2470a3 100644
--- a/pennylane/interfaces/batch/tensorflow.py
+++ b/pennylane/interfaces/batch/tensorflow.py
@@ -76,29 +76,32 @@ def execute(tapes, device, execute_fn, gradient_fn, gradient_kwargs, _n=1, max_d
 
         parameters += [p for i, p in enumerate(params) if i in tape.trainable_params]
 
+    @tf.custom_gradient
+    def _execute(*parameters):  # pylint:disable=unused-argument
         # store all unwrapped parameters
         params_unwrapped.append(
             [i.numpy() if isinstance(i, (tf.Variable, tf.Tensor)) else i for i in params]
         )
 
-    with qml.tape.Unwrap(*tapes, set_trainable=False):
-        # Forward pass: execute the tapes
-        res, jacs = execute_fn(tapes, **gradient_kwargs)
+        with qml.tape.Unwrap(*tapes, set_trainable=False):
+            # Forward pass: execute the tapes
+            res, jacs = execute_fn(tapes, **gradient_kwargs)
 
-    for i, tape in enumerate(tapes):
-        # convert output to TensorFlow tensors
+        for i, tape in enumerate(tapes):
+            # convert output to TensorFlow tensors
 
-        if isinstance(res[i], np.ndarray):
-            # For backwards compatibility, we flatten ragged tape outputs
-            # when there is no sampling
-            r = np.hstack(res[i]) if res[i].dtype == np.dtype("object") else res[i]
-            res[i] = tf.convert_to_tensor(r)
+            if isinstance(res[i], np.ndarray):
+                # For backwards compatibility, we flatten ragged tape outputs
+                # when there is no sampling
+                r = np.hstack(res[i]) if res[i].dtype == np.dtype("object") else res[i]
+                res[i] = tf.convert_to_tensor(r)
 
-        elif isinstance(res[i], tuple):
-            res[i] = tuple(tf.convert_to_tensor(r) for r in res[i])
+            elif isinstance(res[i], tuple):
+                res[i] = tuple(tf.convert_to_tensor(r) for r in res[i])
+
+            else:
+                res[i] = tf.convert_to_tensor(qml.math.toarray(res[i]))
 
-    @tf.custom_gradient
-    def _execute(*parameters):  # pylint:disable=unused-argument
         def grad_fn(*dy, **tfkwargs):
             """Returns the vector-Jacobian product with given
             parameter values and output gradient dy"""
@@ -171,4 +174,7 @@ def grad_fn(*dy, **tfkwargs):
 
         return res, grad_fn
 
-    return _execute(*parameters)
+    if tf.executing_eagerly():
+        return _execute(*parameters)
+
+    return tf.py_function(func=_execute, inp=parameters, Tout=tf.float64)

From e1b166a3b14d244e7276d4f76c6792b218f0ef86 Mon Sep 17 00:00:00 2001
From: Josh Izaac <josh146@gmail.com>
Date: Thu, 11 Nov 2021 16:02:33 +0800
Subject: [PATCH 02/20] fix

---
 pennylane/interfaces/batch/tensorflow.py | 49 +++++++++++++-----------
 1 file changed, 27 insertions(+), 22 deletions(-)

diff --git a/pennylane/interfaces/batch/tensorflow.py b/pennylane/interfaces/batch/tensorflow.py
index bd47a2470a3..778bd9a58b1 100644
--- a/pennylane/interfaces/batch/tensorflow.py
+++ b/pennylane/interfaces/batch/tensorflow.py
@@ -67,40 +67,30 @@ def execute(tapes, device, execute_fn, gradient_fn, gradient_kwargs, _n=1, max_d
     """
 
     parameters = []
-    params_unwrapped = []
+    tape_shapes = []
 
     for i, tape in enumerate(tapes):
         # store the trainable parameters
         params = tape.get_parameters(trainable_only=False)
         tape.trainable_params = qml.math.get_trainable_indices(params)
+        tape_shapes.append(len(tape.trainable_params))
 
         parameters += [p for i, p in enumerate(params) if i in tape.trainable_params]
 
     @tf.custom_gradient
     def _execute(*parameters):  # pylint:disable=unused-argument
         # store all unwrapped parameters
-        params_unwrapped.append(
-            [i.numpy() if isinstance(i, (tf.Variable, tf.Tensor)) else i for i in params]
-        )
 
-        with qml.tape.Unwrap(*tapes, set_trainable=False):
-            # Forward pass: execute the tapes
-            res, jacs = execute_fn(tapes, **gradient_kwargs)
-
-        for i, tape in enumerate(tapes):
-            # convert output to TensorFlow tensors
-
-            if isinstance(res[i], np.ndarray):
-                # For backwards compatibility, we flatten ragged tape outputs
-                # when there is no sampling
-                r = np.hstack(res[i]) if res[i].dtype == np.dtype("object") else res[i]
-                res[i] = tf.convert_to_tensor(r)
+        nested_params = []
+        count = 0
 
-            elif isinstance(res[i], tuple):
-                res[i] = tuple(tf.convert_to_tensor(r) for r in res[i])
+        for shape in tape_shapes:
+            nested_params.append(qml.math.unwrap(parameters[count:shape]))
+            count += shape
 
-            else:
-                res[i] = tf.convert_to_tensor(qml.math.toarray(res[i]))
+        with qml.tape.Unwrap(*tapes, params=nested_params, set_trainable=False):
+            # Forward pass: execute the tapes
+            res, jacs = execute_fn(tapes, **gradient_kwargs)
 
         def grad_fn(*dy, **tfkwargs):
             """Returns the vector-Jacobian product with given
@@ -122,7 +112,7 @@ def grad_fn(*dy, **tfkwargs):
                     # Generate and execute the required gradient tapes
                     if _n == max_diff or not context.executing_eagerly():
 
-                        with qml.tape.Unwrap(*tapes, params=params_unwrapped, set_trainable=False):
+                        with qml.tape.Unwrap(*tapes, params=nested_params, set_trainable=False):
                             vjp_tapes, processing_fn = qml.gradients.batch_vjp(
                                 tapes,
                                 dy,
@@ -166,12 +156,27 @@ def grad_fn(*dy, **tfkwargs):
                     # - gradient_fn is not differentiable
                     #
                     # so we cannot support higher-order derivatives.
-                    with qml.tape.Unwrap(*tapes, params=params_unwrapped, set_trainable=False):
+                    with qml.tape.Unwrap(*tapes, params=nested_params, set_trainable=False):
                         vjps = _compute_vjp(dy, gradient_fn(tapes, **gradient_kwargs))
 
             variables = tfkwargs.get("variables", None)
             return (vjps, variables) if variables is not None else vjps
 
+        for i, tape in enumerate(tapes):
+            # convert output to TensorFlow tensors
+
+            if isinstance(res[i], np.ndarray):
+                # For backwards compatibility, we flatten ragged tape outputs
+                # when there is no sampling
+                r = np.hstack(res[i]) if res[i].dtype == np.dtype("object") else res[i]
+                res[i] = tf.convert_to_tensor(r)
+
+            elif isinstance(res[i], tuple):
+                res[i] = tuple(tf.convert_to_tensor(r) for r in res[i])
+
+            else:
+                res[i] = tf.convert_to_tensor(qml.math.toarray(res[i]))
+
         return res, grad_fn
 
     if tf.executing_eagerly():

From fc3be6a36647306357cf9f16fd406f4346c94f77 Mon Sep 17 00:00:00 2001
From: Josh Izaac <josh146@gmail.com>
Date: Thu, 11 Nov 2021 17:02:57 +0800
Subject: [PATCH 03/20] Add support for @tf.function on non-TF devices

---
 pennylane/interfaces/batch/tensorflow.py | 21 +++++++--------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/pennylane/interfaces/batch/tensorflow.py b/pennylane/interfaces/batch/tensorflow.py
index 778bd9a58b1..801a1fa9e0c 100644
--- a/pennylane/interfaces/batch/tensorflow.py
+++ b/pennylane/interfaces/batch/tensorflow.py
@@ -65,30 +65,23 @@ def execute(tapes, device, execute_fn, gradient_fn, gradient_kwargs, _n=1, max_d
         list[list[tf.Tensor]]: A nested list of tape results. Each element in
         the returned list corresponds in order to the provided tapes.
     """
-
+    all_params = []
     parameters = []
-    tape_shapes = []
 
     for i, tape in enumerate(tapes):
         # store the trainable parameters
         params = tape.get_parameters(trainable_only=False)
+        all_params.append(params)
         tape.trainable_params = qml.math.get_trainable_indices(params)
-        tape_shapes.append(len(tape.trainable_params))
-
         parameters += [p for i, p in enumerate(params) if i in tape.trainable_params]
 
     @tf.custom_gradient
     def _execute(*parameters):  # pylint:disable=unused-argument
         # store all unwrapped parameters
+        for i, a in enumerate(all_params):
+            all_params[i] = qml.math.unwrap(a)
 
-        nested_params = []
-        count = 0
-
-        for shape in tape_shapes:
-            nested_params.append(qml.math.unwrap(parameters[count:shape]))
-            count += shape
-
-        with qml.tape.Unwrap(*tapes, params=nested_params, set_trainable=False):
+        with qml.tape.Unwrap(*tapes, params=all_params, set_trainable=False):
             # Forward pass: execute the tapes
             res, jacs = execute_fn(tapes, **gradient_kwargs)
 
@@ -112,7 +105,7 @@ def grad_fn(*dy, **tfkwargs):
                     # Generate and execute the required gradient tapes
                     if _n == max_diff or not context.executing_eagerly():
 
-                        with qml.tape.Unwrap(*tapes, params=nested_params, set_trainable=False):
+                        with qml.tape.Unwrap(*tapes, params=all_params, set_trainable=False):
                             vjp_tapes, processing_fn = qml.gradients.batch_vjp(
                                 tapes,
                                 dy,
@@ -156,7 +149,7 @@ def grad_fn(*dy, **tfkwargs):
                     # - gradient_fn is not differentiable
                     #
                     # so we cannot support higher-order derivatives.
-                    with qml.tape.Unwrap(*tapes, params=nested_params, set_trainable=False):
+                    with qml.tape.Unwrap(*tapes, params=all_params, set_trainable=False):
                         vjps = _compute_vjp(dy, gradient_fn(tapes, **gradient_kwargs))
 
             variables = tfkwargs.get("variables", None)

From 29db9d620e67b9e43005bf4d7b2eca9cbb5fc1aa Mon Sep 17 00:00:00 2001
From: Josh Izaac <josh146@gmail.com>
Date: Thu, 11 Nov 2021 19:41:33 +0800
Subject: [PATCH 04/20] Add support for @tf.function on non-TF devices

---
 pennylane/gradients/finite_difference.py |  4 +--
 pennylane/interfaces/batch/tensorflow.py | 31 +++++++++++++++++-------
 2 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/pennylane/gradients/finite_difference.py b/pennylane/gradients/finite_difference.py
index a6240df2ce6..bf9e5e018ba 100644
--- a/pennylane/gradients/finite_difference.py
+++ b/pennylane/gradients/finite_difference.py
@@ -172,9 +172,9 @@ def generate_shifted_tapes(tape, idx, shifts, multipliers=None):
 
         if multipliers is not None:
             m = multipliers[i]
-            new_params[idx] = new_params[idx] * qml.math.convert_like(m, new_params[idx])
+            new_params[idx] = new_params[idx] * m
 
-        new_params[idx] = new_params[idx] + qml.math.convert_like(s, new_params[idx])
+        new_params[idx] = new_params[idx] + s
         shifted_tape.set_parameters(new_params)
         tapes.append(shifted_tape)
 
diff --git a/pennylane/interfaces/batch/tensorflow.py b/pennylane/interfaces/batch/tensorflow.py
index 801a1fa9e0c..eff9216ee71 100644
--- a/pennylane/interfaces/batch/tensorflow.py
+++ b/pennylane/interfaces/batch/tensorflow.py
@@ -67,21 +67,31 @@ def execute(tapes, device, execute_fn, gradient_fn, gradient_kwargs, _n=1, max_d
     """
     all_params = []
     parameters = []
+    lens = []
+    trainable = []
 
     for i, tape in enumerate(tapes):
         # store the trainable parameters
         params = tape.get_parameters(trainable_only=False)
-        all_params.append(params)
         tape.trainable_params = qml.math.get_trainable_indices(params)
+
         parameters += [p for i, p in enumerate(params) if i in tape.trainable_params]
+        all_params += params
+        trainable += (np.array(list(tape.trainable_params)) + sum(lens)).tolist()
+
+        lens.append(len(params))
 
     @tf.custom_gradient
-    def _execute(*parameters):  # pylint:disable=unused-argument
+    def _execute(*all_params):  # pylint:disable=unused-argument
         # store all unwrapped parameters
-        for i, a in enumerate(all_params):
-            all_params[i] = qml.math.unwrap(a)
+        count = 0
+        unwrapped_params = []
 
-        with qml.tape.Unwrap(*tapes, params=all_params, set_trainable=False):
+        for s in lens:
+            unwrapped_params.append(qml.math.unwrap(all_params[count : count + s]))
+            count += s
+
+        with qml.tape.Unwrap(*tapes, params=unwrapped_params, set_trainable=False):
             # Forward pass: execute the tapes
             res, jacs = execute_fn(tapes, **gradient_kwargs)
 
@@ -105,7 +115,7 @@ def grad_fn(*dy, **tfkwargs):
                     # Generate and execute the required gradient tapes
                     if _n == max_diff or not context.executing_eagerly():
 
-                        with qml.tape.Unwrap(*tapes, params=all_params, set_trainable=False):
+                        with qml.tape.Unwrap(*tapes, params=unwrapped_params, set_trainable=False):
                             vjp_tapes, processing_fn = qml.gradients.batch_vjp(
                                 tapes,
                                 dy,
@@ -149,9 +159,12 @@ def grad_fn(*dy, **tfkwargs):
                     # - gradient_fn is not differentiable
                     #
                     # so we cannot support higher-order derivatives.
-                    with qml.tape.Unwrap(*tapes, params=all_params, set_trainable=False):
+                    with qml.tape.Unwrap(*tapes, params=unwrapped_params, set_trainable=False):
                         vjps = _compute_vjp(dy, gradient_fn(tapes, **gradient_kwargs))
 
+            vjps = iter(vjps)
+            vjps = [next(vjps) if x in trainable else None for x in range(len(all_params))]
+
             variables = tfkwargs.get("variables", None)
             return (vjps, variables) if variables is not None else vjps
 
@@ -173,6 +186,6 @@ def grad_fn(*dy, **tfkwargs):
         return res, grad_fn
 
     if tf.executing_eagerly():
-        return _execute(*parameters)
+        return _execute(*all_params)
 
-    return tf.py_function(func=_execute, inp=parameters, Tout=tf.float64)
+    return tf.py_function(func=_execute, inp=all_params, Tout=[tf.float64])

From a6b210339a22d275f59ea89063e11b86638160ea Mon Sep 17 00:00:00 2001
From: Josh Izaac <josh146@gmail.com>
Date: Thu, 11 Nov 2021 19:51:54 +0800
Subject: [PATCH 05/20] Add support for @tf.function on non-TF devices

---
 pennylane/gradients/finite_difference.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pennylane/gradients/finite_difference.py b/pennylane/gradients/finite_difference.py
index bf9e5e018ba..a6240df2ce6 100644
--- a/pennylane/gradients/finite_difference.py
+++ b/pennylane/gradients/finite_difference.py
@@ -172,9 +172,9 @@ def generate_shifted_tapes(tape, idx, shifts, multipliers=None):
 
         if multipliers is not None:
             m = multipliers[i]
-            new_params[idx] = new_params[idx] * m
+            new_params[idx] = new_params[idx] * qml.math.convert_like(m, new_params[idx])
 
-        new_params[idx] = new_params[idx] + s
+        new_params[idx] = new_params[idx] + qml.math.convert_like(s, new_params[idx])
         shifted_tape.set_parameters(new_params)
         tapes.append(shifted_tape)
 

From 66e67daeea5557a0d21ddf79a0f2db07772776e3 Mon Sep 17 00:00:00 2001
From: Josh Izaac <josh146@gmail.com>
Date: Thu, 11 Nov 2021 19:56:57 +0800
Subject: [PATCH 06/20] linting

---
 pennylane/interfaces/batch/tensorflow.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pennylane/interfaces/batch/tensorflow.py b/pennylane/interfaces/batch/tensorflow.py
index eff9216ee71..98939e0df7c 100644
--- a/pennylane/interfaces/batch/tensorflow.py
+++ b/pennylane/interfaces/batch/tensorflow.py
@@ -70,7 +70,7 @@ def execute(tapes, device, execute_fn, gradient_fn, gradient_kwargs, _n=1, max_d
     lens = []
     trainable = []
 
-    for i, tape in enumerate(tapes):
+    for tape in tapes:
         # store the trainable parameters
         params = tape.get_parameters(trainable_only=False)
         tape.trainable_params = qml.math.get_trainable_indices(params)
@@ -168,7 +168,7 @@ def grad_fn(*dy, **tfkwargs):
             variables = tfkwargs.get("variables", None)
             return (vjps, variables) if variables is not None else vjps
 
-        for i, tape in enumerate(tapes):
+        for i, _ in enumerate(tapes):
             # convert output to TensorFlow tensors
 
             if isinstance(res[i], np.ndarray):

From 6b6253d8c067b87432eeb35022e6de2ac1db3d72 Mon Sep 17 00:00:00 2001
From: Josh Izaac <josh146@gmail.com>
Date: Thu, 11 Nov 2021 21:23:11 +0800
Subject: [PATCH 07/20] add tests

---
 .../interfaces/test_batch_tensorflow_qnode.py | 30 +++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/tests/interfaces/test_batch_tensorflow_qnode.py b/tests/interfaces/test_batch_tensorflow_qnode.py
index 2401ab4619a..2606f2ca712 100644
--- a/tests/interfaces/test_batch_tensorflow_qnode.py
+++ b/tests/interfaces/test_batch_tensorflow_qnode.py
@@ -1329,3 +1329,33 @@ def circuit():
         assert isinstance(result, tf.Tensor)
         assert np.array_equal(result.shape, (3, n_sample))
         assert result.dtype == tf.int64
+
+
+class TestAutograph:
+    """Tests for Autograph mode"""
+
+    def test_autograph_gradients(self, tol):
+        """Test that a parameter-shift QNode can be compiled
+        using @tf.function, and differentiated"""
+        dev = qml.device("default.qubit", wires=2)
+        x = tf.Variable(0.543, dtype=tf.float64)
+        y = tf.Variable(-0.654, dtype=tf.float64)
+
+        @tf.function
+        @qnode(dev, diff_method="parameter-shift", interface="tf")
+        def circuit(x, y):
+            qml.RX(x, wires=[0])
+            qml.RY(y, wires=[1])
+            qml.CNOT(wires=[0, 1])
+            return qml.probs(wires=[0]), qml.probs(wires=[1])
+
+        with tf.GradientTape() as tape:
+            p0, p1 = circuit(x, y)
+            loss = p0[0] + p1[1]
+
+        expected = tf.cos(x / 2) ** 2 + (1 - tf.cos(x) * tf.cos(y)) / 2
+        assert np.allclose(loss, expected, atol=tol, rtol=0)
+
+        grad = tape.gradient(loss, [x, y])
+        expected = [-tf.sin(x) * tf.sin(y / 2) ** 2, tf.cos(x) * tf.sin(y) / 2]
+        assert np.allclose(grad, expected, atol=tol, rtol=0)

From 9d874759eb54a024767881818801a2d242e92154 Mon Sep 17 00:00:00 2001
From: Josh Izaac <josh146@gmail.com>
Date: Thu, 11 Nov 2021 21:26:53 +0800
Subject: [PATCH 08/20] fixes

---
 pennylane/interfaces/batch/tensorflow.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pennylane/interfaces/batch/tensorflow.py b/pennylane/interfaces/batch/tensorflow.py
index 98939e0df7c..ebddf7f47e5 100644
--- a/pennylane/interfaces/batch/tensorflow.py
+++ b/pennylane/interfaces/batch/tensorflow.py
@@ -85,13 +85,13 @@ def execute(tapes, device, execute_fn, gradient_fn, gradient_kwargs, _n=1, max_d
     def _execute(*all_params):  # pylint:disable=unused-argument
         # store all unwrapped parameters
         count = 0
-        unwrapped_params = []
+        params_unwrapped = []
 
         for s in lens:
-            unwrapped_params.append(qml.math.unwrap(all_params[count : count + s]))
+            params_unwrapped.append(qml.math.unwrap(all_params[count : count + s]))
             count += s
 
-        with qml.tape.Unwrap(*tapes, params=unwrapped_params, set_trainable=False):
+        with qml.tape.Unwrap(*tapes, params=params_unwrapped, set_trainable=False):
             # Forward pass: execute the tapes
             res, jacs = execute_fn(tapes, **gradient_kwargs)
 
@@ -115,7 +115,7 @@ def grad_fn(*dy, **tfkwargs):
                     # Generate and execute the required gradient tapes
                     if _n == max_diff or not context.executing_eagerly():
 
-                        with qml.tape.Unwrap(*tapes, params=unwrapped_params, set_trainable=False):
+                        with qml.tape.Unwrap(*tapes, params=params_unwrapped, set_trainable=False):
                             vjp_tapes, processing_fn = qml.gradients.batch_vjp(
                                 tapes,
                                 dy,
@@ -159,7 +159,7 @@ def grad_fn(*dy, **tfkwargs):
                     # - gradient_fn is not differentiable
                     #
                     # so we cannot support higher-order derivatives.
-                    with qml.tape.Unwrap(*tapes, params=unwrapped_params, set_trainable=False):
+                    with qml.tape.Unwrap(*tapes, params=params_unwrapped, set_trainable=False):
                         vjps = _compute_vjp(dy, gradient_fn(tapes, **gradient_kwargs))
 
             vjps = iter(vjps)

From d8db6ce6690d94a49815aafacaa812b724edabc1 Mon Sep 17 00:00:00 2001
From: Josh Izaac <josh146@gmail.com>
Date: Thu, 11 Nov 2021 21:27:41 +0800
Subject: [PATCH 09/20] fixes

---
 pennylane/interfaces/batch/tensorflow.py | 30 ++++++++++++------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/pennylane/interfaces/batch/tensorflow.py b/pennylane/interfaces/batch/tensorflow.py
index ebddf7f47e5..619ff759cec 100644
--- a/pennylane/interfaces/batch/tensorflow.py
+++ b/pennylane/interfaces/batch/tensorflow.py
@@ -95,6 +95,21 @@ def _execute(*all_params):  # pylint:disable=unused-argument
             # Forward pass: execute the tapes
             res, jacs = execute_fn(tapes, **gradient_kwargs)
 
+        for i, _ in enumerate(tapes):
+            # convert output to TensorFlow tensors
+
+            if isinstance(res[i], np.ndarray):
+                # For backwards compatibility, we flatten ragged tape outputs
+                # when there is no sampling
+                r = np.hstack(res[i]) if res[i].dtype == np.dtype("object") else res[i]
+                res[i] = tf.convert_to_tensor(r)
+
+            elif isinstance(res[i], tuple):
+                res[i] = tuple(tf.convert_to_tensor(r) for r in res[i])
+
+            else:
+                res[i] = tf.convert_to_tensor(qml.math.toarray(res[i]))
+
         def grad_fn(*dy, **tfkwargs):
             """Returns the vector-Jacobian product with given
             parameter values and output gradient dy"""
@@ -168,21 +183,6 @@ def grad_fn(*dy, **tfkwargs):
             variables = tfkwargs.get("variables", None)
             return (vjps, variables) if variables is not None else vjps
 
-        for i, _ in enumerate(tapes):
-            # convert output to TensorFlow tensors
-
-            if isinstance(res[i], np.ndarray):
-                # For backwards compatibility, we flatten ragged tape outputs
-                # when there is no sampling
-                r = np.hstack(res[i]) if res[i].dtype == np.dtype("object") else res[i]
-                res[i] = tf.convert_to_tensor(r)
-
-            elif isinstance(res[i], tuple):
-                res[i] = tuple(tf.convert_to_tensor(r) for r in res[i])
-
-            else:
-                res[i] = tf.convert_to_tensor(qml.math.toarray(res[i]))
-
         return res, grad_fn
 
     if tf.executing_eagerly():

From 7535ff90116dded669149a5c474b34448bce4c21 Mon Sep 17 00:00:00 2001
From: Josh Izaac <josh146@gmail.com>
Date: Fri, 12 Nov 2021 00:49:51 +0800
Subject: [PATCH 10/20] update

---
 pennylane/interfaces/batch/__init__.py        |  14 +-
 pennylane/interfaces/batch/autograd.py        |   4 +-
 pennylane/interfaces/batch/jax.py             |   4 +-
 pennylane/interfaces/batch/tensorflow.py      |  63 ++---
 .../interfaces/batch/tensorflow_autograph.py  | 236 ++++++++++++++++++
 pennylane/interfaces/batch/torch.py           |   4 +-
 .../interfaces/test_batch_tensorflow_qnode.py | 106 ++++++++
 7 files changed, 387 insertions(+), 44 deletions(-)
 create mode 100644 pennylane/interfaces/batch/tensorflow_autograph.py

diff --git a/pennylane/interfaces/batch/__init__.py b/pennylane/interfaces/batch/__init__.py
index a6b6145d8f0..e4d9ff85ec7 100644
--- a/pennylane/interfaces/batch/__init__.py
+++ b/pennylane/interfaces/batch/__init__.py
@@ -322,6 +322,7 @@ def cost_fn(params, x):
 
     # the default execution function is batch_execute
     execute_fn = cache_execute(batch_execute, cache, expand_fn=expand_fn)
+    _mode = "backward"
 
     if gradient_fn == "device":
         # gradient function is a device method
@@ -338,6 +339,7 @@ def cost_fn(params, x):
             # both results and gradients
             execute_fn = set_shots(device, override_shots)(device.execute_and_gradients)
             gradient_fn = None
+            _mode = "forward"
 
         elif mode == "backward":
             # disable caching on the forward pass
@@ -361,7 +363,13 @@ def cost_fn(params, x):
         if interface in INTERFACE_NAMES["Autograd"]:
             from .autograd import execute as _execute
         elif interface in INTERFACE_NAMES["TensorFlow"]:
-            from .tensorflow import execute as _execute
+            import tensorflow as tf
+
+            if tf.executing_eagerly():
+                from .tensorflow import execute as _execute
+            else:
+                from .tensorflow_autograph import execute as _execute
+
         elif interface in INTERFACE_NAMES["PyTorch"]:
             from .torch import execute as _execute
         elif interface in INTERFACE_NAMES["JAX"]:
@@ -379,6 +387,8 @@ def cost_fn(params, x):
             f"version of {interface_name} to enable the '{interface}' interface."
         ) from e
 
-    res = _execute(tapes, device, execute_fn, gradient_fn, gradient_kwargs, _n=1, max_diff=max_diff)
+    res = _execute(
+        tapes, device, execute_fn, gradient_fn, gradient_kwargs, _n=1, max_diff=max_diff, mode=_mode
+    )
 
     return res
diff --git a/pennylane/interfaces/batch/autograd.py b/pennylane/interfaces/batch/autograd.py
index b71bffe8207..8be0d0e8d15 100644
--- a/pennylane/interfaces/batch/autograd.py
+++ b/pennylane/interfaces/batch/autograd.py
@@ -23,7 +23,9 @@
 from pennylane import numpy as np
 
 
-def execute(tapes, device, execute_fn, gradient_fn, gradient_kwargs, _n=1, max_diff=2):
+def execute(
+    tapes, device, execute_fn, gradient_fn, gradient_kwargs, _n=1, max_diff=2, mode="backward"
+):
     """Execute a batch of tapes with Autograd parameters on a device.
 
     Args:
diff --git a/pennylane/interfaces/batch/jax.py b/pennylane/interfaces/batch/jax.py
index a3a8462daaa..0479e8ca401 100644
--- a/pennylane/interfaces/batch/jax.py
+++ b/pennylane/interfaces/batch/jax.py
@@ -27,7 +27,9 @@
 dtype = jnp.float64
 
 
-def execute(tapes, device, execute_fn, gradient_fn, gradient_kwargs, _n=1, max_diff=1):
+def execute(
+    tapes, device, execute_fn, gradient_fn, gradient_kwargs, _n=1, max_diff=1, mode="backward"
+):
     """Execute a batch of tapes with JAX parameters on a device.
 
     Args:
diff --git a/pennylane/interfaces/batch/tensorflow.py b/pennylane/interfaces/batch/tensorflow.py
index 619ff759cec..996c8792c78 100644
--- a/pennylane/interfaces/batch/tensorflow.py
+++ b/pennylane/interfaces/batch/tensorflow.py
@@ -39,7 +39,9 @@ def _compute_vjp(dy, jacs):
     return vjps
 
 
-def execute(tapes, device, execute_fn, gradient_fn, gradient_kwargs, _n=1, max_diff=2):
+def execute(
+    tapes, device, execute_fn, gradient_fn, gradient_kwargs, _n=1, max_diff=2, mode="backward"
+):
     """Execute a batch of tapes with TensorFlow parameters on a device.
 
     Args:
@@ -65,51 +67,40 @@ def execute(tapes, device, execute_fn, gradient_fn, gradient_kwargs, _n=1, max_d
         list[list[tf.Tensor]]: A nested list of tape results. Each element in
         the returned list corresponds in order to the provided tapes.
     """
-    all_params = []
+
     parameters = []
-    lens = []
-    trainable = []
+    params_unwrapped = []
 
-    for tape in tapes:
+    for i, tape in enumerate(tapes):
         # store the trainable parameters
         params = tape.get_parameters(trainable_only=False)
         tape.trainable_params = qml.math.get_trainable_indices(params)
 
         parameters += [p for i, p in enumerate(params) if i in tape.trainable_params]
-        all_params += params
-        trainable += (np.array(list(tape.trainable_params)) + sum(lens)).tolist()
-
-        lens.append(len(params))
 
-    @tf.custom_gradient
-    def _execute(*all_params):  # pylint:disable=unused-argument
         # store all unwrapped parameters
-        count = 0
-        params_unwrapped = []
+        params_unwrapped.append(
+            [i.numpy() if isinstance(i, (tf.Variable, tf.Tensor)) else i for i in params]
+        )
 
-        for s in lens:
-            params_unwrapped.append(qml.math.unwrap(all_params[count : count + s]))
-            count += s
+    with qml.tape.Unwrap(*tapes, set_trainable=False):
+        # Forward pass: execute the tapes
+        res, jacs = execute_fn(tapes, **gradient_kwargs)
 
-        with qml.tape.Unwrap(*tapes, params=params_unwrapped, set_trainable=False):
-            # Forward pass: execute the tapes
-            res, jacs = execute_fn(tapes, **gradient_kwargs)
+    for i, tape in enumerate(tapes):
+        # convert output to TensorFlow tensors
 
-        for i, _ in enumerate(tapes):
-            # convert output to TensorFlow tensors
+        if isinstance(res[i], np.ndarray):
+            # For backwards compatibility, we flatten ragged tape outputs
+            # when there is no sampling
+            r = np.hstack(res[i]) if res[i].dtype == np.dtype("object") else res[i]
+            res[i] = tf.convert_to_tensor(r)
 
-            if isinstance(res[i], np.ndarray):
-                # For backwards compatibility, we flatten ragged tape outputs
-                # when there is no sampling
-                r = np.hstack(res[i]) if res[i].dtype == np.dtype("object") else res[i]
-                res[i] = tf.convert_to_tensor(r)
-
-            elif isinstance(res[i], tuple):
-                res[i] = tuple(tf.convert_to_tensor(r) for r in res[i])
-
-            else:
-                res[i] = tf.convert_to_tensor(qml.math.toarray(res[i]))
+        elif isinstance(res[i], tuple):
+            res[i] = tuple(tf.convert_to_tensor(r) for r in res[i])
 
+    @tf.custom_gradient
+    def _execute(*parameters):  # pylint:disable=unused-argument
         def grad_fn(*dy, **tfkwargs):
             """Returns the vector-Jacobian product with given
             parameter values and output gradient dy"""
@@ -177,15 +168,9 @@ def grad_fn(*dy, **tfkwargs):
                     with qml.tape.Unwrap(*tapes, params=params_unwrapped, set_trainable=False):
                         vjps = _compute_vjp(dy, gradient_fn(tapes, **gradient_kwargs))
 
-            vjps = iter(vjps)
-            vjps = [next(vjps) if x in trainable else None for x in range(len(all_params))]
-
             variables = tfkwargs.get("variables", None)
             return (vjps, variables) if variables is not None else vjps
 
         return res, grad_fn
 
-    if tf.executing_eagerly():
-        return _execute(*all_params)
-
-    return tf.py_function(func=_execute, inp=all_params, Tout=[tf.float64])
+    return _execute(*parameters)
diff --git a/pennylane/interfaces/batch/tensorflow_autograph.py b/pennylane/interfaces/batch/tensorflow_autograph.py
new file mode 100644
index 00000000000..13308bbc9f6
--- /dev/null
+++ b/pennylane/interfaces/batch/tensorflow_autograph.py
@@ -0,0 +1,236 @@
+# Copyright 2018-2021 Xanadu Quantum Technologies Inc.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This module contains functions for adding the TensorFlow interface
+to a PennyLane Device class.
+"""
+# pylint: disable=too-many-arguments,too-many-branches
+import numpy as np
+import tensorflow as tf
+from tensorflow.python.eager import context
+
+import pennylane as qml
+
+
+from .tensorflow import _compute_vjp
+
+
+def execute(
+    tapes, device, execute_fn, gradient_fn, gradient_kwargs, _n=1, max_diff=2, mode="backward"
+):
+    """Execute a batch of tapes with TensorFlow parameters on a device.
+
+    Args:
+        tapes (Sequence[.QuantumTape]): batch of tapes to execute
+        device (.Device): Device to use to execute the batch of tapes.
+            If the device does not provide a ``batch_execute`` method,
+            by default the tapes will be executed in serial.
+        execute_fn (callable): The execution function used to execute the tapes
+            during the forward pass. This function must return a tuple ``(results, jacobians)``.
+            If ``jacobians`` is an empty list, then ``gradient_fn`` is used to
+            compute the gradients during the backwards pass.
+        gradient_kwargs (dict): dictionary of keyword arguments to pass when
+            determining the gradients of tapes
+        gradient_fn (callable): the gradient function to use to compute quantum gradients
+        _n (int): a positive integer used to track nesting of derivatives, for example
+            if the nth-order derivative is requested.
+        max_diff (int): If ``gradient_fn`` is a gradient transform, this option specifies
+            the maximum number of derivatives to support. Increasing this value allows
+            for higher order derivatives to be extracted, at the cost of additional
+            (classical) computational overhead during the backwards pass.
+
+    Returns:
+        list[list[tf.Tensor]]: A nested list of tape results. Each element in
+        the returned list corresponds in order to the provided tapes.
+    """
+    all_params = []
+    parameters = []
+    lens = []
+    trainable = []
+    output_types = []
+
+    for tape in tapes:
+        # store the trainable parameters
+        params = tape.get_parameters(trainable_only=False)
+        tape.trainable_params = qml.math.get_trainable_indices(params)
+
+        parameters += [p for i, p in enumerate(params) if i in tape.trainable_params]
+        all_params += params
+        trainable += (np.array(list(tape.trainable_params)) + sum(lens)).tolist()
+
+        lens.append(len(params))
+
+        if tape.all_sampled:
+            output_types.append(tf.int64)
+        elif tape.measurements[0].return_type is qml.operation.State:
+            output_types.append(tf.complex128)
+        else:
+            output_types.append(tf.float64)
+
+    if mode == "forward":
+        output_types += [tf.float64] * len(tapes)
+
+    def _unwrap_params(all_params):
+        count = 0
+        params_unwrapped = []
+
+        for s in lens:
+            params_unwrapped.append(qml.math.unwrap(all_params[count : count + s]))
+            count += s
+
+        return params_unwrapped
+
+    def _forward(*all_params):
+        params_unwrapped = _unwrap_params(all_params)
+
+        with qml.tape.Unwrap(*tapes, params=params_unwrapped, set_trainable=False):
+            # Forward pass: execute the tapes
+            res, jacs = execute_fn(tapes, **gradient_kwargs)
+
+        for i, _ in enumerate(tapes):
+            # convert output to TensorFlow tensors
+
+            if isinstance(res[i], np.ndarray):
+                # For backwards compatibility, we flatten ragged tape outputs
+                # when there is no sampling
+                r = np.hstack(res[i]) if res[i].dtype == np.dtype("object") else res[i]
+                res[i] = tf.convert_to_tensor(r)
+
+            elif isinstance(res[i], tuple):
+                res[i] = tuple(tf.cast(tf.convert_to_tensor(r), tf.float64) for r in res[i])
+            else:
+                res[i] = tf.convert_to_tensor(qml.math.toarray(res[i]))
+
+        return res + jacs
+
+    @tf.custom_gradient
+    def _execute(*all_params):  # pylint:disable=unused-argument
+        res = tf.py_function(func=_forward, inp=all_params, Tout=output_types)
+
+        if mode == "forward":
+            jacs = res[len(tapes) :]
+            res = res[: len(tapes)]
+
+        def grad_fn(*dy, **tfkwargs):
+            """Returns the vector-Jacobian product with given
+            parameter values and output gradient dy"""
+
+            dy = [qml.math.T(d) for d in dy]
+
+            if mode == "forward":
+                # Jacobians were computed on the forward pass (mode="forward")
+                # No additional quantum evaluations needed; simply compute the VJPs directly.
+                len_dy = len(dy)
+                vjps = tf.py_function(
+                    func=lambda *args: _compute_vjp(args[:len_dy], args[len_dy:]),
+                    inp=dy + jacs,
+                    Tout=[tf.float64] * len(parameters),
+                )
+
+            else:
+                # Need to compute the Jacobians on the backward pass (accumulation="backward")
+                if isinstance(gradient_fn, qml.gradients.gradient_transform):
+                    # Gradient function is a gradient transform.
+
+                    # Generate and execute the required gradient tapes
+                    if _n == max_diff or not context.executing_eagerly():
+
+                        len_all_params = len(all_params)
+
+                        def _backward(*all_params):
+                            dy = all_params[len_all_params:]
+                            all_params = all_params[:len_all_params]
+                            params_unwrapped = _unwrap_params(all_params)
+
+                            with qml.tape.Unwrap(
+                                *tapes, params=params_unwrapped, set_trainable=False
+                            ):
+                                vjp_tapes, processing_fn = qml.gradients.batch_vjp(
+                                    tapes,
+                                    dy,
+                                    gradient_fn,
+                                    reduction=lambda vjps, x: vjps.extend(qml.math.unstack(x)),
+                                    gradient_kwargs=gradient_kwargs,
+                                )
+
+                                vjps = processing_fn(execute_fn(vjp_tapes)[0])
+
+                            return vjps
+
+                        vjps = tf.py_function(
+                            func=_backward,
+                            inp=list(all_params) + dy,
+                            Tout=[tf.float64] * len(parameters),
+                        )
+
+                    else:
+                        vjp_tapes, processing_fn = qml.gradients.batch_vjp(
+                            tapes,
+                            dy,
+                            gradient_fn,
+                            reduction="extend",
+                            gradient_kwargs=gradient_kwargs,
+                        )
+
+                        # This is where the magic happens. Note that we call ``execute``.
+                        # This recursion, coupled with the fact that the gradient transforms
+                        # are differentiable, allows for arbitrary order differentiation.
+                        vjps = processing_fn(
+                            execute(
+                                vjp_tapes,
+                                device,
+                                execute_fn,
+                                gradient_fn,
+                                gradient_kwargs,
+                                _n=_n + 1,
+                                max_diff=max_diff,
+                            )
+                        )
+
+                else:
+                    # Gradient function is not a gradient transform
+                    # (e.g., it might be a device method).
+                    # Note that unlike the previous branch:
+                    #
+                    # - there is no recursion here
+                    # - gradient_fn is not differentiable
+                    #
+                    # so we cannot support higher-order derivatives.
+                    len_all_params = len(all_params)
+
+                    def _backward(*all_params):
+                        dy = all_params[len_all_params:]
+                        all_params = all_params[:len_all_params]
+                        params_unwrapped = _unwrap_params(all_params)
+
+                        with qml.tape.Unwrap(*tapes, params=params_unwrapped, set_trainable=False):
+                            vjps = _compute_vjp(dy, gradient_fn(tapes, **gradient_kwargs))
+
+                        return vjps
+
+                    vjps = tf.py_function(
+                        func=_backward,
+                        inp=list(all_params) + dy,
+                        Tout=[tf.float64] * len(parameters),
+                    )
+
+            vjps = iter(vjps)
+            vjps = [next(vjps) if x in trainable else None for x in range(len(all_params))]
+
+            variables = tfkwargs.get("variables", None)
+            return (vjps, variables) if variables is not None else vjps
+
+        return res, grad_fn
+
+    return _execute(*all_params)
diff --git a/pennylane/interfaces/batch/torch.py b/pennylane/interfaces/batch/torch.py
index 9d263d2ad5d..fe10f2c9661 100644
--- a/pennylane/interfaces/batch/torch.py
+++ b/pennylane/interfaces/batch/torch.py
@@ -191,7 +191,9 @@ def backward(ctx, *dy):
         return (None,) + tuple(vjps)
 
 
-def execute(tapes, device, execute_fn, gradient_fn, gradient_kwargs, _n=1, max_diff=2):
+def execute(
+    tapes, device, execute_fn, gradient_fn, gradient_kwargs, _n=1, max_diff=2, mode="backward"
+):
     """Execute a batch of tapes with Torch parameters on a device.
 
     This function may be called recursively, if ``gradient_fn`` is a differentiable
diff --git a/tests/interfaces/test_batch_tensorflow_qnode.py b/tests/interfaces/test_batch_tensorflow_qnode.py
index 2606f2ca712..9cadf2fa894 100644
--- a/tests/interfaces/test_batch_tensorflow_qnode.py
+++ b/tests/interfaces/test_batch_tensorflow_qnode.py
@@ -1359,3 +1359,109 @@ def circuit(x, y):
         grad = tape.gradient(loss, [x, y])
         expected = [-tf.sin(x) * tf.sin(y / 2) ** 2, tf.cos(x) * tf.sin(y) / 2]
         assert np.allclose(grad, expected, atol=tol, rtol=0)
+
+    def test_autograph_jacobian(self, tol):
+        """Test that a parameter-shift vector-valued QNode can be compiled
+        using @tf.function, and differentiated"""
+        dev = qml.device("default.qubit", wires=2)
+        x = tf.Variable(0.543, dtype=tf.float64)
+        y = tf.Variable(-0.654, dtype=tf.float64)
+
+        @tf.function
+        @qnode(dev, diff_method="parameter-shift", max_diff=1, interface="tf")
+        def circuit(x, y):
+            qml.RX(x, wires=[0])
+            qml.RY(y, wires=[1])
+            qml.CNOT(wires=[0, 1])
+            return qml.probs(wires=[0]), qml.probs(wires=[1])
+
+        with tf.GradientTape() as tape:
+            res = circuit(x, y)
+
+        expected = np.array(
+            [
+                [tf.cos(x / 2) ** 2, tf.sin(x / 2) ** 2],
+                [(1 + tf.cos(x) * tf.cos(y)) / 2, (1 - tf.cos(x) * tf.cos(y)) / 2],
+            ]
+        )
+        assert np.allclose(res, expected, atol=tol, rtol=0)
+
+        res = tape.jacobian(res, [x, y])
+        expected = np.array(
+            [
+                [
+                    [-tf.sin(x) / 2, tf.sin(x) / 2],
+                    [-tf.sin(x) * tf.cos(y) / 2, tf.cos(y) * tf.sin(x) / 2],
+                ],
+                [
+                    [0, 0],
+                    [-tf.cos(x) * tf.sin(y) / 2, tf.cos(x) * tf.sin(y) / 2],
+                ],
+            ]
+        )
+        assert np.allclose(res, expected, atol=tol, rtol=0)
+
+    @pytest.mark.parametrize("mode", ["forward", "backward"])
+    def test_autograph_adjoint(self, mode, tol):
+        """Test that a parameter-shift vQNode can be compiled
+        using @tf.function, and differentiated to second order"""
+        dev = qml.device("default.qubit", wires=1)
+        x = tf.Variable(0.543, dtype=tf.float64)
+        y = tf.Variable(-0.654, dtype=tf.float64)
+
+        @tf.function
+        @qnode(dev, diff_method="adjoint", interface="tf", mode=mode)
+        def circuit(x):
+            qml.RY(x[0], wires=0)
+            qml.RX(x[1], wires=0)
+            return qml.expval(qml.PauliZ(0))
+
+        x = tf.Variable([1.0, 2.0], dtype=tf.float64)
+
+        with tf.GradientTape() as tape:
+            res = circuit(x)
+        g = tape.gradient(res, x)
+        a, b = x * 1.0
+
+        expected_res = tf.cos(a) * tf.cos(b)
+        assert np.allclose(res, expected_res, atol=tol, rtol=0)
+
+        expected_g = [-tf.sin(a) * tf.cos(b), -tf.cos(a) * tf.sin(b)]
+        assert np.allclose(g, expected_g, atol=tol, rtol=0)
+
+    @pytest.mark.xfail
+    def test_autograph_hessian(self, tol):
+        """Test that a parameter-shift vQNode can be compiled
+        using @tf.function, and differentiated to second order"""
+        dev = qml.device("default.qubit", wires=1)
+        x = tf.Variable(0.543, dtype=tf.float64)
+        y = tf.Variable(-0.654, dtype=tf.float64)
+
+        @tf.function
+        @qnode(dev, diff_method="parameter-shift", max_diff=2, interface="tf")
+        def circuit(x):
+            qml.RY(x[0], wires=0)
+            qml.RX(x[1], wires=0)
+            return qml.expval(qml.PauliZ(0))
+
+        x = tf.Variable([1.0, 2.0], dtype=tf.float64)
+
+        with tf.GradientTape() as tape1:
+            with tf.GradientTape() as tape2:
+                res = circuit(x)
+            g = tape2.gradient(res, x)
+
+        hess = tape1.gradient(g, x)
+        a, b = x * 1.0
+
+        expected_res = tf.cos(a) * tf.cos(b)
+        assert np.allclose(res, expected_res, atol=tol, rtol=0)
+
+        expected_g = [-tf.sin(a) * tf.cos(b), -tf.cos(a) * tf.sin(b)]
+        assert np.allclose(g, expected_g, atol=tol, rtol=0)
+
+        expected_hess = [
+            [-tf.cos(a) * tf.cos(b) + tf.sin(a) * tf.sin(b)],
+            [tf.sin(a) * tf.sin(b) - tf.cos(a) * tf.cos(b)],
+        ]
+        assert np.allclose(hess, expected_hess, atol=tol, rtol=0)

From 21784eb962c85c8a5561f18c84a844f1d76d7d3a Mon Sep 17 00:00:00 2001
From: Josh Izaac <josh146@gmail.com>
Date: Fri, 12 Nov 2021 00:51:49 +0800
Subject: [PATCH 11/20] update

---
 pennylane/interfaces/batch/autograd.py             | 1 +
 pennylane/interfaces/batch/jax.py                  | 1 +
 pennylane/interfaces/batch/tensorflow.py           | 1 +
 pennylane/interfaces/batch/tensorflow_autograph.py | 2 +-
 pennylane/interfaces/batch/torch.py                | 1 +
 5 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/pennylane/interfaces/batch/autograd.py b/pennylane/interfaces/batch/autograd.py
index 8be0d0e8d15..241dd8ea97d 100644
--- a/pennylane/interfaces/batch/autograd.py
+++ b/pennylane/interfaces/batch/autograd.py
@@ -51,6 +51,7 @@ def execute(
         list[list[float]]: A nested list of tape results. Each element in
         the returned list corresponds in order to the provided tapes.
     """
+    # pylint: disable=unused-argument
     for tape in tapes:
         # set the trainable parameters
         params = tape.get_parameters(trainable_only=False)
diff --git a/pennylane/interfaces/batch/jax.py b/pennylane/interfaces/batch/jax.py
index 0479e8ca401..8f43e86dc16 100644
--- a/pennylane/interfaces/batch/jax.py
+++ b/pennylane/interfaces/batch/jax.py
@@ -55,6 +55,7 @@ def execute(
         list[list[float]]: A nested list of tape results. Each element in
         the returned list corresponds in order to the provided tapes.
     """
+    # pylint: disable=unused-argument
     if max_diff > 1:
         raise ValueError("The JAX interface only supports first order derivatives.")
 
diff --git a/pennylane/interfaces/batch/tensorflow.py b/pennylane/interfaces/batch/tensorflow.py
index 996c8792c78..bc784778867 100644
--- a/pennylane/interfaces/batch/tensorflow.py
+++ b/pennylane/interfaces/batch/tensorflow.py
@@ -67,6 +67,7 @@ def execute(
         list[list[tf.Tensor]]: A nested list of tape results. Each element in
         the returned list corresponds in order to the provided tapes.
     """
+    # pylint: disable=unused-argument
 
     parameters = []
     params_unwrapped = []
diff --git a/pennylane/interfaces/batch/tensorflow_autograph.py b/pennylane/interfaces/batch/tensorflow_autograph.py
index 13308bbc9f6..6d0b75f7351 100644
--- a/pennylane/interfaces/batch/tensorflow_autograph.py
+++ b/pennylane/interfaces/batch/tensorflow_autograph.py
@@ -15,7 +15,7 @@
 This module contains functions for adding the TensorFlow interface
 to a PennyLane Device class.
 """
-# pylint: disable=too-many-arguments,too-many-branches
+# pylint: disable=too-many-arguments,too-many-branches,too-many-statements
 import numpy as np
 import tensorflow as tf
 from tensorflow.python.eager import context
diff --git a/pennylane/interfaces/batch/torch.py b/pennylane/interfaces/batch/torch.py
index fe10f2c9661..2947e03d0d8 100644
--- a/pennylane/interfaces/batch/torch.py
+++ b/pennylane/interfaces/batch/torch.py
@@ -222,6 +222,7 @@ def execute(
         list[list[torch.Tensor]]: A nested list of tape results. Each element in
         the returned list corresponds in order to the provided tapes.
     """
+    # pylint: disable=unused-argument
     parameters = []
     for tape in tapes:
         # set the trainable parameters

From 4092279eb7d28f0db2054a320c2b3b55378ddebd Mon Sep 17 00:00:00 2001
From: Josh Izaac <josh146@gmail.com>
Date: Fri, 12 Nov 2021 02:49:29 +0800
Subject: [PATCH 12/20] update

---
 pennylane/gradients/vjp.py                    | 22 ++++++++++------
 .../interfaces/batch/tensorflow_autograph.py  | 26 +++++++++++++------
 .../interfaces/test_batch_tensorflow_qnode.py | 21 +++++++--------
 3 files changed, 41 insertions(+), 28 deletions(-)

diff --git a/pennylane/gradients/vjp.py b/pennylane/gradients/vjp.py
index ee2a40d332e..d37e725405a 100644
--- a/pennylane/gradients/vjp.py
+++ b/pennylane/gradients/vjp.py
@@ -20,7 +20,7 @@
 from pennylane import math
 
 
-def compute_vjp(dy, jac):
+def compute_vjp(dy, jac, num=None):
     """Convenience function to compute the vector-Jacobian product for a given
     vector of gradient outputs and a Jacobian.
 
@@ -38,10 +38,13 @@ def compute_vjp(dy, jac):
 
     dy_row = math.reshape(dy, [-1])
 
+    if num is None:
+        num = math.shape(dy_row)[0]
+
     if not isinstance(dy_row, np.ndarray):
         jac = math.convert_like(jac, dy_row)
 
-    jac = math.reshape(jac, [dy_row.shape[0], -1])
+    jac = math.reshape(jac, [num, -1])
 
     try:
         if math.allclose(dy, 0):
@@ -156,23 +159,23 @@ def vjp(tape, dy, gradient_fn, gradient_kwargs=None):
     if num_params == 0:
         # The tape has no trainable parameters; the VJP
         # is simply none.
-        return [], lambda _: None
+        return [], lambda _, num=None: None
 
     try:
         if math.allclose(dy, 0):
             # If the dy vector is zero, then the
             # corresponding element of the VJP will be zero,
             # and we can avoid a quantum computation.
-            return [], lambda _: math.convert_like(np.zeros([num_params]), dy)
+            return [], lambda _, num=None: math.convert_like(np.zeros([num_params]), dy)
     except (AttributeError, TypeError):
         pass
 
     gradient_tapes, fn = gradient_fn(tape, **gradient_kwargs)
 
-    def processing_fn(results):
+    def processing_fn(results, num=None):
         # postprocess results to compute the Jacobian
         jac = fn(results)
-        return compute_vjp(dy, jac)
+        return compute_vjp(dy, jac, num=num)
 
     return gradient_tapes, processing_fn
 
@@ -304,10 +307,13 @@ def ansatz(x):
         processing_fns.append(fn)
         gradient_tapes.extend(g_tapes)
 
-    def processing_fn(results):
+    def processing_fn(results, nums=None):
         vjps = []
         start = 0
 
+        if nums is None:
+            nums = [None] * len(tapes)
+
         for t_idx in range(len(tapes)):
             # extract the correct results from the flat list
             res_len = reshape_info[t_idx]
@@ -315,7 +321,7 @@ def processing_fn(results):
             start += res_len
 
             # postprocess results to compute the VJP
-            vjp_ = processing_fns[t_idx](res_t)
+            vjp_ = processing_fns[t_idx](res_t, num=nums[t_idx])
 
             if vjp_ is None:
                 if reduction == "append":
diff --git a/pennylane/interfaces/batch/tensorflow_autograph.py b/pennylane/interfaces/batch/tensorflow_autograph.py
index 6d0b75f7351..155ef6cb0b5 100644
--- a/pennylane/interfaces/batch/tensorflow_autograph.py
+++ b/pennylane/interfaces/batch/tensorflow_autograph.py
@@ -81,6 +81,8 @@ def execute(
     if mode == "forward":
         output_types += [tf.float64] * len(tapes)
 
+    output_types += [tf.int32] * len(tapes)
+
     def _unwrap_params(all_params):
         count = 0
         params_unwrapped = []
@@ -93,6 +95,7 @@ def _unwrap_params(all_params):
 
     def _forward(*all_params):
         params_unwrapped = _unwrap_params(all_params)
+        output_sizes = []
 
         with qml.tape.Unwrap(*tapes, params=params_unwrapped, set_trainable=False):
             # Forward pass: execute the tapes
@@ -106,27 +109,32 @@ def _forward(*all_params):
                 # when there is no sampling
                 r = np.hstack(res[i]) if res[i].dtype == np.dtype("object") else res[i]
                 res[i] = tf.convert_to_tensor(r)
+                output_sizes.append(tf.size(res[i]))
 
             elif isinstance(res[i], tuple):
                 res[i] = tuple(tf.cast(tf.convert_to_tensor(r), tf.float64) for r in res[i])
             else:
                 res[i] = tf.convert_to_tensor(qml.math.toarray(res[i]))
+                output_sizes.append(tf.size(res[i]))
 
-        return res + jacs
+        return res + jacs + output_sizes
 
     @tf.custom_gradient
     def _execute(*all_params):  # pylint:disable=unused-argument
+
         res = tf.py_function(func=_forward, inp=all_params, Tout=output_types)
+        output_sizes = res[-len(tapes) :]
 
         if mode == "forward":
-            jacs = res[len(tapes) :]
-            res = res[: len(tapes)]
+            jacs = res[len(tapes) : 2 * len(tapes)]
+
+        res = res[: len(tapes)]
 
         def grad_fn(*dy, **tfkwargs):
             """Returns the vector-Jacobian product with given
             parameter values and output gradient dy"""
 
-            dy = [qml.math.T(d) for d in dy]
+            dy = [qml.math.T(d) for d in dy[: len(res)]]
 
             if mode == "forward":
                 # Jacobians were computed on the forward pass (mode="forward")
@@ -144,7 +152,7 @@ def grad_fn(*dy, **tfkwargs):
                     # Gradient function is a gradient transform.
 
                     # Generate and execute the required gradient tapes
-                    if _n == max_diff or not context.executing_eagerly():
+                    if _n == max_diff:
 
                         len_all_params = len(all_params)
 
@@ -165,7 +173,6 @@ def _backward(*all_params):
                                 )
 
                                 vjps = processing_fn(execute_fn(vjp_tapes)[0])
-
                             return vjps
 
                         vjps = tf.py_function(
@@ -179,7 +186,7 @@ def _backward(*all_params):
                             tapes,
                             dy,
                             gradient_fn,
-                            reduction="extend",
+                            reduction="append",
                             gradient_kwargs=gradient_kwargs,
                         )
 
@@ -195,9 +202,12 @@ def _backward(*all_params):
                                 gradient_kwargs,
                                 _n=_n + 1,
                                 max_diff=max_diff,
-                            )
+                            ),
+                            nums=output_sizes,
                         )
 
+                        vjps = tf.unstack(tf.concat(vjps, 0), num=len(parameters))
+
                 else:
                     # Gradient function is not a gradient transform
                     # (e.g., it might be a device method).
diff --git a/tests/interfaces/test_batch_tensorflow_qnode.py b/tests/interfaces/test_batch_tensorflow_qnode.py
index 9cadf2fa894..84cb9654d7c 100644
--- a/tests/interfaces/test_batch_tensorflow_qnode.py
+++ b/tests/interfaces/test_batch_tensorflow_qnode.py
@@ -1429,30 +1429,27 @@ def circuit(x):
         expected_g = [-tf.sin(a) * tf.cos(b), -tf.cos(a) * tf.sin(b)]
         assert np.allclose(g, expected_g, atol=tol, rtol=0)
 
-    @pytest.mark.xfail
     def test_autograph_hessian(self, tol):
         """Test that a parameter-shift vQNode can be compiled
         using @tf.function, and differentiated to second order"""
         dev = qml.device("default.qubit", wires=1)
-        x = tf.Variable(0.543, dtype=tf.float64)
-        y = tf.Variable(-0.654, dtype=tf.float64)
+        a = tf.Variable(0.543, dtype=tf.float64)
+        b = tf.Variable(-0.654, dtype=tf.float64)
 
         @tf.function
         @qnode(dev, diff_method="parameter-shift", max_diff=2, interface="tf")
-        def circuit(x):
-            qml.RY(x[0], wires=0)
-            qml.RX(x[1], wires=0)
+        def circuit(x, y):
+            qml.RY(x, wires=0)
+            qml.RX(y, wires=0)
             return qml.expval(qml.PauliZ(0))
 
-        x = tf.Variable([1.0, 2.0], dtype=tf.float64)
-
         with tf.GradientTape() as tape1:
             with tf.GradientTape() as tape2:
-                res = circuit(x)
-            g = tape2.gradient(res, x)
+                res = circuit(a, b)
+            g = tape2.gradient(res, [a, b])
+            g = tf.stack(g)
 
-        hess = tape1.gradient(g, x)
-        a, b = x * 1.0
+        hess = tf.stack(tape1.gradient(g, [a, b]))
 
         expected_res = tf.cos(a) * tf.cos(b)
         assert np.allclose(res, expected_res, atol=tol, rtol=0)

From 5daecfb8d4837f97377600b1bd8e2e6068203c41 Mon Sep 17 00:00:00 2001
From: Josh Izaac <josh146@gmail.com>
Date: Fri, 12 Nov 2021 02:51:23 +0800
Subject: [PATCH 13/20] linting

---
 pennylane/interfaces/batch/tensorflow_autograph.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pennylane/interfaces/batch/tensorflow_autograph.py b/pennylane/interfaces/batch/tensorflow_autograph.py
index 155ef6cb0b5..9f20095d664 100644
--- a/pennylane/interfaces/batch/tensorflow_autograph.py
+++ b/pennylane/interfaces/batch/tensorflow_autograph.py
@@ -18,7 +18,6 @@
 # pylint: disable=too-many-arguments,too-many-branches,too-many-statements
 import numpy as np
 import tensorflow as tf
-from tensorflow.python.eager import context
 
 import pennylane as qml
 

From 9fd7fa88da7026a512cbfae46effb147f7984b10 Mon Sep 17 00:00:00 2001
From: Josh Izaac <josh146@gmail.com>
Date: Fri, 12 Nov 2021 02:52:07 +0800
Subject: [PATCH 14/20] linting

---
 pennylane/interfaces/batch/tensorflow_autograph.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pennylane/interfaces/batch/tensorflow_autograph.py b/pennylane/interfaces/batch/tensorflow_autograph.py
index 9f20095d664..3d8ed9cf042 100644
--- a/pennylane/interfaces/batch/tensorflow_autograph.py
+++ b/pennylane/interfaces/batch/tensorflow_autograph.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-This module contains functions for adding the TensorFlow interface
+This module contains functions for adding the TensorFlow Autograph interface
 to a PennyLane Device class.
 """
 # pylint: disable=too-many-arguments,too-many-branches,too-many-statements

From c6f14bc130e4e193da95163f80feeac6bfb33ef0 Mon Sep 17 00:00:00 2001
From: Josh Izaac <josh146@gmail.com>
Date: Fri, 12 Nov 2021 14:08:24 +0800
Subject: [PATCH 15/20] changelog

---
 doc/releases/changelog-dev.md                 | 43 ++++++++++++++++++-
 pennylane/interfaces/batch/__init__.py        |  8 ++--
 .../interfaces/batch/tensorflow_autograph.py  | 16 +++----
 .../interfaces/test_batch_tensorflow_qnode.py | 39 +++++++++++------
 4 files changed, 80 insertions(+), 26 deletions(-)

diff --git a/doc/releases/changelog-dev.md b/doc/releases/changelog-dev.md
index 78ee1ffd08a..465eea4e742 100644
--- a/doc/releases/changelog-dev.md
+++ b/doc/releases/changelog-dev.md
@@ -3,6 +3,47 @@
 # Release 0.20.0-dev (development release)
 
 <h3>New features since last release</h3>
+
+* It is now possible to use TensorFlow's [AutoGraph
+  mode](https://www.tensorflow.org/guide/function) with QNodes on all devices and with arbitrary
+  differentiation methods. Previously, AutoGraph mode only support `diff_method="backprop"`. This
+  will result in significantly more performant model execution, at the cost of a more expensive
+  initial compilation. [(#1866)](https://github.com/PennyLaneAI/pennylane/pull/1886)
+
+  Use AutoGraph to convert your QNodes or cost functions into TensorFlow
+  graphs by decorating them with `@tf.function`:
+
+  ```python
+  dev = qml.device("lightning.qubit", wires=2)
+
+  @qml.beta.qnode(dev, diff_method="adjoint", interface="tf", max_diff=1)
+  def circuit(x):
+      qml.RX(x[0], wires=0)
+      qml.RY(x[1], wires=1)
+      return qml.expval(qml.PauliZ(0) @ qml.PauliZ(1)), qml.expval(qml.PauliZ(0))
+
+  @tf.function
+  def cost(x):
+      return tf.reduce_sum(circuit(x))
+
+  x = tf.Variable([0.5, 0.7], dtype=tf.float64)
+
+  with tf.GradientTape() as tape:
+      loss = cost(x)
+
+  grad = tape.gradient(loss, x)
+  ```
+
+  The initial execution may take slightly longer than when executing the circuit in
+  eager mode; this is because TensorFlow is tracing the function to create the graph.
+  Subsequent executions will be much more performant.
+
+  Note that using AutoGraph with backprop-enabled devices, such as `default.qubit`,
+  will yield the best performance.
+
+  For more details, please see the [TensorFlow AutoGraph
+  documentation](https://www.tensorflow.org/guide/function).
+
 * A thermal relaxation channel is added to the Noisy channels. The channel description can be 
   found on the supplementary information of [Quantum classifier with tailored quantum kernels](https://arxiv.org/abs/1909.02611).
   [(#1766)](https://github.com/PennyLaneAI/pennylane/pull/1766)
@@ -38,4 +79,4 @@
 
 This release contains contributions from (in alphabetical order): 
 
-Jalani Kanem, Christina Lee, Guillermo Alonso-Linaje, Alejandro Montanez, Jay Soni
+Josh Izaac, Jalani Kanem, Christina Lee, Guillermo Alonso-Linaje, Alejandro Montanez, Jay Soni.
diff --git a/pennylane/interfaces/batch/__init__.py b/pennylane/interfaces/batch/__init__.py
index e4d9ff85ec7..82d8b65b89d 100644
--- a/pennylane/interfaces/batch/__init__.py
+++ b/pennylane/interfaces/batch/__init__.py
@@ -31,7 +31,7 @@
     "Autograd": ("autograd", "numpy"),  # for backwards compatibility
     "JAX": ("jax", "JAX"),
     "PyTorch": ("torch", "pytorch"),
-    "TensorFlow": ("tf", "tensorflow"),
+    "TensorFlow": ("tf", "tensorflow", "tensorflow-autograph", "tf-autograph"),
 }
 """dict[str, str]: maps allowed interface strings to the name of the interface"""
 
@@ -365,10 +365,10 @@ def cost_fn(params, x):
         elif interface in INTERFACE_NAMES["TensorFlow"]:
             import tensorflow as tf
 
-            if tf.executing_eagerly():
-                from .tensorflow import execute as _execute
-            else:
+            if not tf.executing_eagerly() or "autograph" in interface:
                 from .tensorflow_autograph import execute as _execute
+            else:
+                from .tensorflow import execute as _execute
 
         elif interface in INTERFACE_NAMES["PyTorch"]:
             from .torch import execute as _execute
diff --git a/pennylane/interfaces/batch/tensorflow_autograph.py b/pennylane/interfaces/batch/tensorflow_autograph.py
index 3d8ed9cf042..2144c099609 100644
--- a/pennylane/interfaces/batch/tensorflow_autograph.py
+++ b/pennylane/interfaces/batch/tensorflow_autograph.py
@@ -82,18 +82,18 @@ def execute(
 
     output_types += [tf.int32] * len(tapes)
 
-    def _unwrap_params(all_params):
+    def _nest_params(all_params):
         count = 0
         params_unwrapped = []
 
         for s in lens:
-            params_unwrapped.append(qml.math.unwrap(all_params[count : count + s]))
+            params_unwrapped.append(all_params[count : count + s])
             count += s
 
         return params_unwrapped
 
     def _forward(*all_params):
-        params_unwrapped = _unwrap_params(all_params)
+        params_unwrapped = _nest_params(all_params)
         output_sizes = []
 
         with qml.tape.Unwrap(*tapes, params=params_unwrapped, set_trainable=False):
@@ -121,7 +121,7 @@ def _forward(*all_params):
     @tf.custom_gradient
     def _execute(*all_params):  # pylint:disable=unused-argument
 
-        res = tf.py_function(func=_forward, inp=all_params, Tout=output_types)
+        res = tf.numpy_function(func=_forward, inp=all_params, Tout=output_types)
         output_sizes = res[-len(tapes) :]
 
         if mode == "forward":
@@ -139,7 +139,7 @@ def grad_fn(*dy, **tfkwargs):
                 # Jacobians were computed on the forward pass (mode="forward")
                 # No additional quantum evaluations needed; simply compute the VJPs directly.
                 len_dy = len(dy)
-                vjps = tf.py_function(
+                vjps = tf.numpy_function(
                     func=lambda *args: _compute_vjp(args[:len_dy], args[len_dy:]),
                     inp=dy + jacs,
                     Tout=[tf.float64] * len(parameters),
@@ -158,7 +158,7 @@ def grad_fn(*dy, **tfkwargs):
                         def _backward(*all_params):
                             dy = all_params[len_all_params:]
                             all_params = all_params[:len_all_params]
-                            params_unwrapped = _unwrap_params(all_params)
+                            params_unwrapped = _nest_params(all_params)
 
                             with qml.tape.Unwrap(
                                 *tapes, params=params_unwrapped, set_trainable=False
@@ -221,14 +221,14 @@ def _backward(*all_params):
                     def _backward(*all_params):
                         dy = all_params[len_all_params:]
                         all_params = all_params[:len_all_params]
-                        params_unwrapped = _unwrap_params(all_params)
+                        params_unwrapped = _nest_params(all_params)
 
                         with qml.tape.Unwrap(*tapes, params=params_unwrapped, set_trainable=False):
                             vjps = _compute_vjp(dy, gradient_fn(tapes, **gradient_kwargs))
 
                         return vjps
 
-                    vjps = tf.py_function(
+                    vjps = tf.numpy_function(
                         func=_backward,
                         inp=list(all_params) + dy,
                         Tout=[tf.float64] * len(parameters),
diff --git a/tests/interfaces/test_batch_tensorflow_qnode.py b/tests/interfaces/test_batch_tensorflow_qnode.py
index 84cb9654d7c..395ca381ec7 100644
--- a/tests/interfaces/test_batch_tensorflow_qnode.py
+++ b/tests/interfaces/test_batch_tensorflow_qnode.py
@@ -1331,18 +1331,31 @@ def circuit():
         assert result.dtype == tf.int64
 
 
+@pytest.mark.parametrize(
+    "decorator, interface", [(tf.function, "tf"), (lambda x: x, "tf-autograph")]
+)
 class TestAutograph:
-    """Tests for Autograph mode"""
+    """Tests for Autograph mode. This class is parametrized over the combination:
+
+    1. interface="tf" with the QNode decoratored with @tf.function, and
+    2. interface="tf-autograph" with no QNode decorator.
 
-    def test_autograph_gradients(self, tol):
+    Option (1) checks that if the user enables autograph functionality
+    in TensorFlow, the new `tf-autograph` interface is automatically applied.
+
+    Option (2) ensures that the tf-autograph interface can be manually applied,
+    even if in eager execution mode.
+    """
+
+    def test_autograph_gradients(self, decorator, interface, tol):
         """Test that a parameter-shift QNode can be compiled
         using @tf.function, and differentiated"""
         dev = qml.device("default.qubit", wires=2)
         x = tf.Variable(0.543, dtype=tf.float64)
         y = tf.Variable(-0.654, dtype=tf.float64)
 
-        @tf.function
-        @qnode(dev, diff_method="parameter-shift", interface="tf")
+        @decorator
+        @qnode(dev, diff_method="parameter-shift", interface=interface)
         def circuit(x, y):
             qml.RX(x, wires=[0])
             qml.RY(y, wires=[1])
@@ -1360,15 +1373,15 @@ def circuit(x, y):
         expected = [-tf.sin(x) * tf.sin(y / 2) ** 2, tf.cos(x) * tf.sin(y) / 2]
         assert np.allclose(grad, expected, atol=tol, rtol=0)
 
-    def test_autograph_jacobian(self, tol):
+    def test_autograph_jacobian(self, decorator, interface, tol):
         """Test that a parameter-shift vector-valued QNode can be compiled
         using @tf.function, and differentiated"""
         dev = qml.device("default.qubit", wires=2)
         x = tf.Variable(0.543, dtype=tf.float64)
         y = tf.Variable(-0.654, dtype=tf.float64)
 
-        @tf.function
-        @qnode(dev, diff_method="parameter-shift", max_diff=1, interface="tf")
+        @decorator
+        @qnode(dev, diff_method="parameter-shift", max_diff=1, interface=interface)
         def circuit(x, y):
             qml.RX(x, wires=[0])
             qml.RY(y, wires=[1])
@@ -1402,15 +1415,15 @@ def circuit(x, y):
         assert np.allclose(res, expected, atol=tol, rtol=0)
 
     @pytest.mark.parametrize("mode", ["forward", "backward"])
-    def test_autograph_adjoint(self, mode, tol):
+    def test_autograph_adjoint(self, mode, decorator, interface, tol):
         """Test that a parameter-shift vQNode can be compiled
         using @tf.function, and differentiated to second order"""
         dev = qml.device("default.qubit", wires=1)
         x = tf.Variable(0.543, dtype=tf.float64)
         y = tf.Variable(-0.654, dtype=tf.float64)
 
-        @tf.function
-        @qnode(dev, diff_method="adjoint", interface="tf", mode=mode)
+        @decorator
+        @qnode(dev, diff_method="adjoint", interface=interface, mode=mode)
         def circuit(x):
             qml.RY(x[0], wires=0)
             qml.RX(x[1], wires=0)
@@ -1429,15 +1442,15 @@ def circuit(x):
         expected_g = [-tf.sin(a) * tf.cos(b), -tf.cos(a) * tf.sin(b)]
         assert np.allclose(g, expected_g, atol=tol, rtol=0)
 
-    def test_autograph_hessian(self, tol):
+    def test_autograph_hessian(self, decorator, interface, tol):
         """Test that a parameter-shift vQNode can be compiled
         using @tf.function, and differentiated to second order"""
         dev = qml.device("default.qubit", wires=1)
         a = tf.Variable(0.543, dtype=tf.float64)
         b = tf.Variable(-0.654, dtype=tf.float64)
 
-        @tf.function
-        @qnode(dev, diff_method="parameter-shift", max_diff=2, interface="tf")
+        @decorator
+        @qnode(dev, diff_method="parameter-shift", max_diff=2, interface=interface)
         def circuit(x, y):
             qml.RY(x, wires=0)
             qml.RX(y, wires=0)

From 74b8387cad6f4c960cc3696a15d79da3bb2ec76b Mon Sep 17 00:00:00 2001
From: Josh Izaac <josh146@gmail.com>
Date: Fri, 12 Nov 2021 14:18:08 +0800
Subject: [PATCH 16/20] more

---
 tests/beta/test_beta_qnode.py | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/tests/beta/test_beta_qnode.py b/tests/beta/test_beta_qnode.py
index 185ff3a3dfb..8c0fcf0c045 100644
--- a/tests/beta/test_beta_qnode.py
+++ b/tests/beta/test_beta_qnode.py
@@ -35,10 +35,7 @@ def test_invalid_interface(self):
         """Test that an exception is raised for an invalid interface"""
         dev = qml.device("default.qubit", wires=1)
         test_interface = "something"
-        expected_error = (
-            fr"Unknown interface {test_interface}\. Interface must be "
-            r"one of \[None, 'autograd', 'numpy', 'jax', 'JAX', 'torch', 'pytorch', 'tf', 'tensorflow'\]\."
-        )
+        expected_error = fr"Unknown interface {test_interface}\. Interface must be one of"
 
         with pytest.raises(qml.QuantumFunctionError, match=expected_error):
             QNode(dummyfunc, dev, interface="something")
@@ -54,10 +51,7 @@ def circuit(x):
             qml.RX(wires=0)
             return qml.probs(wires=0)
 
-        expected_error = (
-            fr"Unknown interface {test_interface}\. Interface must be "
-            r"one of \[None, 'autograd', 'numpy', 'jax', 'JAX', 'torch', 'pytorch', 'tf', 'tensorflow'\]\."
-        )
+        expected_error = fr"Unknown interface {test_interface}\. Interface must be one of"
 
         with pytest.raises(qml.QuantumFunctionError, match=expected_error):
             circuit.interface = test_interface

From 94f686603a613de94d0c9e7dcdc6b4eb6ee1e64f Mon Sep 17 00:00:00 2001
From: Josh Izaac <josh146@gmail.com>
Date: Fri, 12 Nov 2021 15:05:36 +0800
Subject: [PATCH 17/20] more tests

---
 .../interfaces/test_batch_tensorflow_qnode.py | 39 +++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/tests/interfaces/test_batch_tensorflow_qnode.py b/tests/interfaces/test_batch_tensorflow_qnode.py
index 395ca381ec7..1b70063d9db 100644
--- a/tests/interfaces/test_batch_tensorflow_qnode.py
+++ b/tests/interfaces/test_batch_tensorflow_qnode.py
@@ -1475,3 +1475,42 @@ def circuit(x, y):
             [tf.sin(a) * tf.sin(b) - tf.cos(a) * tf.cos(b)],
         ]
         assert np.allclose(hess, expected_hess, atol=tol, rtol=0)
+
+    def test_autograph_state(self, decorator, interface, tol):
+        """Test that a parameter-shift QNode returning a state can be compiled
+        using @tf.function"""
+        dev = qml.device("default.qubit", wires=2)
+        x = tf.Variable(0.543, dtype=tf.float64)
+        y = tf.Variable(-0.654, dtype=tf.float64)
+
+        @decorator
+        @qnode(dev, diff_method="parameter-shift", interface=interface)
+        def circuit(x, y):
+            qml.RX(x, wires=[0])
+            qml.RY(y, wires=[1])
+            qml.CNOT(wires=[0, 1])
+            return qml.state()
+
+        with tf.GradientTape() as tape:
+            state = circuit(x, y)
+            probs = tf.abs(state) ** 2
+            loss = probs[0]
+
+        expected = tf.cos(x / 2) ** 2 * tf.cos(y / 2) ** 2
+        assert np.allclose(loss, expected, atol=tol, rtol=0)
+
+    def test_autograph_dimension(self, decorator, interface, tol):
+        """Test sampling works as expected"""
+        dev = qml.device("default.qubit", wires=2, shots=10)
+
+        @decorator
+        @qnode(dev, diff_method="parameter-shift", interface=interface)
+        def circuit():
+            qml.Hadamard(wires=[0])
+            qml.CNOT(wires=[0, 1])
+            return [qml.sample(qml.PauliZ(0)), qml.sample(qml.PauliX(1))]
+
+        res = circuit()
+
+        assert res.shape == (2, 10)
+        assert isinstance(res, tf.Tensor)

From 6536de6fa306de3773262e6475689ebe14438edf Mon Sep 17 00:00:00 2001
From: Josh Izaac <josh146@gmail.com>
Date: Fri, 12 Nov 2021 15:48:33 +0800
Subject: [PATCH 18/20] more tests

---
 .../interfaces/batch/tensorflow_autograph.py   | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/pennylane/interfaces/batch/tensorflow_autograph.py b/pennylane/interfaces/batch/tensorflow_autograph.py
index 2144c099609..f75451a098a 100644
--- a/pennylane/interfaces/batch/tensorflow_autograph.py
+++ b/pennylane/interfaces/batch/tensorflow_autograph.py
@@ -103,18 +103,12 @@ def _forward(*all_params):
         for i, _ in enumerate(tapes):
             # convert output to TensorFlow tensors
 
-            if isinstance(res[i], np.ndarray):
-                # For backwards compatibility, we flatten ragged tape outputs
-                # when there is no sampling
-                r = np.hstack(res[i]) if res[i].dtype == np.dtype("object") else res[i]
-                res[i] = tf.convert_to_tensor(r)
-                output_sizes.append(tf.size(res[i]))
-
-            elif isinstance(res[i], tuple):
-                res[i] = tuple(tf.cast(tf.convert_to_tensor(r), tf.float64) for r in res[i])
-            else:
-                res[i] = tf.convert_to_tensor(qml.math.toarray(res[i]))
-                output_sizes.append(tf.size(res[i]))
+            # For backwards compatibility, we flatten ragged tape outputs
+            # when there is no sampling
+            r = np.hstack(res[i]) if res[i].dtype == np.dtype("object") else res[i]
+
+            res[i] = tf.convert_to_tensor(r)
+            output_sizes.append(tf.size(res[i]))
 
         return res + jacs + output_sizes
 

From 9b226ed3a5937458ed2e6309da7996025c51ae47 Mon Sep 17 00:00:00 2001
From: Josh Izaac <josh146@gmail.com>
Date: Wed, 17 Nov 2021 21:38:46 +0800
Subject: [PATCH 19/20] suggested changes

---
 pennylane/gradients/vjp.py                         | 3 +++
 pennylane/interfaces/batch/autograd.py             | 6 +++---
 pennylane/interfaces/batch/jax.py                  | 6 +++---
 pennylane/interfaces/batch/tensorflow.py           | 6 +++---
 pennylane/interfaces/batch/tensorflow_autograph.py | 7 ++++---
 pennylane/interfaces/batch/torch.py                | 6 +++---
 6 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/pennylane/gradients/vjp.py b/pennylane/gradients/vjp.py
index d37e725405a..33334b14edc 100644
--- a/pennylane/gradients/vjp.py
+++ b/pennylane/gradients/vjp.py
@@ -29,6 +29,9 @@ def compute_vjp(dy, jac, num=None):
         jac (tensor_like): Jacobian matrix. For an n-dimensional ``dy``
             vector, the first n-dimensions of ``jac`` should match
             the shape of ``dy``.
+        num (int): The length of the flattened ``dy`` argument. This is an
+            optional argument, but can be useful to provide if ``dy`` potentially
+            has no shape (for example, due to tracing or just-in-time compilation).
 
     Returns:
         tensor_like: the vector-Jacobian product
diff --git a/pennylane/interfaces/batch/autograd.py b/pennylane/interfaces/batch/autograd.py
index 241dd8ea97d..6e2027e9832 100644
--- a/pennylane/interfaces/batch/autograd.py
+++ b/pennylane/interfaces/batch/autograd.py
@@ -23,9 +23,7 @@
 from pennylane import numpy as np
 
 
-def execute(
-    tapes, device, execute_fn, gradient_fn, gradient_kwargs, _n=1, max_diff=2, mode="backward"
-):
+def execute(tapes, device, execute_fn, gradient_fn, gradient_kwargs, _n=1, max_diff=2, mode=None):
     """Execute a batch of tapes with Autograd parameters on a device.
 
     Args:
@@ -46,6 +44,8 @@ def execute(
             the maximum order of derivatives to support. Increasing this value allows
             for higher order derivatives to be extracted, at the cost of additional
             (classical) computational overhead during the backwards pass.
+        mode (str): Whether the gradients should be computed on the forward
+            pass (``forward``) or the backward pass (``backward``).
 
     Returns:
         list[list[float]]: A nested list of tape results. Each element in
diff --git a/pennylane/interfaces/batch/jax.py b/pennylane/interfaces/batch/jax.py
index 8f43e86dc16..d4e2f1373e1 100644
--- a/pennylane/interfaces/batch/jax.py
+++ b/pennylane/interfaces/batch/jax.py
@@ -27,9 +27,7 @@
 dtype = jnp.float64
 
 
-def execute(
-    tapes, device, execute_fn, gradient_fn, gradient_kwargs, _n=1, max_diff=1, mode="backward"
-):
+def execute(tapes, device, execute_fn, gradient_fn, gradient_kwargs, _n=1, max_diff=1, mode=None):
     """Execute a batch of tapes with JAX parameters on a device.
 
     Args:
@@ -50,6 +48,8 @@ def execute(
             the maximum order of derivatives to support. Increasing this value allows
             for higher order derivatives to be extracted, at the cost of additional
             (classical) computational overhead during the backwards pass.
+        mode (str): Whether the gradients should be computed on the forward
+            pass (``forward``) or the backward pass (``backward``).
 
     Returns:
         list[list[float]]: A nested list of tape results. Each element in
diff --git a/pennylane/interfaces/batch/tensorflow.py b/pennylane/interfaces/batch/tensorflow.py
index bc784778867..9b1cacdef69 100644
--- a/pennylane/interfaces/batch/tensorflow.py
+++ b/pennylane/interfaces/batch/tensorflow.py
@@ -39,9 +39,7 @@ def _compute_vjp(dy, jacs):
     return vjps
 
 
-def execute(
-    tapes, device, execute_fn, gradient_fn, gradient_kwargs, _n=1, max_diff=2, mode="backward"
-):
+def execute(tapes, device, execute_fn, gradient_fn, gradient_kwargs, _n=1, max_diff=2, mode=None):
     """Execute a batch of tapes with TensorFlow parameters on a device.
 
     Args:
@@ -62,6 +60,8 @@ def execute(
             the maximum number of derivatives to support. Increasing this value allows
             for higher order derivatives to be extracted, at the cost of additional
             (classical) computational overhead during the backwards pass.
+        mode (str): Whether the gradients should be computed on the forward
+            pass (``forward``) or the backward pass (``backward``).
 
     Returns:
         list[list[tf.Tensor]]: A nested list of tape results. Each element in
diff --git a/pennylane/interfaces/batch/tensorflow_autograph.py b/pennylane/interfaces/batch/tensorflow_autograph.py
index f75451a098a..a0ef3f2faeb 100644
--- a/pennylane/interfaces/batch/tensorflow_autograph.py
+++ b/pennylane/interfaces/batch/tensorflow_autograph.py
@@ -25,9 +25,7 @@
 from .tensorflow import _compute_vjp
 
 
-def execute(
-    tapes, device, execute_fn, gradient_fn, gradient_kwargs, _n=1, max_diff=2, mode="backward"
-):
+def execute(tapes, device, execute_fn, gradient_fn, gradient_kwargs, _n=1, max_diff=2, mode=None):
     """Execute a batch of tapes with TensorFlow parameters on a device.
 
     Args:
@@ -48,6 +46,8 @@ def execute(
             the maximum number of derivatives to support. Increasing this value allows
             for higher order derivatives to be extracted, at the cost of additional
             (classical) computational overhead during the backwards pass.
+        mode (str): Whether the gradients should be computed on the forward
+            pass (``forward``) or the backward pass (``backward``).
 
     Returns:
         list[list[tf.Tensor]]: A nested list of tape results. Each element in
@@ -195,6 +195,7 @@ def _backward(*all_params):
                                 gradient_kwargs,
                                 _n=_n + 1,
                                 max_diff=max_diff,
+                                mode=mode,
                             ),
                             nums=output_sizes,
                         )
diff --git a/pennylane/interfaces/batch/torch.py b/pennylane/interfaces/batch/torch.py
index 2947e03d0d8..9e54b3de7a5 100644
--- a/pennylane/interfaces/batch/torch.py
+++ b/pennylane/interfaces/batch/torch.py
@@ -191,9 +191,7 @@ def backward(ctx, *dy):
         return (None,) + tuple(vjps)
 
 
-def execute(
-    tapes, device, execute_fn, gradient_fn, gradient_kwargs, _n=1, max_diff=2, mode="backward"
-):
+def execute(tapes, device, execute_fn, gradient_fn, gradient_kwargs, _n=1, max_diff=2, mode=None):
     """Execute a batch of tapes with Torch parameters on a device.
 
     This function may be called recursively, if ``gradient_fn`` is a differentiable
@@ -217,6 +215,8 @@ def execute(
             the maximum order of derivatives to support. Increasing this value allows
             for higher order derivatives to be extracted, at the cost of additional
             (classical) computational overhead during the backwards pass.
+        mode (str): Whether the gradients should be computed on the forward
+            pass (``forward``) or the backward pass (``backward``).
 
     Returns:
         list[list[torch.Tensor]]: A nested list of tape results. Each element in

From fd14c95da9f711246658d14119abc05e578e1f49 Mon Sep 17 00:00:00 2001
From: Josh Izaac <josh146@gmail.com>
Date: Wed, 17 Nov 2021 21:59:43 +0800
Subject: [PATCH 20/20] update

---
 pennylane/interfaces/batch/tensorflow_autograph.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/pennylane/interfaces/batch/tensorflow_autograph.py b/pennylane/interfaces/batch/tensorflow_autograph.py
index a0ef3f2faeb..47bc2151440 100644
--- a/pennylane/interfaces/batch/tensorflow_autograph.py
+++ b/pennylane/interfaces/batch/tensorflow_autograph.py
@@ -96,7 +96,7 @@ def _forward(*all_params):
         params_unwrapped = _nest_params(all_params)
         output_sizes = []
 
-        with qml.tape.Unwrap(*tapes, params=params_unwrapped, set_trainable=False):
+        with qml.tape.Unwrap(*tapes, params=params_unwrapped):
             # Forward pass: execute the tapes
             res, jacs = execute_fn(tapes, **gradient_kwargs)
 
@@ -154,9 +154,7 @@ def _backward(*all_params):
                             all_params = all_params[:len_all_params]
                             params_unwrapped = _nest_params(all_params)
 
-                            with qml.tape.Unwrap(
-                                *tapes, params=params_unwrapped, set_trainable=False
-                            ):
+                            with qml.tape.Unwrap(*tapes, params=params_unwrapped):
                                 vjp_tapes, processing_fn = qml.gradients.batch_vjp(
                                     tapes,
                                     dy,
@@ -218,7 +216,7 @@ def _backward(*all_params):
                         all_params = all_params[:len_all_params]
                         params_unwrapped = _nest_params(all_params)
 
-                        with qml.tape.Unwrap(*tapes, params=params_unwrapped, set_trainable=False):
+                        with qml.tape.Unwrap(*tapes, params=params_unwrapped):
                             vjps = _compute_vjp(dy, gradient_fn(tapes, **gradient_kwargs))
 
                         return vjps