PennyLaneAI · rmoyard · Feb 9, 2023 · Feb 9, 2023 · Feb 9, 2023 · Feb 9, 2023
diff --git a/doc/releases/changelog-dev.md b/doc/releases/changelog-dev.md
@@ -398,6 +398,43 @@
 
 <h3>Improvements</h3>
 
+* The kernel matrix utility functions in `qml.kernels` are now autodifferentiation-compatible.
+  In addition they support batching, for example for quantum kernel execution with shot vectors.
+  [(#3742)](https://github.com/PennyLaneAI/pennylane/pull/3742)
+
+  In addition to the autodifferentiation support in JAX, Autograd, Tensorflow and PyTorch,
+  optional batching was added, allowing for the following:
+
+  ```python
+  dev = qml.device('default.qubit', wires=2, shots=(100, 100))
+  @qml.qnode(dev)
+  def circuit(x1, x2):
+      qml.templates.AngleEmbedding(x1, wires=dev.wires)
+      qml.adjoint(qml.templates.AngleEmbedding)(x2, wires=dev.wires)
+      return qml.probs(wires=dev.wires)
+
+  kernel = lambda x1, x2: circuit(x1, x2)[:, 0]
+  ```
+
+  Note that we extract the first probability vector entry for both
+  evaluations using 100 shots each.
+  We can then compute the kernel matrix on a set of 4 (random) feature
+  vectors ``X`` but using two sets of 100 shots each via
+
+  ```pycon
+  >>> X = np.random.random((4, 2))
+  >>> qml.kernels.square_kernel_matrix(X, kernel)
+  tensor([[[1.  , 0.86, 0.88, 0.92],
+           [0.86, 1.  , 0.75, 0.97],
+           [0.88, 0.75, 1.  , 0.91],
+           [0.92, 0.97, 0.91, 1.  ]],
+
+          [[1.  , 0.93, 0.91, 0.92],
+           [0.93, 1.  , 0.8 , 1.  ],
+           [0.91, 0.8 , 1.  , 0.91],
+           [0.92, 1.  , 0.91, 1.  ]]], requires_grad=True)
+  ```
+
 * The parameter-shift derivative of variances saves a redundant evaluation of the
   corresponding unshifted expectation value tape, if possible
   [(#3744)](https://github.com/PennyLaneAI/pennylane/pull/3744)

diff --git a/pennylane/kernels/utils.py b/pennylane/kernels/utils.py
@@ -14,7 +14,8 @@
 """
 This file contains functionalities that simplify working with kernels.
 """
-from pennylane import numpy as np
+from itertools import product
+import pennylane as qml
 
 
 def square_kernel_matrix(X, kernel, assume_normalized_kernel=False):
@@ -56,18 +57,32 @@ def circuit(x1, x2):
             [0.96864001, 0.99727485, 1.        , 0.96605621],
             [0.90932897, 0.95685561, 0.96605621, 1.        ]], requires_grad=True)
     """
-    N = len(X)
-    matrix = [0] * N**2
+    N = qml.math.shape(X)[0]
+    if assume_normalized_kernel and N == 1:
+        return qml.math.eye(1, like=qml.math.get_interface(X))
 
+    matrix = [None] * N**2
+
+    # Compute all off-diagonal kernel values, using symmetry of the kernel matrix
     for i in range(N):
-        for j in range(i, N):
-            if assume_normalized_kernel and i == j:
-                matrix[N * i + j] = 1.0
-            else:
-                matrix[N * i + j] = kernel(X[i], X[j])
-                matrix[N * j + i] = matrix[N * i + j]
+        for j in range(i + 1, N):
+            matrix[N * i + j] = (kernel_value := kernel(X[i], X[j]))
+            matrix[N * j + i] = kernel_value
+
+    if assume_normalized_kernel:
+        # Create a one-like entry that has the same interface and batching as the kernel output
+        # As we excluded the case N=1 together with assume_normalized_kernel above, matrix[1] exists
+        one = qml.math.ones_like(matrix[1])
+        for i in range(N):
+            matrix[N * i + i] = one
+    else:
+        # Fill the diagonal by computing the corresponding kernel values
+        for i in range(N):
+            matrix[N * i + i] = kernel(X[i], X[i])
+
+    shape = (N, N) if qml.math.ndim(matrix[0]) == 0 else (N, N, qml.math.size(matrix[0]))
 
-    return np.array(matrix).reshape((N, N))
+    return qml.math.moveaxis(qml.math.reshape(qml.math.stack(matrix), shape), -1, 0)
 
 
 def kernel_matrix(X1, X2, kernel):
@@ -79,7 +94,7 @@ def kernel_matrix(X1, X2, kernel):
         kernel ((datapoint, datapoint) -> float): Kernel function that maps datapoints to kernel value.
 
     Returns:
-        array[float]: The square matrix of kernel values.
+        array[float]: The matrix of kernel values.
 
     **Example:**
 
@@ -111,12 +126,12 @@ def circuit(x1, x2):
     As we can see, for :math:`n` and :math:`m` datapoints in the first and second
     dataset respectively, the output matrix has the shape :math:`n\times m`.
     """
-    N = len(X1)
-    M = len(X2)
+    N = qml.math.shape(X1)[0]
+    M = qml.math.shape(X2)[0]
 
-    matrix = [0] * N * M
-    for i in range(N):
-        for j in range(M):
-            matrix[M * i + j] = kernel(X1[i], X2[j])
+    matrix = qml.math.stack([kernel(x, y) for x, y in product(X1, X2)])
+
+    if qml.math.ndim(matrix[0]) == 0:
+        return qml.math.reshape(matrix, (N, M))
 
-    return np.array(matrix).reshape((N, M))
+    return qml.math.moveaxis(qml.math.reshape(matrix, (N, M, qml.math.size(matrix[0]))), -1, 0)