<a href="https://colab.research.google.com/github/BankNatchapol/Comparison-of-Quantum-Gradient/blob/main/discrete_vs_continuous/gradient_comparison.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pennylane-sf

Collecting pennylane-sf
  Downloading PennyLane_SF-0.16.0-py3-none-any.whl (29 kB)
Collecting pennylane>=0.15
  Downloading PennyLane-0.18.0-py3-none-any.whl (631 kB)
[K     |████████████████████████████████| 631 kB 17.4 MB/s 
[?25hCollecting strawberryfields>=0.15
  Downloading StrawberryFields-0.19.0-py3-none-any.whl (4.9 MB)
[K     |████████████████████████████████| 4.9 MB 42.1 MB/s 
Collecting autoray
  Downloading autoray-0.2.5-py3-none-any.whl (16 kB)
Collecting pennylane-lightning>=0.18
  Downloading PennyLane_Lightning-0.18.0-cp37-cp37m-manylinux2010_x86_64.whl (4.4 MB)
[K     |████████████████████████████████| 4.4 MB 41.9 MB/s 
[?25hCollecting semantic-version==2.6
  Downloading semantic_version-2.6.0-py3-none-any.whl (14 kB)
Collecting thewalrus>=0.15.0
  Downloading thewalrus-0.16.2-cp37-cp37m-manylinux2010_x86_64.whl (1.2 MB)
[K     |████████████████████████████████| 1.2 MB 38.0 MB/s 
[?25hCollecting quantum-blackbird>=0.3.0
  Downloading quantum_blackbird-0.4.0-py3-

In [None]:
import pennylane as qml
from pennylane import numpy as np
from pennylane.optimize import GradientDescentOptimizer

import pandas as pd
import time 

import networkx as nx
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px

# **Gradient methods**

In [None]:
#@title Auxilary function
import warnings
warnings.filterwarnings("ignore")

param_gates = ['RX', 'RY', 'RZ']

def padding(params):
    j = 0
    padded = []
    for i in range(len(ansatz_lcu)):
      if ansatz_lcu[i] in param_gates:
        padded.append(params[j])
        j+=1
      else:
        padded.append(None)
    return np.array(padded)

def projector1(wires):
  projector = np.zeros((2, 2))
  projector[1,1] = 1
  qml.QubitUnitary(projector, wires=wires)

def str2gate(gate):
  if gate == 'H':
    return qml.Hadamard
  elif gate == 'X':
    return qml.PauliX
  elif gate == 'Y':
    return qml.PauliY
  elif gate == 'Z':
    return qml.PauliZ
  elif gate == 'RX':
    return qml.RX
  elif gate == 'RY':
    return qml.RY
  elif gate == 'RZ':
    return qml.RZ
  elif gate == 'Projector1':
    return projector1
  else:
    assert 0, "Gate not support."

def controlStr(gate):
  if gate == 'RX':
    return qml.CNOT
  elif gate == 'RY':
    return qml.CY
  elif gate == 'RZ':
    return qml.CZ
  else:
    assert 0, "Gate not support."
    
def print_ansatz(ansatz, coeffs, wires):
  coeffs = padding(coeffs)
  @qml.qnode(dev1)
  def ansatz_construct(ansatz, coeffs, wires):
    ansatz = [a.numpy() for a in ansatz]
    coeffs = [c.numpy() for c in coeffs]
    wires = [w.numpy() for w in wires]

    for i, coeff in enumerate(coeffs):
      if not coeff:
        if ansatz[i] ==  'Problem':
          problem(wires[i])
        else:
          str2gate(ansatz[i])(wires=wires[i])
      else:
        str2gate(ansatz[i])(coeff, wires=wires[i])

    return qml.state()
  print(qml.draw(ansatz_construct)(ansatz, coeffs, wires))

In [None]:
num_wires = 1
dev = qml.device("default.qubit", wires=num_wires)

num_wires_dev2 = 2
dev2 = qml.device("default.qubit", wires=num_wires_dev2)

## Discrete-variable

### Parameter shift rule

In [None]:
def parameter_shift_term(qnode, params, i, j):
    
    shifted = params.copy()
    
    shifted[i, j] += np.pi/2
    forward = qnode(shifted)  # forward evaluation

    shifted[i, j] -= np.pi
    backward = qnode(shifted) # backward evaluation

    return 0.5 * (forward - backward)

def parameter_shift(qnode, params):
    gradients = np.zeros_like((params))
    for i in range(len(gradients)):
        for j in range(len(gradients[0])):
            gradients[i, j] += parameter_shift_term(qnode, params, i, j)

    return gradients

### Finite difference

In [None]:
def finite_term(qnode, params, i, j):
    
    eps = 0.01

    shifted = params.copy()
    
    shifted[i, j] += eps
    forward = qnode(shifted)  # forward evaluation

    shifted[i, j] -= 2*eps
    backward = qnode(shifted) # backward evaluation

    return (1/(2*eps)) * (forward - backward)

def finite_difference(qnode, params):
    gradients = np.zeros_like((params))
    for i in range(len(gradients)):
        for j in range(len(gradients[0])):
            gradients[i, j] += finite_term(qnode, params, i, j)

    return gradients

### Linear combination of unitaries (LCU)

In [None]:
def lcu_term(ansatz, params, wires, diff_index):
  qml.Hadamard(wires=1)
  qml.adjoint(qml.S)(wires=1)

  for i, gate in enumerate(ansatz):
    if gate in param_gates:
      if i == diff_index:
        controlStr(gate)(wires=[1, 0])
      str2gate(gate)(params[i], wires=wires[i])
    else:
      if gate == 'Problem':
        problem()
      else:
        str2gate(gate)(wires=wires[i])
    
  qml.Hadamard(wires=1)


def lcu_term_adjointed(ansatz, params, wires, diff_index):
  ansatz = [a for a in reversed(ansatz)]
  params = params[::-1]

  qml.Hadamard(wires=1)
  for i, gate in enumerate(ansatz):
    if gate in param_gates:
      str2gate(gate)(-params[i], wires=wires[i])
      if i == len(ansatz)-diff_index-1:
        controlStr(gate)(wires=[1, 0])
    else:
      if gate == 'Problem':
         qml.adjoint(problem)()
      else:
        str2gate(gate)(wires=wires[i])

  qml.S(wires=1)
  qml.Hadamard(wires=1)


@qml.qnode(dev2)
def lcu(ansatz, H, ham_wires, params, wires, diff_index):
  ansatz = [a.numpy() for a in ansatz]
  wires = [w.numpy() for w in wires]
  ham_wires = ham_wires.numpy()

  lcu_term(ansatz, params, wires, diff_index)
  str2gate(H)(wires=ham_wires)
  str2gate('Z')(wires=1)
  lcu_term_adjointed(ansatz, params, wires, diff_index)

  return qml.state()

def lcu_gradients(ansatz, H, ham_wires, ham_coeffs, params, wires):

  params = padding(params)

  gradients = []
  for i in range(len(ansatz)):
    if ansatz[i] in param_gates:
      gradients.append(sum([lcu(ansatz, H[h], ham_wires[h], params, wires, i
                                )[0].numpy()*ham_coeffs[h] for h in range(len(H))]).real)

  return gradients

### Quantum natural gradient (QNG)

In [None]:
@qml.qnode(dev)
def metric_tensor_circuit(var, wires=0):
    for v in var: 
      ansatz(v)
    problem() # problem gate 
    return qml.expval(qml.PauliX(wires=0))

def quantum_natural_gradient(var):
  metric_fn = qml.metric_tensor(metric_tensor_circuit)
  grad = parameter_shift(cost_function, var)
  grad_flatten = grad.flatten()
  metric_tensor = metric_fn(var)
  return np.linalg.solve(metric_tensor, grad_flatten)

### Quantum newton gradient

In [None]:
s = np.pi/2

def hessian_parameter_shift_term(qnode, params, i, j):
    
    shifted = params.copy()
    
    shifted[i, j] += 2*s
    forward = qnode(shifted)  # forward evaluation

    shifted[i, j] -= 2*s
    center = qnode(shifted) # center evaluation
    
    shifted[i, j] -= 2*s
    backward = qnode(shifted) # backward evaluation

    return (1/4) * (forward - 2*center + backward)

def hessian_matrix(qnode, params):
    hessian_m = np.zeros_like((params))
    for i in range(len(hessian_m)):
        for j in range(len(hessian_m[0])):
            hessian_m[i, j] += hessian_parameter_shift_term(qnode, params, i, j)
    hessian = np.diag(hessian_m.flatten())
    hessian[hessian == 0] = 1e-17
    return hessian
  
def newton_gradient(qnode, params):
  var = params.copy()
  hessian = hessian_matrix(qnode, var)
  grad = parameter_shift(qnode, var).flatten()
  newton_grad = np.linalg.solve(hessian, grad)
  return newton_grad

## Continuous-variable

### Parameter shift rule

In [None]:
gate_list = ['R', 'S1', 'D1']

def parameter_shift_cv_term(qnode, params, i, j):
    
    gate = gate_list[j%len(gate_list)]

    shifted = params.copy()

    if gate == 'R' or gate == 'S2' or gate == 'D2':
      shifted[i, j] += alpha
      forward = qnode(shifted)  # forward evaluation

      shifted[i, j] -= 2*alpha
      backward = qnode(shifted) # backward evaluation

      return (1/2) * (forward - backward)

    elif gate == 'S1':
      shifted[i, j] += s
      forward = qnode(shifted)  # forward evaluation

      shifted[i, j] -= 2*s
      backward = qnode(shifted) # backward evaluation

      return (1/(2*np.sinh(s))) * (forward - backward)
    
    elif gate == 'D1':
      shifted[i, j] += s
      forward = qnode(shifted)  # forward evaluation

      shifted[i, j] -= 2*s
      backward = qnode(shifted) # backward evaluation

      return (1/(2*s)) * (forward - backward)

def parameter_shift_cv(qnode, params):
    gradients = np.zeros_like((params))
    for i in range(len(gradients)):
        for j in range(len(gradients[0])):
            gradients[i, j] += parameter_shift_cv_term(qnode, params, i, j)

    return gradients

### Finite difference

In [None]:
def finite_cv_term(qnode, params, i, j):
    
    eps = 0.01

    shifted = params.copy()
    
    shifted[i, j] += eps
    forward = qnode(shifted)  # forward evaluation

    shifted[i, j] -= 2*eps
    backward = qnode(shifted) # backward evaluation

    return (1/(2*eps)) * (forward - backward)

def finite_difference_cv(qnode, params):
    gradients = np.zeros_like((params))
    for i in range(len(gradients)):
        for j in range(len(gradients[0])):
            gradients[i, j] += finite_cv_term(qnode, params, i, j)

    return gradients

# **Problem** : Quantum gate estimation

## Discrete-variable
Using $U3(1.44, 0.8, 2.1)$ as target gate

In [None]:
# problem gate 
def problem():
    qml.U3(1.44, 0.8, 2.1, wires=0)

Ansatz for estimating target gate

In [None]:
# guesting ansatz state
def ansatz(var):
    for wire in range(num_wires):
      qml.Hadamard(wires=wire)
      qml.RX(var[0+wire], wires=wire)
      qml.RY(var[1+wire], wires=wire)
      qml.RZ(var[2+wire], wires=wire)

Objective function <br> 
1. initial state is  $|0⟩$
2. apply target gate $U(\theta)|0⟩$ <br> 
the state will be $|\psi⟩ = a|0⟩+b|1⟩$ 
3. apply ansatz $A(\alpha)$  <br> 
the state will be $A(\alpha)|\psi⟩ = |\psi'⟩$
4. if $A(\alpha) = U(\theta)$ then  $|\psi'⟩= 1|0⟩ + 0|1⟩ = |0⟩$
5. so, we will minimize $b|1⟩$ to target $0|1⟩$ to make $A(\alpha) = U(\theta)$


In [None]:
# objective function
@qml.qnode(dev)
def cost_function(var):
    for v in var: 
      ansatz(v)

    problem() # problem gate 

    return qml.expval(qml.Projector([1],wires=0)) # get amplitude of of |1>

Probability distribution of target gate 

In [None]:
# target result of problem gate
@qml.qnode(dev)
def target():
    problem()
    return qml.probs(wires=[0])  # get target probability

Probability distribution of estimated gate 

In [None]:
# prediction circuit
@qml.qnode(dev)
def prediction(var):
    for v in reversed(var):
      qml.adjoint(ansatz)(v)
    return qml.probs(wires=[0])  # get prediction probability

In [None]:
print("Target state: ", target())

Target state:  [0.56521185 0.43478815]


In [None]:
np.random.seed(1)
num_layers = 2
var_init = 0.05*np.random.randn(num_layers, 3*num_wires)

In [None]:
print("Initial cost: ", cost_function(var_init))

Initial cost:  0.38420647954293236


### Parameter shift rule

In [None]:
parameter_shift_time_i = []
start = time.time()

opt = GradientDescentOptimizer(0.01)

var = var_init.copy()
loss_plot = []

for it in range(1201):# while True:
    var, _cost = opt.step_and_cost(lambda v: cost_function(v), var, 
                                   grad_fn=lambda var: parameter_shift(cost_function, var)) 
    loss_plot.append(_cost)

    if it%100==0:
      print("Iter: {:5d} | Cost: {:0.11f} ".format(it, _cost))

    end = time.time()
    t = end - start
    parameter_shift_time_i.append(t)

Iter:     0 | Cost: 0.38420647954 
Iter:   100 | Cost: 0.07905926083 
Iter:   200 | Cost: 0.01260131677 
Iter:   300 | Cost: 0.00204853691 
Iter:   400 | Cost: 0.00034842518 
Iter:   500 | Cost: 0.00006112723 
Iter:   600 | Cost: 0.00001091990 
Iter:   700 | Cost: 0.00000197059 
Iter:   800 | Cost: 0.00000035761 
Iter:   900 | Cost: 0.00000006510 
Iter:  1000 | Cost: 0.00000001187 
Iter:  1100 | Cost: 0.00000000217 
Iter:  1200 | Cost: 0.00000000040 


### Finite difference

In [None]:
finite_time_i = []
start = time.time()

opt = GradientDescentOptimizer(0.01)

var_fd = var_init.copy()
loss_plot_fd = []

for it in range(1201):# while True:
    var_fd, _cost = opt.step_and_cost(lambda v: cost_function(v), var_fd, 
                                   grad_fn=lambda var_fd: finite_difference(cost_function, var_fd)) 
    loss_plot_fd.append(_cost)

    if it%100==0:
      print("Iter: {:5d} | Cost: {:0.11f} ".format(it, _cost))

    end = time.time()
    t = end - start
    finite_time_i.append(t)

Iter:     0 | Cost: 0.38420647954 
Iter:   100 | Cost: 0.07906163434 
Iter:   200 | Cost: 0.01260209243 
Iter:   300 | Cost: 0.00204872124 
Iter:   400 | Cost: 0.00034846608 
Iter:   500 | Cost: 0.00006113608 
Iter:   600 | Cost: 0.00001092178 
Iter:   700 | Cost: 0.00000197099 
Iter:   800 | Cost: 0.00000035769 
Iter:   900 | Cost: 0.00000006511 
Iter:  1000 | Cost: 0.00000001187 
Iter:  1100 | Cost: 0.00000000217 
Iter:  1200 | Cost: 0.00000000040 


### Linear combination of unitaries (LCU)

In [None]:
def ansatz_circuit_lcu(ansatz, var, wires):
  var = padding(var)
  for i, gate in enumerate(ansatz):
    if var[i]:
      str2gate(gate)(var[i], wires=wires[i])  
    else:
      str2gate(gate)(wires=wires[i])  


@qml.qnode(dev)
def cost_function_lcu(ansatz, params, wires):
  ansatz = [a.numpy() for a in ansatz]
  wires = [w.numpy() for w in wires]

  ansatz_circuit_lcu(ansatz[:-1], params, wires[:-1])

  problem()

  return qml.expval(qml.Projector([1], wires=0))

@qml.qnode(dev)
def prediction_lcu(ansatz, params, wires):
  ansatz = [a.numpy() for a in reversed(ansatz)]
  wires = [w.numpy() for w in reversed(wires)]

  qml.adjoint(ansatz_circuit_lcu)(ansatz[1:], params, wires[1:])
  
  return qml.probs(wires=[0])

@qml.qnode(dev)
def target_lcu(wires):
  problem()
  return qml.probs(wires=[0])

In [None]:
lcu_time_i = []
start = time.time()

np.random.seed(1)

num_layers = 2

ansatz_lcu = ['H', 'RX', 'RY', 'RZ']*num_layers + ['Problem']
wires_lcu = [0, 0, 0, 0]*num_layers + [0]

H = ['Projector1']
ham_wires = [0]
ham_coeffs = [1.0]

params_init= 0.05*np.random.randn(1, 3*num_wires*num_layers)[0]

opt = GradientDescentOptimizer(0.01)

var_lcu = params_init.copy()
loss_plot_lcu  = []

for it in range(1201):# while True:
    var_lcu, _cost = opt.step_and_cost(lambda v: cost_function_lcu(ansatz_lcu, v, wires_lcu), var_lcu, 
                                   grad_fn=lambda var_lcu: lcu_gradients(ansatz_lcu, H, ham_wires, ham_coeffs, var_lcu, wires_lcu)) 
    loss_plot_lcu.append(_cost)

    if it%100==0:
      print("Iter: {:5d} | Cost: {:0.11f} ".format(it, _cost))
    
    end = time.time()
    t = end - start
    lcu_time_i.append(t)

Iter:     0 | Cost: 0.38420647954 
Iter:   100 | Cost: 0.07905926083 
Iter:   200 | Cost: 0.01260131677 
Iter:   300 | Cost: 0.00204853691 
Iter:   400 | Cost: 0.00034842518 
Iter:   500 | Cost: 0.00006112723 
Iter:   600 | Cost: 0.00001091990 
Iter:   700 | Cost: 0.00000197059 
Iter:   800 | Cost: 0.00000035761 
Iter:   900 | Cost: 0.00000006510 
Iter:  1000 | Cost: 0.00000001187 
Iter:  1100 | Cost: 0.00000000217 
Iter:  1200 | Cost: 0.00000000040 


### Quantum natural gradient (QNG)

In [None]:
qng_time_i = []
start = time.time()

opt = qml.GradientDescentOptimizer(0.01)

var_qng = var_init.copy()
loss_plot_qng = []

for it in range(1201):# while True:
    var_qng, _cost = opt.step_and_cost(lambda v: cost_function(v), var_qng, 
                                   grad_fn=quantum_natural_gradient)
    loss_plot_qng.append(_cost)

    if it%100==0:
      print("Iter: {:5d} | Cost: {:0.15f} ".format(it, _cost))
    
    end = time.time()
    t = end - start
    qng_time_i.append(t)

Iter:     0 | Cost: 0.384206479542932 
Iter:   100 | Cost: 0.000044607430908 
Iter:   200 | Cost: 0.000000003537183 
Iter:   300 | Cost: 0.000000000000398 
Iter:   400 | Cost: 0.000000000000000 
Iter:   500 | Cost: 0.000000000000000 
Iter:   600 | Cost: 0.000000000000000 
Iter:   700 | Cost: 0.000000000000000 
Iter:   800 | Cost: 0.000000000000000 
Iter:   900 | Cost: 0.000000000000000 
Iter:  1000 | Cost: 0.000000000000000 
Iter:  1100 | Cost: 0.000000000000000 
Iter:  1200 | Cost: 0.000000000000000 


### Quantum newton gradient

In [None]:
newton_time_i = []
start = time.time()

opt = qml.GradientDescentOptimizer(0.01)

var_newton = var_init.copy()
loss_plot_newton = []

for it in range(1201):# while True:
    var_newton, _cost = opt.step_and_cost(lambda v: cost_function(v), var_newton, 
                                   grad_fn=lambda var: newton_gradient(cost_function, var))
    loss_plot_newton.append(_cost)

    if it%100==0:
      print("Iter: {:5d} | Cost: {:0.15f} ".format(it, _cost))
    
    end = time.time()
    t = end - start
    newton_time_i.append(t)

Iter:     0 | Cost: 0.384206479542932 
Iter:   100 | Cost: 0.001472385940137 
Iter:   200 | Cost: 0.000010174234567 
Iter:   300 | Cost: 0.000000091684611 
Iter:   400 | Cost: 0.000000000954087 
Iter:   500 | Cost: 0.000000000010492 
Iter:   600 | Cost: 0.000000000000118 
Iter:   700 | Cost: 0.000000000000001 
Iter:   800 | Cost: 0.000000000000000 
Iter:   900 | Cost: 0.000000000000000 
Iter:  1000 | Cost: 0.000000000000000 
Iter:  1100 | Cost: 0.000000000000000 
Iter:  1200 | Cost: 0.000000000000000 


## **Continuous-variable**
Using $S(0.49, 0.2)D(0.395, 0.5)$ as target gate

In [None]:
num_wires_cv = 1
dev_cv = qml.device("strawberryfields.fock", wires=num_wires_cv, cutoff_dim=20)

In [None]:
def problem_cv():
    qml.Squeezing(0.49, 0.2, wires=0)
    qml.Displacement(0.395, 0.5, wires=0)

Ansatz for estimating target gate

In [None]:
def ansatz_cv(var):
    for wire in range(num_wires_cv):
      qml.Rotation(var[0+wire], wires=wire)
      qml.Squeezing(var[1+wire], 0.0, wires=wire)
      qml.Displacement(var[2+wire], 0.0, wires=wire)

Objective function <br> 
1. initial state is  $|0⟩$
2. apply target gate $U(r, \theta)|0⟩$ <br> 
photon number expectation value $⟨n⟩$
3. apply ansatz $A(s, \alpha)$  and $U(r, \theta)$<br> 
photon number expectation value $A(s, \alpha)U(r, \theta)|0⟩ = ⟨n'⟩$
4. if $⟨n'⟩ = 0$ then  $A(s, \alpha) = U^\dagger(r, \theta)$
5. so, we will minimize $⟨n'⟩$ to target $0$ to make $A(s, \alpha) = U^\dagger(r, \theta)$


In [None]:
@qml.qnode(dev_cv)
def cost_function_cv(var):
    for v in var:
      ansatz_cv(v)

    problem_cv()

    return qml.expval(qml.NumberOperator(wires=0))

Probability distribution of target gate

In [None]:
@qml.qnode(dev_cv)
def target_cv():
    problem_cv()

    return qml.probs(wires=0) #qml.NumberOperator(wires=0)

Probability distribution of estimated gate

In [None]:
@qml.qnode(dev_cv)
def prediction_cv(var):
    for v in reversed(var):
      qml.adjoint(ansatz_cv)(v)
    
    return qml.probs(wires=0)

In [None]:
np.random.seed(1)
num_layers_cv = 2
var_init_cv = 0.05*np.random.randn(num_layers_cv, 3*num_wires_cv)

In [None]:
print("Initial cost: ", cost_function_cv(var_init_cv))

Initial cost:  0.38494228126401864


### Parameter shift rule

In [None]:
parameter_shift_cv_time_i = []
start = time.time()

opt_cv = GradientDescentOptimizer(0.01)

s = 0.1
alpha = np.pi/2

var_cv = var_init_cv.copy()
loss_plot_cv = []
break_count = 0


for it in range(1201):# while True:
    var_cv, _cost = opt.step_and_cost(lambda v: cost_function_cv(v), var_cv, 
                                   grad_fn=lambda var_cv: parameter_shift_cv(cost_function_cv, var_cv)) 
    
    loss_plot_cv.append(_cost)

    if it%100==0:
      print("Iter: {:5d} | Cost: {:0.11f} ".format(it, _cost))
    
    end = time.time()
    t = end - start
    parameter_shift_cv_time_i.append(t)

Iter:     0 | Cost: 0.38494228126 
Iter:   100 | Cost: 0.07176909118 
Iter:   200 | Cost: 0.05033600473 
Iter:   300 | Cost: 0.03014311292 
Iter:   400 | Cost: 0.01913000554 
Iter:   500 | Cost: 0.01183261605 
Iter:   600 | Cost: 0.00480398702 
Iter:   700 | Cost: 0.00152150328 
Iter:   800 | Cost: 0.00046310143 
Iter:   900 | Cost: 0.00014622720 
Iter:  1000 | Cost: 0.00004904791 
Iter:  1100 | Cost: 0.00001839819 
Iter:  1200 | Cost: 0.00000856855 


### Finite difference

In [None]:
finite_cv_time_i = []
start = time.time()

opt_cv = GradientDescentOptimizer(0.01)

s = 0.1
alpha = np.pi/2

var_cv_fd = var_init_cv.copy()
loss_plot_cv_fd = []
break_count = 0


for it in range(1201):# while True:
    var_cv_fd, _cost = opt.step_and_cost(lambda v: cost_function_cv(v), var_cv_fd, 
                                   grad_fn=lambda var_cv_fd: finite_difference_cv(cost_function_cv, var_cv_fd)) 
    
    loss_plot_cv_fd.append(_cost)

    if it%100==0:
      print("Iter: {:5d} | Cost: {:0.11f} ".format(it, _cost))
    
    end = time.time()
    t = end - start
    finite_cv_time_i.append(t)

Iter:     0 | Cost: 0.38494228126 
Iter:   100 | Cost: 0.05206267815 
Iter:   200 | Cost: 0.02924122784 
Iter:   300 | Cost: 0.02053613049 
Iter:   400 | Cost: 0.01528766209 
Iter:   500 | Cost: 0.01120077590 
Iter:   600 | Cost: 0.00796649800 
Iter:   700 | Cost: 0.00548035127 
Iter:   800 | Cost: 0.00364205391 
Iter:   900 | Cost: 0.00233993052 
Iter:  1000 | Cost: 0.00145712365 
Iter:  1100 | Cost: 0.00088301248 
Iter:  1200 | Cost: 0.00052330178 


# **Benchmarking**

In [None]:
parameter_shift_time = []
loss_plot_ni = []
for n in range(1, 11, 1):
  np.random.seed(1)
  num_layers = n
  var_init = 0.05*np.random.randn(num_layers, 3*num_wires)
  start = time.time()

  opt = GradientDescentOptimizer(0.01)

  var = var_init.copy()

  for it in range(101):# while True:
      var, _cost = opt.step_and_cost(lambda v: cost_function(v), var, 
                                    grad_fn=lambda var: parameter_shift(cost_function, var)) 
      

      # if it%100==0:
      #   print("Iter: {:5d} | Cost: {:0.11f} ".format(it, _cost))
  loss_plot_ni.append(_cost)
  end = time.time()
  t = end - start
  parameter_shift_time.append(t)
  print("#Layers: ", n, t)

#Layers:  1 2.510239362716675
#Layers:  2 6.40380072593689
#Layers:  3 11.893859148025513
#Layers:  4 18.951613903045654
#Layers:  5 27.6894211769104
#Layers:  6 37.81098794937134
#Layers:  7 49.80406427383423
#Layers:  8 63.27799916267395
#Layers:  9 77.8781909942627
#Layers:  10 94.12075233459473


In [None]:
finite_time = []
loss_plot_fd_ni = []
for n in range(1, 11, 1):
  np.random.seed(1)
  num_layers = n
  var_init = 0.05*np.random.randn(num_layers, 3*num_wires)
  start = time.time()

  opt = GradientDescentOptimizer(0.01)

  var_fd = var_init.copy()

  for it in range(101):# while True:
      var_fd, _cost = opt.step_and_cost(lambda v: cost_function(v), var_fd, 
                                    grad_fn=lambda var_fd: finite_difference(cost_function, var_fd)) 
      

      # if it%100==0:
      #   print("Iter: {:5d} | Cost: {:0.11f} ".format(it, _cost))
  loss_plot_fd_ni.append(_cost)
  end = time.time()
  t = end - start
  finite_time.append(t)
  print("#Layers: ", n, t)

#Layers:  1 2.484086275100708
#Layers:  2 6.372936487197876
#Layers:  3 11.72551941871643
#Layers:  4 18.803268671035767
#Layers:  5 27.374284744262695
#Layers:  6 37.67602729797363
#Layers:  7 49.49640250205994
#Layers:  8 62.594430685043335
#Layers:  9 77.91492509841919
#Layers:  10 94.3630723953247


In [None]:
lcu_time = []
loss_plot_lcu_ni  = []
for n in range(1, 11, 1):
  start = time.time()

  np.random.seed(1)

  num_layers = n

  ansatz_lcu = ['H', 'RX', 'RY', 'RZ']*num_layers + ['Problem']
  wires_lcu = [0, 0, 0, 0]*num_layers + [0]

  H = ['Projector1']
  ham_wires = [0]
  ham_coeffs = [1.0]

  params_init= 0.05*np.random.randn(1, 3*num_wires*num_layers)[0]

  opt = GradientDescentOptimizer(0.01)

  var_lcu = params_init.copy()
  

  for it in range(101):# while True:
      var_lcu, _cost = opt.step_and_cost(lambda v: cost_function_lcu(ansatz_lcu, v, wires_lcu), var_lcu, 
                                    grad_fn=lambda var_lcu: lcu_gradients(ansatz_lcu, H, ham_wires, ham_coeffs, var_lcu, wires_lcu)) 
      
      # if it%100==0:
      #   print("Iter: {:5d} | Cost: {:0.11f} ".format(it, _cost))
  loss_plot_lcu_ni.append(_cost)
  end = time.time()
  t = end - start
  lcu_time.append(t)
  print("#Layers: ", n, t)

#Layers:  1 3.1815805435180664
#Layers:  2 8.054742097854614
#Layers:  3 14.854975461959839
#Layers:  4 23.38874650001526
#Layers:  5 33.91778540611267
#Layers:  6 46.317798137664795
#Layers:  7 60.62680411338806
#Layers:  8 75.98629307746887
#Layers:  9 94.37583208084106
#Layers:  10 113.77151584625244


In [None]:
qng_time = []
loss_plot_qng_ni = []
for n in range(1, 11, 1):
  np.random.seed(1)
  num_layers = n
  var_init = 0.05*np.random.randn(num_layers, 3*num_wires)
  start = time.time()

  opt = qml.GradientDescentOptimizer(0.01)

  var_qng = var_init.copy()
  

  for it in range(101):# while True:
      var_qng, _cost = opt.step_and_cost(lambda v: cost_function(v), var_qng, 
                                    grad_fn=quantum_natural_gradient)
      

      # if it%100==0:
      #   print("Iter: {:5d} | Cost: {:0.15f} ".format(it, _cost))
  loss_plot_qng_ni.append(_cost)
  end = time.time()
  t = end - start
  qng_time.append(t)
  print("#Layers: ", n, t)

#Layers:  1 3.573849678039551
#Layers:  2 9.14212155342102
#Layers:  3 16.307144165039062
#Layers:  4 25.900527477264404
#Layers:  5 37.53437519073486
#Layers:  6 54.91864728927612
#Layers:  7 71.02966380119324
#Layers:  8 88.93674850463867
#Layers:  9 109.63929510116577
#Layers:  10 131.71135187149048


In [None]:
#@title 
s = np.pi/2

def hessian_parameter_shift_term(qnode, params, i, j):
    
    shifted = params.copy()
    
    shifted[i, j] += 2*s
    forward = qnode(shifted)  # forward evaluation

    shifted[i, j] -= 2*s
    center = qnode(shifted) # center evaluation
    
    shifted[i, j] -= 2*s
    backward = qnode(shifted) # backward evaluation

    return (1/4) * (forward - 2*center + backward)

def hessian_matrix(qnode, params):
    hessian_m = np.zeros_like((params))
    for i in range(len(hessian_m)):
        for j in range(len(hessian_m[0])):
            hessian_m[i, j] += hessian_parameter_shift_term(qnode, params, i, j)
    hessian = np.diag(hessian_m.flatten())
    hessian[hessian == 0] = 1e-17
    return hessian
  
def newton_gradient(qnode, params):
  var = params.copy()
  hessian = hessian_matrix(qnode, var)
  grad = parameter_shift(qnode, var).flatten()
  newton_grad = np.linalg.solve(hessian, grad)
  return newton_grad

In [None]:
newton_time = []
loss_plot_newton_ni = []
for n in range(1, 11, 1):
  np.random.seed(1)
  num_layers = n
  var_init = 0.05*np.random.randn(num_layers, 3*num_wires)
  start = time.time()

  opt = qml.GradientDescentOptimizer(0.01)

  var_newton = var_init.copy()
  
  for it in range(101):# while True:
      var_newton, _cost = opt.step_and_cost(lambda v: cost_function(v), var_newton, 
                                    grad_fn=lambda var: newton_gradient(cost_function, var))

      # if it%100==0:
      #   print("Iter: {:5d} | Cost: {:0.15f} ".format(it, _cost))
  loss_plot_newton_ni.append(_cost)
  end = time.time()
  t = end - start
  newton_time.append(t)
  print("#Layers: ", n, t)

#Layers:  1 5.583561420440674
#Layers:  2 14.89606261253357
#Layers:  3 28.472343683242798
#Layers:  4 45.797685384750366
#Layers:  5 66.85524034500122
#Layers:  6 95.77269196510315
#Layers:  7 126.17850279808044
#Layers:  8 159.08231830596924
#Layers:  9 197.22213768959045
#Layers:  10 237.60688495635986


In [None]:
parameter_shift_cv_time = []
loss_plot_cv_ni = []
for n in range(1, 11, 1):
  np.random.seed(1)
  num_layers_cv = n
  var_init_cv = 0.05*np.random.randn(num_layers_cv, 3*num_wires_cv)
  start = time.time()

  opt_cv = GradientDescentOptimizer(0.01)

  s = 0.1
  alpha = np.pi/2

  var_cv = var_init_cv.copy()
  

  for it in range(101):# while True:
      var_cv, _cost = opt.step_and_cost(lambda v: cost_function_cv(v), var_cv, 
                                    grad_fn=lambda var_cv: parameter_shift_cv(cost_function_cv, var_cv)) 
      
      

      # if it%100==0:
      #   print("Iter: {:5d} | Cost: {:0.11f} ".format(it, _cost))
  loss_plot_cv_ni.append(_cost)
  end = time.time()
  t = end - start
  parameter_shift_cv_time.append(t)
  print("#Layers: ", n, t)

#Layers:  1 1.578887701034546
#Layers:  2 3.7091281414031982
#Layers:  3 6.710351467132568
#Layers:  4 10.19310998916626
#Layers:  5 14.560569047927856
#Layers:  6 19.6621150970459
#Layers:  7 25.243085622787476
#Layers:  8 31.838932275772095
#Layers:  9 38.62765192985535
#Layers:  10 46.58437776565552


In [None]:
finite_cv_time = []
loss_plot_cv_fd_ni = []
for n in range(1, 11, 1): 
  np.random.seed(1)
  num_layers_cv = n
  var_init_cv = 0.05*np.random.randn(num_layers_cv, 3*num_wires_cv)
  start = time.time()

  opt_cv = GradientDescentOptimizer(0.01)

  s = 0.1
  alpha = np.pi/2

  var_cv_fd = var_init_cv.copy()
  
  break_count = 0


  for it in range(101):# while True:
      var_cv_fd, _cost = opt.step_and_cost(lambda v: cost_function_cv(v), var_cv_fd, 
                                    grad_fn=lambda var_cv_fd: finite_difference_cv(cost_function_cv, var_cv_fd)) 

      # if it%100==0:
      #   print("Iter: {:5d} | Cost: {:0.11f} ".format(it, _cost))
  loss_plot_cv_fd_ni.append(_cost)
  end = time.time()
  t = end - start
  finite_cv_time.append(t)
  print("#Layers: ", n, t)

#Layers:  1 1.593559741973877
#Layers:  2 3.785745620727539
#Layers:  3 6.485990762710571
#Layers:  4 10.046629667282104
#Layers:  5 14.556264877319336
#Layers:  6 19.44574999809265
#Layers:  7 25.147919178009033
#Layers:  8 31.527738094329834
#Layers:  9 38.7872269153595
#Layers:  10 46.187861919403076


In [None]:
#@title Loss Comparison
dv = pd.DataFrame({"Iteration":range(len(loss_plot)), "Loss":loss_plot})
dv_fd = pd.DataFrame({"Iteration":range(len(loss_plot_fd)), "Loss":loss_plot_fd})
dv_lcu = pd.DataFrame({"Iteration":range(len(loss_plot_lcu)), "Loss":loss_plot_lcu})
dv_qng = pd.DataFrame({"Iteration":range(len(loss_plot_qng)), "Loss":loss_plot_qng})
dv_newton = pd.DataFrame({"Iteration":range(len(loss_plot_newton)), "Loss":loss_plot_newton})

cv = pd.DataFrame({"Iteration":range(len(loss_plot_cv)), "Loss":loss_plot_cv})
cv_fd = pd.DataFrame({"Iteration":range(len(loss_plot_cv_fd)), "Loss":loss_plot_cv_fd})


fig = go.Figure()
fig.add_trace(go.Scatter(x=dv["Iteration"], y=dv["Loss"], name="Discrete parameter shift", line = dict(width=2, dash='dash')))
fig.add_trace(go.Scatter(x=dv_fd["Iteration"], y=dv_fd["Loss"], mode="lines", name="Discrete finite difference", line = dict(width=2, dash='dash')))
fig.add_trace(go.Scatter(x=dv_lcu["Iteration"], y=dv_lcu["Loss"], mode="lines", name="Discrete LCU", line = dict(width=2, dash='dash')))
fig.add_trace(go.Scatter(x=dv_qng["Iteration"], y=dv_qng["Loss"], mode="lines", name="Discrete QNG", line = dict(width=2, dash='dash')))
fig.add_trace(go.Scatter(x=dv_newton["Iteration"], y=dv_newton["Loss"], mode="lines", name="Discrete Newton method", line = dict(width=2, dash='dash')))

fig.add_trace(go.Scatter(x=cv["Iteration"], y=cv["Loss"], mode="lines", name="Continuous parameter shift"))
fig.add_trace(go.Scatter(x=cv_fd["Iteration"], y=cv_fd["Loss"], mode="lines", name="Continuous finite difference"))
fig.update_layout(title_text='Gradient descent with',
                  xaxis_title="Iteration",
                  yaxis_title="Loss")
fig.show()

In [None]:
#@title Time Comparison
dv = pd.DataFrame({"#Layers":range(1, len(parameter_shift_time)+1), "Simulation time(second)":parameter_shift_time})
dv_fd = pd.DataFrame({"#Layers":range(1, len(finite_time)+1), "Simulation time(second)":finite_time})
dv_lcu = pd.DataFrame({"#Layers":range(1, len(lcu_time)+1), "Simulation time(second)":lcu_time})
dv_qng = pd.DataFrame({"#Layers":range(1, len(qng_time)+1), "Simulation time(second)":qng_time})
dv_newton = pd.DataFrame({"#Layers":range(1, len(newton_time)+1), "Simulation time(second)":newton_time})

cv = pd.DataFrame({"#Layers":range(1, len(parameter_shift_cv_time)+1), "Simulation time(second)":parameter_shift_cv_time})
cv_fd = pd.DataFrame({"#Layers":range(1, len(finite_cv_time)+1), "Simulation time(second)":finite_cv_time})


fig = go.Figure()
fig.add_trace(go.Scatter(x=dv["#Layers"], y=dv["Simulation time(second)"], name="Discrete parameter shift", line = dict(width=2, dash='dash')))
fig.add_trace(go.Scatter(x=dv_fd["#Layers"], y=dv_fd["Simulation time(second)"], mode="lines", name="Discrete finite difference", line = dict(width=2, dash='dash')))
fig.add_trace(go.Scatter(x=dv_lcu["#Layers"], y=dv_lcu["Simulation time(second)"], mode="lines", name="Discrete LCU", line = dict(width=2, dash='dash')))
fig.add_trace(go.Scatter(x=dv_qng["#Layers"], y=dv_qng["Simulation time(second)"], mode="lines", name="Discrete QNG", line = dict(width=2, dash='dash')))
fig.add_trace(go.Scatter(x=dv_newton["#Layers"], y=dv_newton["Simulation time(second)"], mode="lines", name="Discrete Newton method", line = dict(width=2, dash='dash')))

fig.add_trace(go.Scatter(x=cv["#Layers"], y=cv["Simulation time(second)"], mode="lines", name="Continuous parameter shift"))
fig.add_trace(go.Scatter(x=cv_fd["#Layers"], y=cv_fd["Simulation time(second)"], mode="lines", name="Continuous finite difference"))
fig.update_layout(title_text='Gradient descent with',
                  xaxis_title="#Layers",
                  yaxis_title="Simulation time(second)")
fig.show()

In [None]:
#@title 
rows = ['Discrete parameter shift',
        'Discrete finite difference',
        'Discrete LCU', 
        'Discrete QNG',
        'Discrete Newton method']
columns_g = ["Iteration 100", "Iteration 600", "Iteration 1200"]
columns_s = ["Loss", "Simulation time(second)"]
df = pd.DataFrame([[loss_plot[100],parameter_shift_time_i[100],loss_plot[600],parameter_shift_time_i[600],loss_plot[1200],parameter_shift_time_i[1200]],
                   [loss_plot_fd[100],finite_time_i[100],loss_plot_fd[600],finite_time_i[600],loss_plot_fd[1200],finite_time_i[1200]],
                   [loss_plot_lcu[100],lcu_time_i[100],loss_plot_lcu[600],lcu_time_i[600],loss_plot_lcu[1200],lcu_time_i[1200]],
                   [loss_plot_qng[100],qng_time_i[100],loss_plot_qng[600],qng_time_i[600],loss_plot_qng[1200],qng_time_i[1200]],
                   [loss_plot_newton[100],newton_time_i[100],loss_plot_newton[600],newton_time_i[600],loss_plot_newton[1200],newton_time_i[1200]]])

col_ix = pd.MultiIndex.from_product([columns_g, columns_s]) 
row_ix = pd.MultiIndex.from_product([rows])
df = df.set_index(row_ix)
df.columns = col_ix
df[( 'Iteration 100','Loss')] = df[( 'Iteration 100','Loss')].map('{:.2e}'.format)
df[( 'Iteration 600','Loss')] = df[( 'Iteration 600','Loss')].map('{:.2e}'.format)
df[( 'Iteration 1200','Loss')] = df[( 'Iteration 1200','Loss')].map('{:.2e}'.format)
df.round(2)

Unnamed: 0_level_0,Iteration 100,Iteration 100,Iteration 600,Iteration 600,Iteration 1200,Iteration 1200
Unnamed: 0_level_1,Loss,Simulation time(second),Loss,Simulation time(second),Loss,Simulation time(second)
Discrete parameter shift,0.0791,6.71,1.09e-05,38.54,3.96e-10,76.83
Discrete finite difference,0.0791,6.4,1.09e-05,38.01,3.96e-10,75.93
Discrete LCU,0.0791,8.21,1.09e-05,48.78,3.96e-10,97.22
Discrete QNG,4.46e-05,9.2,6.45e-25,54.15,4.47e-31,108.07
Discrete Newton method,0.00147,15.3,1.18e-13,89.93,2.44e-25,178.82


In [None]:
#@title 
rows = ['Continuous parameter shift',
        'Continuous finite difference']
columns_g = ["Iteration 100", "Iteration 600", "Iteration 1200"]
columns_s = ["Loss", "Simulation time(second)"]
df = pd.DataFrame([[loss_plot_cv[100],parameter_shift_cv_time_i[100],loss_plot_cv[600],parameter_shift_cv_time_i[600],loss_plot_cv[1200],parameter_shift_cv_time_i[1200]],
                   [loss_plot_cv_fd[100],finite_cv_time_i[100],loss_plot_cv_fd[600],finite_cv_time_i[600],loss_plot_cv_fd[1200],finite_cv_time_i[1200]]])

col_ix = pd.MultiIndex.from_product([columns_g, columns_s]) 
row_ix = pd.MultiIndex.from_product([rows])
df = df.set_index(row_ix)
df.columns = col_ix
df[( 'Iteration 100','Loss')] = df[( 'Iteration 100','Loss')].map('{:.2e}'.format)
df[( 'Iteration 600','Loss')] = df[( 'Iteration 600','Loss')].map('{:.2e}'.format)
df[( 'Iteration 1200','Loss')] = df[( 'Iteration 1200','Loss')].map('{:.2e}'.format)
df.round(2)

Unnamed: 0_level_0,Iteration 100,Iteration 100,Iteration 600,Iteration 600,Iteration 1200,Iteration 1200
Unnamed: 0_level_1,Loss,Simulation time(second),Loss,Simulation time(second),Loss,Simulation time(second)
Continuous parameter shift,0.0718,3.69,0.0048,22.34,8.57e-06,44.63
Continuous finite difference,0.0521,3.7,0.00797,22.23,0.000523,44.54


In [None]:
#@title
rows = ['Discrete parameter shift',
        'Discrete finite difference',
        'Discrete LCU', 
        'Discrete QNG',
        'Discrete Newton method']
columns_g = ["#Layers 1", "#Layers 5", "#Layers 10"]
columns_s = ["Loss", "Simulation time(second)"]
df = pd.DataFrame([[loss_plot_ni[0],parameter_shift_time[0],loss_plot_ni[4],parameter_shift_time[4],loss_plot_ni[9],parameter_shift_time[9]],
                   [loss_plot_fd_ni[0],finite_time[0],loss_plot_fd_ni[4],finite_time[4],loss_plot_fd_ni[9],finite_time[9]],
                   [loss_plot_lcu_ni[0],lcu_time[0],loss_plot_lcu_ni[4],lcu_time[4],loss_plot_lcu_ni[9],lcu_time[9]],
                   [loss_plot_qng_ni[0],qng_time[0],loss_plot_qng_ni[4],qng_time[4],loss_plot_qng_ni[9],qng_time[9]],
                   [loss_plot_newton_ni[0],newton_time[0],loss_plot_newton_ni[4],newton_time[4],loss_plot_newton_ni[9],newton_time[9]]])

col_ix = pd.MultiIndex.from_product([columns_g, columns_s]) 
row_ix = pd.MultiIndex.from_product([rows])
df = df.set_index(row_ix)
df.columns = col_ix
df[( '#Layers 1','Loss')] = df[( '#Layers 1','Loss')].map('{:.2e}'.format)
df[( '#Layers 5','Loss')] = df[( '#Layers 5','Loss')].map('{:.2e}'.format)
df[( '#Layers 10','Loss')] = df[( '#Layers 10','Loss')].map('{:.2e}'.format)
df.round(2)

Unnamed: 0_level_0,#Layers 1,#Layers 1,#Layers 5,#Layers 5,#Layers 10,#Layers 10
Unnamed: 0_level_1,Loss,Simulation time(second),Loss,Simulation time(second),Loss,Simulation time(second)
Discrete parameter shift,0.114,2.51,0.00199,27.69,1.79e-05,94.12
Discrete finite difference,0.114,2.48,0.00199,27.37,1.79e-05,94.36
Discrete LCU,0.114,3.18,0.00199,33.92,1.79e-05,113.77
Discrete QNG,0.00615,3.57,3.75e-11,37.53,1.1500000000000002e-27,131.71
Discrete Newton method,0.0259,5.58,2.49e-06,66.86,1.29e-13,237.61


In [None]:
#@title
rows = ['Continuous parameter shift',
        'Continuous finite difference']
columns_g = ["#Layers 1", "#Layers 5", "#Layers 10"]
columns_s = ["Loss", "Simulation time(second)"]
df = pd.DataFrame([[loss_plot_cv_ni[0],parameter_shift_cv_time[0],loss_plot_cv_ni[4],parameter_shift_cv_time[4],loss_plot_cv_ni[9],parameter_shift_cv_time[9]],
                   [loss_plot_cv_fd_ni[0],finite_cv_time[0],loss_plot_cv_fd_ni[4],finite_cv_time[4],loss_plot_cv_fd_ni[9],finite_cv_time[9]]])

col_ix = pd.MultiIndex.from_product([columns_g, columns_s]) 
row_ix = pd.MultiIndex.from_product([rows])
df = df.set_index(row_ix)
df.columns = col_ix
df[( '#Layers 1','Loss')] = df[( '#Layers 1','Loss')].map('{:.2e}'.format)
df[( '#Layers 5','Loss')] = df[( '#Layers 5','Loss')].map('{:.2e}'.format)
df[( '#Layers 10','Loss')] = df[( '#Layers 10','Loss')].map('{:.2e}'.format)
df.round(2)

Unnamed: 0_level_0,#Layers 1,#Layers 1,#Layers 5,#Layers 5,#Layers 10,#Layers 10
Unnamed: 0_level_1,Loss,Simulation time(second),Loss,Simulation time(second),Loss,Simulation time(second)
Continuous parameter shift,0.0919,1.58,0.0155,14.56,0.00379,46.58
Continuous finite difference,0.092,1.59,0.00938,14.56,0.00485,46.19
