In [1]:
using PauliPropagation
using Plots
using Bits
using Random
using Distributions: Uniform

In [2]:
# new imports
using ForwardDiff

In [3]:
# Heisenberg Hamiltonian ready to be used in lossfunctions
# keep topology as main argument to avoid forgetting it
function heisenberg_hamiltonian(::Type{CT}, nq::Int, topology; J=1.0) where CT
    
    psum = PauliSum(CT,nq)
    Jc = convert(CT, J)
    if isnothing(topology)
        topology = bricklayertopology(nq;periodic=false) #chain with obc
    end

    for pair in topology
        add!(psum, [:X,:X], collect(pair), Jc)
        add!(psum, [:Y,:Y], collect(pair), Jc)
        add!(psum, [:Z,:Z], collect(pair), Jc)
    end
    return psum
end
# add type default
heisenberg_hamiltonian(nq::Int, topology; J=1.0) = heisenberg_hamiltonian(Float64, nq, topology; J=J)

heisenberg_hamiltonian (generic function with 2 methods)

In [4]:
nq = 3
topology = bricklayertopology(nq;periodic=true)
heisenberg_hamiltonian(nq, topology; J=1.0)

PauliSum(nqubits: 3, 9 Pauli terms:
 1.0 * XXI
 1.0 * YIY
 1.0 * ZZI
 1.0 * ZIZ
 1.0 * IXX
 1.0 * YYI
 1.0 * IZZ
 1.0 * XIX
 1.0 * IYY
)

In [5]:
function neel_bits(nq::Int; up_on_odd::Bool=true)
    if up_on_odd
        # |0101...> → "1" on even sites
        return collect(2:2:nq)
    else
        # |1010...> → "1" on odd sites
        return collect(1:2:nq)
    end
end

neel_bits (generic function with 1 method)

In [6]:
# customized overlap function
# since we are in the Heisenberg picture, we propagate observables and in the final step, calculate the overlap with the initial state
function overlapwithneel(operator, nq::Int; 
                           up_on_odd::Bool=true, 
                           params=nothing)
                           
    # Create Néel state bit representation: indices of "1" bits
    nb = neel_bits(nq; up_on_odd=up_on_odd)
    
    # Compute overlap with computational basis state
    return overlapwithcomputational(operator, nb)
end

overlapwithneel (generic function with 1 method)

In [7]:
function generate_full_bit_pool(nq::Int)
    # Choose appropriate UInt type based on number of qubits (same as PP does)
    # Work with bit representations directly (easier to manipulate)
    UIntType = if nq <= 4
        UInt8
    elseif nq <= 8
        UInt16
    elseif nq <= 16
        UInt32
    else
        UInt64
    end
    
    # Here, we generate all non-identity bit patterns (therefore -1), which gives the full operator pool.
    pool = UIntType[]
    for i in 1:(4^nq - 1)
        push!(pool, UIntType(i))
    end
    
    return pool
end

generate_full_bit_pool (generic function with 1 method)

In [8]:
function bit_to_paulistring(bit_repr, nq)
    paulis = Symbol[]
    sites = Int[]
    
    for qubit in 1:nq #going from right to left in bitstring
        pauli_val = getpauli(bit_repr, qubit) # get pauli of qubit as 0,1,2,3
        #println("Qubit $qubit: Pauli value = $pauli_val")
        if pauli_val != 0  # Skip identity (0) since Paulis are initialised as identity by default
            pauli_symbol = [:I, :X, :Y, :Z][pauli_val + 1] # julia indexing starts at 1!
            push!(paulis, pauli_symbol)
            push!(sites, qubit)
        end
    end
    
    return PauliString(nq, paulis, sites, 1.0), paulis, sites
end

bit_to_paulistring (generic function with 1 method)

In [9]:
psum = heisenberg_hamiltonian(nq, topology; J=1.0)

PauliSum(nqubits: 3, 9 Pauli terms:
 1.0 * XXI
 1.0 * YIY
 1.0 * ZZI
 1.0 * ZIZ
 1.0 * IXX
 1.0 * YYI
 1.0 * IZZ
 1.0 * XIX
 1.0 * IYY
)

In [10]:
# promote PauliSum coefficients to target numeric type CT
function promote_paulisum_coeffs(psum::PauliSum, ::Type{CT}) where {CT}
    newpsum = PauliSum(CT, psum.nqubits)
    for (pstr, coeff) in psum
        # `pstr` is a PauliStringType; add! accepts pstr directly
        add!(newpsum, pstr, convert(CT, tonumber(coeff)))
    end
    return newpsum
end

promote_paulisum_coeffs (generic function with 1 method)

In [11]:
# Function to convert bit representation to PauliString when needed
function bit_to_paulistring(bit_repr, nq)
    paulis = Symbol[]
    sites = Int[]
    
    for qubit in 1:nq #going from right to left in bitstring
        pauli_val = getpauli(bit_repr, qubit) # get pauli of qubit as 0,1,2,3
        #println("Qubit $qubit: Pauli value = $pauli_val")
        if pauli_val != 0  # Skip identity (0) since Paulis are initialised as identity by default
            pauli_symbol = [:I, :X, :Y, :Z][pauli_val + 1] # julia indexing starts at 1!
            push!(paulis, pauli_symbol)
            push!(sites, qubit)
        end
    end
    
    return PauliString(nq, paulis, sites, 1.0), paulis, sites
end

bit_to_paulistring (generic function with 1 method)

In [12]:
function pauli_rotation_from_bits(bit_repr, nq)
    _, paulis, sites = bit_to_paulistring(bit_repr, nq) 
    return PauliRotation(paulis, sites)
end

pauli_rotation_from_bits (generic function with 1 method)

In [13]:
function append_from_bits!(circuit, thetas, chose_op, nq; theta_init=rand())
    gate = pauli_rotation_from_bits(chose_op, nq)
    push!(circuit, gate) # not pushfirst! since we want to apply new gate last (Schrödinger!!)
    push!(thetas, theta_init) # we warm-start the parameters of the new circuit (reusing thetas)
    return circuit, thetas    # index of the new parameter
end

append_from_bits! (generic function with 1 method)

In [14]:
# this is the physically exact formula (using the commutator from the BCH formula) which is incorrect once we truncate
# it shows us the operators which in the untruncated case would be relevant - but we can get stuck in local minima due to this mismatch.
"""
    calc_gradients(bit_pool, H, nq;
                   circuit=nothing, params=nothing,
                   tol=1e-12, verbose=false, up_on_odd=true)

ADAPT-VQE gradients with in-place propagation:
g_P = ⟨φ₀ | [U† H U, U† P U] i | φ₀⟩, where |φ₀⟩ is the Néel state
selected by `up_on_odd`.
"""
function calc_gradients(bit_pool, H, nq;
                        circuit::Union{Nothing,Any}=nothing,
                        params::Union{Nothing,AbstractVector}=nothing,
                        tol::Float64=1e-12,
                        verbose::Bool=false,
                        up_on_odd::Bool=true)

    grads = Float64[]

    # Pre-propagate H once: H_prop = U† H U (only if a circuit is provided)
    H_prop = H
    if circuit !== nothing
        H_prop = deepcopy(H)                 # avoid mutating H
        propagate!(circuit, H_prop, params)  # in-place propagation
    end

    # For each pool element, pre-propagate P (to P_prop) and form [H_prop, P_prop]
    for (k, bit_repr) in enumerate(bit_pool)
        P= bit_to_paulistring(bit_repr, nq)[1]
        psum = PauliSum(nq)
        P = add!(psum, P) # PP works with PauliSums (see datatypes example notebook)

        P_prop = P
        if circuit !== nothing
            P_prop = deepcopy(P)                 # avoid mutating the pool op
            propagate!(circuit, P_prop, params)  # in-place propagation
        end
        
        C = commutator(H_prop, P_prop) 
        if !(iterate(C) !== nothing)           # if the commutator is empty 
            verbose && println("op[$k]: ", P, "  commutator=0  → grad=0.0")
            push!(grads, 0.0)
            continue
        end
        g = overlapwithneel(im * C, nq; up_on_odd=up_on_odd)
        if abs(imag(g)) > tol
            @warn "Gradient has non-negligible imaginary part" imag=imag(g) op=P
        end

        push!(grads, real(g))
        verbose && println("op[$k]: ", P, "  grad=", real(g))
    end

    return grads
end

calc_gradients

In [15]:
grads_std = calc_gradients(generate_full_bit_pool(nq), heisenberg_hamiltonian(nq, topology), nq; verbose=false, up_on_odd=true)


63-element Vector{Float64}:
  0.0
  0.0
  0.0
  0.0
  0.0
 -4.0
  0.0
  0.0
  4.0
  0.0
  ⋮
  0.0
  0.0
  4.0
  0.0
  0.0
  0.0
  0.0
  0.0
  0.0

In [16]:
# an equivalent formulation with the commutator, with slight reordering of terms (time benchmark to do)
"""
    calc_gradients(bit_pool, H, nq;
                   circuit=nothing, params=nothing,
                   tol=1e-12, verbose=false, up_on_odd=true)

ADAPT-VQE gradients with in-place propagation:
g_P = -i * ⟨φ₀ | U_ansatz† [H, P] U_ansatz | φ₀⟩, where |φ₀⟩ is the Néel state
selected by `up_on_odd`.
"""
function calc_gradients_math(bit_pool, H, nq;
                        circuit::Union{Nothing,Any}=nothing,
                        params::Union{Nothing,AbstractVector}=nothing,
                        tol::Float64=1e-12,
                        verbose::Bool=false,
                        up_on_odd::Bool=true)

    grads = Float64[]

    for (k, bit_repr) in enumerate(bit_pool)
        P_op, _, _ = bit_to_paulistring(bit_repr, nq)
        P = PauliSum(nq)
        add!(P, P_op)

        # Calculate the commutator with the original Hamiltonian
        C = commutator(H, P)

        if !(iterate(C) !== nothing) # if the commutator is empty
            verbose && println("op[$k]: ", P, "  commutator=0  → grad=0.0")
            push!(grads, 0.0)
            continue
        end

        # Propagate the commutator through the circuit if it exists
        C_prop = C
        if circuit !== nothing
            C_prop = deepcopy(C)
            propagate!(circuit, C_prop, params)
        end

        # Calculate the gradient as the expectation value of the propagated commutator
        g = overlapwithneel(im * C_prop, nq; up_on_odd=up_on_odd)

        if abs(imag(g)) > tol
            @warn "Gradient has non-negligible imaginary part" imag=imag(g) op=P
        end

        push!(grads, real(g))
        verbose && println("op[$k]: ", P, "  grad=", real(g))
    end

    return grads
end

calc_gradients_math

In [17]:
grads_math = calc_gradients_math(generate_full_bit_pool(nq), heisenberg_hamiltonian(nq, topology), nq; verbose=false, up_on_odd=true)   

63-element Vector{Float64}:
  0.0
  0.0
  0.0
  0.0
  0.0
 -4.0
  0.0
  0.0
  4.0
  0.0
  ⋮
  0.0
  0.0
  4.0
  0.0
  0.0
  0.0
  0.0
  0.0
  0.0

In [18]:
grads_math == grads_std

true

### Goal: Replace the commutator by a finite difference gradient calculation
- so far, the results do not match! 

In [19]:
"""
    calc_gradients_FD(bit_pool, H, nq;
                      circuit=nothing, params=nothing,
                      tol=1e-12, verbose=false, up_on_odd=true)

ADAPT-VQE gradients with in-place propagation and ForwardDiff:
g_P = d/dθ ⟨φ₀|U†(θ) H_prop U(θ)|φ₀⟩|_{θ=0}
where U(θ) = exp(-iθP) and H_prop = U_ansatz† H U_ansatz
"""
function calc_gradients_FD(bit_pool, H::PauliSum{CT}, nq;
                        circuit::Union{Nothing,Any}=nothing,
                        params::Union{Nothing,AbstractVector}=nothing,
                        tol::Float64=1e-12,
                        verbose::Bool=false,
                        up_on_odd::Bool=true) where {CT}

    grads = Float64[]

    # Pre-propagate H once: H_prop = U† H U (only if a circuit is provided)
    H_prop = H
    if circuit !== nothing
        H_prop = deepcopy(H)                 # avoid mutating H
        propagate!(circuit, H_prop, params)  # in-place propagation
    end

    # For each pool element, pre-propagate P (to P_prop) and form [H_prop, P_prop]
    for (k, bit_repr) in enumerate(bit_pool)
        
        gate = pauli_rotation_from_bits(bit_repr, nq)
        temp_circuit = [gate]

        # Define the energy function for a single parameter theta
        function energy_func(theta_vec::Vector{T}) where T
            theta = theta_vec[1]
            #println(theta)
            # We need a fresh copy of H_prop for each evaluation inside the gradient calculation
            # and we need to promote the coefficients to the Dual number type
            H_theta = promote_paulisum_coeffs(H_prop, T)
            
            propagate!(temp_circuit, H_theta, [theta])
            
            # The imaginary part should be zero for Hermitian observables, but can have small numerical noise
            return real(overlapwithneel(H_theta, nq; up_on_odd=up_on_odd))
        end

        # Calculate the gradient at theta=0
        # ForwardDiff.gradient expects a vector input
        g = ForwardDiff.gradient(energy_func, [0.0])[1]

        push!(grads, g)
        verbose && println("op[$k]: ", bit_repr, "  grad=", g)
    end

    return grads
end

calc_gradients_FD

In [20]:
grads_fd = calc_gradients_FD(generate_full_bit_pool(nq), heisenberg_hamiltonian(nq, topology), nq; verbose=false, up_on_odd=true)

63-element Vector{Float64}:
  0.0
  0.0
  0.0
  0.0
  0.0
  0.0
  0.0
  0.0
  0.0
  0.0
  ⋮
  0.0
  0.0
 -2.0
  0.0
  0.0
  0.0
  0.0
  0.0
  0.0

In [21]:
nonzero_indices = findall(x -> abs(x) > 1e-8, grads_fd)

4-element Vector{Int64}:
 27
 39
 54
 57

In [22]:
nonzero_indices = findall(x -> abs(x) > 1e-8, grads_std)

8-element Vector{Int64}:
  6
  9
 24
 27
 36
 39
 54
 57

This cannot be a sign issue since the nonzero indices don't match. For no truncation we would expect this to give (approx) the same result.