In [1]:
using QuantumCollocation
using NamedTrajectories
using TrajectoryIndexingUtils
using Flux
using ReinforcementLearning
using IntervalSets
using LinearAlgebra
using Base
using Distributions
using Statistics
using Printf
using Reexport
using Revise

includet("RL-Copy1.jl")

In [2]:
RZ_traj = load_traj("RZ_pretrained.jld2")

const Units = 1e9
const MHz = 1e6 / Units
const GHz = 1e9 / Units
const ns = 1e-9 * Units
const μs = 1e-6 * Units
;


# Operators
const Paulis = Dict(
    "I" => Matrix{ComplexF64}([1 0; 0 1]),
    "X" => Matrix{ComplexF64}([0 1; 1 0]),
    "Y" => Matrix{ComplexF64}([0 im; -im 0]),
    "Z" => Matrix{ComplexF64}([1 0; 0 -1]),
)

rz_op(theta) = exp(-im/2 * theta[1] * Paulis["Z"]);

RZ = Gate(1,rz_op)

H_drives = [
     Paulis["X"],Paulis["Y"]
]
system = QuantumSystem(H_drives);
t_f = 10* ns
n_steps = 51
times = range(0, t_f, n_steps)  # Alternative: collect(0:Δt:t_f)
n_controls=1
n_qubits=1;
Δt = times[2] - times[1]

N = 11
;

In [3]:
Pretraining_Env = PretrainingGateEnv(
                                    system = system,
                                    Δt=Δt,
                                    T=n_steps,
                                    g=RZ,
                                    N=11,
                                    pretraining_trajectory=RZ_traj;
                                    dda_bound=1.0
                                    )

Training_Env = TrainingGateEnv(
                            system = system,
                            Δt=Δt,
                            T=n_steps,
                            g=RZ;
                            dda_bound=1.0
                            );

policy = GatePolicy(Training_Env)


GatePolicy(Chain(Dense(14 => 16, relu), Dense(16 => 16, relu), Dense(16 => 2, softsign)), Chain(Dense(14 => 16, relu), Dense(16 => 16, relu), Dense(16 => 1, softsign)))

In [4]:
rewards,acts,states = SampleTrajectory(policy,Pretraining_Env)
a=Pretraining_Env.a
da=Pretraining_Env.da
dda=Pretraining_Env.dda
;

In [5]:
(getTrajectoryLoss(Pretraining_Env)-sum(rewards))/getTrajectoryLoss(Pretraining_Env)

0.9938696958458386

In [6]:
sum((euler(dda,n_steps,Δt)-a).^2)

3.3236895057016784e-29

In [7]:
unitary_rollout(operator_to_iso_vec([1+0.0im 0; 0 1]),a,Δt,system)[:,end]-Pretraining_Env.current_op

8-element Vector{Float64}:
  0.0
  8.326672684688674e-17
  5.551115123125783e-17
 -5.551115123125783e-17
 -8.326672684688674e-17
  0.0
 -5.551115123125783e-17
 -5.551115123125783e-17

In [8]:
rewards,acts,states = SampleTrajectory(policy,Training_Env)
a=Training_Env.a
da=Training_Env.da
dda=Training_Env.dda
;

In [9]:
(getTrajectoryLoss(Training_Env)-sum(rewards))/getTrajectoryLoss(Training_Env)

0.9976525330130596

In [10]:
sum((euler(dda,n_steps,Δt)-a).^2)

2.565828725393675e-29

In [11]:
unitary_rollout(operator_to_iso_vec([1+0.0im 0; 0 1]),a,Δt,system)[:,end]-Training_Env.current_op

8-element Vector{Float64}:
  2.7755575615628914e-16
 -2.7755575615628914e-17
  9.020562075079397e-17
  2.220446049250313e-16
  2.7755575615628914e-17
  2.7755575615628914e-16
  2.220446049250313e-16
 -9.020562075079397e-17

In [12]:
ACpolicy = PPO(Pretraining_Env;iterations=250,)

Iterations 1 Complete
Epochs 1 
Mean Rtg: -0.70695
Final KL: 0.46478
Mean Policy Loss: 0.05685
Mean Value Loss: 1.13877
-------------------------
Iterations 2 Complete
Epochs 1 
Mean Rtg: -0.71837
Final KL: 0.90752
Mean Policy Loss: 0.82135
Mean Value Loss: 0.89322
-------------------------
Iterations 3 Complete
Epochs 1 
Mean Rtg: -0.78999
Final KL: 0.09607
Mean Policy Loss: 0.03705
Mean Value Loss: 0.71744
-------------------------
Iterations 4 Complete
Epochs 1 
Mean Rtg: -0.69126
Final KL: 1.81478
Mean Policy Loss: 0.00140
Mean Value Loss: 0.64161
-------------------------
Iterations 5 Complete
Epochs 1 
Mean Rtg: -0.75841
Final KL: 0.43342
Mean Policy Loss: 0.02600
Mean Value Loss: 0.41784
-------------------------
Iterations 6 Complete
Epochs 2 
Mean Rtg: -0.65029
Final KL: 0.08781
Mean Policy Loss: 0.00201
Mean Value Loss: 0.19362
-------------------------
Iterations 7 Complete
Epochs 1 
Mean Rtg: -0.84332
Final KL: 0.12462
Mean Policy Loss: -0.00229
Mean Value Loss: 0.62900
---

ACGatePolicy(Chain(Dense(14 => 64, relu), Dense(64 => 64, relu), Dense(64 => 2, softsign)), Chain(Dense(14 => 64, relu), Dense(64 => 64, relu), Dense(64 => 1, softsign)), Chain(Dense(14 => 64, relu), Dense(64 => 64, relu), Dense(64 => 1)))

In [13]:
x = range(0,2*pi,1000)
y = [ACpolicy.std_network([v])[1] for v in x]
using CairoMakie
lines(x,y)

LoadError: DimensionMismatch: layer Dense(14 => 64, relu) expects size(input, 1) == 14, but got 1-element Vector{Float64}

In [14]:
RLBase.reset!(Pretraining_Env,angle = [range(0,2*pi,11)[5]])

1-element Vector{Float64}:
 2.5132741228718345

In [15]:
rewards,acts,states =SampleTrajectory(ACpolicy,Pretraining_Env;deterministic=true)

LoadError: MethodError: objects of type ACGatePolicy are not callable

In [16]:
RZ_traj[:a5]

2×51 Matrix{Float64}:
 0.0  1.83246e-20  -0.0196885    -0.0507178   …  0.0708192  0.0285495   0.0
 0.0  6.10707e-20   0.000972287   0.00170063     0.0179483  0.00665824  0.0

In [17]:
fig = Figure()
ax = Axis(fig[1,1])
lines!(ax,1:n_steps,Pretraining_Env.a[1,:])
lines!(ax,1:n_steps,RZ_traj[:a5][1,:])
fig

LoadError: UndefVarError: `Figure` not defined