In [7]:
using QuantumCollocation
using NamedTrajectories
using TrajectoryIndexingUtils
using Flux
using ReinforcementLearning
using IntervalSets
using LinearAlgebra
using Base
using Distributions
using Statistics
using Printf
using Reexport
using Revise

includet("RL.jl")

In [8]:
function L2_loss(traj::NamedTrajectory{Float64},symb::Symbol,index::Int64,R::Float64;value::Union{Vector{Float64},Nothing}= nothing)
    return isnothing(value) ? R*traj.timestep^2/2 * sum(traj[symb][:,index].^2) : R*traj.timestep^2/2 * sum((traj[symb][:,index]-value).^2)  
end

L2_loss (generic function with 1 method)

5-element Vector{Float64}:
 0.6938627332305791
 0.006931338713097701
 0.9343036508822421
 1.0
 0.0

In [13]:
reduce(hcat,[[0. for i in 1:3]])

3×1 Matrix{Float64}:
 0.0
 0.0
 0.0

LoadError: UndefVarError: `system` not defined

32×3 Matrix{Float64}:
 1.0  1.0  1.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 1.0  1.0  1.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 ⋮         
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 1.0  1.0  1.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0
 0.0  0.0  0.0

In [10]:
Base.@kwdef mutable struct GatePretrainingEnv <: AbstractEnv
            system::AbstractQuantumSystem
            T::Int64
            𝒢::Gate
            N::Int64
            pretraining_trajectory::NamedTrajectory{Float64}
    
            dda_bound::Float64=1.0
            time_step::Float64=1/T
            
            traj::NamedTrajectory{Float64}
            ϕ⃗::Vector{Float64} = [range(0,2*pi,N)[i] for i in rand(DiscreteUniform(1,N),g.n)]
end

function GatePretrainingEnv(system::AbstractQuantumSystem,T::Int64,𝒢::Gate,Δt::Float64,N::Int64,pretraining_trajectory::NamedTrajectory{Float64};dda_bound::Float64=1.0)
    n_controls = length(system.H_drives)

    component(
        a = Matrix{Float64}(zeros(n_controls,T))
        da = Matrix{Float64}(zeros(n_controls,T))
        dda =  Matrix{Float64}(zeros(n_controls,T))
        Ũ⃗ = reduce(hcat,[operator_to_iso_vec(Matrix{ComplexF64}(I(size(system.H_drives[1], 1)))) for i in 1:T])
    )
    
    traj = NamedTrajectory(components; timestep=Δt, controls=:a)
    
    return GatePretrainingEnv(
            system = system,
            T = T,
            𝒢 = 𝒢,
            N=N,
            pretraining_trajectory = pretraining_trajectory,
            dda_bound = dda_bound,
            traj = traj
            )
end

Base.@kwdef mutable struct GateTrainingEnv <: AbstractEnv
            system::AbstractQuantumSystem
            T::Int
            𝒢::Gate
    
            dda_bound::Float64=1.0
            time_step::Float64=1/T
            
            traj::NamedTrajectory{Float64}
            ϕ⃗::Vector{Float64} = [range(0,2*pi,N)[i] for i in rand(DiscreteUniform(1,N),g.n)]
end

function GateTrainingEnv(system::AbstractQuantumSystem,T::Int64,𝒢::Gate,Δt::Float64;dda_bound::Float64=1.0)
    n_controls = length(system.H_drives)

    component(
        a = Matrix{Float64}(zeros(n_controls,T))
        da = Matrix{Float64}(zeros(n_controls,T))
        dda =  Matrix{Float64}(zeros(n_controls,T))
        Ũ⃗ = reduce(hcat,[operator_to_iso_vec(Matrix{ComplexF64}(I(size(system.H_drives[1], 1)))) for i in 1:T])
    )
    
    traj = NamedTrajectory(components; timestep=Δt, controls=:a)
    
    return GateTrainingEnv(
            system = system,
            T = T,
            𝒢 = 𝒢,
            dda_bound = dda_bound,
            traj = traj
            )
end

RLBase.is_terminated(env::Union{GatePretrainingEnv,GateTrainingEnv}) = env.time_step >= (env.T-2)/env.T
RLBase.action_space(env::Union{GatePretrainingEnv,GateTrainingEnv}) = reduce(×,[(-1..1) for i in 1:length(env.system.H_drives)])
RLBase.state_space(env::Union{GatePretrainingEnv,GateTrainingEnv}) = reduce(×, [(-1..1) for i in 1:length(env.current_op)]) × reduce(×, [(-Inf..Inf) for i in 1:2*length(env.system.H_drives)]) × (1/env.T..1) × reduce(×,[(0..2*pi) for i in 1:env.𝒢.n])
RLBase.state(env::Union{GatePretrainingEnv,GateTrainingEnv})= reduce(vcat,[env.traj.Ũ⃗[:,Int64(round(env.traj.time_step*env.T))],env.traj.da[:,Int64(round(env.traj.time_step*env.T))],env.traj.a[:,Int64(round(env.traj.time_step*env.T))],[env.time_step],env.ϕ⃗])


function RLBase.act!(env::Union{GatePretrainingEnv,GateTrainingEnv}, action::Union{Vector{Float32},Vector{Float64}})
    t = Int64(round(env.time_step*env.T))
    action = Vector{Float64}(action)*env.dda_bound
    
    env.traj[:dda][:,t] = action
    env.traj[:a][:,t+1] = env.a[:,t] + env.da[:,t]*env.Δt
    env.traj[:da][:,t+1] = env.da[:,t] + env.dda[:,t]*env.Δt
    env.traj[:Ũ⃗][:,t+1] = unitary_rollout(env.traj[:Ũ⃗][:,t],hcat(env.a[:,t],zeros(length(action))),env.traj.timestep,env.system)[:,end]
    
    env.time_step += 1/env.T

    if(RLBase.is_terminated(env))
        dda0 = (-a0-da0*2*env.Δt)/env.Δt^2
        dda1=(-da0-dda0*env.Δt)/env.Δt
        
        RLBase.act!(env,dda0)
        RLBase.act!(env,dda1)

    end
end

function RLBase.reset!(env::GatePretrainingEnv; ϕ⃗::Union{Vector{Float64},Nothing}=nothing)
    env.time_step=1/env.T

    n_controls = length(system.H_drives)
    env.traj[:a] = Matrix{Float64}(zeros(n_controls,T))
    env.traj[:da] = Matrix{Float64}(zeros(n_controls,T))
    env.traj[:dda] =  Matrix{Float64}(zeros(n_controls,T))
    env.traj[[:Ũ⃗] = reduce(hcat,[operator_to_iso_vec(Matrix{ComplexF64}(I(size(system.H_drives[1], 1)))) for i in 1:T])
    
    env.ϕ⃗ = isnothing(ϕ⃗) ? [range(0,2*pi,env.N)[i] for i in rand(DiscreteUniform(1,env.N),env.𝒢.n)] : ϕ⃗
end

function RLBase.reset!(env::GateTrainingEnv; ϕ⃗::Union{Vector{Float64},Nothing}=nothing)
    env.time_step=1/env.T

    n_controls = length(system.H_drives)
    env.traj[:a] = Matrix{Float64}(zeros(n_controls,T))
    env.traj[:da] = Matrix{Float64}(zeros(n_controls,T))
    env.traj[:dda] =  Matrix{Float64}(zeros(n_controls,T))
    env.traj[[:Ũ⃗] = reduce(hcat,[operator_to_iso_vec(Matrix{ComplexF64}(I(size(system.H_drives[1], 1)))) for i in 1:T])
    
    env.ϕ⃗ = isnothing(ϕ⃗) ? rand(Uniform(0,2*pi),env.𝒢.n) : ϕ⃗
end

GateTrainingEnv

In [25]:
function ControlPolicy(env::Union{GatePretrainingEnv,GateTrainingEnv};l::Vector{Int64}=[16,16])
    out = length(env.system.H_drives)
    inp = length(RLBase.state(env))
    return Chain(Dense(inp=>l[1],relu),[Dense(l[i]=>l[i+1],relu) for i in 1:length(l)-1]...,Dense(l[end]=>out,softsign))
end

(𝒫::Chain)(env::Union{GatePretrainingEnv,GateTrainingEnv}) = 𝒫.mean_network(Vector{Float32}(RLBase.state(env)))
    

In [29]:
[1:end]

LoadError: ParseError:
[90m# Error @ [0;0m]8;;file://C:/Users/Bikrant/PicoRL/In[29]#1:4\[90mIn[29]:1:4[0;0m]8;;\
[1:[48;2;120;70;70m[0;0mend]
[90m#  └ ── [0;0m[91mmissing last argument in range expression[0;0m

In [None]:

function RLBase.reward(env::PretrainingGateEnv;
                action::Union{AbstractVector{Float32},Nothing}=nothing,
                S::Float64=1.0,
                S_a::Float64=S,
                S_da::Float64=S,
                S_dda::Float64=S)
    idx = Vector{Int64}(env.angle.*(env.N-1)/(2*pi).+1)
    idx = sum((idx[1:env.g.n-1].-1).*[env.N^(env.g.n-i) for i in 1:env.g.n-1])+idx[end]
    if(! RLBase.is_terminated(env))
        t = Int64(round(env.time_step*env.T))
        action = Vector{Float64}(action)*env.dda_bound
        
        return - L2_loss(env.traj,:a,t,S_a;value = env.pretraining_trajectory[Symbol("a"*string(idx))][:,t])
               - L2_loss(env.traj,:da,t,S_da;value = env.pretraining_trajectory[Symbol("da"*string(idx))][:,t])
               - sum((action -env.pretraining_trajectory[Symbol("dda"*string(idx))][:,t]).^2)*Δt^2/2 * S_dda
    
    else
        return - L2_loss(env.traj,:a,Vector([env.T-2:env.T]),S_a;value = env.pretraining_trajectory[Symbol("a"*string(idx))][:,env.T-2:env.T])
               - L2_loss(env.traj,:da,Vector([env.T-2:env.T]),S_da;value = env.pretraining_trajectory[Symbol("da"*string(idx))][:,env.T-2:env.T])
               - L2_loss(env.traj,:da,Vector([env.T-2:env.T]),S_da;value = env.pretraining_trajectory[Symbol("da"*string(idx))][:,env.T-2:env.T])
    end
end