In [1]:
using Flux
using Test
using Statistics
using ReinforcementLearning
using Plots

┌ Info: CUDAdrv.jl failed to initialize, GPU functionality unavailable (set JULIA_CUDA_SILENT or JULIA_CUDA_VERBOSE to silence or expand this message)
└ @ CUDAdrv C:\Users\mclau\.julia\packages\CUDAdrv\b1mvw\src\CUDAdrv.jl:67


In [2]:
#parameters
L=[NNlib.relu, NNlib.σ, NNlib.celu, NNlib.lisht, identity, NNlib.hardsigmoid, NNlib.hardtanh, NNlib.softplus,
   NNlib.swish, NNlib.logcosh];

In [25]:
#AdaNet Part
@testset "generator" begin
    subnetwork1=generateSubnetwork(3,2,1,3)
    subnetwork2=generateSubnetwork(3,2,3,3)
    x=rand(3)
    @test length(subnetwork1)==2
    @test length(subnetwork2)==4
    @test length(subnetwork1[1](x))==3
    @test length(subnetwork1[2](x))==2
    @test length(subnetwork2[3](x))==3
    @test length(subnetwork2[4](x))==2 
    @test typeof(subnetwork1[1](x))==Array{Float32,1}
end

@testset "weakLearner" begin
    subnetwork1=weakLearner(4,2,1,4,[],200)
    subnetwork2=weakLearner(4,2,2,4,[subnetwork1],200)
    subnetwork3=weakLearner(4,2,2,4,[subnetwork1,subnetwork2],200)
    x=rand(4)
    @test length(subnetwork1)==2
    @test length(subnetwork1[1](x))==4
    @test length(subnetwork1[2](x))==2
    @test length(subnetwork2)==3
    @test length(subnetwork2[1](x))==4
    @test length(subnetwork2[3](x))==2
    @test length(subnetwork3)==3
    @test length(subnetwork3[1](x))==4
    @test length(subnetwork3[3](x))==2
    
end

@testset "adaNetDqnObject&Functions" begin
    subnetwork=generateSubnetwork(4,2,1,3)
    network=[ subnetwork]
    envi=CartPoleEnv(;T=Float32, seed=10)
    adaNetDqn = AdaNetDqn(0.99, 0.05,64,10000,
            2,
            4,
            1.0,
            10,
            network,
            1)
    x = rand(4)
    @test typeof(adaNetDqn.model1(x))==Array{Float32,1}
    @test length(adaNetDqn.model1(x))==2
    @test adaNetDqn.model1(x)==createModel(adaNetDqn.network1)(x)
    @test adaNetDqn.epsilon == 1
    updateEpsilon!(adaNetDqn)
    @test adaNetDqn.epsilon == 1*adaNetDqn.decay
    @test adaNetDqn.min == 0.05
    @test adaNetDqn.batchSize== 64
    @test adaNetDqn.memory == []
    @test adaNetDqn.memSize == 10000
    @test adaNetDqn.stateSize==4
    @test adaNetDqn.actionSize==2
    @test adaNetDqn.gamma == 1.0
    @test adaNetDqn.network1[1][1](x) == adaNetDqn.network2[1][1](x)
    @test adaNetDqn.cUpdate == 10
    subnetwork = generateSubnetwork(4,2,2,3)
    last_network=adaNetDqn.network1
    change!(adaNetDqn,subnetwork)
    @test adaNetDqn.network1 == [last_network...,subnetwork]
    @test typeof(adaNetDqn.model1(x))==Array{Float32,1}
    @test length(adaNetDqn.model1(x))==2
    @test adaNetDqn.model1(x)==createModel(adaNetDqn.network1)(x)
    @test adaNetDqn.network1[1][1](x) == adaNetDqn.network2[1][1](x)
    @test adaNetDqn.network1[2][1](x) == adaNetDqn.network2[2][1](x)
    #@test adaNetDqn.ps == Flux.params(subnetwork...)
end

@testset "copyFunction" begin
    subnetwork=generateSubnetwork(4,2,1,3)
    network=[ subnetwork]
    envi=CartPoleEnv(;T=Float32, seed=10)
    adaNetDqn = AdaNetDqn(0.99, 0.05,64,10000,
            2,
            4,
            1.0,
            10,
            network,
            1)
    x = rand(4)
    y=adaNetDqn.model2(x)
    subnetwork2=generateSubnetwork(4,2,1,3)
    network2=[ subnetwork2]
    adaNetDqn.network1=network2
    copy!(1, adaNetDqn)
    y2=adaNetDqn.model2(x)
    copy!(10, adaNetDqn)
    y3=adaNetDqn.model2(x)
    @test y == y2
    @test y != y3
    
end

@testset "actFunctions" begin
    subnetwork=generateSubnetwork(4,2,1,3)
    network=[ subnetwork]
    envi=CartPoleEnv(;T=Float32, seed=10)
    adaNetDqn = AdaNetDqn(0.99, 0.05,64,10000,
            2,
            4,
            1.0,
            10,
            network,
            1)
    obs=observe(envi)
    ini_state=deepcopy(get_state(obs))
    a=action(adaNetDqn,ini_state)
    nextState,reward,done=act(adaNetDqn,envi,a)
    obs=observe(envi)
    @test a !=0
    @test nextState == get_state(obs)
    @test reward == 1
    @test done == false
    @test ini_state != nextState
    adaNetDqn.epsilon = 0
    a = action(adaNetDqn,nextState)
    nextState2,_,__=act(adaNetDqn,envi,a)
    @test a == Flux.argmax(adaNetDqn.model1(nextState))
    @test nextState!=nextState2
    
end

[37m[1mTest Summary: | [22m[39m[32m[1mPass  [22m[39m[36m[1mTotal[22m[39m
generator     | [32m   7  [39m[36m    7[39m
[37m[1mTest Summary: | [22m[39m[32m[1mPass  [22m[39m[36m[1mTotal[22m[39m
weakLearner   | [32m   9  [39m[36m    9[39m
[37m[1mTest Summary:             | [22m[39m[32m[1mPass  [22m[39m[36m[1mTotal[22m[39m
adaNetDqnObject&Functions | [32m  20  [39m[36m   20[39m
[37m[1mTest Summary: | [22m[39m[32m[1mPass  [22m[39m[36m[1mTotal[22m[39m
copyFunction  | [32m   2  [39m[36m    2[39m
[37m[1mTest Summary: | [22m[39m[32m[1mPass  [22m[39m[36m[1mTotal[22m[39m
actFunctions  | [32m   7  [39m[36m    7[39m


Test.DefaultTestSet("actFunctions", Any[], 7, false)

In [15]:

@testset "usingModelsWithMultipleStreams" begin
    subnetwork1=generateSubnetwork(3,2,1,3)
    subnetwork2=generateSubnetwork(3,2,2,3)
    subnetwork3=generateSubnetwork(3,2,2,3)
    x=rand(3)
    usenetwork=createModel4Test([subnetwork1,subnetwork2,subnetwork3])
    rslt,Y=usenetwork(x)
    @test rslt == (Y[1][2]+Y[2][3]+Y[3][3])/3
    @test length(Y[1][1])==3
    @test Y[1][1]==subnetwork1[1](x)
    @test Y[1][2]==subnetwork1[2](subnetwork1[1](x))
    @test Y[2][1]==subnetwork2[1](x)
    x1=subnetwork1[1](x)+subnetwork2[1](x)
    @test Y[2][2]==subnetwork2[2](x1)
    @test Y[3][1]==subnetwork3[1](x)
    x2=x1+subnetwork3[1](x)
    @test Y[3][2]==subnetwork3[2](x2)
end

[37musingModelsWithMultipleStreams: [39m[91m[1mTest Failed[22m[39m at [39m[1mIn[15]:17[22m
  Expression: (Y[3])[2] == (subnetwork3[2])(x2)
   Evaluated: Float32[1.127554, 1.1497988, 0.21469581] == Float32[1.127554, 1.1497985, 0.21469581]
Stacktrace:
 [1] top-level scope at [1mIn[15]:17[22m
 [2] top-level scope at [1mD:\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.3\Test\src\Test.jl:1107[22m
 [3] top-level scope at [1mIn[15]:2[22m
[37m[1mTest Summary:                  | [22m[39m[32m[1mPass  [22m[39m[91m[1mFail  [22m[39m[36m[1mTotal[22m[39m
usingModelsWithMultipleStreams | [32m   7  [39m[91m   1  [39m[36m    8[39m


TestSetException: Some tests did not pass: 7 passed, 1 failed, 0 errored, 0 broken.

In [14]:
#Not working
@testset "remember!" begin
    subnetwork=generateSubnetwork(4,2,1,3)
    network=[ subnetwork]
    envi=CartPoleEnv(;T=Float32, seed=20)
    adaNetDqn = AdaNetDqn(0.99, 0.05,64,10000,
            envi,
            2,
            1.0,
            10,
            network,
            1)
    obs=observe(envi)
    ini_state=deep_copy(get_state(obs))
    action(adaNetDqn)
    act(adaNetDqn,envi)
    obs=observe(envi)
    remember!(adaNetDqn)
    adaNetDqn.state=get_state(envi)
    @test length(adaNetDqn.memory)==1
    println(adaNetDqn.memory[end])
    action(adaNetDqn)
    act(adaNetDqn,envi)
    remember!(adaNetDqn)
    adaNetDqn.state=get_state(envi)
    @test length(adaNetDqn.memory)==2
    println(adaNetDqn.memory[end])
    
end

[37mremember!: [39m[91m[1mError During Test[22m[39m at [39m[1mIn[14]:2[22m
  Got exception outside of a @test
  UndefVarError: stateSize not defined
  Stacktrace:
   [1] AdaNetDqn(::Float64, ::Float64, ::Int64, ::Int64, ::CartPoleEnv{Float32,Random.MersenneTwister}, ::Int64, ::Float64, ::Int64, ::Array{Array{Dense{F,Array{Float32,2},Array{Float32,1}} where F,1},1}, ::Int64) at .\In[7]:37
   [2] top-level scope at In[14]:6
   [3] top-level scope at D:\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.3\Test\src\Test.jl:1107
   [4] top-level scope at In[14]:3
   [5] eval at .\boot.jl:330 [inlined]
   [6] softscope_include_string(::Module, ::String, ::String) at C:\Users\mclau\.julia\packages\SoftGlobalScope\cSbw5\src\SoftGlobalScope.jl:218
   [7] execute_request(::ZMQ.Socket, ::IJulia.Msg) at C:\Users\mclau\.julia\packages\IJulia\yLI42\src\execute_request.jl:67
   [8] #invokelatest#1 at .\essentials.jl:709 [inlined]
   [9] invokelatest at .\essentials.jl:708 [inlined

TestSetException: Some tests did not pass: 0 passed, 0 failed, 1 errored, 0 broken.

In [3]:
function generateSubnetwork(Input::Int64,
                               Output::Int64,
                               lt::Int64,
                               B::Int64)
    if lt==1
        return [Dense(Input,B,L[rand(1:10)]),Dense(B,Output,σ)]
    else
        new_subnetwork=[Dense(Input,B,L[rand(1:10)])]
        for i in 1:lt-1
            new_subnetwork=[new_subnetwork...,Dense(B,B,L[rand(1:10)])]
        end
        return [new_subnetwork...,Dense(B,Output,σ)]
    end
end

generateSubnetwork (generic function with 1 method)

In [4]:
function createModel(network)
    function applyNetworks(x)
        N=length(network)
        Ys=[]
        Ys2=[]
        for i in 1:N
            current_subnetwork=network[i]
            current_lt=length(current_subnetwork)-1
            y=current_subnetwork[1](x)
            Y=[deepcopy(y)] 
            for j in 2:current_lt
                for k in 1:(i-1)
                    if length(Ys[k])>=j #>=j
                        y.=y.+Ys[k][j-1]
                    end
                end
                y=current_subnetwork[j](y)
                Y=[Y...,deepcopy(y)]
            end
            y=current_subnetwork[current_lt+1](y)
            Y=[Y...,deepcopy(y)]
            Ys=[Ys...,Y]
        end
        Ys2=Ys
        Ys=[Ys[i][end] for i in 1:length(Ys)]
        return sum(Ys)/N
    end
    return applyNetworks
end

createModel (generic function with 1 method)

In [5]:
function createModel4Test(network)
    function applyNetworks(x)
        N=length(network)
        Ys=[]
        Ys2=[]
        for i in 1:N
            current_subnetwork=network[i]
            current_lt=length(current_subnetwork)-1
            y=current_subnetwork[1](x)
            Y=[deepcopy(y)] 
            for j in 2:current_lt
                for k in 1:(i-1)
                    if length(Ys[k])>=j #>=j
                        y.=y.+Ys[k][j-1]
                    end
                end
                y=current_subnetwork[j](y)
                Y=[Y...,deepcopy(y)]
            end
            y=current_subnetwork[current_lt+1](y)
            Y=[Y...,deepcopy(y)]
            Ys=[Ys...,Y]
        end
        Ys2=Ys
        Ys=[Ys[i][end] for i in 1:length(Ys)]
        return sum(Ys)/N,Ys2
    end
    return applyNetworks
end

createModel4Test (generic function with 1 method)

In [6]:
function weakTest(user,seeds::Int64)
    s=0
    for i in 1:seeds
        env=CartPoleEnv(;T=Float32, seed=i)
        obs=observe(env)
        current_state=get_state(obs)
        current_done=false
        while !current_done
            current_action=argmax(user(current_state))
            env(current_action)
            obs=observe(env)
            current_next_state,current_reward,current_done=get_state(obs),get_reward(obs),get_terminal(obs)
            s+=current_reward
            current_state=current_next_state
        end  
    end
    return s/seeds
end

function weakLearner(Input::Int64,
                     Output::Int64,
                     lt::Int64,
                     B::Int64,
                     Network,
                     seeds::Int64)
    best_value=-Inf
    selected_subnetwork=[]
    for i in 1:10
        applicant_subnetwork=generateSubnetwork(Input,Output,lt,B)
        current_user=createModel([Network...,applicant_subnetwork])
        current_value=weakTest(current_user,seeds)
        if current_value>best_value
            best_value=current_value
            selected_subnetwork=applicant_subnetwork
        end
    end
    return selected_subnetwork
    
end

weakLearner (generic function with 1 method)

In [18]:
mutable struct AdaNetDqn
    train::Bool
    change::Bool
    epsilon::Float64
    decay::Float64
    min::Float64
    batchSize::Int64
    memory
    memSize::Int64
    stateSize::Int64
    actionSize::Int64
    gamma::Float64
    cUpdate::Int64
    network1
    network2
    loss
    model1
    model2
    ps
    opt
    lt
    
    # attention network1 = [network]
    function AdaNetDqn(ϵ_DECAY::Float64, ϵ_MIN::Float64,BATCH_SIZE::Int64,MEM_SIZE::Int64,
            actionSize,
            stateSize,
            γ::Float64,
            C_UPDATE::Int64,
            network1,
            lt::Int64)
        network2=deepcopy(network1)
        model1=createModel(network1)
        model2=createModel(network2)
        loss(x,y)=Flux.mse(model1(x),y)
        #loss(x,y)=Flux.mse(x,y)
        ps=Flux.params(network1[end]...)
        new(true, true, 1.0f0, ϵ_DECAY, ϵ_MIN, BATCH_SIZE, [], MEM_SIZE, stateSize, actionSize,
            
            γ, C_UPDATE, network1, network2, loss, model1, model2, ps,  ADAM(0.0001, (0.9, 0.999)),lt)
    end
    
end

In [8]:
function updateEpsilon!(adaNetDqn::AdaNetDqn)
    x = adaNetDqn.epsilon*adaNetDqn.decay
    if x < adaNetDqn.min && adaNetDqn.change
        adaNetDqn.epsilon=adaNetDqn.min
        adaNetDqn.change=false 
    elseif adaNetDqn.change
        adaNetDqn.epsilon = x 
    end
end

function change!(adaNetDqn::AdaNetDqn,network)
    adaNetDqn.network1=[adaNetDqn.network1...,network]
    adaNetDqn.network2=deepcopy(adaNetDqn.network1)
    adaNetDqn.model1=createModel(adaNetDqn.network1)
    adaNetDqn.model2=createModel(adaNetDqn.network2)
    adaNetDqn.ps=Flux.params(adaNetDqn.network1[end]...)
    adaNetDqn.lt=length(network)-1
    current_loss(x,y)=Flux.mse(adaNetDqn.model1(x),y)
    adaNetDqn.loss=current_loss
end

change! (generic function with 1 method)

In [9]:
function copy!(iter::Int64, adaNetDqn::AdaNetDqn)
    if iter%adaNetDqn.cUpdate==0
        adaNetDqn.network2=deepcopy(adaNetDqn.network1)
        adaNetDqn.model2=createModel(adaNetDqn.network2)
    end
end

copy! (generic function with 1 method)

In [23]:
function action(adaNetDqn::AdaNetDqn,state)
    if rand() <= adaNetDqn.epsilon && adaNetDqn.train  
        a = rand(1:adaNetDqn.actionSize)
    else
        a=Flux.argmax(adaNetDqn.model1(state))
    end
    return a
end

function act(adaNetDqn::AdaNetDqn,env,action)
    env(action)
    obs=observe(env)
    deepcopy(get_state(obs)),get_reward(obs),get_terminal(obs)
end
    

act (generic function with 1 method)

In [11]:
function remember!(adaNetDqn::AdaNetDqn)
  if length(adaNetDqn.memory) == adaNetDqn.memSize
    deleteat!(adaNetDqn.memory, 1)
  end
  push!(adaNetDqn.memory, (state, action, reward, nextState, done))
end

remember! (generic function with 1 method)

In [None]:
subnetwork=generateSubnetwork(4,2,1,3)
    network=[ subnetwork]
    envi=CartPoleEnv(;T=Float32, seed=10)
    adaNetDqn = AdaNetDqn(0.99, 0.05,64,10000,
            envi,
            2,
            1.0,
            10,
            network,
            1)

In [None]:
envi=CartPoleEnv(;T=Float32, seed=20)
obs=observe(envi)

In [None]:
envi(1)

In [None]:
get_state(obs)

In [None]:
envi(1)

In [None]:
get_state(obs)