<img src="../imgs/logo.png" width="20%" align="right" style="margin:0px 20px">


# Evolutionary Computation

## 5.3 Deep Neuroevolution

<a rel="license" href="http://creativecommons.org/licenses/by-sa/4.0/"><img alt="Creative Commons License" align="left" src="https://i.creativecommons.org/l/by-sa/4.0/80x15.png" /></a>&nbsp;| Dennis G. Wilson | <a href="https://d9w.github.io/evolution/">https://d9w.github.io/evolution/</a>

In [1]:
using PyCall
using Conda

In [3]:
Conda.add("gym")

┌ Info: Running `conda install -y gym` in root environment
└ @ Conda C:\Users\alexb\.julia\packages\Conda\3rPhK\src\Conda.jl:113


Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.



In [11]:
import Random
Random.seed!(1234);

In [12]:
include("cmaes.jl");

In [13]:
struct FCLayer
    w::Array{Float64}
    b::Array{Float64}
end

struct SimpleANN
    l1::FCLayer
    l2::FCLayer
    out::FCLayer
end

In [14]:
function SimpleANN(input::Int, N1::Int, N2::Int, output::Int)
    l1 = FCLayer(zeros(N1, input), zeros(N1))
    l2 = FCLayer(zeros(N2, N1), zeros(N2))
    out = FCLayer(zeros(output, N2), zeros(output))
    SimpleANN(l1, l2, out)
end

SimpleANN

In [15]:
ann = SimpleANN(5, 64, 64, 4);

In [16]:
function compute(inputs::Array{Float64}, ann::SimpleANN)
    x = ann.l1.w * inputs .+ ann.l1.b
    x = ann.l2.w * x .+ ann.l2.b
    x = ann.out.w * x .+ ann.out.b
    x
end

compute (generic function with 1 method)

In [17]:
compute(zeros(5), ann)

4-element Array{Float64,1}:
 0.0
 0.0
 0.0
 0.0

In [18]:
gym = pyimport("gym")

PyObject <module 'gym' from 'C:\\Users\\alexb\\.julia\\conda\\3\\lib\\site-packages\\gym\\__init__.py'>

In [19]:
env = gym.make("CartPole-v1")
n_in = 4
n_out = 2;

In [33]:
function play_env(ann::SimpleANN; render=false)
    env = gym.make("CartPole-v1")
    env.seed(1)
    obs = env.reset()
    total_reward = 0.0
    done = false
    
    while ~done
        action = argmax(compute(obs, ann))-1
        obs, reward, done, _ = env.step(action)
        if render
            env.render()
        end
        total_reward += reward
    end
    env.close()
    env = nothing
    Base.GC.gc()
    total_reward
end

play_env (generic function with 1 method)

In [34]:
ann = SimpleANN(n_in, 5, 5, n_out)
play_env(ann; render=true)

10.0

In [35]:
play_env(ann)

10.0

In [36]:
function genes_to_ann(genes::Array{Float64})
    ann = SimpleANN(n_in, 5, 5, n_out)
    layers = [ann.l1.w, ann.l1.b, ann.l2.w, ann.l2.b, ann.out.w, ann.out.b]
    L = 1
    j = 1
    for i in eachindex(genes)
        if j > length(layers[L])
            L += 1
            j = 1
        end
        layers[L][j] = genes[i]
        j += 1
    end
    ann
end

genes_to_ann (generic function with 1 method)

In [37]:
function objective(genes::Array{Float64})
    ann = genes_to_ann(genes)
    -play_env(ann)
end

objective (generic function with 1 method)

In [38]:
N = n_in*5 + 5 + 5*5 + 5 + 5*n_out + n_out

67

In [39]:
ann = genes_to_ann(randn(N))

SimpleANN(FCLayer([0.9423036181483883 -0.20124596387351995 0.6484645505242579 2.7312418576603834; 3.189837938527441 0.2247582594669369 0.1364676368982851 1.042894630537453; … ; 0.23713169670340895 -0.09908957118315262 0.4672357363138778 0.009227635350028462; -0.4109229823959607 0.5271522480298193 -0.200206278719715 2.0663425364941896], [1.030239912031381, -0.46571338968363557, 0.4017306540219041, 0.3102364882426762, 0.5130523097648733]), FCLayer([-0.4958875410879679 1.0210926805776652 … 0.5788143302794121 1.928738595278616; -0.10197580837567147 -0.3015832949828389 … 0.49683986978541916 0.2897348025021201; … ; -1.4678454673292265 -0.6067707601690482 … 0.7929841404063193 1.7457637334001614; -1.2712550654192276 1.7682876404048449 … -0.23589522394696727 -1.1910101319980537], [1.2317101689352745, 0.48795796469715086, -1.1605494275014194, -0.3687328404316521, 0.03225798060873117]), FCLayer([-1.1941356437844146 1.205245295308865 … 0.6028913873078863 -1.0106957165615909; 0.17895598711471886 -0

In [40]:
play_env(ann)

13.0

In [41]:
play_env(ann)

13.0

In [42]:
c = CMAES(N=N, µ=10, λ=30, τ=sqrt(N), τ_c=N^2, τ_σ=sqrt(N))
for i in 1:5
    step!(c, objective)
    println(i, " ", maximum(.-c.F_λ))
end

1 33.0
2 82.0
3 81.0
4 95.0
5 58.0


In [43]:
best = nothing
best_fit = -Inf
c = CMAES(N=N, µ=10, λ=30, τ=sqrt(N), τ_c=N^2, τ_σ=sqrt(N))
for i in 1:20
    step!(c, objective)
    bestind = argmin(c.F_λ)
    maxfit = -c.F_λ[bestind]
    println(i, " ", maxfit)
    if maxfit > best_fit
        best = copy(c.offspring[bestind])
        best_fit = maxfit
    end
    if best_fit == 500
        break
    end
end

1 379.0
2 206.0
3 214.0
4 401.0
5 132.0
6 239.0
7 208.0
8 491.0
9 500.0


In [44]:
ann = genes_to_ann(best)
play_env(ann; render=true)

500.0