<img src="../imgs/logo.png" width="20%" align="right" style="margin:0px 20px">


# Evolutionary Computation

## 5.3 Deep Neuroevolution

<a rel="license" href="http://creativecommons.org/licenses/by-sa/4.0/"><img alt="Creative Commons License" align="left" src="https://i.creativecommons.org/l/by-sa/4.0/80x15.png" /></a>&nbsp;| Dennis G. Wilson | <a href="https://d9w.github.io/evolution/">https://d9w.github.io/evolution/</a>

# Deep Neuroevolution

Artificial neural networks are commonly used today in many applications, from phone apps to automatic piloting systems to search engines. These machine learning models contain many parameters and are usually optimized with stochastic gradient descent. However, evolutionary strategies can also be a great tool for optimizing neural network parameters, especially when there isn't a clear direction the training of the network should take. This is the case for reinforcement learning, so we'll look at a classic RL task in this section.

Because of the success of deep learning, where neural network architectures are "deep" by having many layers, this field is sometimes called deep neuroevolution. However, remember from tutorial 4 that researchers have been evolving neural networks long before the advent of deep learning.

In today's notebook, I'll be using some Python RL environments and using PyCall to interact with them in Julia.

In [1]:
using PyCall
using Conda
using Flux
include("cmaes.jl");
using Plots

We can write a construction method which just uses zeros as all weights and biases. We'll fill these with the genetic information later.

In [2]:
struct convol
    w ::AbstractArray{Float64}
    b ::AbstractArray{Float64}
end

struct my_CNN
    c1 :: convol
    c2 :: convol
    c3 :: convol
end

function my_CNN(f1::Int,c1_in::Int,c1_out::Int,f2::Int,c2_in::Int,c2_out::Int,c3_in::Int,c3_out::Int)
    c1 = convol(zeros(f1,f1,c1_in,c1_out),zeros(c1_out))
    c2 = convol(zeros(f2,f2,c2_in,c2_out),zeros(c2_out))
    c3 = convol(zeros(c3_out,c3_in),zeros(c3_out))
    my_CNN(c1,c2,c3)
end

my_CNN

In [3]:
function compute_cnn(ann,inputs)
    y = Flux.Conv(ann.c1.w,ann.c1.b,σ;stride=3)(inputs)
    y = Flux.MaxPool((2,2),stride=2)(y)
    y = Flux.Conv(ann.c2.w,ann.c2.b,σ;stride=3)(y)
    y = Flux.MeanPool((2,2),stride=4)(y)
    y = flatten(y)
    y = Flux.Dense(ann.c3.w,ann.c3.b,σ)(y)
    y = (y .> 0.5) .* 1 
end

compute_cnn (generic function with 1 method)

In [4]:
retro = pyimport_conda("retro","gym")

PyObject <module 'retro' from 'C:\\Users\\Kinza\\.julia\\conda\\3\\lib\\site-packages\\retro\\__init__.py'>

In [10]:
function play_env(ann; render=false;train=true)
    env = retro.make("SonicTheHedgehog-Genesis","GreenHillZone.Act1")
    ob = env.reset()
    total_reward = 0.0
    done = false
    #inx, iny, inc = env.observation_space.shape
    #inx = floor(Int,inx/8)
    #iny = floor(Int,iny/8)
    
    max_fitness = 0
    fitness = 0
    counter = 0
    xpos = 0
    xpos_max = 0
    frame = 0
    while ~done
        if render
            frame+=1
            env.render()
        end
        
        ob = Flux.unsqueeze(ob,4)
        action = compute_cnn(ann,ob)
        #println("action = ",action)
        
        ob, reward, done, info = env.step(action)
    
        fitness += reward

        xpos = info["x"]
        xpos_end = info["screen_x_end"]


        if xpos > xpos_max
            fitness += 1
            xpos_max = xpos
        end

        if xpos == xpos_end && xpos > 500
            fitness += 100000
            done = True
        end
        
        if train == true 
            if counter>700 && fitness<=2000
                done = true
                println("max steps reached")
            end

            if counter>1400 && fitness>2000
                done = true
                println("Good fitness but max steps reached")
            end
        end

    end
    
    env.close()
    fitness
end

ErrorException: syntax: more than one semicolon in argument list

In [6]:
function my_CNN(genes::Array{Float64})
    ann = my_CNN(12,3,4,8,4,1,12,12)
    layers = [ann.c1.w, ann.c1.b, ann.c2.w, ann.c2.b,ann.c3.w,ann.c3.b]
    L = 1
    j = 1
    for i in eachindex(genes)
        if j > length(layers[L])
            L += 1
            j = 1
        end
        layers[L][j] = genes[i]
        j += 1
    end
    ann
end

function objective(genes::Array{Float64})
    ann = my_CNN(genes)
    -play_env(ann;render=false)
end

objective (generic function with 1 method)

In [7]:
N = 12*12*3*4+4+8*8*4*1+1+12*12

2133

In [8]:
best = nothing
best_fit = -Inf
c = CMAES(N=N, µ=20, λ=20, τ=sqrt(N), τ_c=N^2, τ_σ=sqrt(N))
i=0
fitness = []
while best_fit <2000 
    i+=1
    start = time()
    step!(c, objective)
    bestind = argmin(c.F_λ)
    maxfit = -c.F_λ[bestind]
    print("generation = ",i, ", fitness = ", maxfit)
    fitness = [fitness; maxfit]
    if maxfit > best_fit
        best = copy(c.offspring[bestind])
        best_fit = maxfit
    end
    println(", elapsed time = ",time()-start)
end

i=1:length(fitness)
plot(i,fitness)

generation = 1, fitness = 193.0, elapsed time = 1088.994999885559
generation = 2, fitness = 438.0, elapsed time = 1195.7709999084473
generation = 3, fitness = 501.0, elapsed time = 804.7369999885559
generation = 4, fitness = 254.0, elapsed time = 1622.1989998817444
generation = 5, fitness = 551.0, elapsed time = 969.0030000209808
generation = 6, fitness = 755.0, elapsed time = 701.5469999313354
generation = 7, fitness = 177.0, elapsed time = 469.7220001220703
generation = 8, fitness = 467.0, elapsed time = 609.2249999046326
generation = 9, fitness = 277.0, elapsed time = 503.93499994277954
generation = 10, fitness = 652.0, elapsed time = 688.7960000038147
generation = 11, fitness = 233.0, elapsed time = 500.606999874115
generation = 12, fitness = 609.0, elapsed time = 545.6499998569489
generation = 13, fitness = 877.0, elapsed time = 563.4580001831055
generation = 14, fitness = 389.0, elapsed time = 476.19599986076355
generation = 15, fitness = 213.0, elapsed time = 539.154000043869
ge

InterruptException: InterruptException:

In [13]:
ann = my_CNN(best)
using BSON: @save
@save "mymodel.bson" ann

In [None]:
play_env(ann; render=true,train=false)