# This code is just trying to get the julia wrapper at https://github.com/JuliaML/OpenAIGym.jl to work . I had to make few changes and also the example on that repo doesn't work - the example at the bottom of this file works as expected.

In [5]:
 __precompile__()

module OpenAIGym

using PyCall
using Reexport
@reexport using Reinforce
import Reinforce:
    MouseAction, MouseActionSet,
    KeyboardAction, KeyboardActionSet

abstract type AbstractGymEnv<:AbstractEnvironment end
export
    gym,
    GymEnv

const _py_envs = Dict{String,Any}()

# --------------------------------------------------------------

abstract type AbstractGymEnv<:AbstractEnvironment end


"A simple wrapper around the OpenAI gym environments to add to the Reinforce framework"
type GymEnv <: AbstractGymEnv
    name::String
    pyenv  # the python "env" object
    state
    reward::Float64
    actions::AbstractSet
    done::Bool
    info::Dict
    GymEnv(name,pyenv) = new(name,pyenv)
end
GymEnv(name) = gym(name)

function Reinforce.reset!(env::GymEnv)
    env.state = env.pyenv[:reset]()
    env.reward = 0.0
    env.actions = actions(env, nothing)
    env.done = false
end

"A simple wrapper around the OpenAI gym environments to add to the Reinforce framework"
type UniverseEnv <: AbstractGymEnv
    name::String
    pyenv  # the python "env" object
    state
    reward
    actions::AbstractSet
    done
    info::Dict
    UniverseEnv(name,pyenv) = new(name,pyenv)
end
UniverseEnv(name) = gym(name)

function Reinforce.reset!(env::UniverseEnv)
    env.state = env.pyenv[:reset]()
    env.reward = [0.0]
    env.actions = actions(env, nothing)
    env.done = [false]
end

function gym(name::AbstractString)
    env = if name in ("Soccer-v0", "SoccerEmptyGoal-v0")
        @pyimport gym_soccer
        get!(_py_envs, name) do
            GymEnv(name, pygym[:make](name))
        end
    elseif split(name, ".")[1] in ("flashgames", "wob")
        @pyimport universe
        @pyimport universe.wrappers as wrappers
        if !isdefined(OpenAIGym, :vnc_event)
            global const vnc_event = PyCall.pywrap(PyCall.pyimport("universe.spaces.vnc_event"))
        end
        get!(_py_envs, name) do
            pyenv = wrappers.SafeActionSpace(pygym[:make](name))
            pyenv[:configure](remotes=1)  # automatically creates a local docker container
            # pyenv[:configure](remotes="vnc://localhost:5900+15900")
            o = UniverseEnv(name, pyenv)
            # finalizer(o,  o.pyenv[:close]())
            sleep(2)
            o
        end
    else
        GymEnv(name, pygym[:make](name))
    end
    reset!(env)
    env
end

function actionset(A::PyObject)
    if haskey(A, :n)
        # choose from n actions
        DiscreteSet(0:A[:n]-1)
    elseif haskey(A, :spaces)
        # a tuple of action sets
        sets = [actionset(a) for a in A[:spaces]]
        TupleSet(sets...)
    elseif haskey(A, :high)
        # continuous interval
        IntervalSet{Vector{Float64}}(A[:low], A[:high])
        # if A[:shape] == (1,)  # for now we only support 1-length vectors
        #     IntervalSet{Float64}(A[:low][1], A[:high][1])
        # else
        #     # @show A[:shape]
        #     lo,hi = A[:low], A[:high]
        #     # error("Unsupported shape for IntervalSet: $(A[:shape])")
        #     [IntervalSet{Float64}(lo[i], hi[i]) for i=1:length(lo)]
        # end
    elseif haskey(A, :buttonmasks)
        # assumed VNC actions... keys to press, buttons to mask, and screen position
        # keyboard = DiscreteSet(A[:keys])
        keyboard = KeyboardActionSet(A[:keys])
        buttons = DiscreteSet(Int[bm for bm in A[:buttonmasks]])
        width,height = A[:screen_shape]
        mouse = MouseActionSet(width, height, buttons)
        TupleSet(keyboard, mouse)
    elseif haskey(A, :actions)
        # Hardcoded
        TupleSet(DiscreteSet(A[:actions]))
    else
        @show A
        @show keys(A)
        error("Unknown actionset type: $A")
    end
end


function Reinforce.actions(env::AbstractGymEnv, s′)
    actionset(env.pyenv[:action_space])
end

pyaction(a::Vector) = Any[pyaction(ai) for ai=a]
pyaction(a::KeyboardAction) = Any[a.key]
pyaction(a::MouseAction) = Any[vnc_event.PointerEvent(a.x, a.y, a.button)]
pyaction(a) = a

function Reinforce.step!(env::GymEnv, s, a)
    # info("Going to take action: $a")
    pyact = pyaction(a)
    s′, r, env.done, env.info = env.pyenv[:step](pyact)
    env.reward, env.state = r, s′
end

function Reinforce.step!(env::UniverseEnv, s, a)
    info("Going to take action: $a")
    pyact = Any[pyaction(a)]
    s′, r, env.done, env.info = env.pyenv[:step](pyact)
    env.reward, env.state = r, s′
end

Reinforce.finished(env::GymEnv, s′) = env.done
Reinforce.finished(env::UniverseEnv, s′) = all(env.done)

function main()
    @static if is_linux()
        # due to a ssl library bug, I have to first load the ssl lib here
        condadir = Pkg.dir("Conda","deps","usr","lib")
        Libdl.dlopen(joinpath(condadir, "libssl.so"))
        Libdl.dlopen(joinpath(condadir, "python2.7", "lib-dynload", "_ssl.so"))
    end

    global const pygym = pyimport("gym")
end

main()

end # module





OpenAIGym

In [9]:
using OpenAIGym
ENV["PYTHON"] = "/Users/manvithaponnapati/gymenvaffinity/bin/python"

env = GymEnv("CartPole-v0")
for i=1:30
    R, T = Episode(env, RandomPolicy())
    env.pyenv[:render]()
    info("Episode $i finished after $T steps. Total reward: $R")
end
#Use this close the window if you are on Julia notebooks
env.pyenv[:render](close=true)


[1m[36mINFO: [39m[22m[36mEpisode 1 finished after ([0.0200146, 0.147416, -0.0103349, -0.25302], 1, 1.0, [0.022963, 0.342684, -0.0153953, -0.548945]) steps. Total reward: ([0.020972, -0.0478652, -0.0111984, 0.0431747], 1, 1.0, [0.0200146, 0.147416, -0.0103349, -0.25302])
[39m[1m[36mINFO: [39m[22m[36mEpisode 2 finished after ([0.0386194, 0.170186, -0.0248196, -0.288056], 0, 1.0, [0.0420232, -0.0245738, -0.0305807, -0.00330299]) steps. Total reward: ([0.0391252, -0.0252867, -0.0250682, 0.0124298], 1, 1.0, [0.0386194, 0.170186, -0.0248196, -0.288056])
[39m[1m[36mINFO: [39m[22m[36mEpisode 3 finished after ([0.0329493, 0.161036, -0.0411905, -0.331779], 1, 1.0, [0.03617, 0.356719, -0.0478261, -0.637162]) steps. Total reward: ([0.0336422, -0.0346448, -0.0406595, -0.0265502], 1, 1.0, [0.0329493, 0.161036, -0.0411905, -0.331779])
[39m[1m[36mINFO: [39m[22m[36mEpisode 4 finished after ([-0.0468723, 0.175824, -0.0155989, -0.321003], 0, 1.0, [-0.0433558, -0.0190726, -0.0220189