In [1]:
using Minesweeper
using Parameters: @with_kw

In [2]:
using Flux 

@with_kw mutable struct HyperParams
    episodes::Integer = 10000
    batchsize::Integer = 64 
    replaybuffercapacity::Integer = 10000
    updatefreq::Integer = 1000
    ϵ::Real = 0.95 
    ϵ_min::Real = 0.001
    ϵ_decay::Real = 0.99975
    γ::Real = 0.1
    α::Real = 0.01
    α_min::Real = 0.001
    α_decay::Real = 0.99975
    rewards::Dict{String, Real} = Dict("Loss" => -1.0, "Guess" => -0.3, "Progress" => 0.3, "Win" => 1.0)
end

@with_kw struct GameParams 
    board_dim::Tuple{Integer, Integer} = (16,16)
    mines::Integer = 40
end

struct ModelParams 
    board_dim::Tuple{Integer, Integer}
    droprate::Real     
end

function q_net(mp::ModelParams)

    total_dim = foldl(*, mp.board_dim)

    layers = [
        Conv((3,3), 1 => 32, pad = (1,1), relu),
        Conv((3,3), 32 => 64, pad = (1,1), relu),
        Conv((3,3), 64 => 64, pad = (1,1), relu),
        Flux.flatten,
        Dense((total_dim * 64), 512, relu),
        Dense(512, total_dim)
    ]

    return Chain(layers...)

end

q_net (generic function with 1 method)

In [3]:
hp = HyperParams()
gp = GameParams()

GameParams
  board_dim: Tuple{Int64, Int64}
  mines: Int64 40


In [4]:
mutable struct Experience
    state::AbstractArray
    action::CartesianIndex{2}
    reward::Float64
    next_state::AbstractArray
    done::Bool
end

mutable struct ReplayBuffer
    buffer::Vector{Experience}
    capacity::Int
    ReplayBuffer(capacity) = new(Experience[], capacity)
end

function push!(rb::ReplayBuffer, exp::Experience)
    if length(rb.buffer) < rb.capacity
        push!(rb.buffer, exp)
    else 
        rb.buffer[1:end-1] .= rb.buffer[2:end]
        rb.buffer[end] = exp
    end
end

function sample(rb::ReplayBuffer, batch_size::Int)
    idxs = rand(1:length(rb.buffer), batch_size)
    batch = rb.buffer[idxs]

    return (state = hcat([exp.state for exp ∈ batch]...), 
            action = [exp.action for exp ∈ batch], 
            reward = [exp.reward for exp ∈ batch], 
            next_state = hcat([exp.next_state for exp ∈ batch]...), 
            done = [exp.done for exp ∈ batch])
end

sample (generic function with 1 method)

In [5]:
mp = ModelParams((12,12), 0.1)
qonline = q_net(mp)

Chain(
  Conv((3, 3), 1 => 32, relu, pad=1),   [90m# 320 parameters[39m
  Conv((3, 3), 32 => 64, relu, pad=1),  [90m# 18_496 parameters[39m
  Conv((3, 3), 64 => 64, relu, pad=1),  [90m# 36_928 parameters[39m
  Flux.flatten,
  Dense(9216 => 512, relu),             [90m# 4_719_104 parameters[39m
  Dense(512 => 144),                    [90m# 73_872 parameters[39m
) [90m                  # Total: 10 arrays, [39m4_848_720 parameters, 18.498 MiB.

In [6]:
opt_state = Flux.setup(ADAM(0.01), qonline)

(layers = ((σ = (), weight = [32mLeaf(Adam{Float64}(0.01, (0.9, 0.999), 1.0e-8), [39m([0.0 0.0 0.0; 0.0 0.0 0.0; 0.0 0.0 0.0;;;; 0.0 0.0 0.0; 0.0 0.0 0.0; 0.0 0.0 0.0;;;; 0.0 0.0 0.0; 0.0 0.0 0.0; 0.0 0.0 0.0;;;; … ;;;; 0.0 0.0 0.0; 0.0 0.0 0.0; 0.0 0.0 0.0;;;; 0.0 0.0 0.0; 0.0 0.0 0.0; 0.0 0.0 0.0;;;; 0.0 0.0 0.0; 0.0 0.0 0.0; 0.0 0.0 0.0], [0.0 0.0 0.0; 0.0 0.0 0.0; 0.0 0.0 0.0;;;; 0.0 0.0 0.0; 0.0 0.0 0.0; 0.0 0.0 0.0;;;; 0.0 0.0 0.0; 0.0 0.0 0.0; 0.0 0.0 0.0;;;; … ;;;; 0.0 0.0 0.0; 0.0 0.0 0.0; 0.0 0.0 0.0;;;; 0.0 0.0 0.0; 0.0 0.0 0.0; 0.0 0.0 0.0;;;; 0.0 0.0 0.0; 0.0 0.0 0.0; 0.0 0.0 0.0], (0.9, 0.999))[32m)[39m, bias = [32mLeaf(Adam{Float64}(0.01, (0.9, 0.999), 1.0e-8), [39m(Float32[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], Float32[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], (0.9, 0.999))[32m)[39m, stride = ((), ()), pad = ((), ()), dilation = ((), ())

In [7]:
Flux.adjust!(opt_state, 0.001)

In [9]:
minegame=Game(dims=(12,12),n_mines=12)



 ■  ■  ■  ■  ■  ■  ■  ■  ■  ■  ■  ■ 
 ■  ■  ■  ■  ■  ■  ■  ■  ■  ■  ■  ■ 
 ■  ■  ■  ■  ■  ■  ■  ■  ■  ■  ■  ■ 
 ■  ■  ■  ■  ■  ■  ■  ■  ■  ■  ■  ■ 
 ■  ■  ■  ■  ■  ■  ■  ■  ■  ■  ■  ■ 
 ■  ■  ■  ■  ■  ■  ■  ■  ■  ■  ■  ■ 
 ■  ■  ■  ■  ■  ■  ■  ■  ■  ■  ■  ■ 
 ■  ■  ■  ■  ■  ■  ■  ■  ■  ■  ■  ■ 
 ■  ■  ■  ■  ■  ■  ■  ■  ■  ■  ■  ■ 
 ■  ■  ■  ■  ■  ■  ■  ■  ■  ■  ■  ■ 
 ■  ■  ■  ■  ■  ■  ■  ■  ■  ■  ■  ■ 
 ■  ■  ■  ■  ■  ■  ■  ■  ■  ■  ■  ■ 



In [10]:
select_cell!(minegame, 6,6)
select_cell!(minegame, 3,6)
select_cell!(minegame, 5,6)

In [11]:
game_over(minegame)

false

In [12]:
revealed = [cell.revealed for cell ∈ minegame.cells]

12×12 Matrix{Bool}:
 0  0  0  0  0  0  0  0  0  0  0  0
 0  0  0  0  0  0  0  0  0  0  0  0
 0  0  0  0  0  1  0  0  0  0  0  0
 0  0  0  0  0  0  0  0  0  0  0  0
 0  0  0  1  1  1  1  1  0  0  0  0
 0  0  0  1  1  1  1  1  1  1  1  1
 0  0  0  1  1  1  1  1  1  1  1  1
 0  0  0  1  1  1  1  1  1  1  1  1
 0  0  0  0  0  1  1  1  1  1  1  1
 0  0  0  0  0  1  1  1  1  1  1  1
 0  0  0  0  0  1  1  1  1  1  1  1
 0  0  0  0  0  0  1  1  1  1  1  1

In [13]:
argmax(revealed)

CartesianIndex(5, 4)

In [17]:
mine_counts = [cell.mine_count for cell ∈ minegame.cells]

12×12 Matrix{Int64}:
 0  1  0  0  0  0  0  0  0  0  1  1
 1  1  0  0  0  0  0  0  0  0  1  0
 0  0  0  0  0  0  0  0  0  0  1  1
 0  0  0  0  0  0  0  0  1  1  1  0
 0  0  0  0  0  0  0  0  1  0  1  0
 1  2  2  1  1  1  1  0  1  1  1  0
 1  1  1  1  1  0  1  0  1  1  1  0
 1  2  2  1  1  2  2  1  1  0  1  0
 0  0  0  1  1  2  0  1  1  1  1  0
 1  1  1  2  1  2  1  1  0  0  0  0
 0  1  1  1  3  2  1  0  0  0  0  0
 1  1  1  1  2  0  1  0  0  0  0  0

In [18]:
board_state = zeros(size(mine_counts)) .|> Int
for (i,x) ∈ enumerate(revealed)
    board_state[i] = revealed[i] == 1 ? mine_counts[i] : -1
end

In [19]:
board_state

12×12 Matrix{Int64}:
 -1   1   0   0   0   0   0   0   0   0   1  -1
  1   1   0   0   0   0   0   0   0   0   1  -1
  0   0   0   0   0   0   0   0   0   0   1  -1
  0   0   0   0   0   0   0   0   1   1   1  -1
  0   0   0   0   0   0   0   0   1  -1  -1  -1
  1   2   2   1   1   1   1   0   1  -1  -1  -1
 -1  -1  -1  -1  -1  -1   1   0   1  -1  -1  -1
 -1  -1  -1  -1  -1  -1   2   1   1  -1  -1  -1
 -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1
 -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1
 -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1
 -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1

In [20]:
board_state = reshape(board_state, (12,12,1,1))

12×12×1×1 Array{Int64, 4}:
[:, :, 1, 1] =
 -1   1   0   0   0   0   0   0   0   0   1  -1
  1   1   0   0   0   0   0   0   0   0   1  -1
  0   0   0   0   0   0   0   0   0   0   1  -1
  0   0   0   0   0   0   0   0   1   1   1  -1
  0   0   0   0   0   0   0   0   1  -1  -1  -1
  1   2   2   1   1   1   1   0   1  -1  -1  -1
 -1  -1  -1  -1  -1  -1   1   0   1  -1  -1  -1
 -1  -1  -1  -1  -1  -1   2   1   1  -1  -1  -1
 -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1
 -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1
 -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1
 -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1

In [22]:
unsolved = [i for (i,x) ∈ pairs(board_state[:,:]) if x == -1]

77-element Vector{CartesianIndex{2}}:
 CartesianIndex(1, 1)
 CartesianIndex(7, 1)
 CartesianIndex(8, 1)
 CartesianIndex(9, 1)
 CartesianIndex(10, 1)
 CartesianIndex(11, 1)
 CartesianIndex(12, 1)
 CartesianIndex(7, 2)
 CartesianIndex(8, 2)
 CartesianIndex(9, 2)
 ⋮
 CartesianIndex(4, 12)
 CartesianIndex(5, 12)
 CartesianIndex(6, 12)
 CartesianIndex(7, 12)
 CartesianIndex(8, 12)
 CartesianIndex(9, 12)
 CartesianIndex(10, 12)
 CartesianIndex(11, 12)
 CartesianIndex(12, 12)

In [4]:
function getboardstate(minegame::Game)
    revealed = [cell.revealed for cell ∈ minegame.cells]
    mine_counts = [cell.mine_count for cell ∈ minegame.cells]
    board_state = zeros(size(mine_counts)) .|> Int
    for (i,x) ∈ enumerate(revealed)
        board_state[i] = revealed[i] == 1 ? mine_counts[i] : -1
    end

    return board_state
end

getboardstate (generic function with 1 method)

In [23]:
q_network = q_net(mp)

Chain(
  Conv((3, 3), 1 => 32, relu, pad=1),   [90m# 320 parameters[39m
  Conv((3, 3), 32 => 64, relu, pad=1),  [90m# 18_496 parameters[39m
  Conv((3, 3), 64 => 64, relu, pad=1),  [90m# 36_928 parameters[39m
  Flux.flatten,
  Dense(9216 => 512, relu),             [90m# 4_719_104 parameters[39m
  Dense(512 => 144),                    [90m# 73_872 parameters[39m
) [90m                  # Total: 10 arrays, [39m4_848_720 parameters, 18.498 MiB.

In [25]:
moves = q_network(board_state)

144×1 Matrix{Float32}:
  0.008372147
  0.075169854
  0.06424284
 -0.008652205
 -0.088997014
  0.023659337
  0.053991225
 -0.14578433
  0.113956526
 -0.0029371753
  ⋮
  0.011468982
  0.041167114
 -0.06722162
  0.03771173
  0.14972436
 -0.04913781
  0.060401566
 -0.080417335
  0.07142547

In [29]:
moves = reshape(moves, (12,12))

12×12 Matrix{Float32}:
  0.00837215   0.0711945   -0.00764437  …  -0.00495338    0.0753803
  0.0751699    0.0102093    0.0352961      -0.0295737     0.0803521
  0.0642428   -0.116191    -0.105575       -0.00105573   -0.188054
 -0.0086522    0.0492703    0.0113981       0.0150566     0.011469
 -0.088997     0.0327218    0.0284242      -0.0310161     0.0411671
  0.0236593    0.110753     0.0438063   …  -0.122578     -0.0672216
  0.0539912    0.0293491   -0.0194716       0.000325726   0.0377117
 -0.145784     0.0197853   -0.0433954      -0.152938      0.149724
  0.113957     0.00124221   0.0843015      -0.057344     -0.0491378
 -0.00293718   0.0172967    0.132008       -0.033442      0.0604016
  0.0811763    0.0438806   -0.0156618   …   0.122359     -0.0804173
 -0.033364     0.0931845    0.0240221       0.101141      0.0714255

In [30]:
best_move = argmax(moves)

CartesianIndex(8, 12)

In [None]:
# States: 
#   For each cell look at all neighbors of that cell
#   For each cell look at the next two neighbors of that cell
#       Partition the board as so
#       Pad game board with a non-useful value for the edges 2 deep

# Reward Structure:
#   Loss, post first move = -1.0
#   Loss, first move = 0.0
#   Win = +1.0
#   Progress = +0.3 
#   Guess = -0.3

# Game Loop:
#   1. Initialize board
#   2. Model Selects move 
#   3. 

In [31]:
rand(1:24)

17

In [35]:
CartesianIndex((17,17)) |> Tuple

(17, 17)

In [36]:
board_state

12×12×1×1 Array{Int64, 4}:
[:, :, 1, 1] =
 -1   1   0   0   0   0   0   0   0   0   1  -1
  1   1   0   0   0   0   0   0   0   0   1  -1
  0   0   0   0   0   0   0   0   0   0   1  -1
  0   0   0   0   0   0   0   0   1   1   1  -1
  0   0   0   0   0   0   0   0   1  -1  -1  -1
  1   2   2   1   1   1   1   0   1  -1  -1  -1
 -1  -1  -1  -1  -1  -1   1   0   1  -1  -1  -1
 -1  -1  -1  -1  -1  -1   2   1   1  -1  -1  -1
 -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1
 -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1
 -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1
 -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1  -1

In [56]:
indx = CartesianIndex((9,2))
board_state[indx]

-1

In [87]:
i, j = indx |> Tuple
neighbors = CartesianIndices((i-1:i+1, j-1:j+1))
board_state[9,2] = 0
guess_state = board_state[neighbors] .== -1

if guess_state == [1 1 1 ; 1 0 1 ; 1 1 1]
    println("true")
end

true


In [5]:
function checkguess(minegame::Game, action::CartesianIndex)::Bool
    neighbors = get_neighbors(minegame,action)
    return foldl( | , [cell.revealed for cell ∈ neighbors])
end

checkguess (generic function with 1 method)

In [110]:
checkguess(minegame,CartesianIndex(3,2))

true

In [99]:
get_neighbors(minegame,CartesianIndex(9,2))

8-element Vector{Cell}:
 Cell(false, false, false, 1, CartesianIndex(8, 1))
 Cell(false, false, false, 0, CartesianIndex(9, 1))
 Cell(false, false, false, 1, CartesianIndex(10, 1))
 Cell(false, false, false, 2, CartesianIndex(8, 2))
 Cell(false, false, false, 1, CartesianIndex(10, 2))
 Cell(false, false, false, 2, CartesianIndex(8, 3))
 Cell(false, false, false, 0, CartesianIndex(9, 3))
 Cell(false, false, false, 1, CartesianIndex(10, 3))

In [92]:
compute_score!(minegame)

In [94]:
reveal(minegame.cells)

 💣  1  0  0  0  0  0  0  0  0  1  1 
 1  1  0  0  0  0  0  0  0  0  1  💣 
 0  0  0  0  0  0  0  0  0  0  1  1 
 0  0  0  0  0  0  0  0  1  1  1  0 
 0  0  0  0  0  0  0  0  1  💣  1  0 
 1  2  2  1  1  1  1  0  1  1  1  0 
 1  💣  💣  1  1  💣  1  0  1  1  1  0 
 1  2  2  1  1  2  2  1  1  💣  1  0 
 0  0  0  1  1  2  💣  1  1  1  1  0 
 1  1  1  2  💣  2  1  1  0  0  0  0 
 💣  1  1  💣  3  2  1  0  0  0  0  0 
 1  1  1  1  2  💣  1  0  0  0  0  0 



In [19]:
function takeaction(minegame::Game, action::CartesianIndex)
    select_cell!(minegame,(action |> tuple)...)
    
    if game_over(minegame)
        reward = "Loss" 
        done = true
    else
        done = false
        if checkguess(minegame, action)
            reward = "Guess"
        end
        reward = "Progress"
    end
    return reward, getboardstate(minegame), done

end

takeaction (generic function with 1 method)

In [23]:
decayparam = (p,pd,pm) -> p > pm ? p = p*pd : Nothing

function train_loop(mp::ModelParams, hp::HyperParams, gp::GameParams)
    q_online = q_net(mp)
    q_target = q_net(mp)

    num_actions = foldl(*, gp.board_dim)

    opt = ADAM(hp.α)
    lossfn = Flux.mse

    rb = ReplayBuffer(hp.replaybuffercapacity)

    for episode ∈ 1:hp.episodes
        
        minegame = Game(dims=gp.board_dim, n_mines=gp.mines)

        total_reward = 0.0
        steps = 0

        playing = true

        while playing

            #prepare boardstate
            board_state = getboardstate(minegame)

            unsolved = [i for (i,x) ∈ pairs(board_state) if x == -1]
        
            if length(unsolved) == gp.mines

                reward = "Win"
                playing = false
                done = true
                next_state = board_stat

            else

                if rand() < hp.ϵ
                    action = (rand(1:mp.board_dim[1]), rand(1:mp.board_dim[2])) |> CartesianIndex
                else
                    q_values = q_online(reshape(board_state,(mp.board_dim...,1,1)))
                    action = reshape(q_values, mp.board_dim)[unsolved] |> argmax
                end

                reward, next_state, done = takeaction(minegame, action)

            reward_val = hp.rewards[reward]
            exp = Experience(board_state, action, reward_val, next_state, done)

            push!(rp, exp)

            total_reward += reward_val
            steps += 1

            batch = replay_buffer.sample(hp.batch_size)

            states, actions, rewards, next_states, dones = tuple.(zip((b.state, b.action, b.reward, b.next_state, b.done) for b ∈ batch))
            
            next_q_vals = q_target(next_states)
            next_actions = argmax(q_online(next_states), dims=2)
            next_q_vals = next_q_vals[CartesianIndex.((1:batch_size, next_actions))]
            TD_targets = rewards .+ hp.γ .* (1 .- dones) .* next_q_vals

            q_vals = q_online(states)
            q_vals = q_vals[CartesianIndex.((1:batch_size, next_actions))]
            
            #Compute loss & update online Q-network
            loss = lossfn(q_vals, TD_targets)
            gradient = gradient(() -> loss, Flux.params(q_online))
            opt(gradient)

            #update target Q-network
            if steps % hp.updatefrequency == 0
                Flux.loadparams!(q_target, Flux.params(q_online))
            end

            #Decay ϵ and α 
            decayparam(hp.ϵ,hp.ϵ_decay,hp.ϵ_min)
            decayparam(hp.α,hp.α_decay,hp.α_min)
        
            end
        end
        println("Episode $episode : Total Reward $total_reward : Exploration Rate $(hp.ϵ)")
    end
end
   

train_loop (generic function with 1 method)

In [24]:
mp = ModelParams(gp.board_dim, 0.1)

ModelParams((16, 16), 0.1)

In [25]:
train_loop(mp,hp,gp)

MethodError: MethodError: Cannot `convert` an object of type CartesianIndex{2} to an object of type Int64
Closest candidates are:
  convert(::Type{T}, !Matched::Ptr) where T<:Integer at pointer.jl:23
  convert(::Type{<:Integer}, !Matched::CUDA.CUSPARSE.cusparseIndexBase_t) at ~/.julia/packages/CUDA/DfvRa/lib/cusparse/types.jl:42
  convert(::Type{T1}, !Matched::CEnum.Cenum{T2}) where {T1<:Integer, T2<:Integer} at ~/.julia/packages/CEnum/Bqafi/src/operators.jl:24
  ...

Adam(0.1, (0.9, 0.999), 1.0e-8, IdDict{Any, Any}())

0.01

Adam(0.01, (0.9, 0.999), 1.0e-8, IdDict{Any, Any}())