In [2]:
import keras
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [3]:
width = 9
height = 9
n_mines = 10

def get_init_stage(width=9, height=9, n_mines=10):
    # set stage
    stage = np.zeros((width, height, 3), dtype='int8')
    # assign mines
    selected_long = np.zeros((width * height), dtype='bool')
    selected_long[np.random.choice(range(0, width * height), n_mines, replace=False)] = True
    selected = selected_long.reshape((width, height))
    stage[selected, 1] = 1
    # set neighbour mine counts
    mines_padded = np.zeros((width + 2, height + 2), dtype='int8')
    mines_padded[1:-1, 1:-1] = stage[:, :, 1]
    for x in range(width):
        for y in range(height):
            if stage[x, y, 1] == 0:
                stage[x, y, 2] = np.sum(mines_padded[x: x + 3, y: y + 3])
    return stage          

def print_stage(stage):
    return stage[:, :, 2] - stage[:, :, 1]

def show_visible(stage):
    return stage[:, :, 0] * print_stage(stage)

def expand(stage, coord):
    mines_padded = np.zeros((width + 2, height + 2), dtype='int8') - 1
    mines_padded[1:-1, 1:-1] = stage[:, :, 1]
    n = []
    n.append((coord[0], coord[1] + 1))
    n.append((coord[0] + 1, coord[1]))
    n.append((coord[0] + 2, coord[1] + 1))
    n.append((coord[0] + 1, coord[1] + 2))
    return [(x[0] - 1, x[1] - 1) for x in n if mines_padded[x[0], x[1]] == 0
           and stage[x[0] - 1, x[1] - 1, 0] == 0]

def step_on(stage, coord):
    new_stage = stage.copy()
    new_stage[coord[0], coord[1], 0] = 1
    # if not mine, expand all (directly) neighbouring non-mine tiles
    if is_dead(new_stage):
        return False, new_stage
    elif stage[coord[0], coord[1], 2] == 0:
        eligible_neighbours = expand(new_stage, coord)
        for c in eligible_neighbours:
            new_stage[c[0], c[1], 0] = 1
            if new_stage[c[0], c[1], 2] == 0:
                eligible_neighbours += expand(new_stage, c)
        return True, new_stage
    else:
        return True, new_stage
    
def is_dead(stage):
    return np.sum(show_visible(stage) < 0)

def pretty(stage):
    pres = pd.DataFrame(print_stage(stage))
    pres = pres.replace(0, '.')
    for c in np.argwhere(stage[:, :, 0] == 0):
        pres.loc[c[0], c[1]] = '*'
    return pres

def is_won(stage):
    return np.sum(stage[:, :, 0]) == width * height - n_mines

In [29]:
# test simulator
temp_stage = get_init_stage()
print(print_stage(temp_stage))
print(pretty(temp_stage))
alive, temp_after = step_on(temp_stage, (2, 2))
print(alive)
print(pretty(temp_after))
print(show_visible(temp_after))

[[-1 -1  1  0  0  1  1  2  1]
 [ 3  3  1  0  0  2 -1  3 -1]
 [-1  1  0  0  1  3 -1  3  1]
 [ 1  1  0  0  1 -1  2  1  0]
 [ 0  0  0  1  2  2  1  0  0]
 [ 1  1  1  1 -1  1  0  0  0]
 [ 1 -1  1  1  1  1  0  0  0]
 [ 1  2  2  1  0  0  0  0  0]
 [ 0  1 -1  1  0  0  0  0  0]]
   0  1  2  3  4  5  6  7  8
0  *  *  *  *  *  *  *  *  *
1  *  *  *  *  *  *  *  *  *
2  *  *  *  *  *  *  *  *  *
3  *  *  *  *  *  *  *  *  *
4  *  *  *  *  *  *  *  *  *
5  *  *  *  *  *  *  *  *  *
6  *  *  *  *  *  *  *  *  *
7  *  *  *  *  *  *  *  *  *
8  *  *  *  *  *  *  *  *  *
True
   0  1  2  3  4  5  6  7  8
0  *  *  1  .  .  1  *  *  *
1  *  *  1  .  .  2  *  *  *
2  *  1  .  .  1  *  *  *  *
3  1  1  .  .  1  *  *  *  *
4  .  .  .  1  *  *  *  *  *
5  1  1  1  *  *  *  *  *  *
6  *  *  *  *  *  *  *  *  *
7  *  *  *  *  *  *  *  *  *
8  *  *  *  *  *  *  *  *  *
[[0 0 1 0 0 1 0 0 0]
 [0 0 1 0 0 2 0 0 0]
 [0 1 0 0 1 0 0 0 0]
 [1 1 0 0 1 0 0 0 0]
 [0 0 0 1 0 0 0 0 0]
 [1 1 1 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 

In [46]:
def border_padding(stage):
    padded = np.zeros((3, width + 4, height + 4), dtype='int8')
    padded[0, 2:-2, 2:-2] = 1
    padded[1, 2:-2, 2:-2] = stage[:, :, 0]
    padded[2, 2:-2, 2:-2] = show_visible(stage)
    return padded

In [47]:
print(get_init_stage().shape)
border_padding(get_init_stage()).shape

(9, 9, 3)


(3, 13, 13)

In [79]:
# define model for q evaluation
from keras.models import Sequential, Model
from keras.layers import Dense, Activation
from keras.layers.core import Reshape
from keras.layers.core import Flatten
from keras.layers.core import Dropout
from keras.layers.core import Lambda
from keras.layers import Input, merge, Convolution2D
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam, RMSprop, SGD
from rl.agents.dqn import DQNAgent
from rl.agents.ddpg import DDPGAgent
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory
import random
from IPython.display import clear_output

In [69]:
model = Sequential()
model.add(Convolution2D(nb_filter=10, nb_row=5, nb_col=5, border_mode='valid', 
                        input_shape=(3, width + 4, height + 4)))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(Convolution2D(nb_filter=5, nb_row=5, nb_col=5, border_mode='same', 
                        input_shape=(3, width, height)))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(Convolution2D(nb_filter=1, nb_row=1, nb_col=1, border_mode='same', 
                        input_shape=(3, width, height)))
model.add(Activation('linear'))
model.compile(optimizer='rmsprop', loss='mse')

In [76]:
example = border_padding(get_init_stage())
print(example.shape)
pred = model.predict(example.reshape(1, 3, width+4, height+4))
print(pred)
print(pred[0, 0, :, :].shape)
np.unravel_index(pred[0, 0, :, :].argmax(), pred[0, 0, :, :].shape)

(3, 13, 13)
[[[[-0.05792572 -0.0354009   0.03887913  0.04983381  0.06709787  0.05862692
     0.06340855  0.05346444  0.00194048]
   [-0.16814481 -0.17293577 -0.0757693  -0.07170346 -0.04817623 -0.05575689
     0.02541313  0.05484676  0.01230349]
   [-0.23979653 -0.23454504 -0.08065415 -0.03599275 -0.0163417  -0.01638608
     0.0120784   0.05504632 -0.00165451]
   [-0.2451165  -0.24394363 -0.10331909 -0.08143683 -0.03637112  0.00531315
     0.0234071   0.05723965 -0.00228843]
   [-0.22937171 -0.22401801 -0.0568387  -0.05279678 -0.01371426  0.00483536
     0.01314424  0.06000319 -0.0045753 ]
   [-0.16304198 -0.15741536  0.00189133  0.01660383  0.06594341  0.06450769
     0.07253702  0.05887225  0.        ]
   [-0.13550043 -0.13706607 -0.0131674  -0.01248849  0.00496498  0.00683005
     0.02542618  0.03720443 -0.02026121]
   [-0.09134807 -0.13380976 -0.04693288 -0.04904399 -0.02352193 -0.00259179
    -0.05060366  0.02980019 -0.00813962]
   [-0.10364249 -0.14535785 -0.04423444 -0.06144509 

(5, 6)

In [77]:
penalty = 50

In [78]:
def perform_action(stage, coord):
    alive, next_stage = step_on(stage, coord)
    if not alive:
        return False, -penalty, next_stage
    else:
        return True, np.sum(next_stage[:, :, 0]) - np.sum(stage[:, :, 0]), next_stage