In [None]:
!git clone https://github.com/HuguesGallier/Sudoku

fatal: destination path 'Sudoku' already exists and is not an empty directory.


In [1]:
from tensorflow.keras import layers
from tensorflow import math, exp

SIZE = 3
# PATH_TO_CSV = 'Sudoku/assets/data.csv'


class SoftmaxMap(layers.Layer):
    def __init__(self, axis=-1, **kwargs):
        self.axis = axis
        super(SoftmaxMap, self).__init__(**kwargs)

    def build(self, input_shape):
        pass

    def call(self, x, mask=None):
        e = exp(x - math.reduce_max(x, axis=self.axis, keepdims=True))
        s = math.reduce_sum(e, axis=self.axis, keepdims=True)
        return e / s

    def get_output_shape_for(self, input_shape):
        return input_shape


class UnsolvableError(ValueError):
    pass


class FillTerminalGrid(ValueError):
    pass

In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

# from ..utils.utils import PATH_TO_CSV


def custom_encoder(array_of_grids):
    """ transform numpy array of sudoku grids in one hot
    array of dimension (len(arr), 9, 9, 9), with one channel
    for each value from 1 to 9. """

    if len(array_of_grids.shape) == 2:
        array_of_grids = np.array([array_of_grids])
    shape_encoded = (*array_of_grids.shape, 9)
    encoded = np.zeros(shape_encoded, dtype=np.bool)
    for i, grid in enumerate(array_of_grids):
        for value in range(9):
            encoded[i][value] = (grid == value + 1) * 1
    return encoded


def read_transform(NROWS=100, encode=False):
    """ If encode is true, also splits in train test and eval. """

    data = pd.read_csv(PATH_TO_CSV, usecols=["puzzle", "solution"],
                       nrows=NROWS)

    _data_X = data["puzzle"].apply(lambda x: [int(i) if i != '.'
                                              else 0 for i in x])
    _data_Y = data["solution"].apply(lambda x: [int(i) for i in x])

    data_X = np.stack(_data_X.to_numpy()).reshape((len(data), 9, 9))
    data_Y = np.stack(_data_Y.to_numpy()).reshape((len(data), 9, 9))

    if not encode:
        return data_X, data_Y

    data_X_encoded = custom_encoder(data_X)
    data_Y_encoded = custom_encoder(data_Y)

    _X_train, X_test, _Y_train, Y_test = train_test_split(
        data_X_encoded, data_Y_encoded, test_size=0.1, random_state=42)

    X_train, X_val, Y_train, Y_val = train_test_split(
        _X_train, _Y_train, test_size=0.1, random_state=42)

    return X_train, X_val, X_test, Y_train, Y_val, Y_test

In [3]:
import numpy as np
# from .utils import SIZE


class Grid:
    """ Implements a minimalist version of a Grid for
    sudoku. Only a function to fill empty cells and another
    to check validity of the grid are provided. """

    def __init__(self, grid=None):
        if isinstance(grid, str):
            grid = np.array([int(i) if i != '.' else 0
                             for i in grid])
            grid = grid.reshape((SIZE ** 2, SIZE ** 2))
        self.grid = grid if grid is not None \
            else np.zeros((SIZE ** 2, SIZE ** 2))

    def fill_cell(self, i, j, value):
        if self.grid[i, j] == 0:
            self.grid[i, j] = value
        else:
            raise ValueError("The cell you are trying to fill is "
                             "not emply")

    def is_correct(self):
        for value in range(1, SIZE ** 2 + 1):
            for i in range(SIZE ** 2):
                if (self.grid[i, :] == value).sum() > 1:
                    return False
                elif (self.grid[:, i] == value).sum() > 1:
                    return False
            for i in range(SIZE ** 2):
                for j in range(SIZE ** 2):
                    if (self._values_in_box(SIZE * i, SIZE * j) ==
                            value).sum() > 1:
                        return False
        return True

    def is_complete(self):
        """ Check only completeness not correctness. """
        return ((self.grid == 0).sum() == 0)

    def copy(self):
        return Grid(self.grid.copy())

    def _values_in_box(self, i, j):
        """ All values in the square SIZE * SIZE. """
        idx_line, idx_col = SIZE * (i // SIZE), SIZE * (j // SIZE)
        return self.grid[idx_line: idx_line + SIZE,
                         idx_col: idx_col + SIZE]


class SmartGrid(Grid):
    """ More complete class to represent a Sudoku Grid. The main
    difference is the calculation and storage of possibilities
    for each cell, and the possibility to go back. """

    def __init__(self, grid=None):
        super().__init__(grid)
        self.possibilities = {}

    @classmethod
    def from_grid(cls, grid):
        if isinstance(grid, str):
            grid = np.array([int(i) if i != '.' else 0
                             for i in grid])
            grid = grid.reshape((SIZE ** 2, SIZE ** 2))
        obj = cls(grid)
        obj.possibilities = obj._pos()
        return obj

    def fill_cell(self, i, j, value):
        self.grid[i, j] = value
        del self.possibilities[(i, j)]  # raises error if not present
        for index in self._related_indeces(i, j):
            pos_at_index = self.possibilities.get(index, [])
            if value in pos_at_index:
                pos_at_index.remove(value)

    def erase_cell(self, i, j):
        self.grid[i, j] = 0
        self.possibilities = self._pos()

    def index_with_min_pos(self):
        min_pos = min(self.possibilities.values(), key=len)
        return [k for k, v in self.possibilities.items() if v == min_pos]

    def copy(self):
        new_grid = SmartGrid(self.grid.copy())
        new_grid.possibilities = self.possibilities.copy()
        return new_grid

    def _related_indeces(self, i, j):
        """ All indeces that will be impacted by changing (i, j). """
        indices = self._indeces_in_box(i, j)
        for k in range(SIZE ** 2):
            if (i, k) not in indices:
                indices.append((i, k))
            if (k, j) not in indices:
                indices.append((k, j))
        return indices

    def _pos(self):
        """ Return a dictionary with keys being index and
        values being a list of different possibilities. """

        line, col = np.where(self.grid == 0)
        pos = {(line[i], col[i]): self._pos_at(line[i], col[i])
               for i in range(len(line))}
        return pos

    def _pos_at(self, i, j):
        """ Possibilities at index (i, j). """

        local_pos = list(range(1, SIZE ** 2 + 1))
        to_delete = []
        for pos in local_pos:
            if pos in self._values_in_box(i, j):
                to_delete.append(pos)
            elif pos in self.grid[i, :]:
                to_delete.append(pos)
            elif pos in self.grid[:, j]:
                to_delete.append(pos)
        return [pos for pos in local_pos if pos not in to_delete]

    def _indeces_in_box(self, i, j):
        idx_line, idx_col = SIZE * (i // SIZE), SIZE * (j // SIZE)
        indeces = []
        for i in range(SIZE):
            for j in range(SIZE):
                indeces.append((idx_line + i, idx_col + j))
        return indeces

In [4]:
from tensorflow.keras import Model
from tensorflow.keras.layers import Input, Conv2D, BatchNormalization, \
    Concatenate
# from utils import SoftmaxMap
# from .data_transform import read_transform


def train_model():
    X_train, X_val, X_test, Y_train, Y_val, Y_test = read_transform()
    model = build_model()
    # --- compile and fit the model
    model.compile(loss='categorical_crossentropy', optimizer='adam')

    model.fit(X_train, Y_train, epochs=3, batch_size=128,
              validation_data=(X_val, Y_val))

    model.save("policy_network")


def build_model():
    # Model definition
    input = Input(shape=(9, 9, 9))

    x1 = Conv2D(32, kernel_size=(3, 3), strides=(1, 1), padding='same',
                activation='tanh')(input)
    x2 = Conv2D(32, kernel_size=(1, 9), strides=(1, 1), padding='same',
                activation='tanh')(input)
    x3 = Conv2D(32, kernel_size=(9, 1), strides=(1, 1), padding='same',
                activation='tanh')(input)
    x4 = Conv2D(32, kernel_size=(9, 9), strides=(1, 1), padding='same',
                activation='tanh')(input)
    x = Concatenate()([x1, x2, x3, x4])
    x = BatchNormalization()(x)
    x = Conv2D(64, kernel_size=(9, 9), strides=(1, 1), padding='same',
               activation='tanh')(x)
    x = BatchNormalization()(x)
    x = Conv2D(64, kernel_size=(7, 7), strides=(1, 1), padding='same',
               activation='tanh')(x)
    x = BatchNormalization()(x)
    x = Conv2D(64, kernel_size=(5, 5), strides=(1, 1), padding='same',
               activation='relu')(x)
    x = BatchNormalization()(x)
    x = Conv2D(32, kernel_size=(5, 5), strides=(1, 1), padding='same',
               activation='relu')(x)
    x = BatchNormalization()(x)
    x = Conv2D(32, kernel_size=(3, 3), strides=(1, 1), padding='same',
               activation='relu')(x)
    x = BatchNormalization()(x)
    x = Conv2D(32, kernel_size=(3, 3), strides=(1, 1), padding='same',
               activation='relu')(x)
    x = BatchNormalization()(x)
    x = Conv2D(32, kernel_size=(3, 3), strides=(1, 1), padding='same',
               activation='relu')(x)
    x = BatchNormalization()(x)

    x = Concatenate()([x, input])
    x = Conv2D(9, kernel_size=(1, 1), strides=(1, 1), padding='same')(x)

    outputs_play = SoftmaxMap()(x)

    # Model instantiation
    model = Model(input, outputs_play)
    print(model.summary())

    return model

In [5]:
# from .grid import SmartGrid
import random
import numpy as np


class SudokuGrid:

    def __init__(self, grid):
        if isinstance(grid, np.ndarray):
            grid = SmartGrid.from_grid(grid.copy())
        self.grid = grid

    def find_children(self):
        if self.is_terminal():
            return set()
        else:
            possible_moves = []
            pos = self.grid.possibilities
            min_nb_pos_ind = min([len(v) for v in pos.values()])
            for index in pos:
                # just look at indeces with min pos
                if len(pos[index]) == min_nb_pos_ind:
                    for value in pos[index]:
                        possible_moves.append((index, value))
                    # we just return children on 1 cell --> sufficient
                    return {self.take_action(a[0], a[1])
                            for a in possible_moves}

    def find_random_child(self):
        pos = self.grid.possibilities
        min_nb_pos_ind = min([len(v) for v in pos.values()])
        if len(pos) == 0 or min_nb_pos_ind == 0:
            return None
        pos_considered = []
        for k, v in pos.items():
            # just look at indeces with min pos
            if len(v) == min_nb_pos_ind:
                pos_considered.append(k)
        index = random.choice(pos_considered)
        action = random.choice(self.grid.possibilities[index])
        child = self.take_action(index, action)
        return child

    def take_action(self, index, action):
        new_grid = SudokuGrid(self.grid.grid.copy())
        new_grid.grid.fill_cell(*index, action)
        return new_grid

    def is_terminal(self):
        if len(self.grid.possibilities) == 0:
            return True
        elif self.grid.is_complete() or not self.grid.is_correct() or \
                min([len(v) for v in self.grid.possibilities.values()]) == 0:
            return True
        return False

    def reward(self):
        return np.count_nonzero(self.grid.grid) / 81

    def __hash__(self):
        return hash(str(self.grid.grid))

    def __eq__(self, grid2):
        if np.array_equal(self.grid.grid, grid2.grid.grid):
            return True
        return False

    def __str__(self):
        return str(self.grid.grid)

# BackTrack

In [6]:
import numpy as np
# from ..utils import SudokuGrid


class BacktrackSolver:

    def __init__(self, sudoku_grid):
        if isinstance(sudoku_grid, np.ndarray):
            sudoku_grid = SudokuGrid(sudoku_grid)
        assert isinstance(sudoku_grid, SudokuGrid), \
            "Please enter an numpy array or a SudokuGrid object."
        self.iterations = 0
        self.sudoku_grid = sudoku_grid
        self.history = [sudoku_grid]
        self.children = {}

    def solve(self):
        while not self.sudoku_grid.grid.is_complete():
            self.sudoku_grid = self.choose_action()
            self.iterations += 1
        return self.sudoku_grid

    def choose_action(self):

        if self.sudoku_grid not in self.children:
            self.children[self.sudoku_grid] = self.sudoku_grid.find_children()
            if len(self.children[self.sudoku_grid]) == 0:
                return self.sudoku_grid
            new_grid = self.children[self.sudoku_grid].pop()
            self.history.append(new_grid)
            return new_grid

        if len(self.children[self.sudoku_grid]) == 0:
            if len(self.history) == 0:
                raise RuntimeError("Solver failed")
            return self.history.pop()

        new_grid = self.children[self.sudoku_grid].pop()
        self.history.append(new_grid)
        return new_grid

# MCTS

In [8]:
#from ..utils import SudokuGrid
import numpy as np
from math import log, sqrt

# This code is an adaptation of the code here:
# https://gist.github.com/qpwo/c538c6f73727e254fdc7fab81024f6e1
# in the case of sudoku


class MCTS:

    def __init__(self, sudoku_grid, exploration_weight=1,
                 max_depth_tree=10, max_iterations=10000):
        if isinstance(sudoku_grid, np.ndarray):
            sudoku_grid = SudokuGrid(sudoku_grid)
        self.sudoku_grid = sudoku_grid
        self.exploration_weight = exploration_weight
        self.Q = {}
        self.N = {}
        self.children = {}
        self.max_depth_tree = max_depth_tree
        self.iterations = 0
        self.max_iterations = max_iterations

    def solve(self):
        while not self.sudoku_grid.is_terminal():
            for i in range(self.max_depth_tree):
                self.do_rollout()
            self.sudoku_grid = self.choose_best_action()
            if self.iterations > self.max_iterations:
                print("Solver failed, you might want to increase "
                      "the number of iterations.")
                break
        return self.sudoku_grid

    def choose_best_action(self):
        self.iterations += 1
        if self.sudoku_grid.is_terminal():
            return self.sudoku_grid

        if self.sudoku_grid not in self.children:
            return self.sudoku_grid.find_random_child()

        def score(n):
            if self.N.get(n, 0) == 0:
                return -1
            return self.Q.get(n, 0) / self.N[n]

        if len(self.children[self.sudoku_grid]) == 0:
            if self.sudoku_grid.find_random_child() is not None:
                return self.sudoku_grid.find_random_child()
            else:
                return RuntimeError("Solver failed")

        return max(self.children[self.sudoku_grid],
                   key=score)

    def do_rollout(self):
        path = self._select(self.sudoku_grid)
        leaf = path[-1]
        self._expand(leaf)
        reward = self._simulate(leaf)
        self._backpropagate(path, reward)

    def _select(self, sudoku_grid):
        path = []
        while True:
            path.append(sudoku_grid)
            if sudoku_grid not in self.children \
                    or not self.children[sudoku_grid]:
                self.iterations += 1
                return path
            unexplored = self.children[sudoku_grid] - self.children.keys()
            if unexplored:
                child = unexplored.pop()
                path.append(child)
                self.iterations += 1
                return path
            sudoku_grid = self._action_selection(sudoku_grid)

    def _simulate(self, sudoku_grid):
        while not sudoku_grid.is_terminal():
            self.iterations += 1
            sudoku_grid = sudoku_grid.find_random_child()
        if sudoku_grid.grid.is_complete() and sudoku_grid.grid.is_correct():
            self.sudoku_grid = sudoku_grid
        return sudoku_grid.reward()

    def _expand(self, sudoku_grid):
        if sudoku_grid in self.children:
            return None
        self.iterations += 1
        self.children[sudoku_grid] = sudoku_grid.find_children()

    def _backpropagate(self, path, reward):
        for sudoku_grid in reversed(path):
            self.N[sudoku_grid] = self.N.get(sudoku_grid, 0) + 1
            self.Q[sudoku_grid] = self.Q.get(sudoku_grid, 0) + reward

    def _action_selection(self, sudoku_grid):
        # All children of node should already be expanded:
        assert all(child in self.children
                   for child in self.children[sudoku_grid])

        log_N_parent = log(self.N[sudoku_grid])

        def uct(child):
            "Upper confidence bound for trees"
            return self.Q[child] / self.N[child] + self.exploration_weight * \
                sqrt(log_N_parent / self.N[child])

        return max(self.children[sudoku_grid], key=uct)

In [9]:
# from ..utils import SudokuGrid
import numpy as np
from math import log, sqrt

# This code is an adaptation of the code here:
# https://gist.github.com/qpwo/c538c6f73727e254fdc7fab81024f6e1
# in the case of sudoku


class MCTS:

    def __init__(self, sudoku_grid, exploration_weight=1,
                 max_depth_tree=10, max_iterations=10000):
        if isinstance(sudoku_grid, np.ndarray):
            sudoku_grid = SudokuGrid(sudoku_grid)
        self.sudoku_grid = sudoku_grid
        self.exploration_weight = exploration_weight
        self.Q = {}
        self.N = {}
        self.children = {}
        self.max_depth_tree = max_depth_tree
        self.iterations = 0
        self.max_iterations = max_iterations

    def solve(self):
        while not self.sudoku_grid.is_terminal():
            for i in range(self.max_depth_tree):
                self.do_rollout()
            self.sudoku_grid = self.choose_best_action()
            if self.iterations > self.max_iterations:
                print("Solver failed, you might want to increase "
                      "the number of iterations.")
                break
        return self.sudoku_grid

    def choose_best_action(self):
        self.iterations += 1
        if self.sudoku_grid.is_terminal():
            return self.sudoku_grid

        if self.sudoku_grid not in self.children:
            return self.sudoku_grid.find_random_child()

        def score(n):
            if self.N.get(n, 0) == 0:
                return -1
            return self.Q.get(n, 0) / self.N[n]

        if len(self.children[self.sudoku_grid]) == 0:
            if self.sudoku_grid.find_random_child() is not None:
                return self.sudoku_grid.find_random_child()
            else:
                return RuntimeError("Solver failed")

        return max(self.children[self.sudoku_grid],
                   key=score)

    def do_rollout(self):
        path = self._select(self.sudoku_grid)
        leaf = path[-1]
        self._expand(leaf)
        reward = self._simulate(leaf)
        self._backpropagate(path, reward)

    def _select(self, sudoku_grid):
        path = []
        while True:
            path.append(sudoku_grid)
            if sudoku_grid not in self.children \
                    or not self.children[sudoku_grid]:
                self.iterations += 1
                return path
            unexplored = self.children[sudoku_grid] - self.children.keys()
            if unexplored:
                child = unexplored.pop()
                path.append(child)
                self.iterations += 1
                return path
            sudoku_grid = self._action_selection(sudoku_grid)

    def _simulate(self, sudoku_grid):
        while not sudoku_grid.is_terminal():
            self.iterations += 1
            sudoku_grid = sudoku_grid.find_random_child()
        if sudoku_grid.grid.is_complete() and sudoku_grid.grid.is_correct():
            self.sudoku_grid = sudoku_grid
        return sudoku_grid.reward()

    def _expand(self, sudoku_grid):
        if sudoku_grid in self.children:
            return None
        self.iterations += 1
        self.children[sudoku_grid] = sudoku_grid.find_children()

    def _backpropagate(self, path, reward):
        for sudoku_grid in reversed(path):
            self.N[sudoku_grid] = self.N.get(sudoku_grid, 0) + 1
            self.Q[sudoku_grid] = self.Q.get(sudoku_grid, 0) + reward

    def _action_selection(self, sudoku_grid):
        # All children of node should already be expanded:
        assert all(child in self.children
                   for child in self.children[sudoku_grid])

        log_N_parent = log(self.N[sudoku_grid])

        def uct(child):
            "Upper confidence bound for trees"
            return self.Q[child] / self.N[child] + self.exploration_weight * \
                sqrt(log_N_parent / self.N[child])

        return max(self.children[sudoku_grid], key=uct)

In [16]:
#Hard
data_X = np.array([[6,7,0,5,0,0,8,0,0],
                    [0,0,0,0,0,9,0,2,0],
                    [0,4,9,8,0,0,1,0,3],
                    [0,0,0,0,0,8,0,0,5],
                    [0,0,7,0,0,0,0,0,2],
                    [0,0,4,0,5,3,9,0,0],
                    [0,0,0,9,0,2,0,0,0],
                    [7,6,0,0,0,0,2,4,0],
                    [0,2,8,0,0,0,6,0,0]])

In [15]:
# from .utils import read_transform
# from .backtrack import BacktrackSolver
# from .mcts import MCTS
# from .deep_iterative_solver import DeepIterativeSolver
# from .alpha_sudoku import AlphaSudoku
from tensorflow.keras.models import load_model
import time

NROWS = 2
# data_X, data_Y = read_transform(NROWS=NROWS)

#model = load_model('Sudoku/policy_network')

# for i in range(1, NROWS):
# print('---------', i)
print('--------- Backtrack ')
start_time = time.time()
back_solver = BacktrackSolver(data_X)
back_solver.solve()
print(round(time.time() - start_time, 2))
print(back_solver.iterations)

print('--------- MCTS ')
start_time = time.time()
mcts_solver = MCTS(data_X, max_iterations=1000)
mcts_solver.solve()
print(round(time.time() - start_time, 2))
print(mcts_solver.iterations)

    # print('--------- DeepIterativeSolver ')
    # start_time = time.time()
    # deep_solver = DeepIterativeSolver(data_X[i], model=model)
    # deep_solver.solve()
    # print(round(time.time() - start_time, 2))
    # print(deep_solver.iterations)

    # print('--------- AlphaSudoku ')
    # start_time = time.time()
    # alpha_solver = AlphaSudoku(data_X[i], model=model)
    # alpha_solver.solve()
    # print(round(time.time() - start_time, 2))
    # print(alpha_solver.iterations)

--------- Backtrack 
0.77
115
--------- MCTS 
2.52
263


In [19]:
#Easy
data_X = np.array([[4,3,0,5,0,0,0,0,6],
                    [0,0,8,0,2,0,0,1,9],
                    [0,1,7,0,0,0,4,5,0],
                    [8,6,0,2,0,0,0,7,0],
                    [0,7,4,8,0,0,2,6,0],
                    [1,0,9,0,0,7,8,3,0],
                    [2,0,0,1,7,8,0,4,3],
                    [0,0,1,9,4,0,0,0,7],
                    [0,0,0,6,5,0,0,0,0]])

In [18]:
NROWS = 2
# data_X, data_Y = read_transform(NROWS=NROWS)

#model = load_model('Sudoku/policy_network')

# for i in range(1, NROWS):
# print('---------', i)
print('--------- Backtrack ')
start_time = time.time()
back_solver = BacktrackSolver(data_X)
back_solver.solve()
print(round(time.time() - start_time, 2))
print(back_solver.iterations)

print('--------- MCTS ')
start_time = time.time()
mcts_solver = MCTS(data_X, max_iterations=1000)
mcts_solver.solve()
print(round(time.time() - start_time, 2))
print(mcts_solver.iterations)

    # print('--------- DeepIterativeSolver ')
    # start_time = time.time()
    # deep_solver = DeepIterativeSolver(data_X[i], model=model)
    # deep_solver.solve()
    # print(round(time.time() - start_time, 2))
    # print(deep_solver.iterations)

    # print('--------- AlphaSudoku ')
    # start_time = time.time()
    # alpha_solver = AlphaSudoku(data_X[i], model=model)
    # alpha_solver.solve()
    # print(round(time.time() - start_time, 2))
    # print(alpha_solver.iterations)

--------- Backtrack 
0.38
43
--------- MCTS 
0.41
56


# DQN

In [None]:
import time
import pandas as pd
import numpy as np
import os

# Data Pre-processing
def split(word):
    return [char for char in word]

COLUMNS = ['puzzle','solution']
#PATH = "./sudoku.csv"
PATH = 'Sudoku/assets/data.csv'
# df_train = pd.read_csv(PATH,
#                        skipinitialspace=True,
#                        names = COLUMNS,
#                        index_col=False)
df_train = pd.read_csv(PATH)

for i in range(len(df_train['puzzle'])):
  df_train['puzzle'].iloc[i] = df_train['puzzle'].iloc[i].replace(".", "0")


# Data Frame type
quizzes = df_train['puzzle'].astype(str)
solutions = df_train['solution'].astype(str)

class board(object):
    def __init__(self):
        super(board,self).__init__()
        # Choose Actions
        self.action_space = ['u', 'd', 'l', 'r','1','2','3','4','5','6','7','8','9']
        self.n_actions = len(self.action_space)
        # Features in Neural Network
        self.n_features = 2
        self.mazecount = 1
        self._build_maze()

    def _build_maze(self):
        # Making Environment
        self.currentquiz = quizzes.iloc[self.mazecount]
        self.quizreshaped = np.asarray(self.currentquiz)
        self.quizarray = split(str(self.quizreshaped))
        # Generating binary sudoku array fir making fixed sudoku maze
        self.binaryquiz = []
        for i in self.quizarray:
            if (i == '0'):
                self.binaryquiz.append('0')
            else:
                self.binaryquiz.append('1')
        print(len(self.quizarray))
        self.quizarray = np.array(self.quizarray).reshape(9, 9)
        self.binaryquizarray = np.array(self.binaryquiz).reshape(9, 9)
        self.agent = np.array([0,0])

        # Extract Solution of current question
        self.currentsolution = solutions.iloc[self.mazecount]
        self.solutionreshaped = np.asarray(self.currentsolution)
        self.solutionarray = split(str(self.solutionreshaped))
        self.solutionarray = np.array(self.solutionarray).reshape(9, 9)
        self.mazecount += 1

    def reset(self):
        time.sleep(0.1)
        # setting the agent pointing to 0,0 position on reset
        return (np.array([0,0]))

    def step(self, action):
        s = self.agent
        stemp = s
        if action == 0:   # up
            if (stemp[0] > 0):
                stemp[0] = stemp[0] - 1
        elif action == 1:   # down
            if (stemp[0] < 8):
                stemp[0] = stemp[0] + 1
        elif action == 2:   # right
            if (stemp[1] < 8):
                stemp[1] = stemp[1] + 1
        elif action == 3:   # left
            if (stemp[1] > 0):
                stemp[1] = stemp[1] - 1
        elif action == 4:   # insert 1
            if (self.binaryquizarray[s[0], s[1]] == '0'):
                self.quizarray[s[0], s[1]] = '1'
        elif action == 5:   # insert 2
            if (self.binaryquizarray[s[0], s[1]] == '0'):
                self.quizarray[s[0], s[1]] = '2'
        elif action == 6:   # insert 3
            if (self.binaryquizarray[s[0], s[1]] == '0'):
                self.quizarray[s[0], s[1]] = '3'
        elif action == 7:   # insert 4
            if (self.binaryquizarray[s[0], s[1]] == '0'):
                self.quizarray[s[0], s[1]] = '4'
        elif action == 8:   # insert 5
            if (self.binaryquizarray[s[0], s[1]] == '0'):
                self.quizarray[s[0], s[1]] = '5'
        elif action == 9:   # insert 6
            if (self.binaryquizarray[s[0], s[1]] == '0'):
                self.quizarray[s[0], s[1]] = '6'
        elif action == 10:   # insert 7
            if (self.binaryquizarray[s[0], s[1]] == '0'):
                self.quizarray[s[0], s[1]] = '7'
        elif action == 11:   # insert 8
            if (self.binaryquizarray[s[0], s[1]] == '0'):
                self.quizarray[s[0], s[1]] = '8'
        elif action == 12:   # insert 9
            if (self.binaryquizarray[s[0], s[1]] == '0'):
                self.quizarray[s[0], s[1]] = '9'
        # reward function

        if ((self.quizarray==self.solutionarray).all()):
            reward = 1
            done = True
        else:
            reward = 0
            done = False

        if(action<4):
            s_ = stemp
        else:
            s_ = s

        cls = lambda: os.system('cls')
        cls()
        print(self.quizarray)
        time.sleep(0.5)
        return s_, reward, done

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


In [None]:
import numpy as np
import tensorflow.compat.v1 as tf

np.random.seed(1)
#tf.set_random_seed(1)

tf.disable_eager_execution()
tf.reset_default_graph()

class DeepQNetwork:
    def __init__(
            self,n_actions,n_features,
            learning_rate=0.01,reward_decay=0.9,e_greedy=0.9,
            replace_target_iter=10,memory_size=500,batch_size=32,
            e_greedy_increment=None,output_graph=False,
    ):
        self.n_actions = n_actions
        self.n_features = n_features
        self.lr = learning_rate
        self.gamma = reward_decay
        self.epsilon_max = e_greedy
        self.replace_target_iter = replace_target_iter
        self.memory_size = memory_size
        self.batch_size = batch_size
        self.epsilon_increment = e_greedy_increment
        self.epsilon = 0 if e_greedy_increment is not None else self.epsilon_max

        self.learn_step_counter = 0

        # initialize zero memory [s, a, r, s_]
        self.memory = np.zeros((self.memory_size, n_features * 2 + 2))

        # consist of [target_net, evaluate_net]
        self._build_net()

        t_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='target_net')
        e_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='eval_net')

        with tf.variable_scope('hard_replacement'):
            self.target_replace_op = [tf.assign(t, e) for t, e in zip(t_params, e_params)]

        self.sess = tf.Session()

        if output_graph:
            #
            tf.summary.FileWriter("logs/", self.sess.graph)

        self.sess.run(tf.global_variables_initializer())
        self.cost_his = []

    def _build_net(self):
        #
        self.s = tf.placeholder(tf.float32, [None, self.n_features], name='s')  # input State
        self.s_ = tf.placeholder(tf.float32, [None, self.n_features], name='s_')  # input Next State
        self.r = tf.placeholder(tf.float32, [None, ], name='r')  # input Reward
        self.a = tf.placeholder(tf.int32, [None, ], name='a')  # input Action

        w_initializer, b_initializer = tf.random_normal_initializer(0., 0.3), tf.constant_initializer(0.1)

        with tf.variable_scope('evaluation_net'):
            e1 = tf.layers.dense(self.s, 20, tf.nn.relu, kernel_initializer=w_initializer,
                                 bias_initializer=b_initializer, name='e1')
            self.q_eval = tf.layers.dense(e1, self.n_actions, kernel_initializer=w_initializer,
                                          bias_initializer=b_initializer, name='q')

        with tf.variable_scope('target_net'):
            t1 = tf.layers.dense(self.s_, 20, tf.nn.relu, kernel_initializer=w_initializer,
                                 bias_initializer=b_initializer, name='t1')
            self.q_next = tf.layers.dense(t1, self.n_actions, kernel_initializer=w_initializer,
                                          bias_initializer=b_initializer, name='t2')

        with tf.variable_scope('q_target'):
            q_target = self.r + self.gamma * tf.reduce_max(self.q_next, axis=1, name='Qmax_s_')
            self.q_target = tf.stop_gradient(q_target)
        with tf.variable_scope('q_eval'):
            a_indices = tf.stack([tf.range(tf.shape(self.a)[0], dtype=tf.int32), self.a], axis=1)
            self.q_eval_wrt_a = tf.gather_nd(params=self.q_eval, indices=a_indices)
        with tf.variable_scope('loss'):
            self.loss = tf.reduce_mean(tf.squared_difference(self.q_target, self.q_eval_wrt_a, name='TD_error'))
        with tf.variable_scope('train'):
            self._train_op = tf.train.RMSPropOptimizer(self.lr).minimize(self.loss)

    def store_transition(self, s, a, r, s_):
        if not hasattr(self, 'memory_counter'):
            self.memory_counter = 0
        transition = np.hstack((s, [a, r], s_))

        index = self.memory_counter % self.memory_size
        self.memory[index, :] = transition
        self.memory_counter += 1

    def choose_action(self, observation):
        observation = observation[np.newaxis, :]

        if np.random.uniform() < self.epsilon:
            actions_value = self.sess.run(self.q_eval, feed_dict={self.s: observation})
            action = np.argmax(actions_value)
        else:
            action = np.random.randint(0, self.n_actions)
        return action

    def learn(self):
        if self.learn_step_counter % self.replace_target_iter == 0:
            self.sess.run(self.target_replace_op)
            print('\ntarget_params_replaced\n')

        if self.memory_counter > self.memory_size:
            sample_index = np.random.choice(self.memory_size, size=self.batch_size)
        else:
            sample_index = np.random.choice(self.memory_counter, size=self.batch_size)
        batch_memory = self.memory[sample_index, :]

        _, cost = self.sess.run(
            [self._train_op, self.loss],
            feed_dict={
                self.s: batch_memory[:, :self.n_features],
                self.a: batch_memory[:, self.n_features],
                self.r: batch_memory[:, self.n_features + 1],
                self.s_: batch_memory[:, -self.n_features:],
            })

        self.cost_his.append(cost)

        self.epsilon = self.epsilon + self.epsilon_increment if self.epsilon < self.epsilon_max else self.epsilon_max
        self.learn_step_counter += 1

In [None]:
# from boardenv import board
# from neural import DeepQNetwork
import pandas as pd

# COLUMNS = ['puzzle','solution']
# PATH = 'Sudoku/assets/data.csv'
# df_train = pd.read_csv(PATH,
#                        skipinitialspace=True,
#                        names = COLUMNS,
#                        index_col=False)

quizzescount = df_train['puzzle'].count()

def run_maze():
    step = 0
    episodes = 1
    for episode in range(episodes):
        currentstate = env.reset()
        print("currentstate", currentstate)
        
        while True:
            action = neuralbrain.choose_action(currentstate)
            print(action)
            futurestate_, reward, done = env.step(action)
            print(reward)
            neuralbrain.store_transition(currentstate, action, reward, futurestate_)
            if (step > 200) and (step % 5 == 0):
                neuralbrain.learn()
            currentstate = futurestate_
            if done:
                break
            step += 1

    print('game over')

if __name__ == "__main__":
    env = board()
    neuralbrain = DeepQNetwork(env.n_actions, env.n_features,
                      learning_rate=0.01,
                      reward_decay=0.9,
                      e_greedy=0.9,
                      replace_target_iter=10,
                      memory_size=2000)
    # for i in range(1,quizzescount):
    #     run_maze()
    #     env._build_maze()

    for i in range(1,2):
      run_maze()
      env._build_maze()

81


  return layer.apply(inputs)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
 ['1' '3' '7' '7' '3' '0' '9' '5' '3']]
0
6
[['1' '7' '5' '6' '6' '3' '3' '6' '6']
 ['7' '8' '8' '7' '8' '8' '5' '4' '9']
 ['4' '8' '1' '3' '8' '4' '1' '9' '6']
 ['1' '9' '7' '6' '1' '1' '5' '1' '4']
 ['8' '1' '6' '2' '1' '0' '0' '8' '3']
 ['9' '7' '2' '4' '6' '8' '3' '0' '1']
 ['4' '2' '9' '7' '3' '3' '7' '8' '6']
 ['3' '7' '3' '4' '1' '0' '8' '2' '3']
 ['1' '3' '7' '7' '3' '0' '9' '5' '3']]
0
6
[['1' '7' '5' '6' '6' '3' '3' '6' '6']
 ['7' '8' '8' '7' '8' '8' '5' '4' '9']
 ['4' '8' '1' '3' '8' '4' '1' '9' '6']
 ['1' '9' '7' '6' '1' '1' '5' '1' '4']
 ['8' '1' '6' '2' '1' '0' '0' '8' '3']
 ['9' '7' '2' '4' '6' '8' '3' '0' '1']
 ['4' '2' '9' '7' '3' '3' '7' '8' '6']
 ['3' '7' '3' '4' '1' '0' '8' '2' '3']
 ['1' '3' '7' '7' '3' '0' '9' '5' '3']]
0
1
[['1' '7' '5' '6' '6' '3' '3' '6' '6']
 ['7' '8' '8' '7' '8' '8' '5' '4' '9']
 ['4' '8' '1' '3' '8' '4' '1' '9' '6']
 ['1' '9' '7' '6' '1' '1' '5' '1' '4']
 ['8' '1' '6' '2' '1' '

In [None]:
# import time
# import pandas as pd
# import numpy as np
# import os

# # Data Pre-processing
# def split(word):
#     return [char for char in word]

# COLUMNS = ['quizzes','solutions']
# #PATH = "./sudoku.csv"
# PATH = 'Sudoku/assets/sudoku.csv'
# df_train = pd.read_csv(PATH,
#                        skipinitialspace=True,
#                        names = COLUMNS,
#                        index_col=False)
# for i in range(len(df_train['puzzle'])):
#   df_train['puzzle'].iloc[i] = df_train['puzzle'].iloc[i].replace(".", "0")
# # Data Frame type
# quizzes = df_train['quizzes'].astype(str)
# solutions = df_train['solutions'].astype(str)


In [None]:
# PATH = 'Sudoku/assets/data.csv'
# df = pd.read_csv(PATH)
# df.head()



In [None]:
# for i in range(len(df['puzzle'])):
#   a = df['puzzle'].iloc[i]
#   print(a)
#   print(len(a))
#   print(type(a))
#   b = a.replace(".", "0")
#   print(b)

#   break

In [None]:
# for i in range(len(df['puzzle'])):
#   df['puzzle'].iloc[i] = df['puzzle'].iloc[i].replace(".", "0")
# df.head()
 

In [None]:
currentstate = env.reset()
print("currentstate", currentstate)
env = board()
env._build_maze()


currentstate [0 0]
81
81
