# Solving Sudoku with Deep Convolutional Neural Network (keras implementation)

In [1]:
import copy
import keras
import numpy as np
from utils import deep_solver as ds
from utils import deep_solver_data_preprocess as data

### Difference Between using an algorithim vs deep learning

The main difference between making the algorithimic solver vs using deep learning is that the algorithimic solver does not learn a fundamental relationship between puzzles and solutions. The algorithimic solver starts fresh with each puzzle and follows its algorithim to efficently search for the solution. Deep learning, in this case, will use a dataset of over a million puzzles and solutions (from kaggle), to learn the relationship between the puzzle and the solution -- and with this learned relationship, return the solution given a new puzzle.

In [2]:
# pull in dataset and make train test split
x_train, x_test, y_train, y_test = data.get_data('utils/data/sudoku.csv')

In [3]:
# instantiate DCNN
model = ds.suduko_network()
# set optimization and learning rate
adam = keras.optimizers.Adam(lr=.001)
# we use sparse categorical cross entropy because this is a multi-class classificaiton where each class (solution) is a unique for each data point (puzzle)
model.compile(loss='sparse_categorical_crossentropy', optimizer=adam)
# train the model
model.fit(x_train, y_train, batch_size=32, epochs=2)

Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x7fe124c58590>

In [4]:
from utils.helper import norm, denorm

In [5]:
# Once the model is trained, it is used to infer what digit should be in each cell, left to right, top to bottom based on what is has learned
def inference_sudoku(sample):
    
    '''
        This function solve the sudoku by filling blank positions one by one.
    '''
    
    feat = copy.copy(sample)
    
    while(1):
    
        out = model.predict(feat.reshape((1,9,9,1)))  
        out = out.squeeze()

        pred = np.argmax(out, axis=1).reshape((9,9))+1 
        prob = np.around(np.max(out, axis=1).reshape((9,9)), 2) 
        
        feat = denorm(feat).reshape((9,9))
        mask = (feat==0)
     
        if(mask.sum()==0):
            break
            
        prob_new = prob*mask
    
        ind = np.argmax(prob_new)
        x, y = (ind//9), (ind%9)

        val = pred[x][y]
        feat[x][y] = val
        feat = norm(feat)
    
    return pred

In [6]:
# test the accuracy of the inference
# we can measure accuracy by taking the number of correct and dividing it by the index 1 of the feature shape (this number is the number of empty cells that need to be inferred to acheive the solution)
def test_accuracy(feats, labels):
    
    correct = 0
    
    for i,feat in enumerate(feats):
        
        pred = inference_sudoku(feat)
        
        true = labels[i].reshape((9,9))+1
        
        if(abs(true - pred).sum()==0):
            correct += 1
        
    print(correct/feats.shape[0])

In [7]:
# if 100% accurate - a return of 1 will be printed as correct divided by how many need to be correct should be the same number divided by itself
test_accuracy(x_test[:100], y_test[:100])

1.0


In [8]:
# once we know the inference function is accuracy using the trained DCNN, then we write our solve function
def solve_sudoku(game):
    
    game = game.replace('\n', '')
    game = game.replace(' ', '')
    game = np.array([int(j) for j in game]).reshape((9,9,1))
    game = norm(game)
    game = inference_sudoku(game)
    return game

In [9]:
# the grid is input as a string rather than matrix, because the matriziation is handled in the inference funciton
game = '''
          0 8 0 0 3 2 0 0 1
          7 0 3 0 8 0 0 0 2
          5 0 0 0 0 7 0 3 0
          0 5 0 0 0 1 9 7 0
          6 0 0 7 0 9 0 0 8
          0 4 7 2 0 0 0 5 0
          0 2 0 6 0 0 0 0 9
          8 0 0 0 9 0 3 0 5
          3 0 0 8 2 0 0 1 0
      '''
# pass game through solver and print solution
game = solve_sudoku(game)

print('solved puzzle:\n')
print(game)

solved puzzle:

[[4 8 9 5 3 2 7 6 1]
 [7 1 3 4 8 6 5 9 2]
 [5 6 2 9 1 7 8 3 4]
 [2 5 8 3 4 1 9 7 6]
 [6 3 1 7 5 9 2 4 8]
 [9 4 7 2 6 8 1 5 3]
 [1 2 5 6 7 3 4 8 9]
 [8 7 6 1 9 4 3 2 5]
 [3 9 4 8 2 5 6 1 7]]


In [10]:
# to further prove that game is solved accurately we can add the digits in each column. If we get 9 45s, then we can see that each column contains digits 1 - 9
np.sum(game, axis=1)

array([45, 45, 45, 45, 45, 45, 45, 45, 45])