In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import seaborn as sns

# Data loading

In [2]:
df = pd.read_csv("sudoku.csv")

In [3]:
df.head()

Unnamed: 0,quizzes,solutions
0,0043002090050090010700600430060020871900074000...,8643712593258497619712658434361925871986574322...
1,0401000501070039605200080000000000170009068008...,3461792581875239645296483719658324174729168358...
2,6001203840084590720000060050002640300700800069...,6951273841384596727248369158512647392739815469...
3,4972000001004000050000160986203000403009000000...,4972583161864397252537164986293815473759641828...
4,0059103080094030600275001000300002010008200070...,4659123781894735623275681497386452919548216372...


# Data processing

In [4]:
number_of_samples = 30000
unsolved = df['quizzes'].iloc[:number_of_samples].values
solved = df['solutions'].iloc[:number_of_samples].values

In [5]:
X = []
y = []

In [6]:
for sudoku in unsolved:
    np_solved = np.array([int(x) for x in sudoku]).reshape(9,9,1)
    X.append(np_solved)
    
for sudoku in solved:
    np_unsolved = np.array([int(y) for y in sudoku]).reshape(9,9,1)
    y.append(np_unsolved)

In [7]:
np_X = np.array(X)
np_y = np.array(y)

In [8]:
type(np_X)

numpy.ndarray

## Train test split

In [9]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [10]:
#default test size = 75%
X_train, X_test, y_train, y_test = train_test_split(np_X, np_y)

In [11]:
#one hot encoding
X_train = to_categorical(X_train).astype('float32')
X_test = to_categorical(X_test).astype('float32')
#-1 because we don't want to predict zeros
y_train = to_categorical(y_train-1).astype('float32')
y_test = to_categorical(y_test-1).astype('float32')

In [12]:
X_train.shape

(22500, 9, 9, 10)

# Training

In [13]:
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Reshape, BatchNormalization, Activation, Input,Dropout

In [14]:
#rows,cols, possible numbers in cell (0-9)
input_shape = (9,9,10)
batch_size = 1000

In [15]:
model = Sequential()

model.add(Dense(64, activation='relu', input_shape=input_shape))
model.add(Dropout(0.4))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.4))
model.add(Flatten())

grid = Input(shape=input_shape)
features = model(grid)

top_model = [
    Dense(9, activation='softmax')(features)
    for i in range(81)
]
sudoku_solver = Model(grid, top_model)
sudoku_solver.compile (optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [16]:
features.shape

TensorShape([None, 5184])

In [17]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 9, 9, 64)          704       
_________________________________________________________________
dropout (Dropout)            (None, 9, 9, 64)          0         
_________________________________________________________________
dense_1 (Dense)              (None, 9, 9, 64)          4160      
_________________________________________________________________
dropout_1 (Dropout)          (None, 9, 9, 64)          0         
_________________________________________________________________
flatten (Flatten)            (None, 5184)              0         
Total params: 4,864
Trainable params: 4,864
Non-trainable params: 0
_________________________________________________________________


In [18]:
#from tensorflow.keras.callbacks import EarlyStopping

In [19]:
#early_stop = EarlyStopping(monitor='val_loss',patience=2, restore_best_weights=True)

In [20]:
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  1


In [43]:
with tf.device('/GPU:0'):
    sudoku_solver.fit(X_train, [y_train[:, i, j, :] for i in range(9) for j in range(9)], epochs=2, batch_size=1000 )

Train on 22500 samples
Epoch 1/2
Epoch 2/2


In [21]:
from tensorflow.keras.models import load_model

In [22]:
model = load_model("model1.h5")

In [60]:
pred = model.predict(X_test[:1])

In [23]:
pred

NameError: name 'pred' is not defined

In [30]:
np_boards.shape

(2, 9, 9, 1)

In [31]:
np_boards = np_X[:2].copy()
with np.nditer(np_boards, op_flags=['readwrite'])
for np_board in np.nditer(np_boards,flags=['external_loop']):
    print(np_board.shape)

(162,)


In [83]:
solved = []
np_boards = np_X[:1].copy()
for board in np_boards:
    np_board = board.copy()
    blanks_count = 81 - np.count_nonzero(np_board)

    np_board_1d = np_board.reshape(81)
    np_board_4d = np.expand_dims(to_categorical(np_board), axis=0)
    
    for _ in range(blanks_count):
        predictions = np.array(model.predict(np_board_4d)).reshape(81,9)
        propabilities = predictions.max(1)

        values = predictions.argmax(1)
        values = values + 1         #change range of digits from <0,8> to <1,9>

        #get position of highest propability
        zeros = (np_board_1d == 0)
        blanks_indices = np.where(zeros)[0]
        print(len(blanks_indices))
        position_to_fill = blanks_indices[propabilities[zeros].argmax()]
        value_to_fill = values[position_to_fill]

        np_board_1d[position_to_fill] = value_to_fill
        np_board = np_board_1d.reshape(9,9,1)
        np_board_4d = np.expand_dims(to_categorical(np_board), axis=0)

       
    #np_board = np_board_1d.reshape(9,9,1)
    solved.append(np_board.copy())
    
    
    

46
45
44
43
42
41
40
39
38
37
36
35
34
33
32
31
30
29
28
27
26
25
24
23
22
21
20
19
18
17
16
15
14
13
12
11
10
9
8
7
6
5
4
3
2
1


In [85]:
solved[0]

array([[[8],
        [4],
        [4],
        [3],
        [7],
        [1],
        [2],
        [6],
        [9]],

       [[6],
        [6],
        [5],
        [7],
        [4],
        [9],
        [8],
        [1],
        [1]],

       [[9],
        [7],
        [2],
        [6],
        [6],
        [5],
        [5],
        [4],
        [3]],

       [[5],
        [3],
        [6],
        [4],
        [4],
        [2],
        [9],
        [8],
        [7]],

       [[1],
        [9],
        [8],
        [3],
        [6],
        [7],
        [4],
        [2],
        [1]],

       [[4],
        [5],
        [7],
        [1],
        [8],
        [3],
        [9],
        [5],
        [6]],

       [[6],
        [8],
        [9],
        [2],
        [3],
        [4],
        [1],
        [1],
        [5]],

       [[1],
        [2],
        [3],
        [5],
        [1],
        [8],
        [6],
        [9],
        [8]],

       [[4],
        [4],
        [2],
        [

In [45]:
np_board_1d

array([0, 0, 4, 3, 0, 0, 2, 0, 9, 0, 0, 5, 0, 0, 9, 0, 0, 1, 0, 7, 0, 0,
       6, 0, 0, 4, 3, 0, 0, 6, 0, 0, 2, 0, 8, 7, 1, 9, 0, 0, 0, 7, 4, 0,
       0, 0, 5, 0, 0, 8, 3, 0, 0, 0, 6, 0, 0, 0, 0, 0, 1, 0, 5, 0, 0, 3,
       5, 0, 8, 6, 9, 0, 0, 4, 2, 9, 1, 0, 3, 0, 0])

In [109]:
for np_board in X_test[:10]:
    print(type(np_board))

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
