In [1]:
import os, json
from glob import glob
import numpy as np
from scipy import misc, ndimage
from scipy.ndimage.interpolation import zoom

from keras.layers.normalization import BatchNormalization
from keras.utils.data_utils import get_file
from keras.models import Sequential
from keras.layers.core import Flatten, Dense, Dropout, Lambda
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.layers.pooling import GlobalAveragePooling2D
from keras.optimizers import SGD, RMSprop, Adam
from keras.preprocessing import image

from numpy import genfromtxt

from keras import callbacks

import pandas as pd
from sklearn.metrics import mean_squared_error

from tqdm import tqdm

Using Theano backend.
Using gpu device 0: GeForce GTX 1080 (CNMeM is disabled, cuDNN 5110)


In [2]:
path = "../../sudoku/sudoku.csv"

In [3]:
sudoku_games = genfromtxt(path,dtype='string', delimiter=',',skip_header=1)[:110000]
sudoku_games.shape

(110000, 2)

In [4]:
train_data = sudoku_games[:,0]

train_data_list = []

for data in tqdm(train_data):
    train_data_list.append(np.array(list(data)).reshape((9,9)))
    
train_data = np.stack(train_data_list)
train_data = np.expand_dims(train_data,axis=1)

100%|██████████| 110000/110000 [00:00<00:00, 156600.61it/s]


In [5]:
train_labels = sudoku_games[:,1]

train_label_list = []

for data in tqdm(train_labels):
    train_label_list.append(np.array(list(data)).reshape((9,9)))
    
train_labels = np.stack(train_label_list)
train_labels = np.expand_dims(train_labels,axis=1)

100%|██████████| 110000/110000 [00:00<00:00, 151484.44it/s]


In [6]:
test_data = train_data[100000:]
test_labels = train_labels[100000:]

train_data = train_data[:100000]
train_labels = train_labels[:100000]


In [7]:
print(train_data.shape)
print(train_labels.shape)

print(test_data.shape)
print(test_labels.shape)

(100000, 1, 9, 9)
(100000, 1, 9, 9)
(10000, 1, 9, 9)
(10000, 1, 9, 9)


In [8]:
train_data[0]

array([[['0', '0', '4', '3', '0', '0', '2', '0', '9'],
        ['0', '0', '5', '0', '0', '9', '0', '0', '1'],
        ['0', '7', '0', '0', '6', '0', '0', '4', '3'],
        ['0', '0', '6', '0', '0', '2', '0', '8', '7'],
        ['1', '9', '0', '0', '0', '7', '4', '0', '0'],
        ['0', '5', '0', '0', '8', '3', '0', '0', '0'],
        ['6', '0', '0', '0', '0', '0', '1', '0', '5'],
        ['0', '0', '3', '5', '0', '8', '6', '9', '0'],
        ['0', '4', '2', '9', '1', '0', '3', '0', '0']]], 
      dtype='|S1')

In [9]:
train_labels[0]

array([[['8', '6', '4', '3', '7', '1', '2', '5', '9'],
        ['3', '2', '5', '8', '4', '9', '7', '6', '1'],
        ['9', '7', '1', '2', '6', '5', '8', '4', '3'],
        ['4', '3', '6', '1', '9', '2', '5', '8', '7'],
        ['1', '9', '8', '6', '5', '7', '4', '3', '2'],
        ['2', '5', '7', '4', '8', '3', '9', '1', '6'],
        ['6', '8', '9', '7', '3', '4', '1', '2', '5'],
        ['7', '1', '3', '5', '2', '8', '6', '9', '4'],
        ['5', '4', '2', '9', '1', '6', '3', '7', '8']]], 
      dtype='|S1')

In [41]:
# model = Sequential()
# model.add(Convolution2D(64, 3, 3, activation='relu',border_mode='same', input_shape=(1, 9, 9)))
# model.add(Convolution2D(64, 6, 6, activation='relu',border_mode='same'))
# model.add(Convolution2D(64, 9, 9, activation='relu',border_mode='same'))
# model.add(Convolution2D(64, 6, 6, activation='relu',border_mode='same'))
# model.add(Convolution2D(64, 3, 3, activation='relu',border_mode='same'))
# model.add(Convolution2D(1, 1, 1))

# model.compile(optimizer=Adam(lr=0.001),
#                 loss='mse')

# model.summary()

In [42]:
# model = Sequential()
# model.add(Convolution2D(512, 3, 3, activation='relu',border_mode='same', input_shape=(1, 9, 9)))
# model.add(Convolution2D(512, 3, 3, activation='relu',border_mode='same'))
# model.add(Convolution2D(512, 3, 3, activation='relu',border_mode='same'))
# model.add(Convolution2D(512, 3, 3, activation='relu',border_mode='same'))
# model.add(Convolution2D(512, 3, 3, activation='relu',border_mode='same'))
# model.add(Convolution2D(512, 3, 3, activation='relu',border_mode='same'))
# model.add(Convolution2D(512, 3, 3, activation='relu',border_mode='same'))
# model.add(Convolution2D(512, 3, 3, activation='relu',border_mode='same'))
# model.add(Convolution2D(512, 3, 3, activation='relu',border_mode='same'))
# model.add(Convolution2D(512, 3, 3, activation='relu',border_mode='same'))
# model.add(Convolution2D(1, 1, 1))

# model.compile(optimizer=Adam(lr=0.001),
#                 loss='mse')

# model.summary()

In [62]:
# model = Sequential()
# model.add(Convolution2D(64, 3, 3, activation='relu',border_mode='same', input_shape=(1, 9, 9)))
# model.add(Convolution2D(64, 3, 3, activation='relu',border_mode='same'))
# model.add(Convolution2D(64, 3, 3, activation='relu',border_mode='same'))
# model.add(Convolution2D(64, 3, 3, activation='relu',border_mode='same'))
# model.add(Convolution2D(64, 3, 3, activation='relu',border_mode='same'))
# model.add(Convolution2D(64, 3, 3, activation='relu',border_mode='same'))
# model.add(Convolution2D(64, 3, 3, activation='relu',border_mode='same'))
# model.add(Convolution2D(64, 3, 3, activation='relu',border_mode='same'))
# model.add(Convolution2D(64, 3, 3, activation='relu',border_mode='same'))
# model.add(Convolution2D(1, 1, 1))

# model.compile(optimizer=Adam(lr=0.001),
#                 loss='mse')

# model.summary()

In [63]:
model = Sequential()
model.add(Convolution2D(128, 3, 3, activation='relu',border_mode='same', input_shape=(1, 9, 9)))
model.add(Convolution2D(128, 3, 3, activation='relu',border_mode='same'))
model.add(Convolution2D(128, 3, 3, activation='relu',border_mode='same'))
model.add(Convolution2D(128, 3, 3, activation='relu',border_mode='same'))
model.add(Convolution2D(128, 3, 3, activation='relu',border_mode='same'))
model.add(Convolution2D(128, 3, 3, activation='relu',border_mode='same'))
model.add(Convolution2D(128, 3, 3, activation='relu',border_mode='same'))
model.add(Convolution2D(128, 3, 3, activation='relu',border_mode='same'))
model.add(Convolution2D(128, 3, 3, activation='relu',border_mode='same'))
model.add(Convolution2D(128, 3, 3, activation='relu',border_mode='same'))
model.add(Convolution2D(128, 3, 3, activation='relu',border_mode='same'))
model.add(Convolution2D(128, 3, 3, activation='relu',border_mode='same'))
model.add(Convolution2D(128, 3, 3, activation='relu',border_mode='same'))
model.add(Convolution2D(128, 3, 3, activation='relu',border_mode='same'))
model.add(Convolution2D(1, 1, 1))

model.compile(optimizer=Adam(lr=0.001),
                loss='mse')

model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
convolution2d_34 (Convolution2D) (None, 128, 9, 9)     1280        convolution2d_input_5[0][0]      
____________________________________________________________________________________________________
convolution2d_35 (Convolution2D) (None, 128, 9, 9)     147584      convolution2d_34[0][0]           
____________________________________________________________________________________________________
convolution2d_36 (Convolution2D) (None, 128, 9, 9)     147584      convolution2d_35[0][0]           
____________________________________________________________________________________________________
convolution2d_37 (Convolution2D) (None, 128, 9, 9)     147584      convolution2d_36[0][0]           
___________________________________________________________________________________________

In [26]:
model.load_weights("sudoku-model.h5")

In [None]:
model.fit(train_data, 
          train_labels, 
          validation_data = (test_data,test_labels),
          batch_size=1024, 
          callbacks = [callbacks.CSVLogger("./training.log")],
          nb_epoch=50)

Train on 100000 samples, validate on 10000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50

In [61]:
model.save_weights("sudoku-model-3.h5")

# Preds

In [None]:
raw_train_preds = model.predict(train_data)
raw_train_preds.shape

In [68]:
mean_squared_error(train_labels.flatten().astype(float), raw_train_preds.flatten().astype(float))  

1.6599508481715688

In [69]:
rounded_train_preds = np.round(raw_train_preds)

In [70]:
mean_squared_error(train_labels.flatten().astype(float), rounded_train_preds.flatten().astype(float))  

1.6968946913580247

In [71]:
test_example = np.expand_dims(test_data[0],axis=0)

In [72]:
test_example.shape

(1, 1, 9, 9)

In [73]:
predicted_data = model.predict(test_example)

In [74]:
predicted_data

array([[[[ 3.04801488,  7.77154541,  8.61554146,  2.6496489 ,  5.34771442,
           1.67092514,  4.27748632,  6.03880596,  4.45575523],
         [ 4.277915  ,  6.04037857,  3.09467077,  9.14618206,  3.0374732 ,
           6.96886349,  1.02108228,  4.629632  ,  8.04586887],
         [ 1.33454669,  3.97426677,  5.52914095,  4.80016041,  6.51669931,
           4.89872313,  4.46832991,  2.3653729 ,  9.14165306],
         [ 9.08875561,  6.98149538,  3.93642664,  3.26343513,  1.11207998,
           6.2200675 ,  5.59935284,  6.50753164,  3.70029664],
         [ 5.68225622,  4.51157999,  6.93451309,  8.10488224,  7.2634654 ,
           1.9959414 ,  6.85415411,  2.96953225,  1.14118385],
         [ 3.07481742,  3.67427015,  0.48225975,  5.07349443,  4.26970673,
           9.24265385,  7.1094389 ,  7.47328949,  5.62337875],
         [ 7.68987274,  8.89116573,  4.80247068,  5.43432236,  2.13826799,
           5.09976053,  7.65639257,  0.97299188,  3.33594918],
         [ 4.25982952,  1.03819728

In [75]:
test_labels[0]

array([[['7', '8', '9', '2', '5', '1', '3', '6', '4'],
        ['4', '6', '2', '9', '3', '7', '1', '5', '8'],
        ['1', '3', '5', '4', '6', '8', '7', '2', '9'],
        ['9', '7', '8', '3', '1', '6', '5', '4', '2'],
        ['5', '4', '6', '8', '7', '2', '9', '3', '1'],
        ['3', '2', '1', '5', '4', '9', '8', '7', '6'],
        ['8', '9', '4', '7', '2', '5', '6', '1', '3'],
        ['2', '1', '7', '6', '9', '3', '4', '8', '5'],
        ['6', '5', '3', '1', '8', '4', '2', '9', '7']]], 
      dtype='|S1')

In [76]:
np.round(predicted_data)

array([[[[ 3.,  8.,  9.,  3.,  5.,  2.,  4.,  6.,  4.],
         [ 4.,  6.,  3.,  9.,  3.,  7.,  1.,  5.,  8.],
         [ 1.,  4.,  6.,  5.,  7.,  5.,  4.,  2.,  9.],
         [ 9.,  7.,  4.,  3.,  1.,  6.,  6.,  7.,  4.],
         [ 6.,  5.,  7.,  8.,  7.,  2.,  7.,  3.,  1.],
         [ 3.,  4.,  0.,  5.,  4.,  9.,  7.,  7.,  6.],
         [ 8.,  9.,  5.,  5.,  2.,  5.,  8.,  1.,  3.],
         [ 4.,  1.,  7.,  4.,  8.,  6.,  4.,  8.,  3.],
         [ 6.,  3.,  3.,  1.,  8.,  4.,  3.,  7.,  7.]]]], dtype=float32)

In [77]:
test_data[0]

array([[['0', '8', '0', '0', '5', '0', '0', '6', '0'],
        ['4', '6', '0', '9', '0', '7', '1', '0', '8'],
        ['0', '0', '5', '0', '0', '0', '0', '2', '9'],
        ['9', '7', '0', '0', '0', '6', '5', '0', '0'],
        ['0', '0', '0', '8', '7', '2', '0', '3', '1'],
        ['3', '0', '0', '0', '4', '9', '0', '0', '0'],
        ['0', '0', '4', '0', '2', '5', '0', '0', '3'],
        ['0', '1', '0', '0', '0', '0', '4', '8', '0'],
        ['6', '0', '3', '1', '0', '0', '0', '0', '7']]], 
      dtype='|S1')