In [141]:
import sqlite3
import numpy as np
import pandas as pd
from IPython.display import display, HTML

DB = '../data/puzzler.db'

In [227]:
def load_sudokus(path):
    conn = sqlite3.connect(path)
    c = conn.cursor()
    #c.execute('SELECT * FROM "sudoku" LIMIT 0, 10;')
    c.execute('SELECT * FROM "sudoku";')
    data = c.fetchall()
    names = [member[0] for member in c.description]
    dic = {'Diabolical': 4, 'Hard': 3, 'Medium': 2, 'Easy': 1, 'Ultra Easy': 0}
    data_raw = np.array([row[3:] for row in data]) 
    dataset = {}
    dataset['data'] = data_raw[:,:10]
    dataset['data_cell'] = np.delete(data_raw, np.s_[1:10], 1)
    dataset['data_bool'] = np.clip(data_raw[:,:10], 0, 1)
    dataset['target'] = np.array([dic[name] for name in [row[2] for row in data]])
    dataset['target_names'] = np.array(['Ultra Easy', 'Easy', 'Medium', 'Hard', 'Diabolical'])
    dataset['feature_names'] = names[3:13]
    c.close()
    conn.close()
    return dataset

dataset = load_sudokus(DB)
#print the first 10 results
print(dataset['data'][:10])
print(dataset['data_cell'][:10])
print(dataset['data_bool'][:10])
print(dataset['target'][:10])
print(dataset['target_names'])
print(dataset['feature_names'])


[[ 29 138  23   1   0   0   0   0   0   0]
 [ 31 124  23   0   0   0   0   0   0   0]
 [ 28 127  27   0   1   0   0   3   0   0]
 [ 27 132  25   4   1   0   0   0   2   0]
 [ 33 126   0   0   0   0   0   0   0   0]
 [ 32 116   0   0   0   0   0   0   0   0]
 [ 32  86  45   0   0   0   0   0   0   0]
 [ 30 117  28   3   0   0   0   0   0   0]
 [ 31  95  54   0   0   0   0   0   0   0]
 [ 28 120  39   0   0   0   0   3   0   0]]
[[29 35 15  2  0  0  0  0  0  0]
 [31 37 13  0  0  0  0  0  0  0]
 [28 35 11  0  3  0  0  4  0  0]
 [27 23 10 11  3  0  0  0  7  0]
 [33 48  0  0  0  0  0  0  0  0]
 [32 49  0  0  0  0  0  0  0  0]
 [32 23 26  0  0  0  0  0  0  0]
 [30 32 13  6  0  0  0  0  0  0]
 [31 23 27  0  0  0  0  0  0  0]
 [28 26 23  0  0  0  0  4  0  0]]
[[1 1 1 1 0 0 0 0 0 0]
 [1 1 1 0 0 0 0 0 0 0]
 [1 1 1 0 1 0 0 1 0 0]
 [1 1 1 1 1 0 0 0 1 0]
 [1 1 0 0 0 0 0 0 0 0]
 [1 1 0 0 0 0 0 0 0 0]
 [1 1 1 0 0 0 0 0 0 0]
 [1 1 1 1 0 0 0 0 0 0]
 [1 1 1 0 0 0 0 0 0 0]
 [1 1 1 0 0 0 0 1 0 0]]
[3 2 3 

In [211]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    dataset['data'], dataset['target'], random_state=0)

In [212]:
print("X_train shape: {}".format(X_train.shape))
print("y_train shape: {}".format(y_train.shape))

X_train shape: (2077, 10)
y_train shape: (2077,)


In [213]:
print("X_test shape: {}".format(X_test.shape))
print("y_test shape: {}".format(y_test.shape))

X_test shape: (693, 10)
y_test shape: (693,)


In [214]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=1)

In [215]:
knn.fit(X_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=1, p=2,
           weights='uniform')

In [217]:
y_pred = knn.predict(X_test)
print("Test set predictions:\n {}".format(y_pred))

Test set predictions:
 [3 2 2 4 2 4 1 1 4 2 3 2 3 4 2 0 2 1 3 2 3 0 2 0 3 3 0 2 3 2 4 0 3 0 1 0 2
 1 4 0 2 3 3 4 3 2 4 3 0 2 3 3 2 4 3 2 2 3 0 2 0 0 3 3 0 2 4 0 4 0 3 3 2 2
 0 3 2 2 1 2 3 2 3 3 3 3 3 2 2 3 4 4 3 0 1 3 3 1 2 2 3 2 2 3 1 3 2 3 3 2 0
 3 1 3 2 1 0 0 3 3 4 2 3 2 3 2 2 3 2 2 1 0 1 3 4 2 1 1 2 1 3 2 0 0 2 1 2 1
 3 2 4 2 3 3 3 3 2 3 4 3 2 4 2 2 3 2 3 3 4 1 3 1 3 3 1 2 0 2 3 3 3 4 0 3 3
 2 2 1 3 3 1 3 3 1 4 3 3 0 1 3 2 3 3 2 2 3 1 3 2 3 1 3 3 1 2 3 2 3 0 3 0 2
 3 4 1 4 4 2 3 1 2 1 3 1 3 2 4 4 4 3 2 2 2 3 4 1 2 0 0 2 4 3 4 0 2 1 3 3 0
 1 4 1 0 3 2 2 1 1 3 3 2 3 3 3 1 0 2 3 1 0 4 2 2 1 1 2 1 2 0 0 3 4 1 3 1 0
 1 3 2 4 2 2 3 3 3 3 0 2 3 0 0 1 1 4 3 4 3 1 2 4 3 2 4 1 3 3 3 1 3 2 1 1 0
 1 3 3 3 3 2 3 1 4 1 2 1 4 3 2 0 2 4 0 2 0 2 3 0 1 3 3 4 2 4 2 3 4 2 1 0 1
 3 2 2 3 2 4 3 3 0 0 2 3 3 2 0 3 1 2 3 0 0 1 4 1 2 3 2 2 2 1 3 0 4 1 4 3 3
 2 1 2 3 0 2 3 2 3 0 1 2 2 3 0 1 0 4 2 3 2 2 0 2 4 3 0 2 1 4 0 4 2 2 4 4 1
 2 0 2 3 3 3 3 3 1 2 4 4 1 2 1 3 2 2 4 1 2 4 2 3 3 1 0 4 1 2 2 2 4 4 0 1 1
 1

In [218]:
print("Test set score: {:.2f}".format(np.mean(y_pred == y_test)))

Test set score: 0.85


In [219]:
print("Test set score: {:.2f}".format(knn.score(X_test, y_test)))

Test set score: 0.85


In [233]:
X_train, X_test, y_train, y_test = train_test_split(
    dataset['data_cell'], dataset['target'], random_state=0)

knn = KNeighborsClassifier(n_neighbors=7)
knn.fit(X_train, y_train)

print("Test set score: {:.2f}".format(knn.score(X_test, y_test)))
print(dataset['data_cell'])

Test set score: 0.92
[[29 35 15 ...,  0  0  0]
 [31 37 13 ...,  0  0  0]
 [28 35 11 ...,  4  0  0]
 ..., 
 [29 27 21 ...,  0  0  0]
 [28 26  4 ...,  0  0  0]
 [35 27 19 ...,  0  0  0]]
