### This is a simple notebook about a classic U-Net model for sudoku puzzle solving.
The model is trained on a dataset of 3000 sudoku puzzles and their solutions.
The model is able to solve sudoku puzzles with about 40% accuracy.
Before launching it, you will have to get the dataset from :
https://www.kaggle.com/datasets/radcliffe/3-million-sudoku-puzzles-with-ratings

We first define the U-NET model.

In [24]:
import tensorflow as tf
from tensorflow.keras import layers, models
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split


def unet_model(input_size=(9, 9, 1)):
    inputs = tf.keras.Input(input_size)

    # Contracting Path
    c1 = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(inputs)
    c1 = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(c1)
    p1 = layers.MaxPooling2D((2, 2))(c1)
    
    c2 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(p1)
    c2 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(c2)
    p2 = layers.MaxPooling2D((2, 2))(c2)

    # Bottleneck
    c3 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(p2)
    c3 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(c3)

    # Expansive Path (Adjusted)
    u4 = layers.Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(c3)
    u4 = layers.concatenate([u4, c2])
    c4 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(u4)
    c4 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(c4)

    u5 = layers.Conv2DTranspose(16, (3, 3), strides=(2, 2), padding='valid')(c4)  # Adjusted for additional upsampling
    u5 = layers.concatenate([u5, layers.Cropping2D(cropping=((0,0),(0,0)))(c1)])  # Adjust cropping accordingly
    c5 = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(u5)
    c5 = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(c5)


    outputs = layers.Conv2D(10, (1, 1), activation='softmax')(c5)

    model = models.Model(inputs=[inputs], outputs=[outputs])

    return model

# Model Summary. Needed as we are using a U-NET model to get more information about the model
model = unet_model()
model.summary()



Then we can get the data, and preprocess it.
Once it is done we can train our model.

In [42]:
def preprocess_data(X, y):
    # For puzzles
    X_processed = []
    for puzzle in X:
        # Replace '.' with '0' and convert each character to int
        int_puzzle = [int(c) if c != '.' else 0 for c in puzzle]
        X_processed.append(int_puzzle)
    X = np.array(X_processed)

    # For solutions
    y_processed = []
    for solution in y:
        int_solution = [int(c) for c in solution]
        y_processed.append(int_solution)
    y = np.array(y_processed)

    # Reshape to (9, 9) and expand dims to (9, 9, 1)
    X = np.expand_dims(X.reshape(-1, 9, 9), axis=-1)
    y = np.expand_dims(y.reshape(-1, 9, 9), axis=-1)

    return X, y

# Load 1 % of dataset
df = pd.read_csv("sudoku-3m.csv", skiprows=lambda i: i % 100 != 0)

# Assuming 'puzzle' and 'solution' columns exist
X = df['puzzle'].values
y = df['solution'].values

# Preprocess data
X, y = preprocess_data(X, y)

# Split dataset into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train = X_train / 255.0
X_test = X_test / 255.0  
tf.experimental.numpy.experimental_enable_numpy_behavior()
y_train = y_train.reshape(-1, 81)
y_test = y_test.reshape(-1, 81)

num_classes = 10
y_train = tf.one_hot(y_train, num_classes).numpy()
y_test = tf.one_hot(y_test, num_classes).numpy()

y_train = y_train.reshape(-1, 9, 9, 10) 
y_test = y_test.reshape(-1, 9, 9, 10)

model.compile(optimizer='adam',
              loss='categorical_crossentropy',  
              metrics=['accuracy'])
model.fit(X_train, y_train, batch_size=36, epochs=50, validation_split=0.2)
model.evaluate(X_test, y_test)

Epoch 1/50
[1m534/534[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step - accuracy: 0.1181 - loss: 2.2049 - val_accuracy: 0.1623 - val_loss: 2.0718
Epoch 2/50
[1m534/534[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.2081 - loss: 1.9694 - val_accuracy: 0.3095 - val_loss: 1.8236
Epoch 3/50
[1m534/534[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.3161 - loss: 1.7984 - val_accuracy: 0.3175 - val_loss: 1.7853
Epoch 4/50
[1m534/534[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.3349 - loss: 1.7426 - val_accuracy: 0.3746 - val_loss: 1.6920
Epoch 5/50
[1m534/534[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.3858 - loss: 1.6661 - val_accuracy: 0.3935 - val_loss: 1.6134
Epoch 6/50
[1m534/534[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.3987 - loss: 1.6050 - val_accuracy: 0.4024 - val_loss: 1.5760
Epoch 7/50
[1m534/534[0m 

[1.4867732524871826, 0.4186376929283142]

Finally we can use a simple program to test our model

In [43]:
def preprocess_input(puzzle_str):
    # Replace '.' with '0' and convert each character to int
    int_puzzle = [int(c) if c != '.' else 0 for c in puzzle_str]
    
    # Reshape to (1, 9, 9, 1) for single input
    X = np.array(int_puzzle).reshape(1, 9, 9, 1) / 255.0
    
    return X

def postprocess_output(y_pred):
    # Get the index of the highest probability for each cell
    y_pred_classes = np.argmax(y_pred, axis=-1)
    
    # Reshape to (9, 9)
    solution = y_pred_classes.reshape(9, 9)
    
    return solution


# Load the trained model
input_puzzle = '..3.2.6..9..3.5..1..18.64....81.29..7.......8..67.82....26.95..8..2.3..9..5.1.3..'
X_input = preprocess_input(input_puzzle)
y_pred = model.predict(X_input)

# Postprocess the output
solution = postprocess_output(y_pred)

print("Input Puzzle:")
print(np.array([int(c) if c != '.' else 0 for c in input_puzzle]).reshape(9, 9))
print("\nPredicted Solution:")
print(solution)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 114ms/step
Input Puzzle:
[[0 0 3 0 2 0 6 0 0]
 [9 0 0 3 0 5 0 0 1]
 [0 0 1 8 0 6 4 0 0]
 [0 0 8 1 0 2 9 0 0]
 [7 0 0 0 0 0 0 0 8]
 [0 0 6 7 0 8 2 0 0]
 [0 0 2 6 0 9 5 0 0]
 [8 0 0 2 0 3 0 0 9]
 [0 0 5 0 1 0 3 0 0]]

Predicted Solution:
[[6 6 3 9 2 9 6 9 9]
 [9 7 6 3 7 5 1 7 1]
 [1 2 1 8 9 6 4 9 1]
 [2 6 8 1 4 2 9 6 2]
 [7 9 1 9 4 2 1 4 8]
 [3 9 6 7 1 8 2 2 1]
 [6 5 2 6 1 9 5 2 1]
 [8 6 9 2 7 3 1 6 9]
 [1 9 5 1 1 1 3 9 1]]


In [44]:
# Save the model
model.save('UNET_model.keras')