# Build a Sudoku Solver using just an array of sudoku

## Import all the necessary libraries

In [3]:
import string
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.image as mpimg
import random

## Define a Sudoku Class

In [19]:
class sudoku():
    def __init__(self, sudoku_array) -> None:
        self.n = len(sudoku_array)
        self.sqrt_n = int(np.sqrt(self.n))
        self.sudoku_original = sudoku_array
        self.sudoku_array = np.copy(sudoku_array)
        self.all_allowed_values = np.arange(1,self.n+1,1)

        self.subgrids_array = self.get_subgrids()
        self.validate_n()
        # Get occupied values in each row and column
        self.rows_occupied = {i: [v for v in self.sudoku_array[i, :] if v > 0] for i in range(self.n)}
        self.cols_occupied = {i: [v for v in self.sudoku_array[:, i] if v > 0] for i in range(self.n)}
        self.rows_available = {i: [v for v in self.all_allowed_values if v not in self.rows_occupied[i]] for i in range(self.n)}
        self.cols_available = {i: [v for v in self.all_allowed_values if v not in self.cols_occupied[i]] for i in range(self.n)}
        # self.number_rows_availability = {i: [v for v in self.sudoku_array[i, :] if v == i] for i in range(self.n)}
        self.available_cells = list(zip(*np.where(self.sudoku_array == 0)))

    def validate_n(self):
        if self.sqrt_n ** 2 != self.n:
            raise ValueError(f"Grid size {self.n} is not a perfect square.")
        if len(self.sudoku_array[:,0]) != self.n:
            raise ValueError(f"Grid size {self.n} is not a perfect square.")
        # for subgrid_num in range(len(self.subgrids_array)):
        #     subgrid_values = self.subgrids_array[subgrid_num].flatten()
        #     if len(set(subgrid_values)) != len([v for v in subgrid_values if v > 0]):
        #         raise ValueError(f"Subgrid {subgrid_num} contains duplicate values.")
        
    def update_availability(self, x, y):
        self.sudoku_array = self.sudoku_array
        self.subgrids_array = self.get_subgrids()
        # Get occupied values in each row and column
        self.rows_occupied[x] = {i: [v for v in self.sudoku_array[i, :] if v > 0] for i in range(self.n)}
        self.cols_occupied[y] = {i: [v for v in self.sudoku_array[:, i] if v > 0] for i in range(self.n)}
        self.rows_available[x] = {i: [v for v in self.all_allowed_values if v not in self.rows_occupied[i]] for i in range(self.n)}
        self.cols_available[y] = {i: [v for v in self.all_allowed_values if v not in self.cols_occupied[i]] for i in range(self.n)}
        self.available_cells = list(zip(*np.where(self.sudoku_array == 0)))
    
    def insert_value(self, row, col, value):
        self.sudoku_array[row][col] = value
        self.update_availability(row, col)
        return print(f"Inserted {value} in row = {row} and column = {col}")

    
    # Function to cut the grid into nxn subgrids
    def get_subgrids(self):
        subgrids = []
        for row in range(0, self.n, self.sqrt_n):    # Loop over rows in steps of n
            for col in range(0, self.n, self.sqrt_n):  # Loop over columns in steps of n
                # Slice the nxn subgrid
                subgrid = self.sudoku_array[row:row+self.sqrt_n, col:col+self.sqrt_n]
                subgrids.append(subgrid)
        return subgrids
    
    def identify_subgrid(self, row, col):
        # Determine the row and column of the subgrid
        subgrid_row = row // 3
        subgrid_col = col // 3
        
        # Return the subgrid number (for visualization, numbering from 1 to 9)
        subgrid_number = subgrid_row * 3 + subgrid_col
        
        return subgrid_number

    def subgrid_features(self, row, col):
        subgrid_num = self.identify_subgrid(row, col)
        occupied_integers = [i for i in self.subgrids_array[subgrid_num].flatten() if i > 0]
        available_integers = [i for i in self.all_allowed_values if i not in occupied_integers]
        return occupied_integers, available_integers
    
    def solve_cell_wise(self, row, col):
        cell_row_available = [i for i in self.rows_available[row]]
        cell_col_available = [i for i in self.cols_available[col]]
        cell_subgrid_available = self.subgrid_features(row, col)[1]
        possible_values = list(set(cell_row_available).intersection(cell_col_available).intersection(cell_subgrid_available))
        if len(possible_values) == 1:
            self.insert_value(row, col, possible_values[0])
        return False
    
    def subgrid_wise(self, grid_iter):
        row_idx = (grid_iter-1) // self.sqrt_n
        col_idx = (grid_iter-1) % self.sqrt_n
        list_rows = np.arange(row_idx*self.sqrt_n, row_idx*self.sqrt_n+self.sqrt_n)
        list_columns = np.arange(col_idx*self.sqrt_n, col_idx*self.sqrt_n+self.sqrt_n)
        list_values_to_be_filled_in = [i for i in self.all_allowed_values if i not in self.subgrids_array[grid_iter].flatten()]
        for num in list_values_to_be_filled_in:
            list_available_moves = []
            for i in list_rows:
                if num in self.rows_available[i]:
                    for j in list_columns:
                        if num in self.rows_available[j]:
                            if self.sudoku_array[i][j] > 0:
                                pass
                            else:
                                list_available_moves.append([i,j])
                        else:
                            pass
                else:
                    pass
            if len(list_available_moves) == 1:
                self.insert_value(i, j, num)
        return print(f"Did iteration for Grid number: {grid_iter}")

        
        # # cell_row_available = [i for i in self.rows_available[row]]
        # # cell_col_available = [i for i in self.cols_available[col]]
        # # cell_subgrid_available = self.subgrid_features(row, col)[1]
        # return list(set(cell_row_available).intersection(cell_col_available).intersection(cell_subgrid_available))
    
    def print_sudoku(self):
        for i in range(self.n):
            if i % self.sqrt_n == 0 and i != 0:
                print("-" * self.sqrt_n*8)  # Print horizontal line after every 3rd row
            
            for j in range(self.n):
                if j % self.sqrt_n == 0 and j != 0:
                    print(" | ", end="")  # Print vertical line after every 3rd column
                if self.sudoku_array[i][j] == 0:
                    cell_value = "."
                else:
                    cell_value = self.sudoku_array[i][j]
                # Print the cell value, or a dot for empty cells (0)
                if j == self.n-1:  # End of the row, print the value and a newline
                    print(cell_value)
                else:
                    print(f"{cell_value} ", end="")

    def solve_sudoku(self):
        progress = True
        while progress:
            progress = False
            for row, col in self.available_cells:
                if self.solve_cell_wise(row, col):
                    progress = True  # If we solve at least one cell, continue solving

        if len(self.available_cells) > 0:
            print("Couldn't fully solve the Sudoku.")
        else:
            print("Sudoku solved!")

## Use an image and load it to generate the Sudoku grid

In [5]:
import cv2

# Read the image
img = cv2.imread("sudoku_1.png")

import cv2
import numpy as np
import pytesseract

# Load the image
# image = cv2.imread("path_to_sudoku_image.jpg")

# Convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# Apply Gaussian blur
blur = cv2.GaussianBlur(gray, (7, 7), 0)

# Adaptive thresholding to get a binary image
thresh = cv2.adaptiveThreshold(blur, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)

# Find contours to detect the Sudoku grid
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

# Sort the contours to find the largest contour which should be the grid
contours = sorted(contours, key=cv2.contourArea, reverse=True)

# Draw a rectangle around the largest contour (Sudoku grid)
sudoku_contour = contours[0]
cv2.drawContours(img, [sudoku_contour], -1, (0, 255, 0), 3)

# Show the grid
cv2.imshow('Detected Sudoku Grid', img)
cv2.waitKey(0)
cv2.destroyAllWindows()


# Assume the grid is 450x450 pixels
grid_size = 450
cell_size = grid_size // 9  # Size of each cell (e.g., 50x50 pixels for a 450x450 grid)

# Resize the image to a fixed size (450x450)
grid = cv2.resize(gray, (grid_size, grid_size))

# Initialize an empty 9x9 Sudoku array
sudoku_array = np.zeros((9, 9), dtype=int)

# Loop over each cell and extract the digit
for i in range(9):
    for j in range(9):
        # Extract the cell
        cell = grid[i*cell_size:(i+1)*cell_size, j*cell_size:(j+1)*cell_size]
        
        # Preprocess the cell for OCR (Thresholding)
        cell = cv2.adaptiveThreshold(cell, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
        
        # Perform OCR using Tesseract
        digit = pytesseract.image_to_string(cell, config='--psm 10 digits')
        
        # If the OCR detects a digit, update the array
        if digit.isdigit():
            sudoku_array[i, j] = int(digit)

# Print the resulting Sudoku array
print(sudoku_array)


[[0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0]]


In [6]:
# Loop over each cell and extract the digit
for i in range(9):
    for j in range(9):
        # Extract the cell
        cell = grid[i*cell_size:(i+1)*cell_size, j*cell_size:(j+1)*cell_size]
        
        # Preprocess the cell for OCR
        cell = cv2.resize(cell, (50, 50))  # Resize for better OCR readability
        cell = cv2.GaussianBlur(cell, (3, 3), 0)
        cell = cv2.adaptiveThreshold(cell, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)

        # Show each cell to make sure they are correctly processed
        cv2.imshow(f'Cell ({i}, {j})', cell)
        cv2.waitKey(500)  # Display for 500ms for debugging
        cv2.destroyAllWindows()


In [7]:
# Loop over each cell and extract the digit
for i in range(9):
    for j in range(9):
        # Extract the cell
        cell = grid[i*cell_size:(i+1)*cell_size, j*cell_size:(j+1)*cell_size]
        
        # Preprocess the cell for OCR (Resize, Blur, Threshold)
        cell = cv2.resize(cell, (50, 50))  # Resize for better OCR readability
        cell = cv2.GaussianBlur(cell, (3, 3), 0)
        cell = cv2.adaptiveThreshold(cell, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)

        # Apply OCR to extract the digit
        digit = pytesseract.image_to_string(cell, config='--psm 10 -c tessedit_char_whitelist=0123456789')

        # If the OCR detects a digit, update the array
        if digit.strip().isdigit():
            sudoku_array[i, j] = int(digit.strip())
        else:
            sudoku_array[i, j] = 0  # Assign zero if no digit is detected or OCR fails

# Print the resulting Sudoku array
print(sudoku_array)


[[5 0 0 0 3 0 0 0 0]
 [0 0 0 2 0 0 4 7 5]
 [4 0 0 0 0 0 0 0 0]
 [0 0 0 2 0 0 7 0 3]
 [0 0 0 0 8 0 0 0 0]
 [3 0 0 0 5 9 2 0 6]
 [0 0 0 0 0 0 0 4 7]
 [3 0 0 4 3 0 0 6 0]
 [0 0 0 0 2 8 0 9 1]]


## Model configuration: Give a sudoku grid as an input

In [20]:
sudoku_grid = np.array([
    [5, 3, 0, 0, 7, 0, 0, 0, 0],
    [6, 0, 0, 1, 9, 5, 0, 0, 0],
    [0, 9, 8, 0, 0, 0, 0, 6, 0],
    [8, 0, 0, 0, 6, 0, 0, 0, 3],
    [4, 0, 0, 8, 0, 3, 0, 0, 1],
    [7, 0, 0, 0, 2, 0, 0, 0, 6],
    [0, 6, 0, 0, 0, 0, 2, 8, 0],
    [0, 0, 0, 4, 1, 9, 0, 0, 5],
    [0, 0, 0, 0, 8, 0, 0, 7, 9]
])

In [21]:
n_sudoku = sudoku(sudoku_grid)

In [22]:
n_sudoku.print_sudoku()

5 3 .  | . 7 .  | . . .
6 . .  | 1 9 5  | . . .
. 9 8  | . . .  | . 6 .
------------------------
8 . .  | . 6 .  | . . 3
4 . .  | 8 . 3  | . . 1
7 . .  | . 2 .  | . . 6
------------------------
. 6 .  | . . .  | 2 8 .
. . .  | 4 1 9  | . . 5
. . .  | . 8 .  | . 7 9


In [24]:
n_sudoku.print_sudoku()

5 3 .  | . 7 .  | . . .
6 . .  | 1 9 5  | . . .
. 9 8  | . . .  | . 6 .
------------------------
8 . .  | . 6 .  | . . 3
4 . .  | 8 5 3  | . . 1
7 . .  | 9 2 .  | . . 6
------------------------
. 6 .  | . . 7  | 2 8 4
. . .  | 4 1 9  | . 3 5
. . .  | . 8 .  | . 7 9


In [23]:
n_sudoku.solve_sudoku()

Inserted 5 in row = 4 and column = 4
Inserted 9 in row = 5 and column = 3
Inserted 7 in row = 6 and column = 5
Inserted 4 in row = 6 and column = 8
Inserted 3 in row = 7 and column = 7
Couldn't fully solve the Sudoku.


In [29]:
(8 - 1) // 3

2

In [39]:
n = 9
row_idx = (n-1) // 3
col_idx = (n-1) % 3
get_list_rows = np.arange(row_idx*3, row_idx*3+3)
get_list_columns = np.arange(col_idx*3, col_idx*3+3)

In [40]:
get_list_rows

array([6, 7, 8])

In [41]:
get_list_columns

array([6, 7, 8])