In [1]:
# Interactive
%matplotlib notebook
# Static
# %matplotlib inline

# Environment for this notebook
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
import warnings
import IPython

# Set the style for the plots
sns.set()
plt.style.use('ggplot')
sns.set_style("darkgrid")
# Ignore warnings
warnings.filterwarnings('ignore')

In [3]:
def string_to_sudoku_board(puzzle: str, empty_cell: str = '0'):
    """
    Convert a Sudoku string into a 2D 9x9 list of integers (board).

    Args:
        puzzle (str): A string of length 81, containing
            digits 1-9 and one empty cell symbol.
        empty_cell (str): A string representing the empty
            celll symbol.

    Raises:
        TypeError: When `puzzle` or `empty_cell` are not
            strings
        ValueError: When `puzzle` length is not exactly 81,
            or when the unique digits are not exactly 1-9 plus
            the empty cell symbol (hence 10).

    Returns:
        list[list[int]]: An array-like 2D 9x9 list of integers
            representing the sudoku board puzzle.
    """
    # Check the type is correct
    if not isinstance(puzzle, str) or not isinstance(empty_cell, str):
        raise TypeError(
            'Both the puzzle and the empty cell must be strings.'
        )

    # Check if the string is exactly 81 characters (a 9x9 Sudoku board)
    if len(puzzle) != 81:
        raise ValueError(
            'The puzzle string must be exactly 81 characters long.'
        )

    # Find the unique values in the string
    unique_values = set(puzzle)
    # Record the valid values for sudoku
    valid_values = set('123456789')
    valid_values.add(empty_cell)

    # Check if unique set is subset of valid set and length is respected
    if len(list(unique_values)) > 10 or not (unique_values <= valid_values):
        raise ValueError(
            'The puzzle string must contain exactly 10 unique values:' +
            'digits 1-9 and one empty cell symbol. '
        )

    del unique_values, valid_values

    # Construct Board
    board = []
    step = 9
    for row in range(step):
        chunk = []
        for col in range(step):
            cell = puzzle[row * step + col]
            # Append value to row
            if cell == empty_cell:
                chunk.append(0)
            else:
                chunk.append(int(cell))

        board.append(chunk)

    return board


In [4]:
# Working folder, where this file is at
WORKING_DIR = os.getcwd()

In [6]:
# Find the right directory
DATA_DIR = os.path.join(os.path.dirname(WORKING_DIR), 'data')
DATA_DIR

'C:\\Users\\PayThePizzo\\Projects\\SudokuSolver\\data'

In [7]:
# Import the Dataset
df_path = os.path.join(DATA_DIR, 'sudoku_cluewise.csv')
df = pd.read_csv(df_path)

In [8]:
df.describe()

Unnamed: 0,clue_numbers
count,4000000.0
mean,48.5
std,18.47296
min,17.0
25%,32.75
50%,48.5
75%,64.25
max,80.0


In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4000000 entries, 0 to 3999999
Data columns (total 3 columns):
 #   Column        Dtype 
---  ------        ----- 
 0   quizzes       object
 1   solutions     object
 2   clue_numbers  int64 
dtypes: int64(1), object(2)
memory usage: 91.6+ MB


In [13]:
string_to_sudoku_board(df['quizzes'][86823])


[[6, 4, 7, 1, 3, 2, 5, 8, 9],
 [8, 5, 9, 4, 7, 6, 1, 2, 3],
 [2, 1, 3, 5, 9, 8, 4, 6, 7],
 [9, 6, 5, 2, 4, 7, 8, 3, 1],
 [7, 2, 4, 8, 1, 3, 6, 9, 5],
 [3, 8, 1, 6, 5, 9, 2, 7, 4],
 [5, 7, 6, 3, 2, 4, 9, 1, 8],
 [1, 9, 0, 0, 6, 5, 3, 4, 2],
 [4, 3, 2, 9, 8, 1, 7, 5, 6]]

False

In [35]:
if 0 in first_quiz:
    print(1)

TypeError: 'in <string>' requires string as left operand, not int