Sudoku solver - I will test it on a csv file of 80k+ examples I found on Kaggle. I define classes for the overall grid, for each cell and for each row, column and 3x3 square. The row, column and square classes inherit from the group class. 

In [None]:
import numpy as np
import pandas as pd
import time
from tqdm.notebook import tqdm
from google.colab import output

# grid class
class grid:
  def __init__(self,dim):
    self.dim = dim
    self.sqrt_dim = int(np.sqrt(self.dim))
    self.columns = []
    self.rows = []
    self.squares = []
    self.cells = []
    self.solvable = True
    
    for i in range(self.dim):
      self.columns.append(column(self,i))
      self.rows.append(row(self,i))
      self.squares.append(square(self,i))
    
    for i in range(self.dim):
      for j in range(self.dim):
        self.cells.append(cell(self,i*self.dim+j))
    
    for i,c in enumerate(self.columns):
      self.columns[i] = c.find_cells()
  
    for i,r in enumerate(self.rows):
      self.rows[i] = r.find_cells()

    for i,s in enumerate(self.squares):
      self.squares[i] = s.find_cells()

  def render(self):
    for i, r in enumerate(self.rows):
      if i in [3, 6]:
        print('------+------+------')
      for j, c in enumerate(self.columns):
        if j in [3, 6]:
          print('|',end='')
        val = self.cells[i*self.dim+j].value
        if np.isnan(val):
          print(' ',end=' ')
        else:
          print(self.cells[i*self.dim+j].value,end=' ')
      print()


  def to_string(self):
    vals = np.array([c.value for c in self.cells])
    vals[np.isnan(vals)] = 0
    s = ''
    for v in vals:
      s=s+str(int(v))
    
    return s

  def solved(self):
    return  not np.any(np.isnan(np.array([c.value for c in self.cells])))

# cell class
class cell:
  def __init__(self,grid,number,val=np.NaN):
    
    self.number = number #cells are numbers from 0 to dim**2 - 1
    self.grid = grid
    self.value = val

    # each cell should have a reference to its square, row and column

    self.row_no = int(np.floor(self.number / self.grid.dim))
    self.column_no = int(np.mod(self.number , self.grid.dim))
    # this is a bit more complicated ... 
    self.square_no = int(np.floor(self.row_no / self.grid.sqrt_dim) * self.grid.sqrt_dim +\
      np.floor(self.column_no / self.grid.sqrt_dim))

    self.row = self.grid.rows[self.row_no]
    self.column = self.grid.columns[self.column_no]
    self.square = self.grid.squares[self.square_no]

    self.can_be = np.full([self.grid.dim],True) #array of bools - intialize to all true 
    self.cant_be = np.full([self.grid.dim],False) #array of bools - intialize to all false 

  def set_value(self,value):
    self.value = value
    self.can_be = np.full([self.grid.dim],False)
    self.can_be[value-1] = True
    self.cant_be = np.full([self.grid.dim],True)
    self.cant_be[value-1] = False
    return self

  def check(self):
    if np.isnan(self.value):
      if np.sum(self.can_be==True)==1:
        self.set_value(np.where(self.can_be==True)[0][0]+1)

      if np.sum(self.cant_be==False)==1:
        self.set_value(np.where(self.cant_be==False)[0][0]+1)

      # check its row, column and square
      for i,v in  enumerate(self.column.contains):   
        self.can_be[v-1] = False
      for i,v in  enumerate(self.row.contains):   
        self.can_be[v-1] = False
      for i,v in  enumerate(self.square.contains):   
        self.can_be[v-1] = False

    return self

  def sanity(self):
    if np.sum(self.can_be==False)<self.grid.dim:
      self.grid.solvable = False
    if np.sum(self.cant_be==True)<self.grid.dim:
      self.grid.solvable = False
    #assert np.sum(self.can_be==False)<self.grid.dim, 'cell sanity check: cell number {} has NO permitted value'.format(self.number)
    #assert np.sum(self.cant_be==True)<self.grid.dim, 'cell sanity check: cell number {} has NO permitted value'.format(self.number)

  def refresh(self):

    for v in self.column.contains:
      if not np.isnan(v):
        self.can_be[v-1] = False
        self.cant_be[v-1] = True

    for v in self.row.contains:
      if not np.isnan(v):
        self.can_be[v-1] = False
        self.cant_be[v-1] = True

    for v in self.square.contains:
      if not np.isnan(v):
        self.can_be[v-1] = False
        self.cant_be[v-1] = True
    
    return self

# row, column and square inherit from the group class
class group:
  def __init__(self,grid,number):
    self.number = number
    self.grid = grid
    self.cells = []
    self.contains = [] # the list of numbers already assigned to this group
  
  def check(self):
    # if there is only one number missing from this group we can fill it in
    if len(self.contains) ==  self.grid.dim-1:
      missing_value = np.where(np.isin(range(1,self.grid.dim+1),self.contains)==False)[0][0]+1
      empty_cell = np.where(np.isnan([c.value for c in self.cells]))[0][0]
      self.cells[empty_cell] = self.cells[empty_cell].set_value(missing_value)
    return self

  def refresh(self):
    # refresh the list of numbers already in this group
    for i,c in enumerate(self.cells):
      if not np.isnan(c.value):
        if not np.isin(c.value,self.contains):
          self.contains.append(c.value)
    return self
  
  def sanity(self):
    v,c = np.unique(self.contains,return_counts=True)
    if np.max(c) <2:
      self.grid.solvable = True
    #assert np.max(c) <2, '{} number {} has multiple instances of {}'.format(*[__class__, self.number,v[np.where(c>1)[0][0]]])

  def find_cells(self):
    pass

class column(group):
  def __init__(self,grid,number):
    group.__init__(self,grid,number)

  def find_cells(self):
    #if len(self.cells) == 0
    # we should only ever perform this once
    for i in range(self.grid.dim):
      self.cells.append(self.grid.cells[self.number+i*self.grid.dim])
    return self

class row(group):
  def __init__(self,grid,number):
    group.__init__(self,grid,number)

  def find_cells(self):
    #if len(self.cells) == 0
    # we should only ever perform this once
    for i in range(self.grid.dim):
      self.cells.append(self.grid.cells[self.number*self.grid.dim+i])
    return self

class square(group):
  def __init__(self,grid,number):
    group.__init__(self,grid,number)

  def find_cells(self):
    #if len(self.cells) == 0
    # we should only ever perform this once
    for c in self.grid.cells:
      # check which cells are in this square
      if c.square_no == self.number:
        self.cells.append(c)
    return self



In [None]:

def assign(grid,values):
  for i,v in enumerate(values):
    if v != '0': 
      grid.cells[i].set_value(int(v))
  
  return grid

def iterate(grid):

  for i,r in enumerate(grid.rows):
    grid.rows[i] = r.refresh()
    grid.rows[i] = r.check()
    r.sanity()

  for i,c in enumerate(grid.columns):
    grid.columns[i] = c.refresh()  
    grid.columns[i] = c.check()  
    c.sanity()

  for i,s in enumerate(grid.squares):
    grid.squares[i] = s.refresh()
    grid.squares[i] = s.check()
    s.sanity()

  for i,c in enumerate(grid.cells):
    grid.cells[i] = c.check()
    grid.cells[i] = c.refresh()
    c.sanity()

  return grid

def solve_simple(grid,render=True):
  
  if render:
    output.clear()
    grid.render()
    time.sleep(1)

  grid = iterate(grid)
  last_str = grid.to_string()
  new_str = ''
  counter = 1
  while new_str != last_str or counter < 20:
    grid = iterate(grid)
    if render:
      time.sleep(1)
      output.clear()
      grid.render()  
    last_str = new_str
    new_str = grid.to_string()
    counter = counter + 1
    if grid.solved():
      break
    if grid.solvable == False:
      break
      
  return grid, counter

In [None]:
a = pd.read_csv('/content/drive/My Drive/Sudoku/sudoku.csv')
output_df = pd.DataFrame(columns = ['Number','Solved','Solution','Correct','Counter','Time'])
# this will take a while ...
for i in tqdm(range(a.shape[0])):
#for i in tqdm(range(1000)):
  my_grid = grid(9)
  my_grid = assign(my_grid,a['quizzes'][i])
  start_time = time.time()
  my_grid, counter = solve(my_grid,False)
  output_df.loc[i] = [i,my_grid.solved(),my_grid.to_string(),a['solutions'][i] == my_grid.to_string(),counter,start_time - time.time()]

output_df.to_pickle('/content/drive/My Drive/Sudoku/Output')

np.all(output_df.Solved==True)
#output_df.Number[output_df.Solved==False]
# 3610 is an example that isn't solved

my_grid = grid(9)
my_grid = assign(my_grid,a['quizzes'][3610])
my_grid = iterate(my_grid)
output.clear()
my_grid.render()

solve_full(my_grid)

for c in my_grid.squares:
  print(c.contains)

  6 8 |7 1   |9 4 3 
    4 |5 6   |8 7 1 
7 3 1 |8 4 9 |2 5 6 
------+------+------
3   5 |  8   |1 6 4 
  4 6 |    5 |3 8 7 
8 1 7 |3 6 4 |5 9 2 
------+------+------
6 8 2 |    1 |7 3 5 
4 7 3 |4 2 8 |6 1 9 
1 5 9 |6 3 7 |4 2 8 


In [None]:
def solve_full(grid,render=True):
  # this version handles grids that may not be solvable by the simple algorithm
  # if the simple algorithm cannot solve the problem we proceed to guessing
  grid, counter = solve_simple(grid,render)
  while not grid.solved():
    grid, counter = solve_simple(grid,render)

    # randomly choose a cell amongst those with 2 possible values
    p = np.array([np.sum(c.can_be==True) for c in grid.cells])
    c = np.random.choice(np.where(p==2)[0])
    pivot_cell = grid.cells[c]
    # the randomly choose between the values
    v = np.random.choice(np.where(pivot_cell.can_be==True)[0])+1
    test_grid = grid
    test_grid.cells[c] = test_grid.cells[c].set_value(v)
    test_grid, test_counter = solve_simple(test_grid,render)
    counter = counter + test_counter
    if test_grid.solvable == False:
      # then we know that the choice of value was incorrect
      grid.cells[c].can_be[v-1] = False
    else:
      grid = test_grid

  return grid


    
