In [3]:
%pip install numpy

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.2 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import numpy as np
import random
import pickle

class SudokuEnv:
    def __init__(self, puzzle):
        self.puzzle = puzzle
        self.grid = np.array(puzzle)
        self.done = False

    def reset(self):
        self.grid = np.array(self.puzzle)
        self.done = False
        return self.grid

    def step(self, action):
        row, col, num = action
        reward = 0

        if self.grid[row, col] != 0:
            reward = -1  # Invalid action: cell already filled
        elif self._is_valid_move(row, col, num):
            self.grid[row, col] = num
            reward = 1  # Valid move
            if self._is_solved():
                reward += 10
                self.done = True
        else:
            reward = -1  # Invalid move

        return self.grid, reward, self.done

    def _is_valid_move(self, row, col, num):
        if num in self.grid[row, :] or num in self.grid[:, col]:
            return False
        subgrid_x, subgrid_y = 3 * (row // 3), 3 * (col // 3)
        if num in self.grid[subgrid_x:subgrid_x+3, subgrid_y:subgrid_y+3]:
            return False
        return True

    def _is_solved(self):
        return np.all(self.grid > 0) and self._is_valid_grid()

    def _is_valid_grid(self):
        for i in range(9):
            if len(set(self.grid[i, :])) != 9 or len(set(self.grid[:, i])) != 9:
                return False
        for x in range(0, 9, 3):
            for y in range(0, 9, 3):
                subgrid = self.grid[x:x+3, y:y+3].flatten()
                if len(set(subgrid)) != 9:
                    return False
        return True


class QLearningAgent:
    def __init__(self, learning_rate=0.1, discount_factor=0.9, epsilon=0.1):
        self.q_table = {}
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.epsilon = epsilon

    def get_action(self, state):
        if random.uniform(0, 1) < self.epsilon:
            return self._random_action()
        state_key = self._state_to_key(state)
        if state_key not in self.q_table:
            self.q_table[state_key] = {}
        return max(self.q_table[state_key], key=self.q_table[state_key].get, default=self._random_action())

    def update_q_value(self, state, action, reward, next_state):
        state_key = self._state_to_key(state)
        next_state_key = self._state_to_key(next_state)
        if state_key not in self.q_table:
            self.q_table[state_key] = {}
        if action not in self.q_table[state_key]:
            self.q_table[state_key][action] = 0

        max_future_q = max(self.q_table.get(next_state_key, {}).values(), default=0)
        self.q_table[state_key][action] += self.learning_rate * (reward + self.discount_factor * max_future_q - self.q_table[state_key][action])

    def _state_to_key(self, state):
        return tuple(state.flatten())

    def _random_action(self):
        row = random.randint(0, 8)
        col = random.randint(0, 8)
        num = random.randint(1, 9)
        return (row, col, num)

    def save(self, filepath):
        with open(filepath, 'wb') as f:
            pickle.dump(self.q_table, f)

    def load(self, filepath):
        with open(filepath, 'rb') as f:
            self.q_table = pickle.load(f)


# Main Training Loop
def train_agent(puzzle, episodes, save_path="q_table.pkl"):
    env = SudokuEnv(puzzle)
    agent = QLearningAgent()

    for episode in range(episodes):
        state = env.reset()
        done = False
        step = 0

        while not done:
            action = agent.get_action(state)
            next_state, reward, done = env.step(action)
            agent.update_q_value(state, action, reward, next_state)
            state = next_state
            step += 1

        if env._is_solved():
            print(f"Episode {episode+1}/{episodes} completed in {step} steps. Puzzle solved!")
            print("Solved Puzzle:")
            print(env.grid)
            break
        else:
            print(f"Episode {episode+1}/{episodes} completed. Puzzle not solved.")

    # Save the Q-table after training
    agent.save(save_path)
    print(f"Q-table saved to {save_path}")

def solve_puzzle_with_trained_agent(puzzle, q_table_path="q_table.pkl"):
    env = SudokuEnv(puzzle)
    agent = QLearningAgent()
    agent.load(q_table_path)
    print(f"Q-table loaded from {q_table_path}")

    state = env.reset()
    done = False

    while not done:
        action = agent.get_action(state)
        state, _, done = env.step(action)

    print("Solved Puzzle:")
    print(env.grid)


# Example Sudoku Puzzle (0 represents empty cells)
sample_puzzle = [
    [0, 3, 4, 6, 7, 8, 9, 1, 2],
    [6, 7, 2, 1, 9, 5, 3, 4, 8],
    [1, 9, 8, 3, 4, 2, 5, 6, 7],
    [0, 5, 9, 7, 6, 1, 4, 2, 3],
    [4, 2, 6, 8, 5, 3, 7, 9, 1],
    [7, 1, 3, 9, 2, 0, 8, 5, 6],
    [9, 6, 1, 5, 3, 7, 2, 8, 4],
    [2, 0, 7, 4, 1, 9, 6, 3, 5],
    [0, 0, 0, 0, 0, 0, 0, 0, 0]
]

train_agent(sample_puzzle, episodes=1000, save_path="sudoku_q_table.pkl")
solve_puzzle_with_trained_agent(sample_puzzle, q_table_path="sudoku_q_table.pkl")
