# Assignment 3: Tree-of-Thoughts



In this assignment, you will write a program to solve Sudoku using GPT and Tree-of-Thoughts.

## What is Sudoku

Sudoku is a logic-based puzzle where the goal is to fill a grid so that every row, column, and region contains unique numbers. In this task, we will focus on solving a 4x4 Sudoku using a tree-of-thought approach to systematically explore possible solutions and fill in the grid correctly.

![Sudoku Puzzle](https://www.sudokuweb.org/wp-content/uploads/2013/04/sudoku-kids-4x4-10-150x150.png)

You can try 4x4 Sudoku game here:
* https://www.sudokuweb.org/

## Environment Setup

In [None]:
# Install langchain
!pip install -qU langchain

In [None]:
# Install langchain-openai
!pip install -qU langchain-openai

### Set API key

In [None]:
# Set API key
OPENAI_API_KEY="your_api_key_here"

## Prepare Language Models

You can select language models for the thought generation.

You can also change the temperature as you want.

In [None]:
# Prepare model
from langchain_openai import ChatOpenAI

low_temperature_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.0, api_key=OPENAI_API_KEY, max_tokens=1024)
mid_temperature_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.5, api_key=OPENAI_API_KEY, max_tokens=1024)
high_temperature_llm = ChatOpenAI(model="gpt-4o-mini", temperature=1.0, api_key=OPENAI_API_KEY, max_tokens=1024)

## Using Structured Output in LangChain

In [None]:
from pydantic import BaseModel, Field

class Joke(BaseModel):
    """Joke to tell user."""

    setup: str = Field(description="The setup of the joke")
    punchline: str = Field(description="The punchline to the joke")


structured_llm = mid_temperature_llm.with_structured_output(Joke)

joke = structured_llm.invoke("Tell me a joke about cats")

In [None]:
joke

In [None]:
print(joke.setup)
print(joke.punchline)

## Test GPT-4o-mini without Tree-of-Thoughts

### Define Prompt Templates

In [None]:
from langchain_core.prompts import PromptTemplate

In [None]:
solve_prompt = PromptTemplate(
    input_variables=["game_status"],
    template="""
    You are a Sudoku solver. Your goal is to solve the 4x4 sudoku puzzle.
    The game includes the grid with numbers and empty cells '*'.
    Respond in JSON with `answer` key which value is an answer for the puzzle.

    Game:
    {game_status}

    Solve the puzzle.

    Output JSON:
    {{
      'answer': 'answer for the given puzzle'
    }}

    """
)

### Define Class for the Answer

In [None]:
from pydantic import BaseModel, Field

class Answer(BaseModel):
    """Answer for the given puzzle"""

    answer: str = Field(description="answer for the given puzzle")

### Run Answer Chain

In [None]:
answer_chain = solve_prompt | mid_temperature_llm.with_structured_output(Answer)

In [None]:
answer_chain.invoke({"game_status": "[['*', '*', '*', '*'], ['*', '*', 2, '*'], ['*', 1, 3, '*'], [2, 3, '*', 4]]"})

## TODO: Sudoku Solver with Tree-of-Thoughts

### Define propose prompt for the next thought

In [None]:
propose_prompt = PromptTemplate(
    ##############################
    #########    TODO   ##########
    ##############################
)

### Define class for the structured output

In [None]:
class Guess(BaseModel):
    ##############################
    #########    TODO   ##########
    ##############################

In [None]:
propose_chain = propose_prompt | mid_temperature_llm.with_structured_output(Guess) # You can change the llm model

### Util function for extracting proposals from the LLM response

You can decide to use it or not

In [None]:
def extract_proposals(response):
  """Extracts proposals from the response of the propose_chain."""
  try:
    return response.next_guesses
  except (KeyError, TypeError):
    print("Failed to extract proposals from response:", response)
    return []

In [None]:
response = propose_chain.invoke({"game_status": "[['*', '*', '*', '*'], [2, '*', '*', '*'], [4, '*', '*', 3], [3, '*', 4, 2]]"})
extract_proposals(response)

### Check the sudoku is valid or not including blanks

In [None]:
def is_valid_sudoku(grid):
  """Checks if a 4x4 Sudoku grid is valid."""

  def check_rows(grid):
    for row in grid:
      if not is_valid_unit(row):
        return False
    return True

  def check_cols(grid):
    for col_index in range(4):
      col = [grid[row_index][col_index] for row_index in range(4)]
      if not is_valid_unit(col):
        return False
    return True

  def check_subgrids(grid):
    for row_start in range(0, 4, 2):
      for col_start in range(0, 4, 2):
        subgrid = [
            grid[row_start][col_start],
            grid[row_start][col_start + 1],
            grid[row_start + 1][col_start],
            grid[row_start + 1][col_start + 1],
        ]
        if not is_valid_unit(subgrid):
          return False
    return True

  def is_valid_unit(unit):
    digits = [digit for digit in unit if digit != '*']
    return len(set(digits)) == len(digits)

  return check_rows(grid) and check_cols(grid) and check_subgrids(grid)

# Example usage:
sudoku_grid = [
    ['*', '*', '*', '*'],
    ['2', '*', '*', '*'],
    ['4', '*', '*', '3'],
    ['3', '*', '4', '2']
]

if is_valid_sudoku(sudoku_grid):
  print("The Sudoku grid is valid.")
else:
  print("The Sudoku grid is not valid.")

### Rule-based evaluation function for the guess

Gives higher score when the blanks are filled compared to the original game status.

In [None]:
def rule_based_evaluation(initial_game, guess):
  """Calculate evaluation score from the guess."""
  blank_count_game = sum(row.count('*') for row in initial_game)
  blank_count_guess = sum(row.count('*') for row in guess)
  if is_valid_sudoku(guess):
    return (blank_count_game - blank_count_guess) / 16.0
  else:
    return 0.0

In [None]:
sudoku_grid1 = [
    ['*', '*', '*', '*'],
    ['2', '*', '*', '*'],
    ['4', '*', '*', '3'],
    ['3', '*', '4', '2']
]
sudoku_grid2 = [
    ['1', '*', '*', '*'],
    ['2', '*', '*', '*'],
    ['4', '*', '*', '3'],
    ['3', '1', '4', '2']
]

rule_based_evaluation(sudoku_grid1, sudoku_grid2)

### Run Tree-of-Thought Sudoku solver

In [None]:
LOOPS = 10
PROPOSAL_RUNS_PER_STATE = 1  # Adjust as needed
EVAL_RUNS_PER_STATE = 1  # Adjust as needed
BRANCH_FACTOR = 1 # 1: Greedy, Adjust as needed

In [None]:
import ast

def solve_sudoku_tot(sudoku_grid):
  """Solves a 4x4 Sudoku puzzle using a tree-search approach with LLMs.

  Args:
    sudoku_grid: A list of lists representing the initial Sudoku grid.
                  Empty cells are represented by '*'.

  Returns:
    A list of lists representing the solved Sudoku grid, or None if no solution is found.
  """
  initial_game = sudoku_grid
  curr_states = [sudoku_grid] # Stores current guess for generating next thoughts
  proposal_and_score = []

  for loop in range(LOOPS):
    print('Curr states:', curr_states)
    for state in curr_states:
      proposal_and_score = proposal_and_score[1:] # popup the first element (state with best score)
      proposals = []
      for _ in range(PROPOSAL_RUNS_PER_STATE): # runs PROPOSAL_RUNS_PER_STATE times
        # generate proposals based on the current intermediate guess
        proposals.extend(extract_proposals(propose_chain.invoke({"game_status": str(state)})))
        print("current proposals:", proposals)

      for proposal in proposals:
        # for each generated proposals, evaluate score
        score = 0
        for _ in range(EVAL_RUNS_PER_STATE): # get score multiple times to get average, but runs one time for rule-based evaluation
          try:
            score += rule_based_evaluation(initial_game, ast.literal_eval(proposal))
          except:
            score += 0

        proposal_and_score.append((proposal, score/EVAL_RUNS_PER_STATE))
        print("current proposal_and_score: ", proposal_and_score)


    proposal_and_score.sort(key=lambda x: x[1], reverse=True) # sort the proposals by the score
    curr_states = [item[0] for item in proposal_and_score[:BRANCH_FACTOR]]
    if '*' not in str(curr_states[0]): # if the proposal is the final proposal without any blank
      print('Solved! The answer is: ', str(curr_states[0]))
      return ast.literal_eval(curr_states[0]), True, loop # outputs answer, isSolved, number of loops
    else:
      continue # continue loop on the new current state
  print('Cannot solve this puzzle')
  return ast.literal_eval(curr_states[0]), False, loop

In [None]:
solve_sudoku_tot("[[2, 3, '*', 4], ['*', 1, 3, '*'], ['*', '*', 2, '*'], ['*', '*', '*', '*']]")

In [None]:
sudoku_puzzle_test = [
    "[[3, 4, '*', '*'], [1, '*', '*', '*'], ['*', '*', 2, 1], [2, '*', '*', '*']]",
    "[[3, '*', '*', 2], ['*', 1, 4, 3], ['*', 2, 3, 1], [1, '*', 2, 4]]",
    "[['*', '*', '*', 4], ['*', 1, '*', '*'], ['*', '*', '*', 3], ['*', 4, 2, 1]]",
    "[[1, '*', 2, '*'], [2, '*', '*', 3], [3, '*', 4, 2], [4, '*', 3, '*']]",
    "[['*', '*', '*', 4], ['*', 2, '*', '*'], [3, 4, 1, '*'], [2, 1, '*', '*']]",
    "[[2, '*', '*', 4], [1, 4, '*', 3], [3, 1, '*', '*'], ['*', 2, 3, 1]]",
    "[[2, '*', 3, '*'], [3, '*', 2, 1], [1, '*', '*', 2], [4, '*', '*', '*']]",
    "[['*', '*', 1, 3], [1, '*', '*', '*'], ['*', '*', '*', 1], [3, 1, '*', 4]]",
    "[['*', '*', '*', 4], [1, 4, 2, 3], [3, 1, 4, '*'], [4, 2, '*', '*']]",
    "[['*', 1, 2, 4], [2, '*', 1, '*'], ['*', '*', 3, '*'], ['*', 3, '*', '*']]"
]

In [None]:
# get solve ratio for the test puzzle set

solve_count = 0
total_count = len(sudoku_puzzle_test)
total_loops = 0

for puzzle in sudoku_puzzle_test:
  answer, solved, loops = solve_sudoku_tot(puzzle)
  if solved:
    if is_valid_sudoku(answer):
      solve_count += 1
      total_loops += loops

solve_ratio = solve_count / total_count if total_count > 0 else 0
average_loops = total_loops / solve_count if solve_count > 0 else 0
print(f"Solve ratio for the test puzzle set: {solve_ratio}")
print(f"Average loops for solving: {average_loops}")