In [1]:
import os

In [2]:
!pip install qwen-vl-utils[decord]==0.0.8

Collecting qwen-vl-utils==0.0.8 (from qwen-vl-utils[decord]==0.0.8)
  Downloading qwen_vl_utils-0.0.8-py3-none-any.whl.metadata (3.6 kB)
Collecting av (from qwen-vl-utils==0.0.8->qwen-vl-utils[decord]==0.0.8)
  Downloading av-15.0.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.6 kB)
Collecting decord (from qwen-vl-utils[decord]==0.0.8)
  Downloading decord-0.6.0-py3-none-manylinux2010_x86_64.whl.metadata (422 bytes)
Downloading qwen_vl_utils-0.0.8-py3-none-any.whl (5.9 kB)
Downloading av-15.0.0-cp311-cp311-manylinux_2_28_x86_64.whl (39.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m39.7/39.7 MB[0m [31m65.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading decord-0.6.0-py3-none-manylinux2010_x86_64.whl (13.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.6/13.6 MB[0m [31m129.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: decord, av, qwen-vl-utils
Successfully installed av-15.0.0 decord-0.6.0 qwen-vl-utils-0.0.8


In [3]:
prompt = """You will be provided an empty KenKen puzzle board, which is a puzzle similar to Sudoku but with mathematical operations. Like Sudoku, every row and column must contain the numbers 1 through n, where n is the size of the grid. The thick border lines represent cages, which contain a target number and arithmetic operator (+-/*) in the top left cell of each cage. For a given cage, all of the numbers that will make up that cage must arrive at the target number through the arithmetic operator. For example in a cage with two cells and the symbol 5+, it could be filled in with a 2 and a 3 because 2 + 3 = 5. If there is only one cell in the cage, then it can be automatically filled in with the target number.

Your task is to output a correct solution to the puzzle provided. The puzzle could have size 3, 4, 5, 6, or 7. All puzzles have at least one solution. Format your response as a 2 dimensional list representing the solution for the puzzle. An example response for a 3x3 KenKen puzzle is:
    [[1, 2, 3],[3, 1, 2],[2, 3, 1]]"""

In [4]:
import torch
from PIL import Image
from transformers import pipeline
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:

pipe = pipeline("image-text-to-text", model="Qwen/Qwen2.5-VL-7B-Instruct")



In [None]:
!pip install z3-solver

Collecting z3-solver
  Downloading z3_solver-4.15.1.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (602 bytes)
Downloading z3_solver-4.15.1.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (29.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m29.5/29.5 MB[0m [31m57.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: z3-solver
Successfully installed z3-solver-4.15.1.0


In [None]:
 from z3 import *
import time

In [None]:
def extract_solution(response):
  res = response
  solution = [[]]
  row = 0

  strt = res.rfind("[[")
  end = res.find("]]", strt)
  if strt == -1 or end == -1:
    return None

  for i in range(strt, end):
    if res[i].isdigit():
      solution[row].append(int(res[i]))
    elif res[i] == ']':
      solution.append([])
      row+=1
  return solution

In [None]:
def parse_block_constraints(puzzle, cells):
    constraints = []
    for block in puzzle:
        op = block["op"]
        target = block["target"]
        vars_in_block = [cells[i][j] for i, j in block["cells"]]
        if op == "":
            constraints.append(vars_in_block[0] == target)
        elif op == "add":
            constraints.append(Sum(vars_in_block) == target)
        elif op == "mul":
            product = vars_in_block[0]
            for v in vars_in_block[1:]:
                product *= v
            constraints.append(product == target)
        elif op == "sub" and len(vars_in_block) == 2:
            a, b = vars_in_block
            constraints.append(Or(a - b == target, b - a == target))
        elif op == "div" and len(vars_in_block) == 2:
            a, b = vars_in_block
            constraints.append(Or(a / b == target, b / a == target))
        else:
            raise ValueError(f"Unsupported operation or malformed block: {block}")
    return constraints



In [None]:
def validate_solution(puzzle, size, solution):
  X = [ [ Int("x_%s_%s" % (i+1, j+1)) for j in range(size) ]
      for i in range(size) ]
  cells_c  = [ And(1 <= X[i][j], X[i][j] <= size)
              for i in range(size) for j in range(size) ]
  rows_c   = [ Distinct(X[i]) for i in range(size) ]
  cols_c   = [ Distinct([ X[i][j] for i in range(size) ])
              for j in range(size) ]
  constraints = cells_c + rows_c + cols_c + parse_block_constraints(puzzle, X)
  instance = [
        X[i][j] == solution[i][j]
        for i in range(size)
        for j in range(size)
    ]
  s = Solver()
  problem = constraints + instance
  s.add(problem)
  return s.check() == sat

In [None]:
import json
with open("/content/drive/MyDrive/Summer2025Research/KenKenSolver/images/puzzles_all_sizes.json", "r") as f:
    puzzles_ds = json.load(f)
with open("/content/drive/MyDrive/Summer2025Research/KenKenSolver/images/puzzles_7x7.json", "r") as f:
    puzzles_7x7 = json.load(f)
puzzles_ds['7'] = puzzles_7x7['7']

In [None]:
qwen_accuracy = {3:10, 4:0, 5:0, 6:0, 7:0}
qwen_avg_time = {3:26.58244123697281, 4:35.20106599092483, 5:0, 6:0, 7:0}
qwen_responses = {3:[], 4:[], 5:[], 6:[], 7:[]}

In [None]:
num_puzzles = 30
total = 0
size = 7

In [None]:
for i in range(0, min(num_puzzles, len(puzzles_ds[str(size)]))):
    filepath= "/content/drive/MyDrive/Summer2025Research/KenKenSolver/images/boards_noto_sans/board"+str(size)+"_"+str(i)+".png"
    image = Image.open(filepath).convert("RGB")
    messages = [
      {
          "role": "user",
          "content": [
              {
                  "type": "image",
                  "image": image,
              },
              {
                  "type": "text",
                  "text": prompt
              }
          ]
      }
    ]
    start = time.time()
    res = pipe(text=messages, max_new_tokens=1024)[0]['generated_text'][-1]['content']
    end = time.time()
    res_time = end-start
    #print(res)
    qwen_responses[size].append(res)
    qwen_avg_time[size] += res_time
    solution = extract_solution(res)
    if solution and len(solution)==size and all(len(row) == size for row in solution) and validate_solution(puzzles_ds[str(size)][i], size, solution):
      qwen_accuracy[size] += 1

    total+=1
    print(str(qwen_accuracy[size])+"/"+str(total))
    time.sleep(5)


0/1
0/2
0/3
0/4
0/5
0/6
0/7
0/8
0/9
0/10
0/11
0/12
0/13
0/14
0/15
0/16
0/17
0/18
0/19
0/20
0/21
0/22
0/23
0/24
0/25
0/26
0/27
0/28
0/29
0/30


In [None]:
solution

[[2, 4, 1, 8], [6], [3], [1, 2]]

In [None]:
qwen_responses[4][-2]

"To solve this KenKen puzzle, we need to fill in the grid such that each row and each column contains the numbers 1 through 7 (since the grid is 7x7). Each cage has a target number and an arithmetic operation, and the numbers within the cage must satisfy that operation to reach the target.\n\nHere's the step-by-step process to solve the puzzle:\n\n1. **Identify the cages and their operations:**\n   - Top-left cage: \\(11+\\)\n   - Top-middle cage: \\(3+\\)\n   - Top-right cage: \\(8+\\)\n   - Middle-left cage: \\(2\\times\\)\n   - Middle-right cage: \\(1-\\)\n   - Bottom-left cage: \\(3\\div\\)\n\n2. **Fill in the cages with the given numbers:**\n   - The top-left cage \\(11+\\) must sum to 11. Possible pairs are (2, 9), (3, 8), (4, 7), (5, 6).\n   - The top-middle cage \\(3+\\) must sum to 3. Possible pairs are (1, 2).\n   - The top-right cage \\(8+\\) must sum to 8. Possible pairs are (1, 7), (2, 6), (3, 5), (4, 4).\n   - The middle-left cage \\(2\\times\\) must multiply to 2. Possib

In [None]:
print("Qwen Mini 3x3 Results: \nAccuracy: ", qwen_accuracy[3], "%\nAverage Time: ", qwen_avg_time[3], "s")

Qwen Mini 3x3 Results: 
Accuracy:  10 %
Average Time:  26.58244123697281 s


In [None]:
print("Qwen Mini 4x4 Results: \nAccuracy: ", qwen_accuracy[4], "%\nAverage Time: ", qwen_avg_time[4], "s")

Qwen Mini 4x4 Results: 
Accuracy:  0 %
Average Time:  35.20106599092483 s


In [None]:
print("Qwen Mini 5x5 Results: \nAccuracy: ", qwen_accuracy[5], "%\nAverage Time: ", qwen_avg_time[5], "s")

Qwen Mini 5x5 Results: 
Accuracy:  0 %
Average Time:  55.86759095191955 s


In [None]:
print("Qwen Mini 6x6 Results: \nAccuracy: ", qwen_accuracy[6], "%\nAverage Time: ", qwen_avg_time[6], "s")

Qwen Mini 6x6 Results: 
Accuracy:  0 %
Average Time:  55.0236961889267 s


In [None]:
print("Qwen Mini 7x7 Results: \nAccuracy: ", qwen_accuracy[7], "%\nAverage Time: ", qwen_avg_time[7], "s")

Qwen Mini 7x7 Results: 
Accuracy:  0 %
Average Time:  56.894556013743085 s


In [None]:
qwen_avg_time[7] = qwen_avg_time[7] / 30

In [None]:
import pandas as pd

In [None]:
results = pd.DataFrame({
    'accuracy (%)': qwen_accuracy,
    'avg_time (s)': qwen_avg_time
})

In [None]:
results.to_csv('/content/drive/MyDrive/Summer2025Research/SolverRepo/results/qwen_evaluation.csv', index=True)