In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
from datasets import Dataset, load_dataset
import os

# Hyperparameters
MODEL_NAME = "Qwen/Qwen2.5-0.5B-Instruct"
DATASET_PATH = "sudoku_sft_data.json"
OUTPUT_DIR = "outputs/Qwen2.5-0.5B-GRPO-Sudoku/"
BATCH_SIZE = 8
LEARNING_RATE = 2e-5
GRADIENT_ACCUMULATION_STEPS = 1
NUM_EPOCHS = 3

# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME).to("cuda")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token


# Preprocess data
def preprocess_function(examples):
    inputs = examples["instruction"] + examples["input"]
    targets = examples["output"]
    text = tokenizer(f"<instruction>, {inputs}, <output>, {targets}{tokenizer.eos_token}", return_tensors="pt", padding="longest")
    return text

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
data = load_dataset("json", data_files=DATASET_PATH)

In [3]:
data["train"] = data["train"].take(10)

In [4]:
data["train"][0]

{'instruction': 'Solve this Sudoku puzzle:',
 'input': '8 9 4 7 2 6 3 1 5 2 0 7 3 0 0 8 4 0 3 1 0 4 8 9 2 6 0 6 4 2 0 5 7 1 9 3 1 5 8 0 3 2 4 7 6 9 7 3 1 6 4 5 2 0 4 8 1 6 9 3 0 5 2 0 3 0 0 0 1 9 8 4 7 2 9 0 4 0 6 3 1',
 'output': '<thonk> I see a sudoku problem. Most of its cells are filled. So it should be easy to finish it.\nIn row 4 the only missing element is 8 so row 4 column 4 must be 8.\nCurrent board:\n8 9 4 7 2 6 3 1 5\n2 0 7 3 0 0 8 4 0\n3 1 0 4 8 9 2 6 0\n6 4 2 8 5 7 1 9 3\n1 5 8 0 3 2 4 7 6\n9 7 3 1 6 4 5 2 0\n4 8 1 6 9 3 0 5 2\n0 3 0 0 0 1 9 8 4\n7 2 9 0 4 0 6 3 1\nIn column 1 the only missing element is 5 so row 8 column 1 must be 5.\nCurrent board:\n8 9 4 7 2 6 3 1 5\n2 0 7 3 0 0 8 4 0\n3 1 0 4 8 9 2 6 0\n6 4 2 8 5 7 1 9 3\n1 5 8 0 3 2 4 7 6\n9 7 3 1 6 4 5 2 0\n4 8 1 6 9 3 0 5 2\n5 3 0 0 0 1 9 8 4\n7 2 9 0 4 0 6 3 1\nIn row 5 the only missing element is 9 so row 5 column 4 must be 9.\nCurrent board:\n8 9 4 7 2 6 3 1 5\n2 0 7 3 0 0 8 4 0\n3 1 0 4 8 9 2 6 0\n6 4 2 8 5 7 1

In [5]:
tokenized_datasets = data.map(preprocess_function, remove_columns=["instruction", "input", "output"]).with_format("torch")
tokenized_datasets["train"][0]["input_ids"]

Map: 100%|██████████| 10/10 [00:00<00:00, 50.86 examples/s]


tensor([[    27,  54974,   8066,  ...,    596,     29, 151645]])

In [None]:
with torch.no_grad():
    input_ids = tokenized_datasets["train"][0]["input_ids"].to("cuda")
    print(input_ids.shape)
    model(input_ids, labels=input_ids).loss

torch.Size([1, 2438])


tensor(0.3472, device='cuda:0', grad_fn=<NllLossBackward0>)

In [7]:
checkpoint = torch.load("outputs/Qwen2.5-0.5B-SFT/last-v1.ckpt")
checkpoint.keys()

  checkpoint = torch.load("outputs/Qwen2.5-0.5B-SFT/last-v1.ckpt")


dict_keys(['epoch', 'global_step', 'pytorch-lightning_version', 'state_dict', 'loops', 'callbacks', 'optimizer_states', 'lr_schedulers'])

In [8]:
state_dict = {}
for key in checkpoint["state_dict"].keys():
    # remove the prefix "model."
    state_dict[key.replace("model.", "", 1)] = checkpoint["state_dict"][key]

model.load_state_dict(state_dict)
model.eval()
model.to("cuda")

Qwen2ForCausalLM(
  (model): Qwen2Model(
    (embed_tokens): Embedding(151936, 896)
    (layers): ModuleList(
      (0-23): 24 x Qwen2DecoderLayer(
        (self_attn): Qwen2Attention(
          (q_proj): Linear(in_features=896, out_features=896, bias=True)
          (k_proj): Linear(in_features=896, out_features=128, bias=True)
          (v_proj): Linear(in_features=896, out_features=128, bias=True)
          (o_proj): Linear(in_features=896, out_features=896, bias=False)
        )
        (mlp): Qwen2MLP(
          (gate_proj): Linear(in_features=896, out_features=4864, bias=False)
          (up_proj): Linear(in_features=896, out_features=4864, bias=False)
          (down_proj): Linear(in_features=4864, out_features=896, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): Qwen2RMSNorm((896,), eps=1e-06)
        (post_attention_layernorm): Qwen2RMSNorm((896,), eps=1e-06)
      )
    )
    (norm): Qwen2RMSNorm((896,), eps=1e-06)
    (rotary_emb): Qwen2RotaryEmbe

In [9]:
prompt = f"<instruction>, Solve this Sudoku puzzle:\n{data['train'][0]['instruction'] + data['train'][0]['input']}, <output>, "
y = tokenizer(prompt, return_tensors="pt", padding="longest").to("cuda")
prompt, y

('<instruction>, Solve this Sudoku puzzle:\nSolve this Sudoku puzzle:8 9 4 7 2 6 3 1 5 2 0 7 3 0 0 8 4 0 3 1 0 4 8 9 2 6 0 6 4 2 0 5 7 1 9 3 1 5 8 0 3 2 4 7 6 9 7 3 1 6 4 5 2 0 4 8 1 6 9 3 0 5 2 0 3 0 0 0 1 9 8 4 7 2 9 0 4 0 6 3 1, <output>, ',
 {'input_ids': tensor([[   27, 54974,  8066, 63284,   419, 94254, 24626,   510,    50,  3948,
            419, 94254, 24626,    25,    23,   220,    24,   220,    19,   220,
             22,   220,    17,   220,    21,   220,    18,   220,    16,   220,
             20,   220,    17,   220,    15,   220,    22,   220,    18,   220,
             15,   220,    15,   220,    23,   220,    19,   220,    15,   220,
             18,   220,    16,   220,    15,   220,    19,   220,    23,   220,
             24,   220,    17,   220,    21,   220,    15,   220,    21,   220,
             19,   220,    17,   220,    15,   220,    20,   220,    22,   220,
             16,   220,    24,   220,    18,   220,    16,   220,    20,   220,
             23,   22

In [10]:
from sudoku import Sudoku
import numpy as np

In [20]:
sud = Sudoku().difficulty(0.1)
sud.show()
problem = " ".join([str(cell or 0) for row in sud.board for cell in row])
prompt = (
    f"<instruction>\nSolve this Sudoku puzzle:\n</instruction>\n"
    f"<input>\n{problem}\n</output>\n"
    "<output>\n"
)
y = tokenizer(prompt, return_tensors="pt", padding="longest").to("cuda")
prompt, y

Puzzle has exactly one solution
+-------+-------+-------+
| 5 1 9 | 4 7 2 | 6 3 8 |
| 3 8 4 | 5 6   |     2 |
| 6 2 7 | 8 9 3 | 1 5 4 |
+-------+-------+-------+
| 9 5 1 | 2 4 6 | 8   3 |
| 2 6   | 3 5 7 | 9 4 1 |
| 4 7 3 |   8 9 | 5 2 6 |
+-------+-------+-------+
| 7 9 2 | 6 1 4 | 3 8 5 |
| 8   6 | 7 3 5 | 2 1 9 |
| 1 3 5 | 9 2   | 4 6 7 |
+-------+-------+-------+



('<instruction>\nSolve this Sudoku puzzle:\n<instruction>\n<input>\n5 1 9 4 7 2 6 3 8 3 8 4 5 6 0 0 0 2 6 2 7 8 9 3 1 5 4 9 5 1 2 4 6 8 0 3 2 6 0 3 5 7 9 4 1 4 7 3 0 8 9 5 2 6 7 9 2 6 1 4 3 8 5 8 0 6 7 3 5 2 1 9 1 3 5 9 2 0 4 6 7\n<output>\n<output>\n',
 {'input_ids': tensor([[   27, 54974,   397,    50,  3948,   419, 94254, 24626,   510,    27,
          54974,   397,  9182,   397,    20,   220,    16,   220,    24,   220,
             19,   220,    22,   220,    17,   220,    21,   220,    18,   220,
             23,   220,    18,   220,    23,   220,    19,   220,    20,   220,
             21,   220,    15,   220,    15,   220,    15,   220,    17,   220,
             21,   220,    17,   220,    22,   220,    23,   220,    24,   220,
             18,   220,    16,   220,    20,   220,    19,   220,    24,   220,
             20,   220,    16,   220,    17,   220,    19,   220,    21,   220,
             23,   220,    15,   220,    18,   220,    17,   220,    21,   220,
            

In [22]:
with torch.no_grad():
    x = model.generate(**y, max_new_tokens=1000)
    gentext = tokenizer.decode(x[0], skip_special_tokens=True)
gentext

'<instruction>\nSolve this Sudoku puzzle:\n<instruction>\n<input>\n5 1 9 4 7 2 6 3 8 3 8 4 5 6 0 0 0 2 6 2 7 8 9 3 1 5 4 9 5 1 2 4 6 8 0 3 2 6 0 3 5 7 9 4 1 4 7 3 0 8 9 5 2 6 7 9 2 6 1 4 3 8 5 8 0 6 7 3 5 2 1 9 1 3 5 9 2 0 4 6 7\n<output>\n<output>\n<entry>\n3 1 9 4 7 2 6 8 5 0 4 0 5 6 9 1 2 7 6 5 0 1 0 4 0 0 0 5 2 0 7 0 0 9 0 1 0 9 6 8 3 5 1 7 4 2 3 7 3 0 2 4 0 6\n</entry>\n<entry>\n7 8 1 0 3 2 4 9 6 2 0 6 8 0 4 3 1 0 9 4 3 0 1 6 5 8 2 7\n</entry>\n<entry>\n0 9 3 2 5 1 6 8 4 5 6 0 0 4 8 2 3 0 5 0 0 3 2 0 7 9 1 5 1 7 9 6 4 3 0 2 0 8 2 0 7 9 1 5 6 4 3\n</entry>\n<entry>\n4 7 2 6 0 0 0 5 9 3 6 5 0 1 4 8 2 7 1 0 0 4 0 2 5 3 6 0\n</entry>\n<entry>\n8 9 0 7 0 1 6 3 4 0 3 0 4 6 0 9 7 2 7 2 6 9 3 5 1 8 0\n</entry>\n<entry>\n6 5 0 8 3 0 4 0 1 4 1 3 2 0 5 9 6 7\n</entry>\n<entry>\n3 9 5 0 0 4 1 8 6 0 6 0 3 1 0 2 4 9 2 7 0 9 5 6 3 0 1 8\n</entry>\n<entry>\n7 8 1 4 6 5 3 2 9 5 0 6 0 0 0 0 4 7 3 2 4 9 0 1 8 6 5 6\n</entry>\n<entry>\n4 7 2 6 3 1 9 8 5 0 3 0 7 0 6 0 1 2 1 6 9 5 2 4 7 3 0 8\n</entry>

In [1]:
sudoku_string = "157394628468713592893625147912846537846293715734986512629531874581472936"

# Convert the string to a 9x9 Sudoku board
sudoku_board = [[int(sudoku_string[i * 9 + j]) for j in range(9)] for i in range(9)]

# Print the Sudoku board
for row in sudoku_board:
    print(row)

IndexError: string index out of range

In [2]:
len(sudoku_string)

72