In [4]:
from datasets import load_dataset, DatasetDict
import os
import shutil

In [5]:
dataset = load_dataset("json", data_files="../train.json",split="train")
print(dataset)
dataset_dict = DatasetDict({
    "train": dataset
})
print(dataset_dict)
if os.path.exists("../aixue_test_data"):
    shutil.rmtree("../aixue_test_data")
    os.makedirs("../aixue_test_data", exist_ok=True)
dataset_dict.save_to_disk(
    dataset_dict_path="../aixue_test_data",
    max_shard_size="500MB",  # 可选：分片大小控制
    num_proc=1,               # 可选：并行进程数
)

Generating train split: 0 examples [00:00, ? examples/s]

Dataset({
    features: ['id', 'prompt', 'response', 'reward'],
    num_rows: 480
})
DatasetDict({
    train: Dataset({
        features: ['id', 'prompt', 'response', 'reward'],
        num_rows: 480
    })
})


Saving the dataset (0/1 shards):   0%|          | 0/480 [00:00<?, ? examples/s]

# Response Test

In [35]:
from transformers import AutoModelForCausalLM, AutoTokenizer

In [36]:
# prepare the model input
prompt = """你是一名专注于1对1自然拼读教学的英语老师。

# 教学目标
## 核心课程目标：
    1. 建立“字母/字母组合”与“发音”的牢固对应关系（音形联结），掌握单个字母基础发音及常见字母组合发音。
    2. 发展学生语音意识（音素、音节、重音感知），提升单词记忆与拼写能力。
    3. 最终目标：实现“见词能读，听音能写”。
## 单节课教学目标：
    1. 个性化教学：根据学生能力调整内容呈现和练习方式，激发兴趣，提升效果。
    2. 保持专注与信心：控制单次学习时长，避免连续错误过多，减少疲劳感和挫败感。

# 学生画像
    1. 年龄：7岁
    2. 性别：女
    3. 所在地：中国三线城市
    4. 英语基础：
       - 掌握26个英文字母名称。
       - 仅会极少量简单会话（如：What's your name?）。

# 当前教学状态
    1. 课节内容：教授字母 A、B、C 的发音（a: /æ/, b: /b/, c: /k/）。
    2. 当前**环节**：字母 A (/æ/) 的发音练习。
    3. 主题关联：教学围绕'苹果 (apple)'展开，练习部分如有单词，建议与之相关。

# 教学工具箱 (可选学习范式)
## 字母教学包含4种基础练习类型（难度递增）, 例如对于字母a：
1. 纯音素重复：`/æ/ /æ/ /æ/` (重复发音3次)
2. 音形对应：`a says /æ/` (建立字母与发音关联)
3. 音素-单词关联：`/æ/ /æ/ apple` (强化发音在单词中的感知)
4. 综合练习：`a says /æ/, /æ/ /æ/ apple` (整合字母、发音与单词)
## 智能纠错策略 (根据错误类型选择下一步)
1. 错误类型A (字母名称错，发音对)：例如学生说'a says /æ/' (a读错，/æ/正确)。  
    **下一步：** 聚焦字母名称练习。老师示范：'a' (仅字母名称)。
2. 错误类型B (单词发音错，字母发音对)：例如学生说'/æ/ /æ/ apple' (apple发音错，/æ/正确)。  
    **下一步：** 聚焦目标单词练习。老师示范：'apple' (仅单词)。
## 教学控制参数
1. 单字母(单环节)最大教学次数：4次 (若学生能力强，`综合练习`一次性通过，可减少次数, 直接进入下一个**环节**)。
2. 单次跟读最大重复次数：1-2次 (避免疲劳)。
3. 核心原则：及时强化正确，精准纠正错误，保持学习动力。

# 学生学习记录
1. 学习次数：1 次
2. 上次练习内容：'/æ/ /æ/ apple'
3. 上次表现评分：B (部分正确)
4. 具体错误：单词'apple'中的辅音'/p/'发音有瑕疵。

# 你的任务：制定下一步教学指令
1. **评估：** 基于教学目标、学生基础、当前环节、可选范式、纠错策略、历史表现及教学控制参数，决定下一步的教学。
2. 输出格式要求：
   - 如果结束当前字母教学，直接回复 <END>
   - 如果需要继续学习，直接回复 下一步跟读的句子"""
messages = [
    {"role": "user", "content": prompt}
]

In [41]:
model_name = "/root/group-shared/models/base_models/Qwen3-32B"

# load the tokenizer and the model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="auto",
    attn_implementation="flash_attention_2"
)
text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True,
    enable_thinking=False # Switches between thinking and non-thinking modes. Default is True.
)
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

# conduct text completion
for _ in range(8):
    generated_ids = model.generate(
        **model_inputs,
        max_new_tokens=32768,
        temperature=0.1
    )
    output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist() 
    
    # parsing thinking content
    try:
        # rindex finding 151668 (</think>)
        index = len(output_ids) - output_ids[::-1].index(151668)
    except ValueError:
        index = 0
    
    thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip("\n")
    content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")
    
    print("thinking content:", thinking_content)
    print("content:", content)

Loading checkpoint shards:   0%|          | 0/17 [00:00<?, ?it/s]

thinking content: 
content: 下一步跟读的句子：**apple**
thinking content: 
content: 下一步跟读的句子：**apple**
thinking content: 
content: 下一步跟读的句子：**apple**
thinking content: 
content: 下一步跟读的句子：  
**"apple"** (仅单词，聚焦单词发音练习，纠正辅音 /p/)
thinking content: 
content: 下一步跟读的句子：**apple**
thinking content: 
content: 下一步跟读的句子：**apple**
thinking content: 
content: 下一步跟读的句子：**apple**
thinking content: 
content: 下一步跟读的句子：  
**"apple"** (仅单词，聚焦单词发音练习，纠正辅音 /p/)


In [None]:
model_my_name = "/root/group-shared/jrc/ppo-test/models/train_8gpu_param_offload"

# load the tokenizer and the model
tokenizer = AutoTokenizer.from_pretrained(model_my_name)
model_my= AutoModelForCausalLM.from_pretrained(
    model_my_name,
    torch_dtype="auto",
    device_map="auto",
    attn_implementation="flash_attention_2"
)

text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True,
    enable_thinking=False # Switches between thinking and non-thinking modes. Default is True.
)
model_my_inputs = tokenizer([text], return_tensors="pt").to(model_my.device)

Loading checkpoint shards:   0%|          | 0/14 [00:00<?, ?it/s]

thinking content: 
content: /æ/ /æ/ apple
thinking content: 
content: /æ/ /æ/ apple
thinking content: 
content: /æ/ /æ/ apple (请跟读)
thinking content: 
content: 下一步跟读的句子：**apple**
thinking content: 
content: 下一步跟读的句子：**apple**
thinking content: 
content: /æ/ /æ/ apple
thinking content: 
content: 下一步跟读的句子：**apple**
thinking content: 
content: /æ/ /æ/ apple
thinking content: 
content: 下一步跟读的句子：**apple**
thinking content: 
content: /æ/ /æ/ apple
thinking content: 
content: 根据学生当前的学习状态和历史表现，她在“音素-单词关联”阶段（`/æ/ /æ/ apple`）中，虽然对音素 `/æ/` 的发音掌握较好，但在单词 `apple` 的发音中，特别是辅音 `/p/` 的发音仍存在瑕疵。这表明她在音素-单词的整合练习中仍需强化语音意识和单词整体发音的准确性。

因此，下一步应聚焦于目标单词 `apple` 的发音练习，以提高其对单词整体语音结构的掌握。

**下一步跟读的句子：**  
`apple` (仅单词)
thinking content: 
content: /æ/ /æ/ apple
thinking content: 
content: /æ/ /æ/ apple
thinking content: 
content: 下一步跟读的句子：**apple**
thinking content: 
content: /æ/ /æ/ apple
thinking content: 
content: 根据当前教学目标、学生基础、当前环节及历史表现，学生在音素-单词关联练习中已基本掌握音素 /æ/ 的发音，但在单词 "apple" 的完整发音上仍需加强，尤其是辅音 /p/ 的发音。因此，下一步应聚焦单

In [40]:
for _ in range(16):
    # conduct text completion
    generated_ids = model_my.generate(
        **model_my_inputs,
        max_new_tokens=32768,
        temperature=0.1
    )
    output_ids = generated_ids[0][len(model_my_inputs.input_ids[0]):].tolist() 
    
    # parsing thinking content
    try:
        # rindex finding 151668 (</think>)
        index = len(output_ids) - output_ids[::-1].index(151668)
    except ValueError:
        index = 0
    
    thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip("\n")
    content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")
    
    print("thinking content:", thinking_content)
    print("content:", content)

thinking content: 
content: /æ/ /æ/ apple
thinking content: 
content: /æ/ /æ/ apple
thinking content: 
content: /æ/ /æ/ apple
thinking content: 
content: /æ/ /æ/ apple
thinking content: 
content: /æ/ /æ/ apple
thinking content: 
content: /æ/ /æ/ apple
thinking content: 
content: /æ/ /æ/ apple
thinking content: 
content: /æ/ /æ/ apple
thinking content: 
content: /æ/ /æ/ apple
thinking content: 
content: /æ/ /æ/ apple
thinking content: 
content: /æ/ /æ/ apple
thinking content: 
content: /æ/ /æ/ apple
thinking content: 
content: /æ/ /æ/ apple
thinking content: 
content: /æ/ /æ/ apple
thinking content: 
content: /æ/ /æ/ apple
thinking content: 
content: /æ/ /æ/ apple


In [4]:
model_my_name = "/root/group-shared/jrc/ppo-test/models/train_4gpu_liger"

# load the tokenizer and the model
tokenizer = AutoTokenizer.from_pretrained(model_my_name)
model_my_liger= AutoModelForCausalLM.from_pretrained(
    model_my_name,
    torch_dtype="auto",
    device_map="auto",
    attn_implementation="flash_attention_2"
)

text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True,
    enable_thinking=False # Switches between thinking and non-thinking modes. Default is True.
)
model_my_liger_inputs = tokenizer([text], return_tensors="pt").to(model_my_liger.device)

for _ in range(16):
    # conduct text completion
    generated_ids = model_my_liger.generate(
        **model_my_liger_inputs,
        max_new_tokens=32768
    )
    output_ids = generated_ids[0][len(model_my_liger_inputs.input_ids[0]):].tolist() 
    
    # parsing thinking content
    try:
        # rindex finding 151668 (</think>)
        index = len(output_ids) - output_ids[::-1].index(151668)
    except ValueError:
        index = 0
    
    thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip("\n")
    content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")
    
    print("thinking content:", thinking_content)
    print("content:", content)

Loading checkpoint shards:   0%|          | 0/14 [00:00<?, ?it/s]

thinking content: 
content: 下一步跟读的句子：**apple**
thinking content: 
content: 下一步跟读的句子：**apple**
thinking content: 
content: **下一步跟读的句子：** `apple` (仅单词)
thinking content: 
content: 下一步跟读的句子：**apple**
thinking content: 
content: 根据学生当前的学习状态、教学目标和纠错策略，我们应优先解决上一次练习中出现的错误类型B（单词发音错误，字母发音正确）——即学生在“/æ/ /æ/ apple”中对单词“apple”的辅音/p/发音有瑕疵。

因此，下一步教学应聚焦于目标单词的发音练习。

**下一步跟读的句子：**  
**apple**
thinking content: 
content: 下一步跟读的句子：**apple**
thinking content: 
content: 根据学生当前学习状态和错误类型（**错误类型B**：单词发音错，字母发音对），下一步应聚焦于**目标单词练习**，即强化单词 **apple** 的正确发音。

因此，下一步教学指令为：

**apple**
thinking content: 
content: 根据学生当前的学习状态和表现（上次练习内容为“/æ/ /æ/ apple”，评分B，错误集中在单词“apple”中的辅音/p/发音），下一步应聚焦**单词发音的精准练习**，尤其是“apple”中/p/的发音。

结合教学控制参数，我们选择**单词专项练习**，并控制练习强度，避免疲劳。

**下一步跟读的句子：**  
**apple**
thinking content: 
content: 根据学生当前的学习记录和教学目标，她在“音素-单词关联”阶段表现尚可，但“apple”中的辅音/p/发音仍有瑕疵。这属于**错误类型B**，应优先聚焦单词发音的练习。

下一步教学指令为：

**"apple" (仅单词，重点练习/p/的正确发音)**
thinking content: 
content: **下一步跟读的句子：** `apple` (仅单词)
thinking content: 
content: 下一

In [9]:
from modelscope import AutoModelForCausalLM, AutoTokenizer

model_name = "Qwen/Qwen3-0.6B"

# load the tokenizer and the model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="auto"
)

# prepare the model input
prompt = "How are you today?"
text1 = tokenizer.apply_chat_template(
    [{"role": "user", "content": "How are you?"}],
    tokenize=False,
    add_generation_prompt=True,
    enable_thinking=True # Switch between thinking and non-thinking modes. Default is True.
)
text2 = tokenizer.apply_chat_template(
    [{"role": "user", "content": "How are you, today?"}],
    tokenize=False,
    add_generation_prompt=True,
    enable_thinking=True # Switch between thinking and non-thinking modes. Default is True.
)
model_inputs = tokenizer([text1, text2], padding=True, return_tensors="pt").to(model.device)

lm_backbone = getattr(model, model.base_model_prefix)
# conduct text completion
output = lm_backbone(
    **model_inputs,
    max_new_tokens=32768,
    return_dict_in_generate=True,
    return_dict=True,
    output_scores=True,
    output_hidden_states=True,
) 

Downloading Model from https://www.modelscope.cn to directory: /root/.cache/modelscope/hub/models/Qwen/Qwen3-0.6B


2025-07-24 00:16:44,327 - modelscope - INFO - Target directory already exists, skipping creation.


Downloading Model from https://www.modelscope.cn to directory: /root/.cache/modelscope/hub/models/Qwen/Qwen3-0.6B


2025-07-24 00:16:45,581 - modelscope - INFO - Target directory already exists, skipping creation.


In [10]:
print(lm_backbone)

Qwen3Model(
  (embed_tokens): Embedding(151936, 1024)
  (layers): ModuleList(
    (0-27): 28 x Qwen3DecoderLayer(
      (self_attn): Qwen3Attention(
        (q_proj): Linear(in_features=1024, out_features=2048, bias=False)
        (k_proj): Linear(in_features=1024, out_features=1024, bias=False)
        (v_proj): Linear(in_features=1024, out_features=1024, bias=False)
        (o_proj): Linear(in_features=2048, out_features=1024, bias=False)
        (q_norm): Qwen3RMSNorm((128,), eps=1e-06)
        (k_norm): Qwen3RMSNorm((128,), eps=1e-06)
      )
      (mlp): Qwen3MLP(
        (gate_proj): Linear(in_features=1024, out_features=3072, bias=False)
        (up_proj): Linear(in_features=1024, out_features=3072, bias=False)
        (down_proj): Linear(in_features=3072, out_features=1024, bias=False)
        (act_fn): SiLU()
      )
      (input_layernorm): Qwen3RMSNorm((1024,), eps=1e-06)
      (post_attention_layernorm): Qwen3RMSNorm((1024,), eps=1e-06)
    )
  )
  (norm): Qwen3RMSNorm((102

In [18]:
print(output.hidden_states[-1].shape)
print(len(output.hidden_states))
print(model_inputs['input_ids'].shape)

torch.Size([2, 14, 1024])
29
torch.Size([2, 14])


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "Qwen/Qwen3-0.6B"

# load the tokenizer and the model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="auto"
)

# prepare the model input
prompt = "How are you today?"
text1 = tokenizer.apply_chat_template(
    [{"role": "user", "content": "How are you?"}],
    tokenize=False,
    add_generation_prompt=True,
    enable_thinking=True # Switch between thinking and non-thinking modes. Default is True.
)
text2 = tokenizer.apply_chat_template(
    [{"role": "user", "content": "How are you, today?"}],
    tokenize=False,
    add_generation_prompt=True,
    enable_thinking=True # Switch between thinking and non-thinking modes. Default is True.
)


In [28]:
model_inputs = tokenizer([text1, text2], padding=True, padding_side="left", return_tensors="pt").to(model.device)

# conduct text completion
output = model.generate(
    **model_inputs,
    max_new_tokens=32768,
    return_dict_in_generate=True,
    output_scores=True,
    output_hidden_states=True,
) 

In [29]:
print(model_inputs)
print(output.sequences)


{'input_ids': tensor([[151643, 151643, 151644,    872,    198,   4340,    525,    498,     30,
         151645,    198, 151644,  77091,    198],
        [151644,    872,    198,   4340,    525,    498,     11,   3351,     30,
         151645,    198, 151644,  77091,    198]], device='cuda:7'), 'attention_mask': tensor([[0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], device='cuda:7')}
tensor([[151643, 151643, 151644,    872,    198,   4340,    525,    498,     30,
         151645,    198, 151644,  77091,    198, 151667,    198,  32313,     11,
            279,   1196,   4588,     11,    330,   4340,    525,    498,   7521,
            358,   1184,    311,   5889,  34901,     13,   8704,    358,   2776,
            458,  15235,  17847,     11,    358,   1265,  24645,    862,   3405,
            323,   3410,    264,  10950,   4226,    382,   5338,     11,    358,
           1265,   7683,    847,   2639,    438,    458,  15235,    323,   628

In [30]:
for i in range(2):
    output_ids = output.sequences[i][len(model_inputs.input_ids[i]):].tolist() 
    
    # parsing thinking content
    try:
        # rindex finding 151668 (</think>)
        index = len(output_ids) - output_ids[::-1].index(151668)
    except ValueError:
        index = 0
    
    thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip("\n")
    content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")
    
    print("thinking content:", thinking_content)
    print("content:", content)

thinking content: <think>
Okay, the user asked, "How are you?" I need to respond appropriately. Since I'm an AI assistant, I should acknowledge their question and provide a helpful answer.

First, I should confirm my status as an AI and mention that I'm here to assist. Then, I can offer assistance with various topics. I should keep the tone friendly and open-ended to encourage further conversation.

I should avoid any technical jargon and keep the response simple. Let me check for any possible misunderstandings. No, the user just wants to know my current state and ability to help. Alright, that should cover it.
</think>
content: Hello! I'm here to help with anything you need. How can I assist you today? 😊
thinking content: <think>
Okay, the user asked, "How are you, today?" I need to respond appropriately. Let me think about the best way to answer.

First, I should acknowledge their question. It's a friendly way to start. Maybe say something like "Hi! How are you today?" That's polite 

In [None]:
import torch
print(torch.topk(output.scores[0][0],5))
print(torch.topk(output.scores[1][0],5))

torch.return_types.topk(
values=tensor([52.7083,    -inf,    -inf,    -inf,    -inf], device='cuda:7'),
indices=tensor([151667,      2,      0,      3,      1], device='cuda:7'))
torch.return_types.topk(
values=tensor([57.0833,    -inf,    -inf,    -inf,    -inf], device='cuda:7'),
indices=tensor([198,   2,   0,   3,   1], device='cuda:7'))


In [32]:
model_inputs = tokenizer([text1, text2], padding=True, padding_side="right", return_tensors="pt").to(model.device)

# conduct text completion
output = model.generate(
    **model_inputs,
    max_new_tokens=32768,
    return_dict_in_generate=True,
    output_scores=True,
    output_hidden_states=True,
) 
print(model_inputs)
print(output.sequences[:,model_inputs.input_ids.shape[1]:])

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


{'input_ids': tensor([[151644,    872,    198,   4340,    525,    498,     30, 151645,    198,
         151644,  77091,    198, 151643, 151643],
        [151644,    872,    198,   4340,    525,    498,     11,   3351,     30,
         151645,    198, 151644,  77091,    198]], device='cuda:7'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], device='cuda:7')}
tensor([[   198, 151667,    198,  32313,     11,    279,   1196,   4588,     11,
            330,   4340,    525,    498,   7521,    358,   1184,    311,   5889,
            304,    264,  11657,    323,  10950,   1616,     13,   6771,    752,
           1744,    911,    279,   1850,   5486,    382,   5338,     11,    358,
           1265,  24645,    862,   3405,     13,   1084,    594,    264,   4285,
          42113,     11,    773,   7196,   1191,    448,    330,  13048,   1052,
           8958,    311,   1473,    358,   2776,   6247,    311,   1492,     1

In [33]:
for i in range(2):
    output_ids = output.sequences[i][len(model_inputs.input_ids[i]):].tolist() 
    
    # parsing thinking content
    try:
        # rindex finding 151668 (</think>)
        index = len(output_ids) - output_ids[::-1].index(151668)
    except ValueError:
        index = 0
    
    thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip("\n")
    content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")
    
    print("thinking content:", thinking_content)
    print("content:", content)

thinking content: <think>
Okay, the user asked, "How are you?" I need to respond in a friendly and helpful way. Let me think about the best approach.

First, I should acknowledge their question. It's a simple greeting, so maybe start with "Hi there!" to show I'm happy to help. Then, I can add a bit more about my current state, like "I'm doing well and I'm here to help you with anything!" That gives a sense of being there for them.

I should make sure the response is natural and not too formal. Maybe add a friendly emoji to keep it light. Also, check if the user has any specific questions or needs assistance, but since they just asked "How are you?" I should keep it general.

Wait, is there anything else I should consider? Maybe add a follow-up question to encourage further interaction, but the user didn't ask anything else. So, keeping it concise and positive should be good. Let me put that all together.
</think>
content: Hi there! I'm happy to help! I'm doing well and I'm here to assi

In [34]:
import torch
print(torch.topk(output.scores[0][0],5))
print(torch.topk(output.scores[0][1],5))

torch.return_types.topk(
values=tensor([47.2917,    -inf,    -inf,    -inf,    -inf], device='cuda:7'),
indices=tensor([198,   2,   0,   3,   1], device='cuda:7'))
torch.return_types.topk(
values=tensor([52.7083,    -inf,    -inf,    -inf,    -inf], device='cuda:7'),
indices=tensor([151667,      2,      0,      3,      1], device='cuda:7'))
