In [1]:
from mlx_lm import load, generate
from datasets import load_dataset

# Загрузка модели и токенизатора с MLX
model, tokenizer = load("Qwen/Qwen3-0.6B")

# Шаблон для обучающих данных
train_prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request. 
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
You are an expert in Japanese animation with advanced knowledge in anime, manga, Japanese cartoons and animated films.
Please answer the following anime fan question.

### Question:
{}

### Response:
<think>
{}
</think>
{}"""

# Функция для форматирования примеров
def formatting_prompts_func(examples):
    inputs = examples["Question"]
    complex_cots = examples["Complex_CoT"]
    outputs = examples["Response"]
    texts = []
    for question, cot, response in zip(inputs, complex_cots, outputs):
        # Добавляем EOS-токен к ответу, если его нет
        if not response.endswith(tokenizer.eos_token):
            response += tokenizer.eos_token
        text = train_prompt_style.format(question, cot, response)
        texts.append(text)
    return {"text": texts}

# Загрузка и обработка датасета
dataset = load_dataset(
    "json",
    data_files="datasets-anime-sharegpt-2025-05-24.json",
    split="train"
)
dataset = dataset.map(
    formatting_prompts_func,
    batched=True,
)


# Шаблон для инференса
inference_prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request. 
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
You are an expert in Japanese animation with advanced knowledge in anime, manga, Japanese cartoons and animated films.
Please answer the following anime fan question.

### Question:
{}

### Response:
<think>
"""

# Выполнение инференса
question = dataset[10]['Question']
prompt = inference_prompt_style.format(question)

outputs = generate(
    model,
    tokenizer,
    prompt,
    max_tokens=1200
)

outputs = generate(model, tokenizer, prompt, max_tokens=1200)
print(outputs)

  from .autonotebook import tqdm as notebook_tqdm
Fetching 7 files: 100%|██████████| 7/7 [00:00<00:00, 67963.26it/s]


<|im_start|>
Okay, so the user is asking how many members the anime "Peach Girl" has. Let me start by recalling what I know about this series. I remember that "Peach Girl" is a popular Japanese anime, and it's known for its unique characters and story. The main characters are the protagonist, a girl named Peach, and her friends. But wait, I think there are more characters involved. Let me think... Oh right, there's also the character of the girl who is the main character of the story, and then there's the other main character, maybe a friend or a sibling. Wait, I'm getting confused. Let me check my memory again.

I think the main characters are Peach, her friend, and another girl. But I'm not entirely sure. Maybe there are more. Oh, right! The anime has a main protagonist, a main character, and a few other characters. Let me confirm. The original series has the main characters: Peach, her friend, and another girl. So, that's three main characters. But I'm not 100% sure. Maybe there are

In [None]:
import gc
import json
import mlx.optimizers as optim
from pathlib import Path
from mlx_lm.tuner import TrainingArgs, datasets, linear_to_lora_layers, train
from mlx.utils import tree_flatten

dataset = dataset.map(
    remove_columns=["Question", "Response", "Complex_CoT"]
)

configs = {
    "mask_prompt": False,
    "prompt_feature": "prompt",
    "text_feature": "text",
    "completion_feature": "completion",
    "chat_feature": "messages",
}

train_dataset = datasets.CacheDataset(datasets.create_dataset(
    dataset,
    tokenizer,
    configs
))

opt = optim.Adam(learning_rate=1e-5)

# LoRA configuration
lora_config = {
    "num_layers": 16,
    "lora_parameters": {
    "rank": 32,                          # Rank of the LoRA update matrices
    "scale": 64,                        # Scaling factor for LoRA
    "dropout": 0.0                    # Dropout for regularization
    }
}

adapter_path = Path("adapters")
adapter_path.mkdir(parents=True, exist_ok=True)
with open(adapter_path / "adapter_config.json", "w") as fid:
    json.dump(lora_config, fid, indent=4)

_ = model.freeze()

training_args = TrainingArgs(
    iters=2930,
    batch_size=1,
    steps_per_eval=50,
    adapter_file=adapter_path / "adapters.safetensors",
)

# Apply LoRA to the model
linear_to_lora_layers(model, lora_config["num_layers"], lora_config["lora_parameters"])

num_train_params = (
    sum(v.size for _, v in tree_flatten(model.trainable_parameters()))
)
print(f"Number of trainable parameters: {num_train_params}")

# Train the model
gc.collect()

_ = model.train() # Запуск режима обучения

train(
    model=model,
    args=training_args,
    optimizer=opt,
    train_dataset=train_dataset,
    val_dataset=train_dataset
)


Number of trainable parameters: 2621440
Starting training..., iters: 2930
Iter 1: Val loss 2.430, Val took 4.963s
Iter 10: Train loss 1.692, Learning Rate 1.000e-05, It/sec 2.521, Tokens/sec 1190.982, Trained Tokens 4725, Peak mem 2.699 GB
Iter 20: Train loss 1.527, Learning Rate 1.000e-05, It/sec 2.108, Tokens/sec 1221.345, Trained Tokens 10518, Peak mem 4.480 GB
Iter 30: Train loss 1.347, Learning Rate 1.000e-05, It/sec 2.442, Tokens/sec 1258.191, Trained Tokens 15671, Peak mem 4.480 GB
Iter 40: Train loss 1.090, Learning Rate 1.000e-05, It/sec 3.090, Tokens/sec 1254.796, Trained Tokens 19732, Peak mem 4.480 GB
Iter 50: Val loss 1.053, Val took 4.948s
Iter 50: Train loss 1.168, Learning Rate 1.000e-05, It/sec 2.458, Tokens/sec 1260.532, Trained Tokens 24861, Peak mem 4.480 GB
Iter 60: Train loss 1.144, Learning Rate 1.000e-05, It/sec 2.988, Tokens/sec 1264.103, Trained Tokens 29091, Peak mem 4.480 GB
Iter 70: Train loss 1.106, Learning Rate 1.000e-05, It/sec 2.567, Tokens/sec 1234.98

In [None]:
from mlx_lm import load, generate

model_lora, tokenizer = load("Qwen/Qwen3-0.6B", adapter_path="adapters")

question = "Which genres does Nageki no Kenkou Yuuryouji fall under?"
#question = "What is anime?"
prompt = inference_prompt_style.format(question) # Используем правильный шаблон

response = generate(model_lora, tokenizer, prompt=prompt, max_tokens=1200)

print(response)