In [1]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from transformers import BitsAndBytesConfig, DataCollatorForLanguageModeling
import torch


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
dataset = load_dataset("wiki_auto", "auto", trust_remote_code=True)
INPUT_COLUMN = "normal"
TARGET_COLUMN = "simple"


In [3]:
model_name = "Qwen/Qwen-7B-Chat"

In [4]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)


In [9]:
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

# Use eos_token if possible
if tokenizer.pad_token is None:
    if tokenizer.eos_token is not None:
        tokenizer.pad_token = tokenizer.eos_token
    else:
        # fallback: use known safe Qwen token
        tokenizer.pad_token = '<|endoftext|>'

print("Using PAD token:", tokenizer.pad_token)


Using PAD token: <|endoftext|>


In [10]:
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

The model is automatically converting to bf16 for faster inference. If you want to disable the automatic precision, please manually add bf16/fp16/fp32=True to "AutoModelForCausalLM.from_pretrained".
Try importing flash-attention for faster inference...
Loading checkpoint shards: 100%|██████████| 8/8 [00:24<00:00,  3.06s/it]


In [11]:
model.resize_token_embeddings(len(tokenizer))

Embedding(151851, 4096)

In [13]:
model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["c_attn", "c_proj", "w1", "w2"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)


You are using an old version of the checkpointing format that is deprecated (We will also silently ignore `gradient_checkpointing_kwargs` in case you passed it).Please update to the new format on your modeling file. To use the new format, you need to completely remove the definition of the method `_set_gradient_checkpointing` in your model.


In [14]:
def preprocess(example):
    prompt = f"Simplify this:\n{example[INPUT_COLUMN]}\nSimplified:"
    target = example[TARGET_COLUMN]
    full = f"{prompt} {target}"

    tokenized = tokenizer(
        full,
        truncation=True,
        padding="max_length",
        max_length=512
    )

    labels = tokenized["input_ids"].copy()
    labels = [-100 if token == tokenizer.pad_token_id else token for token in labels]
    tokenized["labels"] = labels
    return tokenized


In [15]:
# 7. Tokenize datasets
tokenized_train = dataset["part_1"].map(preprocess, remove_columns=dataset["part_1"].column_names)
tokenized_eval = dataset["part_2"].map(preprocess, remove_columns=dataset["part_2"].column_names) if "part_2" in dataset else None

# 8. Data collator
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)


Map: 100%|██████████| 125059/125059 [14:44<00:00, 141.38 examples/s]
Map: 100%|██████████| 13036/13036 [01:05<00:00, 199.31 examples/s]


In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

model.eval()

# Simple prompt to test generation
prompt = "Simplify this:\nThe proliferation of autonomous vehicles necessitates robust regulatory oversight.\nSimplified:"

# Tokenize
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

# Generate
output_ids = model.generate(
    **inputs,
    max_new_tokens=100,  # up from 50
    do_sample=False,
    pad_token_id=tokenizer.pad_token_id,
    eos_token_id=tokenizer.eos_token_id or tokenizer.pad_token_id
)


# Decode
output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True).strip()
simplified_response = output_text.split("Simplified:")[-1].strip()

print(" Simplified:", simplified_response)



 Model Output:
 Simplify this:
The proliferation of autonomous vehicles necessitates robust regulatory oversight.
Simplified:
't be able to drive without regulations.



In [None]:
# 9. Training args
training_args = TrainingArguments(
    output_dir="./qwen-7b-lora-simplifier",
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=4,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    num_train_epochs=3,
    learning_rate=2e-4,
    fp16=True,
    logging_dir="./logs",
    logging_steps=50,
    report_to="none"
)


In [None]:

# 10. Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_eval,
    tokenizer=tokenizer,
    data_collator=data_collator
)

In [None]:

# 11. Train & save
trainer.train()
model.save_pretrained("./qwen-7b-lora-simplifier")
tokenizer.save_pretrained("./qwen-7b-lora-simplifier")

print(" Fine-tuning complete!")