# Fine-Tune Qwen3-8B for Chinese Novel Writing

This notebook fine-tunes **Qwen3-8B** on your custom Chinese novel data using QLoRA via Unsloth.

**Why Qwen3-8B for Chinese?**
- Best-in-class Chinese language understanding and generation at 8B scale
- Outperforms Qwen2.5-14B on most benchmarks despite being smaller
- Native Chinese tokenizer (efficient encoding, no wasted tokens on CJK characters)
- Apache 2.0 license

**Requirements:** Google Colab with T4 GPU (free tier works) or Kaggle Notebooks

## 1. Install Dependencies

In [None]:
%%capture
!pip install unsloth
!pip install --force-reinstall --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

## 2. Load Model

In [None]:
from unsloth import FastLanguageModel
import torch

max_seq_length = 4096  # Qwen3 supports up to 40960, but 4096 is good for novel chunks
dtype = None            # Auto-detect (float16 for T4, bfloat16 for A100)
load_in_4bit = True     # QLoRA 4-bit quantization

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Qwen3-8B",
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)

print(f"Model loaded. GPU memory used: {torch.cuda.memory_allocated() / 1024**3:.1f} GB")

## 3. Configure LoRA Adapters

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r=32,                    # LoRA rank
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj"],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",  # Saves 30% more VRAM
    random_state=3407,
)

# Print trainable parameters
trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
total = sum(p.numel() for p in model.parameters())
print(f"Trainable: {trainable:,} / {total:,} ({100 * trainable / total:.2f}%)")

## 4. Upload and Load Dataset

Upload your `train.jsonl` file (generated by the Novel Writer pipeline).

Each line should have: `{"instruction": "...", "input": "", "output": "..."}`

In [None]:
from google.colab import files
import os

# Upload your dataset
if not os.path.exists("train.jsonl"):
    print("Please upload your train.jsonl file:")
    uploaded = files.upload()
else:
    print("train.jsonl already exists, skipping upload.")

# Check file size
import json
with open("train.jsonl", "r", encoding="utf-8") as f:
    lines = f.readlines()
print(f"Dataset: {len(lines)} entries")
print(f"Sample entry:")
sample = json.loads(lines[0])
print(f"  instruction: {sample.get('instruction', '')[:80]}...")
print(f"  output: {sample.get('output', '')[:80]}...")

In [None]:
from datasets import load_dataset

dataset = load_dataset("json", data_files="train.jsonl", split="train")

# Split into train/validation (90/10)
split = dataset.train_test_split(test_size=0.1, seed=42)
train_dataset = split["train"]
eval_dataset = split["test"]

print(f"Training samples: {len(train_dataset)}")
print(f"Validation samples: {len(eval_dataset)}")

# Chinese novel prompt template
novel_prompt = """<|im_start|>system
你是一位专业的中文小说作家。请根据指令，以优美流畅的中文续写故事内容。注意保持文风一致，人物性格鲜明，情节引人入胜。<|im_end|>
<|im_start|>user
{}<|im_end|>
<|im_start|>assistant
{}<|im_end|>"""

def formatting_func(examples):
    instructions = examples["instruction"]
    outputs = examples["output"]
    texts = []
    for instruction, output in zip(instructions, outputs):
        text = novel_prompt.format(instruction, output)
        texts.append(text)
    return {"text": texts}

train_dataset = train_dataset.map(formatting_func, batched=True)
eval_dataset = eval_dataset.map(formatting_func, batched=True)

# Preview a formatted sample
print("--- Formatted sample ---")
print(train_dataset[0]["text"][:500])

## 5. Train

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments, EarlyStoppingCallback

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    packing=False,
    args=TrainingArguments(
        output_dir="checkpoints_qwen3_chinese",
        num_train_epochs=3,
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        warmup_ratio=0.1,
        learning_rate=2e-4,
        fp16=not torch.cuda.is_bf16_supported(),
        bf16=torch.cuda.is_bf16_supported(),
        logging_steps=10,
        eval_strategy="steps",
        eval_steps=50,
        save_strategy="steps",
        save_steps=50,
        save_total_limit=3,
        load_best_model_at_end=True,
        metric_for_best_model="eval_loss",
        greater_is_better=False,
        seed=3407,
    ),
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],
)

print("Starting training...")
stats = trainer.train()
print(f"Training complete! Total steps: {stats.global_step}")

## 6. Test Generation

In [None]:
FastLanguageModel.for_inference(model)

test_prompts = [
    "续写以下故事：李明站在长安城门前，心中百感交集。三年前他离开家乡时还是个少年，如今",
    "描写一个武侠场景：月光下，两位剑客在悬崖边对峙。",
    "请以古风笔触描写一个春日清晨的集市。",
]

for i, prompt in enumerate(test_prompts):
    messages = [
        {"role": "system", "content": "你是一位专业的中文小说作家。请根据指令，以优美流畅的中文续写故事内容。"},
        {"role": "user", "content": prompt},
    ]
    inputs = tokenizer.apply_chat_template(
        messages, tokenize=True, add_generation_prompt=True, return_tensors="pt"
    ).to("cuda")

    outputs = model.generate(
        input_ids=inputs,
        max_new_tokens=512,
        temperature=0.8,
        top_p=0.9,
        do_sample=True,
    )
    response = tokenizer.decode(outputs[0][inputs.shape[-1]:], skip_special_tokens=True)

    print(f"\n{'='*60}")
    print(f"Prompt {i+1}: {prompt}")
    print(f"{'='*60}")
    print(response)
    print(f"\n[{len(response)} chars generated]")

## 7. Save Model

In [None]:
# Save LoRA adapters
model.save_pretrained("qwen3_chinese_novel_lora")
tokenizer.save_pretrained("qwen3_chinese_novel_lora")
print("LoRA adapters saved to qwen3_chinese_novel_lora/")

# Download as zip
!zip -r qwen3_chinese_novel_lora.zip qwen3_chinese_novel_lora/
from google.colab import files
files.download("qwen3_chinese_novel_lora.zip")

## 8. (Optional) Save to Google Drive

In [None]:
# Uncomment to save to Google Drive instead
# from google.colab import drive
# drive.mount('/content/drive')
# !cp -r qwen3_chinese_novel_lora /content/drive/MyDrive/
# print("Saved to Google Drive!")