## dependencies
dependencies on kaggle are very finicky so these are set up in a specific way

In [None]:
!pip install pip3-autoremove
!pip-autoremove torch torchvision torchaudio -y
!pip install torch torchvision torchaudio xformers --index-url https://download.pytorch.org/whl/cu121
!pip install unsloth
!pip install bitsandbytes==0.43.2
!pip uninstall -y torchao

from unsloth.chat_templates import get_chat_template
from unsloth import FastLanguageModel
from transformers import TextStreamer
from unsloth.chat_templates import train_on_responses_only
from trl import SFTTrainer, SFTConfig
import os, re, datasets, json, torch

## build and download the model, trainer, tokenizer, dataset...

In [None]:
model, tokenizer = FastLanguageModel.from_pretrained(model_name="unsloth/Qwen3-14B-unsloth-bnb-4bit", max_seq_length=2048, load_in_4bit=True)

# can tweak lora rank r in 2^1 - 2^7
model = FastLanguageModel.get_peft_model(model, r=32, target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj",], lora_alpha=32, lora_dropout=0, bias="none", use_gradient_checkpointing="unsloth", random_state=3407, use_rslora=False, loftq_config=None)

tokenizer = get_chat_template(tokenizer, chat_template="qwen3-instruct")

loaded_data = json.load(open('/kaggle/input/discord-0209/discord.json'))
dataset = datasets.Dataset.from_list(loaded_data)

# args.learning_rate, args.max_steps, num_train_epochs
trainer = SFTTrainer(model=model, tokenizer=tokenizer, train_dataset=dataset, eval_dataset=None, args=SFTConfig(max_steps=100, num_train_epochs=3, learning_rate=5e-5, dataset_text_field="text", per_device_train_batch_size=2, gradient_accumulation_steps=4, warmup_steps=5, logging_steps=1, optim="adamw_8bit", weight_decay=0.001, lr_scheduler_type="linear", seed=3407, report_to="none"))

trainer = train_on_responses_only(trainer, instruction_part="<|im_start|>user\n", response_part="<|im_start|>assistant\n")

## test the tokenizer (confirms the dataset is good)

In [None]:
print('detokenized whole msg:')
print(tokenizer.decode(trainer.train_dataset[67]["input_ids"]))
print('detokenized just unmasked:')
print(tokenizer.decode([tokenizer.pad_token_id if x == -100 else x for x in trainer.train_dataset[67]["labels"]]).replace(tokenizer.pad_token, " "))

# @title Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024**3, 2)
max_memory = round(gpu_stats.total_memory / 1024**3, 2)
print(f"gpu = {gpu_stats.name} w/ {max_memory}gb, reserved {start_gpu_memory}gb")

## train

In [None]:
trainer_stats = trainer.train()

res_vram, total_vram = torch.cuda.max_memory_reserved() / 1024**3, torch.cuda.get_device_properties(0).total_memory / 1024**3
print(f'used {res_vram:.2f} gb vram ({100 * res_vram / total_vram:.2f}% of total)')

## it works! (does it?)

In [None]:
messages = [{"role": "system", "content": "this is a discord conversation. you are b4444. continue the conversation"},
            {"role": "assistant", "content": "b4444: exclusivity is IMPLIED in common parlance idoit. 'do you want cake or cookies' they mean XOR. if you say both you ARE A GLUTTON."},
            {"role": "user", "content": "championwastaken: cornball"}]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
_ = model.generate(**tokenizer(text, return_tensors="pt").to("cuda"), max_new_tokens=200, temperature=0.7, top_p=0.8, top_k=20, streamer=TextStreamer(tokenizer, skip_prompt=True))

## backup to hugging face

In [None]:
!mkdir -p /tmp/qwen
%cd /tmp/qwen
# model.save_pretrained_gguf("/kaggle/qwen", tokenizer, quantization_method="q4_k_m")
model.push_to_hub_gguf("b44ken/discqwen14b", tokenizer, quantization_method="q4_k_m", token="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")