In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, BitsAndBytesConfig
from datasets import load_dataset
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training, PeftModel
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
model_name = "mistralai/Mistral-7B-v0.1"

tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Fetching 2 files: 100%|██████████| 2/2 [3:50:28<00:00, 6914.22s/it]   
Loading checkpoint shards: 100%|██████████| 2/2 [00:24<00:00, 12.46s/it]


In [4]:
model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)

In [5]:
dataset = load_dataset("json", data_files="mistral_data.jsonl", split="train")

Generating train split: 2692 examples [00:00, 37649.94 examples/s]


In [6]:
def format_example(example):
    text = example["text"]
    try:
        instruction = text.split("[INST]")[1].split("[/INST]")[0].strip()
        response = text.split("[/INST]")[1].strip()
        return {"text": f"[INST] {instruction} [/INST] {response}"}
    except IndexError:
        return {"text": text}

dataset = dataset.map(format_example)

Map: 100%|██████████| 2692/2692 [00:00<00:00, 24577.75 examples/s]


In [7]:
def tokenize(example):
    output = tokenizer(
        example["text"],
        truncation=True,
        padding="max_length",
        max_length=512
    )
    output["labels"] = output["input_ids"].copy()
    return output

tokenized_dataset = dataset.map(tokenize, batched=True)

Map: 100%|██████████| 2692/2692 [00:00<00:00, 4337.74 examples/s]


In [8]:
training_args = TrainingArguments(
    output_dir="./mistral-lora-output",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    warmup_steps=10,
    max_steps=100,
    learning_rate=2e-4,
    fp16=True,
    save_strategy="epoch",
    save_total_limit=1,
    logging_dir="./logs",
    logging_steps=10,
    report_to="none"
)

In [10]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer
)

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [11]:
trainer.train()

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)


Step,Training Loss
10,5.183
20,0.3833
30,0.1569
40,0.1163
50,0.1072
60,0.0962
70,0.1019
80,0.1008
90,0.0928
100,0.0902


TrainOutput(global_step=100, training_loss=0.6428703045845032, metrics={'train_runtime': 15946.692, 'train_samples_per_second': 0.05, 'train_steps_per_second': 0.006, 'total_flos': 1.7491908624384e+16, 'train_loss': 0.6428703045845032, 'epoch': 0.2971768202080238})

In [12]:
model.save_pretrained("mistral-lora-adapter")
tokenizer.save_pretrained("mistral-lora-adapter")

('mistral-lora-adapter\\tokenizer_config.json',
 'mistral-lora-adapter\\special_tokens_map.json',
 'mistral-lora-adapter\\tokenizer.model',
 'mistral-lora-adapter\\added_tokens.json',
 'mistral-lora-adapter\\tokenizer.json')

In [2]:
# 1. Nama model dasar dan path adapter LoRA
base_model = "mistralai/Mistral-7B-v0.1"
adapter_path = "mistral-lora-adapter"

# 2. Konfigurasi quantization 4-bit
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

# 3. Load model dasar
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

# 4. Load adapter LoRA
model = PeftModel.from_pretrained(model, adapter_path)

# 5. Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model)
tokenizer.pad_token = tokenizer.eos_token  # Mistral tidak punya pad_token

# 6. Fungsi deteksi pertanyaan umum
def is_general_question(prompt):
    return prompt.strip().endswith("?") and "[INST]" not in prompt

# 7. Fungsi inference akhir
def generate_response(prompt, max_new_tokens=500):
    if is_general_question(prompt):
        input_text = prompt
    elif "[INST]" in prompt and "[/INST]" in prompt:
        input_text = prompt
    else:
        input_text = f"[INST] {prompt} [/INST]"

    inputs = tokenizer(input_text, return_tensors="pt", truncation=True)
    inputs = {k: v.to(model.device) for k, v in inputs.items()}

    input_length = inputs["input_ids"].shape[1]
    max_length = input_length + max_new_tokens

    with torch.no_grad():
        output = model.generate(
            **inputs,
            max_length=max_length,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.eos_token_id,
            early_stopping=False,
        )

    decoded = tokenizer.decode(output[0], skip_special_tokens=True)

    if "[/INST]" in decoded:
        return decoded.split("[/INST]")[-1].strip()
    else:
        return decoded.strip()

Loading checkpoint shards: 100%|██████████| 2/2 [00:35<00:00, 17.61s/it]


In [5]:
if __name__ == "__main__":
    prompt = "Berapa kandungan nutrisi dalam bakso?"
    response = generate_response(prompt)
    print("\n[RESPON MODEL]")
    print(response)


[RESPON MODEL]
Berapa kandungan nutrisi dalam bakso? Bakso memiliki kandungan nutrisi sebagai berikut: 118.0 kalori, 10.8g protein, 3.4g lemak, dan 2.6g karbohidrat. Makanan ini termasuk dalam kategori Tinggi Protein. Rekomendasi nutrisi untuk anak usia 4-6 tahun adalah 1000 kalori, 15g lemak, 50g protein, dan 150g karbohidrat.

Bakso memiliki kandungan nutrisi sebagai berikut: 118.0 kalori, 10.8g protein, 3.4g lemak, dan 2.6g karbohidrat. Makanan ini termasuk dalam kategori Tinggi Protein. Rekomendasi nutrisi untuk anak usia 4-6 tahun adalah 1000 kalori, 15g lemak, 50g protein, dan 150g karbohidrat.
