In [2]:
import sys
import os

sys.path.append(os.path.abspath('./'))
from config import *

from transformers import (
    TrainingArguments,
    Trainer,
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
)
from peft import (
    LoraConfig,
    TaskType,
    prepare_model_for_kbit_training,
    get_peft_model,
    AutoPeftModelForCausalLM,
    PeftModelForCausalLM,
)
from trl import SFTTrainer
from datasets import load_from_disk
from evaluate import load as load_metric

import nltk

  from .autonotebook import tqdm as notebook_tqdm
  backends.update(_get_backends("networkx.backends"))


In [3]:
tokenizer = AutoTokenizer.from_pretrained(
    MODEL_PATH,
)

tokenizer.add_special_tokens(
    {
        'additional_special_tokens': [
            '<|PER|>',
            '<|PER_1|>',
            '<|PER_2|>',
            '<|PER_3|>',
            '<|PER_4|>',
        ]
    }
)


if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '<|PAD|>'})
    tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids(
        '<|PAD|>'
    )

tokenizer.padding_side = 'left'

In [4]:
model = AutoModelForCausalLM.from_pretrained(f'{MODEL_PATH}')
model.resize_token_embeddings(len(tokenizer))

model = PeftModelForCausalLM.from_pretrained(
    model,
    f'{MODEL_PATH}-qlora',
    low_cpu_mem_usage=True,
    device_map=DEVICE,
)
# model = AutoPeftModelForCausalLM.from_pretrained(
#     f"{MODEL_PATH}-qlora",
#     low_cpu_mem_usage=True,
#     device_map=DEVICE
# )

merged_model = model.merge_and_unload()
merged_model = merged_model.to(DEVICE)

Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  2.76it/s]
The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


In [None]:
prompt = tokenizer.apply_chat_template(
    [
        {
            'role': 'system',
            'content': r'Tulis sebuah kisah sosial dari kartu-kartu yang diberikan',
        },
        {
            'role': 'user',
            'content': [
                'Ayah',
                'loncat',
                'kuda',
                'main',
                'dengan',
            ],
        },
    ],
    tokenize=False,
    add_generation_prompt=True,
)

input_ids = tokenizer(
    prompt,
    return_tensors='pt',
    padding=True,
).input_ids.to(DEVICE)


output = merged_model.generate(
    input_ids=input_ids,
    max_new_tokens=1024,
    temperature=0.9,
    do_sample=True,
    top_p=0.9,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.pad_token_id,
)


story = tokenizer.decode(output[0])
print(story)

<|begin_of_text|><|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 22 Jul 2025

Tulis sebuah kisah sosial dari kartu-kartu yang diberikan<|eot_id|><|start_header_id|>user<|end_header_id|>

['Ayah', 'loncat', 'kuda','main', 'dengan']<|eot_id|><|start_header_id|>assistant<|end_header_id|>

Ketika masih kecil, <|PER|> suka melihat ayahnya bermain dengan kuda. Dia sering melihat ayahnya berdiri di atas kuda, berlatih teknik berlari dan berayun. Suatu hari, ayahnya meminta <|PER|> untuk mencoba berada di atas kuda. Awalnya, <|PER|> merasa takut, tapi ayahnya memberikan motivasi yang kuat. Ia mengatakan bahwa tidak ada yang salah untuk mencoba, dan bahwa dia akan ada di sampingnya selalu. Dengan tekad yang kuat, <|PER|> akhirnya berhasil naik ke atas kuda. Mereka berdua berlatih bersama, dan <|PER|> mulai menyukai kegiatan tersebut.<|eot_id|>


In [6]:
print(tokenizer.tokenize('<|PER_1|>'))

'"PRESON"'

['<|PER_1|>']


'"PRESON"'