In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import Trainer, TrainingArguments
from transformers import DataCollatorForLanguageModeling
from transformers import TextStreamer
from trl import SFTTrainer

from torch.utils.data import Dataset, DataLoader
from accelerate import Accelerator
import argparse, logging, os
from tqdm import tqdm

# LoRA modules
from peft.mapping import get_peft_model
from peft.tuners.lora import LoraConfig
from peft.utils.peft_types import TaskType
from peft.peft_model import PeftModelForCausalLM
from peft.config import PeftConfig

In [None]:
class Generator:
    def __init__(self, model, tokenizer):
        self.instruction = """<|begin_of_text|>
        <|start_header_id|>system<|end_header_id|>
        당신은 한국어를 정확하게 구사하는 AI 어시트턴트입니다. 질문에 대해 한국어로 답변해주세요<|eot_id|>
        <|start_header_id|>user<|end_header_id|>
        AI 어시스턴트님, 아래 질문을 한국어로 전문적으로 답변해주세요. 중요한 키워드는 볼드체로 표기하세요.
        질문: {}<|eot_id|>
        <|start_header_id|>assistant<|end_header_id|>\n\n"""

        self.model = model
        self.tokenizer = tokenizer

    def __call__(self, input):
        template = self.instruction.format(input)
        input_ids = self.tokenizer(template, return_tensors="pt")["input_ids"].to(
            self.model.device
        )
        terminators = [
            self.tokenizer.eos_token_id,
            self.tokenizer.convert_tokens_to_ids("<|eot_id|>"),
        ]

        output = self.model.generate(
            input_ids,
            streamer=TextStreamer(self.tokenizer),
            do_sample=True,
            max_new_tokens=1024,
            temperature=0.3,
            top_p=0.92,
            top_k=2,
            # no_repeat_ngram_size=3,
            renormalize_logits=True,
            eos_token_id=terminators,
        )

        response = output[0][input_ids.shape[-1] :]
        response = self.tokenizer.decode(response, skip_special_tokens=True)
        return response

In [None]:
# model class
class ModelInitiator:
    def __init__(self, model_checkpoint, tokenizer_checkpoint):
        
        self.model_checkpoint = model_checkpoint
        self.tokenizer_checkpoint = tokenizer_checkpoint

    def __call__(self):
        config = PeftConfig.from_pretrained(self.model_checkpoint)
        model = AutoModelForCausalLM.from_pretrained(
            config.base_model_name_or_path
            device_map="auto",
            trust_remote_code=True,
            attn_implementation="flash_attention_2",
        )
        model = PeftModelForCausalLM.from_pretrained(model, self.model_checkpoint)
        model.eval()

        tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_checkpoint)
        tokenizer.pad_token = tokenizer.pad_token
        tokenizer.add_special_tokens({"pad_token": "<pad>"})
        model.resize_token_embeddings(len(tokenizer))

        return model, tokenizer

In [None]:
def main(args):
    # initialize model

    if args.tokenizer == None:
        modelInitiator = ModelInitiator(args.model, args.model)
    else:
        modelInitiator = ModelInitiator(args.model, args.tokenizer)

    model, tokenizer = modelInitiator()
    generator = Generator(model, tokenizer)

    return generator(args.inputs)

In [None]:
types = "jupyter_inline"
if __name__ == "__main__":

    if types == "argumentparser":
        parser = argparse.ArgumentParser()
        parser.add_argument("--model", default=None, type=str, required=True)
        parser.add_argument("--tokenizer", default=None, type=str, required=False)
        parser.add_argument("--inputs", default=None, type=str, required=False)

        args = parser.parse_args()

    if types == "jupyter_inline":
        model_checkpoint = ""
        tokenizer_checkpoint = ""
        inputs = ""

        args = argparse.Namespace(
            model=model_checkpoint, tokenizer=tokenizer_checkpoint, inputs=inputs
        )

    if types in ["argumentparser", "jupyter_inline"]:
        main(args)