# chatKoAlpaca

국내 실력자분들이 한국어 LLM을 위해 좋은 오픈소스들을 공유해주셨습니다.

한데모아 One-Step으로 chatGPT를 흉내낼 수 있게 작성하였습니다.

작성한 환경은 A100 80G Single 입니다.

torch 2.x 버전 이상을 colossalai에서 지원하지 않으므로 torch1.x 버전을 이용해주세요

LLM은 polyglot-1.3B를 KoAlpaca_v1.1.json으로 추가학습하였으며 다른 LLM은 제 환경에서 OOM이 발생해
좋은 환경이신분은 좋은 모델을 활용할 수 있을 것 같습니다.
* 베이스 LLM https://github.com/Beomi/KoAlpaca
* 베이스 Code https://github.com/airobotlab/KoChatGPT
* 베이스 Data https://github.com/airobotlab/KoChatGPT/tree/main/data_kochatgpt

오픈소스 공유에 감사인사 올립니다.

# set Env

In [None]:
!pip install transformers accelerate

In [None]:
!wget https://raw.githubusercontent.com/airobotlab/KoChatGPT/main/data_kochatgpt/kochatgpt_1_SFT.jsonl
!wget https://raw.githubusercontent.com/airobotlab/KoChatGPT/main/data_kochatgpt/kochatgpt_2_RM.jsonl
!wget https://raw.githubusercontent.com/airobotlab/KoChatGPT/main/data_kochatgpt/kochatgpt_3_PPO.jsonl

# Step1) SFT(지도학습)

In [None]:
import os
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from transformers import AutoTokenizer, PreTrainedTokenizer, AutoModelForCausalLM, pipeline, TrainingArguments, Trainer

In [None]:
model_name = 'KoAlpaca/train_v1.1b/polyglot-1.3b-koalpaca-v1.1b'
tokenizer = AutoTokenizer.from_pretrained(model_name, local_files_only=True)
print(tokenizer.tokenize("안녕하세요."))

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True,
).to(device="cuda", non_blocking=True)

In [None]:
text = '근육이 커지기 위해서는'
input_ids = tokenizer.encode(text, return_tensors='pt').to('cuda')
gen_ids = model.generate(input_ids,
                         max_length=512,
                         repetition_penalty=2.0,
                         use_cache=True)
generated = tokenizer.decode(gen_ids[0])
print(generated)

In [None]:
generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0)
generation_args = dict(
    num_beams=4,
    repetition_penalty=2.0,
    no_repeat_ngram_size=4,
    eos_token_id=202, # \n
    max_new_tokens=64,
    do_sample=True,
    top_k=50,
    early_stopping=True,
)
generator(
    ["0 : **는 게임 좋아하니\n1 :",
    "0 : 어제 강남에서 살인사건 났대 ㅜㅜ 너무 무서워\n1 : 헐 왜? 무슨 일 있었어?\n0 : 사진보니까 막 피흘리는 사람있고 경찰들이 떠서 제압하고 난리도 아니었다던데??\n1 :",
    "0 : 자기야 어제는 나한테 왜 그랬어?\n1 : 뭔 일 있었어?\n0 : 어떻게 나한테 말도 없이 그럴 수 있어? 나 진짜 실망했어\n1 : "],
    **generation_args
)

In [None]:
# data config
IGNORE_INDEX = -100
DEFAULT_PAD_TOKEN = "[PAD]"
DEFAULT_EOS_TOKEN = "</s>"
DEFAULT_BOS_TOKEN = "</s>"
DEFAULT_UNK_TOKEN = "</s>"
PROMPT_DICT = {
    "prompt_input": (
        "Below is an instruction that describes a task, paired with an input that provides further context.\n"
        "아래는 작업을 설명하는 명령어와 추가적 맥락을 제공하는 입력이 짝을 이루는 예제입니다.\n\n"
        "Write a response that appropriately completes the request.\n요청을 적절히 완료하는 응답을 작성하세요.\n\n"
        "### Instruction(명령어):\n{prompt}\n\n### Input(입력):\n{input}\n\n### Response(응답):"
    ),
    "prompt_no_input": (
        "Below is an instruction that describes a task.\n"
        "아래는 작업을 설명하는 명령어입니다.\n\n"
        "Write a response that appropriately completes the request.\n명령어에 따른 요청을 적절히 완료하는 응답을 작성하세요.\n\n"
        "### Instruction(명령어):\n{prompt}\n\n### Response(응답):"
    ),
}

In [None]:
## 모델 준비
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    padding_side="right",
    model_max_length=256,
)
tokenizer.add_special_tokens(
    {
        "eos_token": DEFAULT_EOS_TOKEN,
        "bos_token": DEFAULT_BOS_TOKEN,
        "unk_token": DEFAULT_UNK_TOKEN,
    }
)
tokenizer.pad_token = tokenizer.eos_token
print(tokenizer)

In [None]:
from typing import Optional, Dict, Sequence
import logging
import copy
from dataclasses import dataclass
import json


class SFT_dataset(Dataset):
    '''SFT dataset by wygo'''

    def __init__(self, data_path_1_SFT: str, tokenizer: PreTrainedTokenizer, verbose=False):
        super(SFT_dataset, self).__init__()
        logging.warning("Loading data...")

        ## format
        pattern_instruction = 'prompt'  # instruction
        pattern_input = 'input'  # 내 데이터엔 input이 없다
        pattern_output = 'completion'  # output

        ############################################################
        ## load dataset
        # 내 데이터셋엔 input이 없다
        # data_path_1_SFT = 'data_kochatgpt/korean_chatgpt_1_SFT.jsonl'
        with open(data_path_1_SFT, "r", encoding='utf-8-sig') as json_file:
            list_data_dict = json.load(json_file)
            if verbose:
                print('## data check ##')
                print((list_data_dict[0]))
        # {'prompt': '불고기용 고기 한우에요?',
        #  'completion': "'저는 인공지능 챗봇이며, 직접적으로 식품에 관한 정보를 가지고 있지 않습니다. 하지만 일반적으로 불고기용 고기는 한우, 쇠고기, 돼지고기 등 다양한 종류의 고기를 사용합니다. 하지만 한우는 대표적인 고급 육류로 알려져 있기 때문에, 한우를 사용하는 경우도 많습니다. 알러지나 개별 건강 상태에 따라 다를 수 있으니 충분한 정보 수집 후에 선택해 주시기 바랍니다.",
        #  'tokens': 193}        

        ############################################################
        ## 데이터셋 만들기, source와 target
        prompt_input, prompt_no_input = PROMPT_DICT["prompt_input"], PROMPT_DICT["prompt_no_input"]  # 템플릿 가져오기

        # 입력
        sources = []
        for example in list_data_dict:
            if example.get(pattern_input, "") != "":
                tmp = prompt_input.format_map(example)
            else:
                tmp = prompt_no_input.format_map(example)
            sources.append(tmp)

        # 출력
        targets = []
        for example in list_data_dict:
            targets.append(f"{example[pattern_output]}{tokenizer.eos_token}")

        if verbose:
            idx = 0
            print((sources[idx]))
            print((targets[idx]))
            print("Tokenizing inputs... This may take some time...")

        ############################################################
        # data_dict = preprocess(sources, targets, tokenizer)  # https://github.com/Beomi/KoAlpaca/blob/04704348d58b8b1c2e2638d6437a04b4e8ba1823/train.py#L124
        examples = [s + t for s, t in zip(sources, targets)]

        # source data tokenized
        sources_tokenized = self._tokenize_fn(sources, tokenizer)  # source만
        examples_tokenized = self._tokenize_fn(examples, tokenizer)  # source + target


        ## 입력은 source, 출력은 source+target 이지만 학습은 target 부분만
        input_ids = examples_tokenized["input_ids"]
        labels = copy.deepcopy(input_ids)
        for label, source_len in zip(labels, sources_tokenized["input_ids_lens"]):
            label[:source_len] = IGNORE_INDEX  # source 부분은 -100으로 채운다

        data_dict = dict(input_ids=input_ids, labels=labels)

        self.input_ids = data_dict["input_ids"]
        self.labels = data_dict["labels"]
        logging.warning("Loading data done!!: %d"%(len(self.labels)))

    def _tokenize_fn(self, strings: Sequence[str], tokenizer: PreTrainedTokenizer) -> Dict:
        """Tokenize a list of strings."""
        tokenized_list = [
            tokenizer(
                text,
                return_tensors="pt",
                padding="longest",
                max_length=tokenizer.model_max_length,
                truncation=True,
            )
            for text in strings
        ]
        input_ids = labels = [tokenized.input_ids[0] for tokenized in tokenized_list]
        input_ids_lens = labels_lens = [
            tokenized.input_ids.ne(tokenizer.pad_token_id).sum().item() for tokenized in tokenized_list
        ]
        return dict(
            input_ids=input_ids,
            labels=labels,
            input_ids_lens=input_ids_lens,
            labels_lens=labels_lens,
        )

    def __len__(self):
        return len(self.input_ids)

    def __getitem__(self, i) -> Dict[str, torch.Tensor]:
        return dict(input_ids=self.input_ids[i], labels=self.labels[i])


@dataclass
class DataCollatorForSupervisedDataset(object):
    """Collate examples for supervised fine-tuning."""

    tokenizer: PreTrainedTokenizer

    def __call__(self, instances: Sequence[Dict]) -> Dict[str, torch.Tensor]:
        input_ids, labels = tuple([instance[key] for instance in instances] for key in ("input_ids", "labels"))
        input_ids = torch.nn.utils.rnn.pad_sequence(
            input_ids, batch_first=True, padding_value=self.tokenizer.pad_token_id
        )
        labels = torch.nn.utils.rnn.pad_sequence(labels, batch_first=True, padding_value=IGNORE_INDEX)
        return dict(
            input_ids=input_ids,
            labels=labels,
            attention_mask=input_ids.ne(self.tokenizer.pad_token_id),
        )


train_dataset = SFT_dataset(data_path_1_SFT='./kochatgpt_1_SFT.jsonl', tokenizer=tokenizer)
eval_dataset = None  # eval은 안함
data_collator = DataCollatorForSupervisedDataset(tokenizer=tokenizer)

In [None]:
def safe_save_model_for_hf_trainer(trainer: Trainer, output_dir: str):
    """Collects the state dict and dump to disk."""
    state_dict = trainer.model.state_dict()
    if trainer.args.should_save:
        cpu_state_dict = {key: value.cpu() for key, value in list(state_dict.items())}
        del state_dict
        trainer._save(output_dir, state_dict=cpu_state_dict)  # noqa


# training_args 수정 가능: https://github.com/Beomi/KoAlpaca/blob/main/train.sh 참고
training_args = TrainingArguments(
    output_dir="./test", #The output directory
    overwrite_output_dir=True, #overwrite the content of the output directory
    num_train_epochs=1, # number of training epochs
    per_device_train_batch_size=4, # batch size for training
    per_device_eval_batch_size=4,  # batch size for evaluation
    eval_steps = 3, # Number of update steps between two evaluations.
    save_steps=500, # after # steps model is saved 
    warmup_steps=5,# number of warmup steps for learning rate scheduler
    prediction_loss_only=True,
    optim='adafactor',
    )
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
)

trainer.train()
trainer.save_state()
safe_save_model_for_hf_trainer(trainer=trainer, output_dir='./output_1_SFT')

## prediction

In [None]:
## 추론 테스트
custom_model = AutoModelForCausalLM.from_pretrained('./output_1_SFT').to('cuda')
generator = pipeline('text-generation', model=custom_model, tokenizer=tokenizer, device=0)

generation_args = dict(
    num_beams=4,
    repetition_penalty=2.0,
    no_repeat_ngram_size=4,
    eos_token_id=202, # \n
    max_new_tokens=64,
    do_sample=True,
    top_k=50,
    early_stopping=True
)

list_prompt = ['불고기용 고기 한우에요?',
               '리처드 닉슨이 43대 부통령직을 수행한 년도는?',
               '시카고 오헤어 국제공항은 어디에 있어',
               '오늘 미세먼지 어때?']
list_prompt = [PROMPT_DICT['prompt_no_input'].format_map({'prompt': tmp}) for tmp in list_prompt]

list_result = generator(list_prompt, **generation_args)
for prompt, result in zip(list_prompt, list_result):
    print(('#'*70))
    print(('completion: %s' % (result[0]['generated_text'])))

# Step2) RM(보상모델)

세션 재시작 추천!

In [None]:
!pip install colossalai==0.2.7

# setup data
!git clone https://github.com/HaloKim/KoChatGPT.git

%cd KoChatGPT/colossalai_ChatGPT_230319/
!pip install .
%cd ../../

In [None]:
pip install urllib3==1.26

In [None]:
IGNORE_INDEX = -100
DEFAULT_PAD_TOKEN = "[PAD]"
DEFAULT_EOS_TOKEN = "</s>"
DEFAULT_BOS_TOKEN = "</s>"
DEFAULT_UNK_TOKEN = "</s>"
PROMPT_DICT = {
    "prompt_input": (
        "Below is an instruction that describes a task, paired with an input that provides further context.\n"
        "아래는 작업을 설명하는 명령어와 추가적 맥락을 제공하는 입력이 짝을 이루는 예제입니다.\n\n"
        "Write a response that appropriately completes the request.\n요청을 적절히 완료하는 응답을 작성하세요.\n\n"
        "### Instruction(명령어):\n{prompt}\n\n### Input(입력):\n{input}\n\n### Response(응답):"
    ),
    "prompt_no_input": (
        "Below is an instruction that describes a task.\n"
        "아래는 작업을 설명하는 명령어입니다.\n\n"
        "Write a response that appropriately completes the request.\n명령어에 따른 요청을 적절히 완료하는 응답을 작성하세요.\n\n"
        "### Instruction(명령어):\n{prompt}\n\n### Response(응답):"
    ),
}

In [None]:
from typing import Optional
from transformers import AutoConfig, AutoModel, AutoConfig
from chatgpt.models.base import RewardModel
from torch import nn

model_name = 'KoAlpaca/train_v1.1b/polyglot-1.3b-koalpaca-v1.1b'
class GPTRM_custom(RewardModel):
    """
    GPT Reward model.
    Args:
        pretrained (str): Pretrained model name or path.
        config (GPT2Config): Model config.
        checkpoint (bool): Enable gradient checkpointing.
        lora_rank (int): Rank of the low-rank approximation.
        lora_train_bias (str): LoRA bias training mode.
    """

    def __init__(self,
                 pretrained: Optional[str] = None,
                 config: Optional[AutoConfig] = None,
                 checkpoint: bool = False,
                 lora_rank: int = 0,
                 lora_train_bias: str = 'none',
                 tokenizer=None) -> None:
        if pretrained is not None:
            model = AutoModel.from_pretrained(pretrained)
            model.resize_token_embeddings(len(tokenizer))  # wygo 추가!!!
        elif config is not None:
            model = AutoModel(config)
        else:
            model = AutoModel(AutoConfig())
        if checkpoint:
            model.gradient_checkpointing_enable()
        # model = model.resize_token_embeddings(len(tokenizer))

        value_head = nn.Linear(model.config.hidden_size, 1)
        super().__init__(model, value_head, lora_rank, lora_train_bias)

        # 추가, 230421
        if pretrained is not None:
            self.model = model
            self.pretrained = pretrained

    # 추가, 230421, config.json을 생성하기 위해 추가
    def save_pretrained(self, dir):
        if self.pretrained is not None:
            self.model.save_pretrained(dir)

In [None]:
import torch
from transformers import AutoTokenizer
from chatgpt.trainer.strategies import ColossalAIStrategy, DDPStrategy, NaiveStrategy

strategy = NaiveStrategy()
lora_rank = 0
pretrain = 'KoAlpaca/train_v1.1b/polyglot-1.3b-koalpaca-v1.1b'

# configure model, tokenizer
with strategy.model_init_context():
    tokenizer = AutoTokenizer.from_pretrained(pretrain, padding_side="right", model_max_length=512)
    tokenizer.add_special_tokens(
        {
            "eos_token": DEFAULT_EOS_TOKEN,
            "bos_token": DEFAULT_BOS_TOKEN,
            "unk_token": DEFAULT_UNK_TOKEN,
        }
    )
    tokenizer.pad_token = tokenizer.eos_token
    model = GPTRM_custom(pretrained=pretrain, lora_rank=lora_rank, tokenizer=tokenizer).cuda()

In [None]:
import json

# make ranking data to chosen, rejetced data
with open('./kochatgpt_2_RM.jsonl', "r", encoding='utf-8-sig') as json_file:
    list_data_dict = json.load(json_file)
    if True:
        print('## data check ##')
        print((list_data_dict[0]))

total_data_ranking2chosen = []
for tmp in list_data_dict:
    one_data_ranking2chosen = []

    # data 1) 0 VS 1
    data = {}
    data['prompt'] = tmp['prompt']
    if tmp['ranking'][0] < tmp['ranking'][1]:
        data['chosen'] = tmp['completion_0']
        data['rejected'] = tmp['completion_1']
    else:
        data['chosen'] = tmp['completion_1']
        data['rejected'] = tmp['completion_0']
    one_data_ranking2chosen.append(data)


    # data 2) 0 VS 2
    data = {}
    data['prompt'] = tmp['prompt']
    if tmp['ranking'][0] < tmp['ranking'][2]:
        data['chosen'] = tmp['completion_0']
        data['rejected'] = tmp['completion_2']
    else:
        data['chosen'] = tmp['completion_2']
        data['rejected'] = tmp['completion_0']
    one_data_ranking2chosen.append(data)

    # data 1) 1 VS 2
    data = {}
    data['prompt'] = tmp['prompt']
    if tmp['ranking'][1] < tmp['ranking'][2]:
        data['chosen'] = tmp['completion_1']
        data['rejected'] = tmp['completion_2']
    else:
        data['chosen'] = tmp['completion_2']
        data['rejected'] = tmp['completion_1']
    one_data_ranking2chosen.append(data)
    total_data_ranking2chosen.extend(one_data_ranking2chosen)

print('before data num: %d' % (len(list_data_dict)))
print('after  data num: %d' % (len(total_data_ranking2chosen)))
print('data example: \n%s' % total_data_ranking2chosen[45])

In [None]:
from chatgpt.dataset import RewardDataset
# prepare for data and dataset
import random
random.seed(230319)
# list_tmp = list(range(10))
random.shuffle(total_data_ranking2chosen)
print(total_data_ranking2chosen[45])

# train_data = total_data_ranking2chosen[:-1000]  # 29000 학습
# eval_data = total_data_ranking2chosen[-1000:0]  # 1000개만 평가

train_data = total_data_ranking2chosen[:100]  # 29000 학습
eval_data = total_data_ranking2chosen[100:130]  # 1000개만 평가


train_dataset = RewardDataset(train_data, tokenizer, 512)
eval_dataset = RewardDataset(eval_data, tokenizer, 512)

# check
idx = 10
print('#'*70)
print('## prompt ##')
print(train_data[idx]['prompt'])
print('#'*70)
print('## chosen ##')
print(train_data[idx]['chosen'])
print('#'*70)
print('## rejected ##')
print(train_data[idx]['rejected'])

In [None]:
# configure optimizer
from colossalai.nn.optimizer import HybridAdam
from torch.optim import Adam

optim = HybridAdam(model.parameters(), lr=5e-5)
# optim = Adam(model.parameters(), lr=5e-5)

In [None]:
from chatgpt.trainer import RewardModelTrainer

trainer = RewardModelTrainer(model=model,
                             strategy=strategy,
                             optim=optim,
                             train_dataset=train_dataset,
                             eval_dataset=eval_dataset,
                             batch_size=1,
                             max_epochs=3,)

In [None]:
import os

# train!!
trainer.fit(use_lora=lora_rank)


def createDirectory(directory):
    try:
        if not os.path.exists(directory):
            os.makedirs(directory)
    except OSError:
        print("Error: Failed to create the directory.")


createDirectory('./output_2_RM')

## save
# save model checkpoint after fitting on only rank0
strategy.save_model(model, os.path.join('./output_2_RM', 'RM.pt'), only_rank0=True)
# save optimizer checkpoint on all ranks
strategy.save_optimizer(optim,
                        os.path.join('./output_2_RM', 'RM_optim_checkpoint_%d.pt' % (torch.cuda.current_device())),
                        only_rank0=False)

model.save_pretrained('./output_2_RM')  # config.json 생성

In [None]:
# 보상모델 체크
def inference_RM(input_text='인공지능은 인공지능 입니다'):
    input_ids = tokenizer.encode(input_text, return_tensors='pt').to(
        torch.cuda.current_device())
    output = model(input_ids)
    output_reward = output.cpu().detach().numpy()[0]

    print('input: %s\nreward score: %.1f' % (input_text, output_reward))

    return output_reward


# input_text = '한국은 대한민국 입니다'
input_text = '인공지능은 인공지능 입니다'

output_reward = inference_RM(input_text=input_text)

# Step3) PPO(강화학습)
세션 재시작 추천!

In [None]:
!pip install urllib3==1.26
# !git clone https://github.com/hpcaitech/ColossalAI.git
# %cd ColossalAI
# !CUDA_EXT=1 pip install .
# %cd ../
!git clone https://github.com/HaloKim/KoChatGPT.git
%cd KoChatGPT/colossalai_ChatGPT_230319/
!pip install .
%cd ../../

In [None]:
!pip install colossalai==0.2.7

In [1]:
# data config
IGNORE_INDEX = -100
MAX_LEN = 256
DEFAULT_PAD_TOKEN = "[PAD]"
DEFAULT_EOS_TOKEN = "</s>"
DEFAULT_BOS_TOKEN = "</s>"
DEFAULT_UNK_TOKEN = "</s>"
PROMPT_DICT = {
    "prompt_input": (
        "Below is an instruction that describes a task, paired with an input that provides further context.\n"
        "아래는 작업을 설명하는 명령어와 추가적 맥락을 제공하는 입력이 짝을 이루는 예제입니다.\n\n"
        "Write a response that appropriately completes the request.\n요청을 적절히 완료하는 응답을 작성하세요.\n\n"
        "### Instruction(명령어):\n{prompt}\n\n### Input(입력):\n{input}\n\n### Response(응답):"
    ),
    "prompt_no_input": (
        "Below is an instruction that describes a task.\n"
        "아래는 작업을 설명하는 명령어입니다.\n\n"
        "Write a response that appropriately completes the request.\n명령어에 따른 요청을 적절히 완료하는 응답을 작성하세요.\n\n"
        "### Instruction(명령어):\n{prompt}\n\n### Response(응답):"
    ),
}

In [2]:
from chatgpt.models.auto import AutoActor, AutoCritic
from copy import deepcopy
import torch
from transformers import AutoTokenizer
from chatgpt.models.base import RewardModel
from chatgpt.trainer.strategies import NaiveStrategy

# configure model, tokenizer
strategy = NaiveStrategy()
pretrain_actor = './output_1_SFT'  # SFT 모델 가져오기
pretrain_critic = './output_2_RM'  # RM 모델 가져오기
lora_rank = 0
pretrain = 'KoAlpaca/train_v1.1b/polyglot-1.3b-koalpaca-v1.1b'

with strategy.model_init_context():
    actor = AutoActor(pretrained=pretrain_actor, lora_rank=lora_rank).to(torch.cuda.current_device())
    critic = AutoCritic(pretrained=pretrain_critic, lora_rank=lora_rank).to(torch.cuda.current_device())
    tokenizer = AutoTokenizer.from_pretrained(pretrain, padding_side="right", model_max_length=512)
    tokenizer.add_special_tokens(
        {
            "eos_token": DEFAULT_EOS_TOKEN,
            "bos_token": DEFAULT_BOS_TOKEN,
            "unk_token": DEFAULT_UNK_TOKEN,
        }
    )
    tokenizer.pad_token = tokenizer.eos_token

    initial_model = deepcopy(actor)
    reward_model = RewardModel(deepcopy(critic.model), deepcopy(critic.value_head)).to(torch.cuda.current_device())

  operator: aten::index.Tensor(Tensor self, Tensor?[] indices) -> Tensor
    registered at /opt/pytorch/pytorch/build/aten/src/ATen/RegisterSchema.cpp:6
  dispatch key: Meta
  previous kernel: registered at /opt/pytorch/pytorch/aten/src/ATen/functorch/BatchRulesScatterOps.cpp:1053
       new kernel: registered at /dev/null:219 (Triggered internally at /opt/pytorch/pytorch/aten/src/ATen/core/dispatch/OperatorEntry.cpp:150.)
  self.m.impl(name, dispatch_key, fn)


In [3]:
from colossalai.nn.optimizer import HybridAdam
from torch.optim import Adam

actor_optim = HybridAdam(actor.parameters(), lr=5e-6)
critic_optim = HybridAdam(critic.parameters(), lr=5e-6)
# actor_optim = Adam(actor.parameters(), lr=5e-6)
# critic_optim = Adam(critic.parameters(), lr=5e-6)

False
[extension] Compiling or loading the JIT-built cpu_adam kernel during runtime now


Emitting ninja build file /home/jovyan/.cache/colossalai/torch_extensions/torch1.13_cu11.8/build.ninja...
Building extension module cpu_adam...
Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
Loading extension module cpu_adam...


ninja: no work to do.
[extension] Time to compile or load cpu_adam op: 0.36983585357666016 seconds
False
[extension] Compiling or loading the JIT-built fused_optim kernel during runtime now


Detected CUDA files, patching ldflags
Emitting ninja build file /home/jovyan/.cache/colossalai/torch_extensions/torch1.13_cu11.8/build.ninja...
Building extension module fused_optim...
Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
Loading extension module fused_optim...


ninja: no work to do.
[extension] Time to compile or load fused_optim op: 0.3223903179168701 seconds
False
[extension] Compiling or loading the JIT-built cpu_adam kernel during runtime now
[extension] Time to compile or load cpu_adam op: 0.07686758041381836 seconds


No modifications detected for re-loaded extension module cpu_adam, skipping build step...
Loading extension module cpu_adam...


False
[extension] Compiling or loading the JIT-built fused_optim kernel during runtime now
[extension] Time to compile or load fused_optim op: 0.002943754196166992 seconds


No modifications detected for re-loaded extension module fused_optim, skipping build step...
Loading extension module fused_optim...


In [4]:
# setting the models
(actor, actor_optim), (critic, critic_optim), reward_model, initial_model = strategy.prepare(
    (actor, actor_optim), (critic, critic_optim), reward_model, initial_model)

In [5]:
import json

with open('./kochatgpt_3_PPO.jsonl', "r", encoding='utf-8-sig') as json_file:
    list_data_dict = json.load(json_file)
    list_prompt = [tmp['prompt'] for tmp in list_data_dict]


def tokenize_fn(texts):
    batch = tokenizer(texts, return_tensors='pt', max_length=MAX_LEN, padding=True, truncation=True)
    return {k: v.cuda() for k, v in batch.items()}


# print(list_prompt)
print('\n\n\n')
print(tokenize_fn('I want you to act as a linux terminal.'))





{'input_ids': tensor([[   44,  9883, 15741, 26661, 13384,   224, 16683,   224,  7245, 12063,
         14445,  2479,    88,    91,  3628,  2423,    80,  2479,  3328,    17]],
       device='cuda:0'), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
       device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]],
       device='cuda:0')}


In [6]:
import os
from chatgpt.trainer import PPOTrainer
output_dir = './output_3_PPO'


def createDirectory(directory):
    try:
        if not os.path.exists(directory):
            os.makedirs(directory)
    except OSError:
        print("Error: Failed to create the directory.")


createDirectory(output_dir)

# configure trainer
trainer = PPOTrainer(strategy,
                     actor,
                     critic,
                     reward_model,
                     initial_model,
                     actor_optim,
                     critic_optim,
                     max_epochs=1,
                     train_batch_size=8,
                     tokenizer=tokenize_fn,
                     max_length=MAX_LEN,
                     do_sample=True,
                     temperature=1,
                     top_k=50,
                     top_p=0.9,
                     pad_token_id=tokenizer.pad_token_id,
                     eos_token_id=tokenizer.eos_token_id)

## train!
trainer.fit(list_prompt,  # 입력 prompt
            num_episodes=1,
            max_timesteps=3,
            update_timesteps=3)

## save
# save model checkpoint after fitting on only rank0
strategy.save_model(actor, os.path.join(output_dir, 'actor.pt'), only_rank0=True)
# save optimizer checkpoint on all ranks
strategy.save_optimizer(actor_optim,
                        os.path.join(output_dir, 'actor_optim_checkpoint_%d.pt' % (torch.cuda.current_device())),
                        only_rank0=False)

Episode [1/1]:  67%|██████▋   | 2/3 [00:24<00:12, 12.21s/it]
Train epoch [1/1]:   0%|          | 0/3 [00:00<?, ?it/s][A
Train epoch [1/1]:   0%|          | 0/3 [00:00<?, ?it/s, actor_loss=0, critic_loss=0][A
Train epoch [1/1]:  33%|███▎      | 1/3 [00:00<00:01,  1.78it/s, actor_loss=0, critic_loss=0][A
Train epoch [1/1]:  33%|███▎      | 1/3 [00:01<00:01,  1.78it/s, actor_loss=0, critic_loss=0][A
Train epoch [1/1]:  67%|██████▋   | 2/3 [00:01<00:00,  1.88it/s, actor_loss=0, critic_loss=0][A
Train epoch [1/1]:  67%|██████▋   | 2/3 [00:01<00:00,  1.88it/s, actor_loss=0, critic_loss=0][A
Train epoch [1/1]: 100%|██████████| 3/3 [00:01<00:00,  1.91it/s, actor_loss=0, critic_loss=0][A
Episode [1/1]: 100%|██████████| 3/3 [00:37<00:00, 12.64s/it]


In [16]:
## inference
def generation(input_text):
    input_ids = tokenizer.encode(input_text, return_tensors='pt').to(
        torch.cuda.current_device())
    outputs = actor.generate(input_ids,
                             max_length=MAX_LEN,
                             do_sample=True,
                             top_k=20,
                             top_p=0.95,
                             no_repeat_ngram_size=2,
                             repetition_penalty=2.0,
                             num_return_sequences=1,
                             early_stopping=True)
    output = tokenizer.batch_decode(outputs[0], skip_special_tokens=True)[0]
    print('#' * 70)
    print(output)
    return output


list_prompt = [
    '불고기용 고기 한우에요?',
    '리처드 닉슨이 43대 부통령직을 수행한 년도는?',
    '시카고 오헤어 국제공항은 어디에 있어',
    '오늘 미세먼지 어때?']

list_prompt = [PROMPT_DICT['prompt_no_input'].format_map({'prompt': tmp}) for tmp in list_prompt]

for input_text in list_prompt:
    output = generation(input_text)

######################################################################
Below is an instruction that describes a task.
아래는 작업을 설명하는 명령어입니다.

Write a response that appropriately completes the request.
명령어에 따른 요청을 적절히 완료하는 응답을 작성하세요.

### Instruction(명령어):
불고기용 고기 한우에요?

### Response(응답):'저는 인공지능 언어 모델로, 물리적 개념인 소고기와 돼지고기는 다른 종류이며, 불고그용 고기 역시 일반적으로 사용되는 종류가 아닙니다. 따라서, 질문에 대한 정확한 답변은 제공할 수 없습니다. 😊 😊 😊 😊죄송합니다. 😊하지만, 한우와 돼지고기를 사용하여 불고기를 만들 수 있는 방법이 있습니다. 일반적으로 불고기 요리를 할 때는 소고기를 사용하지만, 한우와 돼지고기도 불고기 요리에 사용될 수 있습니다. 이들 고기를 이용해 불고기와 된장
######################################################################
Below is an instruction that describes a task.
아래는 작업을 설명하는 명령어입니다.

Write a response that appropriately completes the request.
명령어에 따른 요청을 적절히 완료하는 응답을 작성하세요.

### Instruction(명령어):
리처드 닉슨이 43대 부통령직을 수행한 년도는?

### Response(응답):'리처드 닉슨이 43대 부통령직을 수행한 년도는 1978년입니다.인 1978년 11월에 닉슨은 43대 부통령으로 지명되었습니다.당한 년도는 1978년입니다.은 닉슨의 업적을 인정하지 않는 일부 언론에 대한 탄압과 닉슨의 사생활과 관련된 문제를 제기하면서 부통령직에서 물러났습니다.은 닉슨의 재임기간 중

# Final) Test !

In [4]:
# import
import torch
from chatgpt.models.auto import AutoActor, AutoCritic
from transformers import AutoTokenizer

# data config
IGNORE_INDEX = -100
MAX_LEN = 256
DEFAULT_PAD_TOKEN = "[PAD]"
DEFAULT_EOS_TOKEN = "</s>"
DEFAULT_BOS_TOKEN = "</s>"
DEFAULT_UNK_TOKEN = "</s>"
PROMPT_DICT = {
    "prompt_input":
    ("Below is an instruction that describes a task, paired with an input that provides further context.\n"
     "아래는 작업을 설명하는 명령어와 추가적 맥락을 제공하는 입력이 짝을 이루는 예제입니다.\n\n"
     "Write a response that appropriately completes the request.\n요청을 적절히 완료하는 응답을 작성하세요.\n\n"
     "### Instruction(명령어):\n{prompt}\n\n### Input(입력):\n{input}\n\n### Response(응답):"
     ),
    "prompt_no_input":
    ("Below is an instruction that describes a task.\n"
     "아래는 작업을 설명하는 명령어입니다.\n\n"
     "Write a response that appropriately completes the request.\n명령어에 따른 요청을 적절히 완료하는 응답을 작성하세요.\n\n"
     "### Instruction(명령어):\n{prompt}\n\n### Response(응답):"),
}

In [3]:
import os

pretrain = 'KoAlpaca/train_v1.1b/polyglot-1.3b-koalpaca-v1.1b'
model_directory = './output_3_PPO'
model_path = os.path.join(model_directory, 'actor.pt')

# configure model, tokenizer
actor = AutoActor(pretrained=pretrain).to(torch.cuda.current_device())
tokenizer = AutoTokenizer.from_pretrained(pretrain,
                                          padding_side="right",
                                          model_max_length=512)
tokenizer.add_special_tokens({
    "eos_token": DEFAULT_EOS_TOKEN,
    "bos_token": DEFAULT_BOS_TOKEN,
    "unk_token": DEFAULT_UNK_TOKEN,
})

tokenizer.pad_token = tokenizer.eos_token

state_dict = torch.load(model_path, map_location='cpu')
actor.model.load_state_dict(state_dict)

actor.eval()

AutoActor(
  (model): GPTNeoXForCausalLM(
    (gpt_neox): GPTNeoXModel(
      (embed_in): Embedding(30080, 2048)
      (layers): ModuleList(
        (0): GPTNeoXLayer(
          (input_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
          (post_attention_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
          (attention): GPTNeoXAttention(
            (rotary_emb): RotaryEmbedding()
            (query_key_value): Linear(in_features=2048, out_features=6144, bias=True)
            (dense): Linear(in_features=2048, out_features=2048, bias=True)
          )
          (mlp): GPTNeoXMLP(
            (dense_h_to_4h): Linear(in_features=2048, out_features=8192, bias=True)
            (dense_4h_to_h): Linear(in_features=8192, out_features=2048, bias=True)
            (act): GELUActivation()
          )
        )
        (1): GPTNeoXLayer(
          (input_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
          (post_attention_laye

In [5]:
## inference
def generation(input_text):
    input_ids = tokenizer.encode(input_text, return_tensors='pt').to(
        torch.cuda.current_device())
    outputs = actor.generate(input_ids,
                             max_length=MAX_LEN,
                             do_sample=True,
                             top_k=20,
                             top_p=0.95,
                             no_repeat_ngram_size=2,
                             repetition_penalty=2.0,
                             num_return_sequences=1,
                             early_stopping=True)
    output = tokenizer.batch_decode(outputs[0], skip_special_tokens=True)[0]
    print('#' * 70)
    print(output)
    return output


list_prompt = [
    '불고기용 고기 한우에요?', 
    '리처드 닉슨이 43대 부통령직을 수행한 년도는?', 
    '시카고 오헤어 국제공항은 어디에 있어',
    '오늘 미세먼지 어때?']

list_prompt = [PROMPT_DICT['prompt_no_input'].format_map({'prompt': tmp}) for tmp in list_prompt]

for input_text in list_prompt:
    output = generation(input_text)

2023-05-26 01:38:20.279311: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-05-26 01:38:20.319378: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


######################################################################
Below is an instruction that describes a task.
아래는 작업을 설명하는 명령어입니다.

Write a response that appropriately completes the request.
명령어에 따른 요청을 적절히 완료하는 응답을 작성하세요.

### Instruction(명령어):
불고기용 고기 한우에요?

### Response(응답):'제가 AI 챗봇으로 프로그래밍되어있기 때문에 물리적인 정보를 제공할 수는 없습니다. 하지만 한우는 한우 중에서도 고급육종으로 유명하며, 그 맛과 영양적 가치가 높은 고급 육류로 인정받고 있습니다.에 따라 가격이 다를 수 있지만 일반적으로 한우는 한우 중에서 가장 비싼 가격대에 속한답니다. 따라서 구매 전에는 가격 정보를 충분히 확인해보시는 것이 좋습니다.!다사오지 마세요.이의 답변을 제공해드리겠습니다. :\n\n'네, 한우 중에서도 고급
######################################################################
Below is an instruction that describes a task.
아래는 작업을 설명하는 명령어입니다.

Write a response that appropriately completes the request.
명령어에 따른 요청을 적절히 완료하는 응답을 작성하세요.

### Instruction(명령어):
리처드 닉슨이 43대 부통령직을 수행한 년도는?

### Response(응답):'리처드 닉슨은 1973년부터 1974년까지 부통령을 역임했습니다.\n\n하지만 리처드 닉슨이 43대 부통령을 수행한 정확한 년도는 알려지지 않았습니다.닉슨이 43대 부통령을 수행한 해는 1944년이었습니다.닉슨이 43대 부통령을 수행한 해는 정확히 알려지지 않았지만, 미국 내에서는 1950년대에 