# ライブラリインストール

In [1]:
# transformerのインストール
!pip install transformers accelerate -q
!pip install sentencepiece -q

# PEFTのインストール
!pip install -Uqq  git+https://github.com/huggingface/peft.git -q
!pip install -Uqq transformers datasets accelerate bitsandbytes -q

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


# 日本語LLMの利用

In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

model_name = "rinna/japanese-gpt-neox-3.6b" # 汎用型モデルを利用

tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
model = AutoModelForCausalLM.from_pretrained(model_name)

if torch.cuda.is_available():
    model = model.to("cuda")

text = "質問:Linuxでシェルを終了するコマンドを教えてください。 回答:"

token_ids = tokenizer.encode(text, add_special_tokens=False, return_tensors="pt")

with torch.no_grad():
    output_ids = model.generate(
        token_ids.to(model.device),
        max_new_tokens=30, # 最大トークン数を30に設定
        min_new_tokens=30, # 最大トークン数を30に設定
        do_sample=True,
        temperature=0.1, # 確率制御のパラメータ
        pad_token_id=tokenizer.pad_token_id,
        bos_token_id=tokenizer.bos_token_id,
        eos_token_id=tokenizer.eos_token_id
    )

output = tokenizer.decode(output_ids.tolist()[0])
print(output)

質問:Linuxでシェルを終了するコマンドを教えてください。 回答:Linuxでシェルを終了するコマンドは、以下の通りです。 回答:Linuxでシェルを終了するコマンドは、以下の通りです。 回答:


## 日本語LLMのファインチューン

In [4]:
# ディレクトリ
peft_name = "lora-japanese-gpt-neox-3.6b" # peft保存先ディレクトリ
output_dir = "lora-japanese-gpt-neox-3.6b-result" # 出力保存先

In [5]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)

CUTOFF_LEN = 256

def tokenize(prompt, tokenizer):
    result = tokenizer(
        prompt,
        truncation=True,
        max_length=CUTOFF_LEN,
        padding=False,
    )
    return {
        "input_ids": result["input_ids"],
        "attention_mask": result["attention_mask"],
    }

In [6]:
from datasets import load_dataset

# 利用するデータセットの準備: 169個のLinuxコマンド
dataset = "Beluuuuuuga/Japanese-Instruction-Linux-Command-169"
data = load_dataset(dataset)



  0%|          | 0/1 [00:00<?, ?it/s]

In [7]:
# プロンプトテンプレート: 今回のデータセット用
def generate_prompt(data_point):
    result = f"""### 指示:
{data_point["instruction"]}

### 回答:
{data_point["output"]}"""
    result = result.replace('\n', '<NL>')  # 改行を<NL>に変換
    return result

In [8]:
# 学習データと評価データの準備
VAL_SET_SIZE = 34 # 評価データを34で設定

train_val = data["train"].train_test_split(
    test_size=VAL_SET_SIZE, shuffle=True, seed=42
)
train_data = train_val["train"]
val_data = train_val["test"]
train_data = train_data.shuffle().map(lambda x: tokenize(generate_prompt(x), tokenizer))
val_data = val_data.shuffle().map(lambda x: tokenize(generate_prompt(x), tokenizer))



Map:   0%|          | 0/135 [00:00<?, ? examples/s]

Map:   0%|          | 0/34 [00:00<?, ? examples/s]

In [9]:
# 学習データと評価データのデータ数を確認
len(train_data), len(val_data)

(135, 34)

In [10]:
# モデルを作成
from transformers import AutoModelForCausalLM

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_8bit=True,
    device_map="auto",
)


Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cuda118.so
CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so.11.0
CUDA SETUP: Highest compute capability among GPUs detected: 8.0
CUDA SETUP: Detected CUDA version 118
CUDA SETUP: Loading binary /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cuda118.so...


  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
Either way, this might cause trouble in the future:
If you get `CUDA error: invalid device function` errors, the above might be the cause and the solution is to make sure only one ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] in the paths that we search based on your env.
  warn(msg)


In [11]:
from peft import LoraConfig, get_peft_model, prepare_model_for_int8_training, TaskType

# LoRAの設定
lora_config = LoraConfig(
    r= 10, # 低ランク行列の次元数
    lora_alpha=16, # 重み行列のスケーリングファクターを設定
    target_modules=["query_key_value"], # 利用するモデルによって異なる
    lora_dropout=0.05,  # 過学習を防ぐためLoRAレイヤーのドロップアウト確率を設定
    bias="none",
    task_type=TaskType.CAUSAL_LM
)

model = prepare_model_for_int8_training(model) # 前処理(int8に変換)

model = get_peft_model(model, lora_config) # LoRAの利用

model.print_trainable_parameters() # 学習するパラメータ数の確認



trainable params: 4,055,040 || all params: 3,611,300,352 || trainable%: 0.1122875309375541


In [12]:
import transformers

# 学習時のパラメータ設定
eval_steps = 10
save_steps = 10
logging_steps = 10
epoch_num = 4

# 学習パラメータ
argument=transformers.TrainingArguments(
        num_train_epochs=epoch_num,
        learning_rate=1e-4,
        logging_steps=logging_steps,
        evaluation_strategy="steps",
        save_strategy="steps",
        eval_steps=eval_steps,
        save_steps=save_steps,
        output_dir=output_dir,
        report_to="none",
        save_total_limit=6,
        push_to_hub=False,
        auto_find_batch_size=True
)

# 学習フロー
trainer = transformers.Trainer(
    model=model,
    train_dataset=train_data,
    eval_dataset=val_data,
    args=argument,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

In [13]:
# 学習
model.config.use_cache = False
trainer.train()
model.config.use_cache = True

# モデルを保存
trainer.model.save_pretrained(peft_name)

  attn_scores = torch.where(causal_mask, attn_scores, mask_value)


Step,Training Loss,Validation Loss
10,2.631,2.108726
20,1.7401,1.360454
30,1.1488,0.851869
40,0.6897,0.597706
50,0.5814,0.548892
60,0.5415,0.536135




# ファインチューンされた日本語LLMの性能確認

In [14]:
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer

# モデルのインスタンス化
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_8bit=True,
    device_map="auto",
)

tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)

# LoRAモデルのインスタンス化
model = PeftModel.from_pretrained(
    model,
    peft_name,
    device_map="auto"
)

model.eval() # 評価モード



PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): GPTNeoXForCausalLM(
      (gpt_neox): GPTNeoXModel(
        (embed_in): Embedding(32000, 2816)
        (layers): ModuleList(
          (0-35): 36 x GPTNeoXLayer(
            (input_layernorm): LayerNorm((2816,), eps=1e-05, elementwise_affine=True)
            (post_attention_layernorm): LayerNorm((2816,), eps=1e-05, elementwise_affine=True)
            (attention): GPTNeoXAttention(
              (rotary_emb): RotaryEmbedding()
              (query_key_value): Linear8bitLt(
                in_features=2816, out_features=8448, bias=True
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2816, out_features=10, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=10, out_features=8448, bias=False)
              

In [15]:
# プロンプトテンプレート: 今回のデータセット用
def generate_prompt(data_point):
    result = f"""### 指示:
{data_point["instruction"]}

### 回答:
"""
    result = result.replace('\n', '<NL>') # 改行を<NL>に変換
    return result

In [52]:
# テキスト生成関数の定義
def generate(instruction):

    # 推論
    prompt = generate_prompt({'instruction':instruction,'input':input})

    input_ids = tokenizer(prompt,
        return_tensors="pt",
        truncation=True,
        add_special_tokens=False).input_ids.cuda()

    outputs = model.generate(
        input_ids=input_ids,
        max_new_tokens=30,
        do_sample=True,
        temperature=0.5,
        top_p=0.75,
        top_k=40,
        no_repeat_ngram_size=2,
    )

    outputs = outputs[0].tolist()

    print(tokenizer.decode(outputs))

In [56]:
# プロンプト
generate("Linuxコマンドを教えてください")
generate("Linuxでシェルを終了するコマンド教えてください")
generate("lsコマンドは何のために使用されますか？")
generate("wgetコマンドの主な用途と使用方法について説明していただけますか？")
generate("catとmore、lessコマンドの違いは何ですか？")
generate("pwdコマンドの機能とは何ですか？？")

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:3 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:3 for open-end generation.


### 指示:<NL>Linuxコマンドを教えてください<NL><NL>### 回答:<NL>ps</s>


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:3 for open-end generation.


### 指示:<NL>Linuxでシェルを終了するコマンド教えてください<NL><NL>### 回答:<NL>exit</s>


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:3 for open-end generation.


### 指示:<NL>lsコマンドは何のために使用されますか?<NL><NL>### 回答:<NL>ファイルとディレクトリの情報を表示する</s>


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:3 for open-end generation.


### 指示:<NL>wgetコマンドの主な用途と使用方法について説明していただけますか?<NL><NL>### 回答:<NL>get</s>


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:3 for open-end generation.


### 指示:<NL>catとmore、lessコマンドの違いは何ですか?<NL><NL>### 回答:<NL>more</s>
### 指示:<NL>pwdコマンドの機能とは何ですか??<NL><NL>### 回答:<NL>カレントディレクトリの表示</s>
