In [1]:
#ライブラリの自動リロード
%load_ext autoreload
%autoreload 2

#!pip install rouge-score
#!pip install peft
#!pip install bitsandbytes
#!pip install accelerate
#!pip install scipy
#!huggingface-cli login --token=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
from scoring import eval_model
from transformers import pipeline
import os

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
mode="zero"
mode="qlora"

In [3]:

model_name = "meta-llama/Llama-2-7b-chat-hf"

os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"

In [4]:
#deepspeed
import torch
import deepspeed
from transformers.deepspeed import HfDeepSpeedConfig
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
import json

if mode=="zero":
    os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
    os.environ["TOKENIZERS_PARALLELISM"] = "false"  # To avoid warnings about parallelism in tokenizers
    local_rank = int(os.getenv("LOCAL_RANK",0))
    world_size = int(os.getenv("WORLD_SIZE",1))

    torch.cuda.set_device(local_rank)
    deepspeed.init_distributed()

    # ベースとなるZeRO3 configの読み込み
    ds_config_file = "zero_infer.json"
    with open(ds_config_file) as f:
        ds_config = json.load(f)

    model_config = AutoConfig.from_pretrained(model_name)
    hidden_size = model_config.hidden_size

    ds_config["train_batch_size"] = 1 * world_size
    ds_config["train_micro_batch_size_per_gpu"] = 1
    ds_config["reduce_bucket_size"] = hidden_size*hidden_size
    ds_config["stage3_prefetch_bucket_size"] = 0.9 * hidden_size * hidden_size
    ds_config["stage3_param_persistence_threshold"] = 10 * hidden_size

    dschf = HfDeepSpeedConfig(ds_config)  #zero3を使用するために必要(モデルロード前に実行する必要がある)
    model = AutoModelForCausalLM.from_pretrained(model_name)
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    ds_engine = deepspeed.initialize(model=model, config_params=ds_config)[0]
    ds_model = ds_engine.module#.eval()


[2023-09-29 17:53:58,501] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)




In [5]:

#qloraの場合
if mode=="qlora":
    from peft import PeftModel
    from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

    #load base model
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16
    )

    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name, 
                                                quantization_config=bnb_config, 
                                                device_map="auto")


Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.31s/it]


In [6]:
pipe= pipeline("text-generation", model=model, tokenizer=tokenizer,max_new_tokens=100)

In [7]:
import json
dataset_path="../database/output/qa_dataset.json"
with open(dataset_path, "r") as f:
    raw_dataset = json.load(f)


In [8]:
#eval_model(raw_dataset[:50],pipe,"outputs/0929test/original_model.csv")

# train

In [9]:

import transformers
from datasets import load_dataset

context_path="../database/output/context0926.json"
with open(context_path, 'r') as f:
    context_list = json.load(f)

context_list=context_list[:10000]

train_text_path="trainset/temp_train.txt"
with open(train_text_path,"w") as f:
    for context in context_list:
        f.write(context+"\n")

if mode=="qlora": 
    train_dataset = load_dataset("text", data_files=train_text_path)
    train_dataset=train_dataset.map(lambda samples: tokenizer(samples['text']), batched=True)
    tokenizer.pad_token = tokenizer.eos_token

if mode=="zero":
    from transformers import TextDataset
    train_dataset= TextDataset(
        tokenizer=tokenizer,
        file_path=train_text_path,
        block_size=4096, #文章の長さを揃える,
        cache_dir="cache/"+model_name,
    )


Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 6721.64it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 857.03it/s]
Generating train split: 10000 examples [00:00, 88214.41 examples/s]
Map: 100%|██████████| 10000/10000 [00:01<00:00, 7260.67 examples/s]


In [10]:
per_device_train_batch_size=10
epochs=1

train_args=transformers.TrainingArguments(
        per_device_train_batch_size=per_device_train_batch_size,
        gradient_accumulation_steps=1,
        warmup_steps=100,
        num_train_epochs=epochs,  # エポック数
        #max_steps=tot_steps,
        learning_rate=2e-5,
        fp16=True,
        logging_steps=100,
        output_dir='outputs',
        #optim=""
    )

if mode=="qlora":
    from peft import LoraConfig, get_peft_model
    peft_config = LoraConfig(
            task_type="CAUSAL_LM", inference_mode=False, r=8, lora_alpha=32,
            lora_dropout=0.1
        )
    model = get_peft_model(model, peft_config)

if mode=="zero":
        train_args.deepspeed='./zero_infer.json',  # deepspeedのconfigへのpath

trainer = transformers.Trainer(
    model=model,
    train_dataset=train_dataset['train'],
    args=train_args,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)

if mode=="zero":
      trainer.gradient_checkpointing=True

#model.config.use_cache = True  # silence the warnings. Please re-enable for inference!
trainer.train()

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
100,2.4375
200,2.2584
300,2.1613
400,2.1099
500,2.1093
600,2.0787
700,2.11
800,2.0855
900,2.1055
1000,2.088


TrainOutput(global_step=1000, training_loss=2.1544044189453126, metrics={'train_runtime': 1270.1839, 'train_samples_per_second': 7.873, 'train_steps_per_second': 0.787, 'total_flos': 9.101298772721664e+16, 'train_loss': 2.1544044189453126, 'epoch': 1.0})

In [11]:
peft_name = f"model/10000_{mode}"
trainer.model.save_pretrained(peft_name)
tokenizer.save_pretrained(peft_name)

('model/10000_qlora/tokenizer_config.json',
 'model/10000_qlora/special_tokens_map.json',
 'model/10000_qlora/tokenizer.model',
 'model/10000_qlora/added_tokens.json',
 'model/10000_qlora/tokenizer.json')

In [15]:

eval_model(raw_dataset[:200],pipe,f"res/0929test/10000_{mode}_200.csv")

  0%|          | 0/200 [00:00<?, ?it/s]

100%|██████████| 200/200 [14:04<00:00,  4.22s/it]


Unnamed: 0,problem,answer,pred,score,type,context
0,You are a great scientist. Anser the following...,3,2. A glucose biofuel cell (GBFC) consists of t...,0.000000,multi,False
1,You are a great scientist. Anser the following...,1,1. LmrR has been shown to bind the compounds s...,1.000000,multi,True
2,You are a great scientist. Anser the following...,1,2. The purpose of the Japanese phase III clini...,0.000000,multi,True
3,You are a great scientist. Anser the following...,1,1. SmMIT-LAMP is a LAMP (Loop-mediated isother...,1.000000,multi,False
4,You are a great scientist. Anser the following...,The key components of a LEID device structure ...,1.CFs with high strength and good electrical c...,0.357143,gen,True
...,...,...,...,...,...,...
195,You are a great scientist. Anser the following...,2,"2. According to the text, PbO was selected as ...",1.000000,multi,True
196,You are a great scientist. Anser the following...,1,"2. In concentrated DNA solutions, in addition ...",0.000000,multi,False
197,You are a great scientist. Anser the following...,The concept that a polymer's molecular structu...,A major branch of polymer technology relies o...,0.057692,gen,False
198,You are a great scientist. Anser the following...,2,2. According to the Codex Alimentarius Commiss...,1.000000,multi,True
