In [1]:

from scoring import eval_model
from transformers import pipeline
from scoring import eval_model
from peft import LoraConfig, get_peft_model
import argparse
import os


os.environ["HF_HOME"]="/media/hatakeyama/hf"

# for debug, set test_nums=10
test_nums = 10**10



# 変数に格納
model_name = "meta-llama/Llama-2-7b-chat-hf"
base_path = "res/1018ig_change_dataset/"
r = 64
lr = 0.001
total_epochs = 5
train_dataset_path = ""
test_dataset_path = "../smallDB/1018ig/qa.json"
context_path = "../smallDB/1018ig/all.json"
do_original_eval = False
full_lora = True
per_device_train_batch_size = 1
# %%

base_path += f"r_{r}_"
base_path += f"fullLoRA_{full_lora}_"
m_name = model_name.split("/")[-1]
base_path += f"name_{m_name}_"


  from .autonotebook import tqdm as notebook_tqdm


In [2]:

from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
import transformers
from datasets import load_dataset, Dataset
from scoring import generate_prompt
import random
import json

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token


with open(test_dataset_path, "r") as f:
    test_dataset = json.load(f)

# context
with open(context_path, 'r') as f:
    context_list = json.load(f)



In [3]:
context_dict = {}

for i in range(2**7):
    key = format(i, '07b')
    value = [context_list[j] for j in range(7) if key[j] == '1']
    context_dict[key] = value


In [4]:
def prepare_dataset(context_list):
    data_list = [{"text": i} for i in context_list]
    random.shuffle(data_list)

    # tokenize
    dataset = Dataset.from_dict(
        {"text": [item["text"] for item in data_list[:test_nums]]})
    dataset = dataset.map(lambda samples: tokenizer(
        samples['text']), batched=True)

    return dataset

In [5]:
# load base model
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)



def init_model():
    model = AutoModelForCausalLM.from_pretrained(model_name,
                                                    quantization_config=bnb_config,
                                                    device_map="auto",
                                                    use_flash_attention_2=True,
                                                    )

    # %%
    if full_lora:
        target_modules = [
            "embed_tokens",
            "lm_head",
            "q_proj",
            "k_proj",
            "v_proj",
            "o_proj",
            "gate_proj",
            "up_proj",
            "down_proj",
        ]
    else:
        target_modules = None

    peft_config = LoraConfig(
        task_type="CAUSAL_LM", inference_mode=False, r=r, lora_alpha=r,
        lora_dropout=0.1,
        target_modules=target_modules,
    )
    model = get_peft_model(model, peft_config)
    return model


cond_list=list(context_dict.keys())
random.shuffle(cond_list)
for cond in cond_list:
    if cond=="0000000":
        continue

    eval_path=base_path+f"{cond}.csv"
    if os.path.exists(eval_path):
        print(f"{eval_path} is already exist")
        continue

    print(eval_path)

    dataset = prepare_dataset(context_dict[cond])
    model=init_model()

    train_args = transformers.TrainingArguments(
        per_device_train_batch_size=per_device_train_batch_size,
        gradient_accumulation_steps=1,
        warmup_steps=0,
        num_train_epochs=1,
        learning_rate=lr,
        fp16=True,
        logging_steps=100,
        output_dir='outputs/'+base_path,
    )



    trainer = transformers.Trainer(
        model=model,
        train_dataset=dataset,
        args=train_args,
        data_collator=transformers.DataCollatorForLanguageModeling(
            tokenizer, mlm=False)
    )

    loss_dict = {}
    epoch = 0


    for i in range(total_epochs):
        epoch += 1
        peft_name = f"model/"+base_path+f"epoch_{epoch}"


        training_result = trainer.train()
        loss_dict[i] = {"loss": training_result.training_loss}
        # log
    with open(base_path+f"loss.json", "a") as f:
        json.dump(loss_dict, f)


    pipe = pipeline("text-generation", model=model,
                    tokenizer=tokenizer, max_new_tokens=100)

    # eval
    eval_model(test_dataset[:test_nums], pipe,
                eval_path)

res/1018ig_change_dataset/r_64_fullLoRA_True_name_Llama-2-7b-chat-hf_0011001.csv


Map: 100%|██████████| 3/3 [00:00<00:00, 180.58 examples/s]
Loading checkpoint shards: 100%|██████████| 2/2 [00:03<00:00,  1.74s/it]


ValueError: You can't train a model that has been loaded in 8-bit precision on a different device than the one you're training on. Make sure you loaded the model on the correct device using for example `device_map={'':torch.cuda.current_device()}you're training on. Make sure you loaded the model on the correct device using for example `device_map={'':torch.cuda.current_device() or device_map={'':torch.xpu.current_device()}