In [8]:
!nvidia-smi

Thu Aug 24 10:17:25 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.125.06   Driver Version: 525.125.06   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100-PCI...  On   | 00000000:00:06.0 Off |                    0 |
| N/A   64C    P0   259W / 250W |  33378MiB / 40960MiB |     97%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
|   1  NVIDIA A100-PCI...  On   | 00000000:00:07.0 Off |                  Off |
| N/A   55C    P0    69W / 250W |  34702MiB / 40960MiB |      0%      Default |
|       

In [2]:
import transformers
import torch

from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import AutoPeftModelForCausalLM, get_peft_model, PeftConfig, IA3Config
from datasets import load_dataset

In [3]:
config = IA3Config(peft_type = "IA3", task_type = "CAUSAL_LM", target_modules = ["query_key_value", "dense_4h_to_h"], feedforward_modules = ["dense_4h_to_h"])

tokenizer = AutoTokenizer.from_pretrained('EleutherAI/polyglot-ko-1.3b')
model = AutoModelForCausalLM.from_pretrained('EleutherAI/polyglot-ko-1.3b', device_map = 'auto', torch_dtype = torch.float16, load_in_8bit = True)

model = get_peft_model(model, config)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [5]:
model.eval()

PeftModelForCausalLM(
  (base_model): IA3Model(
    (model): GPTNeoXForCausalLM(
      (gpt_neox): GPTNeoXModel(
        (embed_in): Embedding(30080, 2048)
        (emb_dropout): Dropout(p=0.0, inplace=False)
        (layers): ModuleList(
          (0-10): 11 x GPTNeoXLayer(
            (input_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
            (post_attention_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
            (post_attention_dropout): Dropout(p=0.0, inplace=False)
            (post_mlp_dropout): Dropout(p=0.0, inplace=False)
            (attention): GPTNeoXAttention(
              (rotary_emb): GPTNeoXRotaryEmbedding()
              (query_key_value): Linear8bitLt(
                in_features=2048, out_features=6144, bias=True
                (ia3_l): ParameterDict(  (default): Parameter containing: [torch.cuda.FloatTensor of size 6144x1 (GPU 0)])
              )
              (dense): Linear8bitLt(in_features=2048, out_features=2

In [4]:
data = load_dataset('beomi/KoAlpaca-v1.1a')

data = data.remove_columns(['url'])
data = data.map(lambda x: {"text" : f"### 질문:{x['instruction']}\n\n### 답변: {x['output']}<|endoftext|>"})
data = data.map(lambda x: tokenizer(x['text']), batched = True)

Found cached dataset parquet (/home/ubuntu/.cache/huggingface/datasets/beomi___parquet/beomi--KoAlpaca-v1.1a-1465f66eb846fd61/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7)


  0%|          | 0/1 [00:00<?, ?it/s]

Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/beomi___parquet/beomi--KoAlpaca-v1.1a-1465f66eb846fd61/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7/cache-826eb6ee01e5bb3f.arrow
Loading cached processed dataset at /home/ubuntu/.cache/huggingface/datasets/beomi___parquet/beomi--KoAlpaca-v1.1a-1465f66eb846fd61/0.0.0/14a00e99c0d15a23649d0db8944380ac81082d4b021f398733dd84f3a6c569a7/cache-5c063ec87916cebb.arrow


In [5]:
tokenizer.pad_token = tokenizer.eos_token
# <|endoftext|>
embedding_size = model.get_input_embeddings().weight.shape[0]
if len(tokenizer) > embedding_size:
    model.resize_token_embeddings(len(tokenizer))

In [6]:
trainer = transformers.Trainer(
    model = model,
    train_dataset = data['train'],
    args = transformers.TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 2,
        max_steps = 50,
        learning_rate = 6e-2,
        save_total_limit = 2,
        optim="adamw_torch",
        save_steps = 25,
        logging_steps = 1,
        output_dir = "/home/ubuntu/askbiz/finetuning/ia3/ia3_6e2_1.3b"
    ),
    data_collator = transformers.DataCollatorForLanguageModeling(tokenizer, mlm = False)
)

In [7]:
with torch.autocast("cuda"):
    trainer.train()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mdongryeol[0m. Use [1m`wandb login --relogin`[0m to force relogin


You're using a PreTrainedTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
1,2.5637
2,2.6668
3,2.5134
4,2.8581
5,2.4897
6,2.5267
7,2.5201
8,2.3181
9,2.5144
10,2.2323


