In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="5"
#os.environ["HF_DATASETS_CACHE"]="/workspace/1195/"

import torch
import torch.nn as nn
import bitsandbytes as bnb
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM

model = AutoModelForCausalLM.from_pretrained(
    'facebook/xglm-7.5B', 
    load_in_8bit=False, 
    device_map='auto',
    torch_dtype=torch.float16
)

tokenizer = AutoTokenizer.from_pretrained('facebook/xglm-7.5B')


Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 7.0
CUDA SETUP: Detected CUDA version 117
CUDA SETUP: Loading binary /usr/local/lib/python3.8/dist-packages/bitsandbytes/libbitsandbytes_cuda117_nocublaslt.so...


  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)


In [2]:
!pwd

/workspace/xglm_ok


In [3]:
for param in model.parameters():
  param.requires_grad = False  # freeze the model - train adapters later
#  if param.ndim == 1:
    # cast the small parameters (e.g. layernorm) to fp32 for stability
#    param.data = param.data.to(torch.float32)

model.gradient_checkpointing_enable()  # reduce number of stored activations
model.enable_input_require_grads()

class CastOutputToFloat(nn.Sequential):
  def forward(self, x): return super().forward(x).to(torch.float32)
model.lm_head = CastOutputToFloat(model.lm_head)

In [4]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [5]:
from peft import LoraConfig, get_peft_model 

config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

trainable params: 8388608 || all params: 7501160448 || trainable%: 0.1118308034890337


In [6]:
import transformers
from datasets import load_dataset
data = load_dataset("pythainlp/alpaca_en_sft")

Found cached dataset parquet (/root/.cache/huggingface/datasets/pythainlp___parquet/pythainlp--alpaca_en_sft-8c0a443e01591af7/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)


  0%|          | 0/2 [00:00<?, ?it/s]

In [7]:
data = data.map(lambda samples: tokenizer(samples['text']), batched=True)

Loading cached processed dataset at /root/.cache/huggingface/datasets/pythainlp___parquet/pythainlp--alpaca_en_sft-8c0a443e01591af7/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec/cache-cc06846f1fe22927.arrow
Loading cached processed dataset at /root/.cache/huggingface/datasets/pythainlp___parquet/pythainlp--alpaca_en_sft-8c0a443e01591af7/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec/cache-56b8769e18803a2e.arrow


In [15]:
data['train']["text"][0]

'<human>: Evaluate the following statement " The world is better now compared to 10 years ago." <bot>: This statement is subjective, and depends on a person\'s own opinion and perspective. There are some aspects of the world that are better now than 10 years ago, such as the advancement of technology and the increasing awareness about global climate change and sustainability. However, there are also aspects which are worse, such as the widening economic inequality and increasing social unrest in many countries. It is impossible to definitively say that the world as a whole is better or worse than it was 10 years ago.'

In [9]:
trainer = transformers.Trainer(
    model=model, 
    train_dataset=data['train'],
    eval_dataset=data['test'],
    args=transformers.TrainingArguments(
        per_device_train_batch_size=4, 
        gradient_accumulation_steps=32,
        per_device_eval_batch_size=2,
        warmup_steps=100, 
        num_train_epochs=2,
        learning_rate=2e-4,
        fp16=True,
        logging_strategy="epoch", 
        output_dir='outputs'
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
model.config.use_cache = False  # silence the warnings. Please re-enable for inference!
trainer.train()

[34m[1mwandb[0m: Currently logged in as: [33mwannaphong[0m. Use [1m`wandb login --relogin`[0m to force relogin


You're using a XGLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
385,1.7573
770,1.5862


TrainOutput(global_step=770, training_loss=1.6717456371753248, metrics={'train_runtime': 15555.7184, 'train_samples_per_second': 6.347, 'train_steps_per_second': 0.049, 'total_flos': 5.60407984939008e+17, 'train_loss': 1.6717456371753248, 'epoch': 2.0})

In [12]:
model.push_to_hub("pythainlp/alpaca_en_sft-xglm-7.5B-2ep",private=True)

Upload 1 LFS files:   0%|          | 0/1 [00:00<?, ?it/s]

adapter_model.bin:   0%|          | 0.00/33.6M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/pythainlp/alpaca_en_sft-xglm-7.5B-2ep/commit/dfec0ef5ae7142e8cbf4f3c2523e40469fdbc021', commit_message='Upload model', commit_description='', oid='dfec0ef5ae7142e8cbf4f3c2523e40469fdbc021', pr_url=None, pr_revision=None, pr_num=None)

In [13]:
tokenizer.push_to_hub("pythainlp/alpaca_en_sft-xglm-7.5B-2ep",private=True)

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

sentencepiece.bpe.model:   0%|          | 0.00/4.92M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/pythainlp/alpaca_en_sft-xglm-7.5B-2ep/commit/5b45c8892e609adfb9fa8486105851bde6aff898', commit_message='Upload tokenizer', commit_description='', oid='5b45c8892e609adfb9fa8486105851bde6aff898', pr_url=None, pr_revision=None, pr_num=None)

In [54]:
model.save_pretrained("test_ep2")
tokenizer.save_pretrained("test_ep2")

('test_ep2/tokenizer_config.json',
 'test_ep2/special_tokens_map.json',
 'test_ep2/sentencepiece.bpe.model',
 'test_ep2/added_tokens.json',
 'test_ep2/tokenizer.json')