In [1]:
import os
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, AutoConfig, BitsAndBytesConfig, GenerationConfig, TrainingArguments
from peft import LoraConfig, PeftModel
from datasets import Dataset, load_dataset
from trl import SFTTrainer

In [2]:
# import dataset from the hugging face hub from my own account
dataset = load_dataset('TC14/050724_random200gs_llama3_instruct')

Repo card metadata block was not found. Setting CardData to empty.


In [3]:
# model name
model_name = 'meta-llama/Meta-Llama-3-8B-Instruct'

# tokenizer 
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast = True)

# add special token padding for that
tokenizer.add_special_tokens({"pad_token" : "<pad>"})
tokenizer.padding_side = 'left'


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [4]:
# define compute module
compute_dtype = getattr(torch, 'float16')

# Quantization parameter
bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_quant_type = 'nf4',
    bnb_4bit_compute_dtype = compute_dtype,
    bnb_4bit_use_double_quant = True,
    
)

# load in the base model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config = bnb_config,
    device_map = "auto"
)

# uniform the input text length
model.resize_token_embeddings(len(tokenizer))
# Configure the pad token in the model
model.config.pad_token_id = tokenizer.pad_token_id
model.config.use_cache = False

#model = prepare_model_for_kbit_training(model)

# configure lora 
peft_config = LoraConfig(
    lora_alpha = 32,
    lora_dropout = 0.1,
    r = 8,
    bias = 'none',
    task_type = 'CAUSAL_LM',
    target_modules = ['q_proj' , 'v_proj']
)


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [5]:
training_arg = TrainingArguments(
    output_dir = './results/050724',
    #evaluation_strategy = "steps",
    #do_eval = False,
    per_device_train_batch_size = 8,
    gradient_accumulation_steps = 1,
    #per_device_eval_batch_size = 4,
    log_level = "debug",
    optim = 'paged_adamw_32bit',
    save_steps = 2,
    logging_steps = 1,
    learning_rate = 1e-4,
    #eval_steps = 5,
    fp16 = True,
    #num_train_epochs = 3,
    max_grad_norm = 0.3,
    max_steps = 10,
    warmup_ratio = 0.03,
    lr_scheduler_type = 'constant',
)

In [6]:
trainer = SFTTrainer(
        model=model,
        train_dataset=dataset['train'],
        #eval_dataset=dataset['test'],
        peft_config=peft_config,
        dataset_text_field="text",
        max_seq_length=512,
        tokenizer=tokenizer,
        args=training_arg,
)

trainer.train()

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
max_steps is given, it will override any value given in num_train_epochs
Using auto half precision backend
Currently training with a batch size of: 8
***** Running training *****
  Num examples = 1,015
  Num Epochs = 1
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 10
  Number of trainable parameters = 3,407,872
  attn_output = torch.nn.functional.scaled_dot_product_attention(


Step,Training Loss
1,8.5544
2,8.0152
3,7.9574
4,7.7228
5,7.1522
6,7.0524
7,6.7956
8,6.4212
9,5.9997
10,5.7043


Saving model checkpoint to ./results/050724\checkpoint-2
loading configuration file config.json from cache at C:\Users\tchen\.cache\huggingface\hub\models--meta-llama--Meta-Llama-3-8B-Instruct\snapshots\e5e23bbe8e749ef0efcf16cad411a7d23bd23298\config.json
Model config LlamaConfig {
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 128000,
  "eos_token_id": 128001,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 14336,
  "max_position_embeddings": 8192,
  "model_type": "llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 8,
  "pretraining_tp": 1,
  "rms_norm_eps": 1e-05,
  "rope_scaling": null,
  "rope_theta": 500000.0,
  "tie_word_embeddings": false,
  "torch_dtype": "bfloat16",
  "transformers_version": "4.39.3",
  "use_cache": true,
  "vocab_size": 128256
}

tokenizer config file saved in ./results/050724\checkpoint-2\tokenize

TrainOutput(global_step=10, training_loss=7.137519836425781, metrics={'train_runtime': 83.338, 'train_samples_per_second': 0.96, 'train_steps_per_second': 0.12, 'total_flos': 199661687930880.0, 'train_loss': 7.137519836425781, 'epoch': 0.08})

In [21]:
model = PeftModel.from_pretrained(model, "./results/checkpoint-1000")


In [27]:
prompt = '### Human : Please classify what kind of comment is this  clear clear clear. ### Assistant : '
text_input = tokenizer(prompt , return_tensors = "pt")

#print(text_input)

model.to("cuda")
model_input = text_input['input_ids'].cuda()

generation_output = model.generate(
    model_input,
    max_new_tokens = 5,
    do_sample = True
)

decoded = tokenizer.batch_decode(generation_output)
print(decoded)

['<s> ### Human : Please classify what kind of comment is this  clear clear clear. ### Assistant :  General Info, Matt,']


In [14]:
del model

torch.cuda.empty_cache()
import gc
gc.collect()

NameError: name 'model' is not defined

In [19]:
torch.cuda.empty_cache()
gc.collect()

0