In [1]:
!nvidia-smi

Fri Dec 15 14:01:02 2023       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.129.03             Driver Version: 535.129.03   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 4090        Off | 00000000:01:00.0 Off |                  Off |
|  0%   43C    P8              11W / 450W |     38MiB / 24564MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                         

In [2]:
!pip install -Uqqq pip --progress-bar off
!pip install -qqq torch==2.0.1 --progress-bar off
!pip install -qqq transformers==4.32.1 --progress-bar off
!pip install -qqq datasets==2.14.4 --progress-bar off
!pip install -qqq peft==0.5.0 --progress-bar off
!pip install -qqq bitsandbytes==0.41.1 --progress-bar off
!pip install -qqq trl==0.7.1 --progress-bar off

In [2]:
import json
import re
from pprint import pprint

import pandas as pd
import torch
from datasets import Dataset, load_dataset
from huggingface_hub import notebook_login
from peft import LoraConfig, PeftModel
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
)
from trl import SFTTrainer

DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
MODEL_NAME = "meta-llama/Llama-2-7b-hf"

2023-12-15 15:12:41.210344: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-15 15:12:41.210369: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-15 15:12:41.211055: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-15 15:12:41.214966: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
dataset = load_dataset("Jiahuan/teach_edh", cache_dir='/media/Blue2TB3/jpei/cache-huggingface')
dataset

DatasetDict({
    train: Dataset({
        features: ['output', 'input'],
        num_rows: 9742
    })
    validation: Dataset({
        features: ['output', 'input'],
        num_rows: 1137
    })
    test: Dataset({
        features: ['output', 'input'],
        num_rows: 3855
    })
})

In [4]:
DEFAULT_SYSTEM_PROMPT = """
Below is a conversation between a human and an AI agent. Reply a response given the context.
""".strip()


def generate_training_prompt(
    context: str, response: str, system_prompt: str = DEFAULT_SYSTEM_PROMPT
) -> str:
    return f"""### Instruction: {system_prompt}

### Context:
{context.strip()}

### Response:
{response}
""".strip()

In [5]:
def clean_text(text):
    text = re.sub(r"http\S+", "", text)
    text = re.sub(r"@[^\s]+", "", text)
    text = re.sub(r"\s+", " ", text)
    return re.sub(r"\^[^ ]+", "", text)

In [6]:
def generate_text(data_point):
    context = data_point['input']
    response = data_point['output']
    return {
        "context": context,
        "response": response,
        "text": generate_training_prompt(context, response),
    }

In [7]:
example = generate_text(dataset["train"][0])

In [8]:
print(example)

{'context': "Driver: What tasks do I today? Commander: grab the mug from the coffee maker. take it to the sink. clear the sink first. then place and run water Driver: I have grabbed the mug from the coffee maker.. I have cleared the sink.. I have placed the mug in the sink. Commander: run water Driver: I have turned the tap on. What next? Commander: turn off then remove the mug Driver: turned* Commander: take back to the coffee maker Driver: I have removed the mug. Commander: make sure it's empty Driver: But the mug still has water Commander: dump the water Driver: Okay Commander: go back to the coffee maker. place then start it Driver: I have poured the water. Now going to the coffee maker. Commander: start Driver: I have placed the mug on the coffee maker. I have started it. Commander: go find a knife Driver: What next? Commander: in the drawer under the sink Driver: Okay. going to find knife. Commander: the right side cabinet. grab that. you had it right. open then grab. yes Driver:

In [9]:
print(example["text"])

### Instruction: Below is a conversation between a human and an AI agent. Reply a response given the context.

### Context:
Driver: What tasks do I today? Commander: grab the mug from the coffee maker. take it to the sink. clear the sink first. then place and run water Driver: I have grabbed the mug from the coffee maker.. I have cleared the sink.. I have placed the mug in the sink. Commander: run water Driver: I have turned the tap on. What next? Commander: turn off then remove the mug Driver: turned* Commander: take back to the coffee maker Driver: I have removed the mug. Commander: make sure it's empty Driver: But the mug still has water Commander: dump the water Driver: Okay Commander: go back to the coffee maker. place then start it Driver: I have poured the water. Now going to the coffee maker. Commander: start Driver: I have placed the mug on the coffee maker. I have started it. Commander: go find a knife Driver: What next? Commander: in the drawer under the sink Driver: Okay. g

In [10]:
def process_dataset(data: Dataset):
    return (
        data.shuffle(seed=42)
        .map(generate_text)
        # .remove_columns(
        #     [
        #         "original dialog id",
        #         "new dialog id",
        #         "dialog index",
        #         "original dialog info",
        #         "log",
        #         "prompt",
        #     ]
        # )
    )

In [11]:
dataset["train"] = process_dataset(dataset["train"])
dataset["validation"] = process_dataset(dataset["validation"])
dataset["test"] = process_dataset(dataset["test"])

In [12]:
dataset

DatasetDict({
    train: Dataset({
        features: ['output', 'input', 'context', 'response', 'text'],
        num_rows: 9742
    })
    validation: Dataset({
        features: ['output', 'input', 'context', 'response', 'text'],
        num_rows: 1137
    })
    test: Dataset({
        features: ['output', 'input', 'context', 'response', 'text'],
        num_rows: 3855
    })
})

In [13]:
# notebook_login()
from huggingface_hub import interpreter_login
interpreter_login()


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Token is valid (permission: write).
Your token has been saved in your configured git credential helpers (store).
Your 

In [14]:
def create_model_and_tokenizer():
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
        # bnb_4bit_compute_dtype=torch.float16,
    )

    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        use_safetensors=True,
        quantization_config=bnb_config,
        trust_remote_code=True,
        device_map="auto",
    )

    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right"

    return model, tokenizer

In [15]:
model, tokenizer = create_model_and_tokenizer()
model.config.use_cache = False

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [16]:
model.config.quantization_config.to_dict()

{'quant_method': <QuantizationMethod.BITS_AND_BYTES: 'bitsandbytes'>,
 'load_in_8bit': False,
 'load_in_4bit': True,
 'llm_int8_threshold': 6.0,
 'llm_int8_skip_modules': None,
 'llm_int8_enable_fp32_cpu_offload': False,
 'llm_int8_has_fp16_weight': False,
 'bnb_4bit_quant_type': 'nf4',
 'bnb_4bit_use_double_quant': False,
 'bnb_4bit_compute_dtype': 'bfloat16'}

In [17]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
)
print_trainable_parameters(model)

trainable params: 262410240 || all params: 3500412928 || trainable%: 7.496550989769399


In [25]:
from accelerate import FullyShardedDataParallelPlugin, Accelerator
from torch.distributed.fsdp.fully_sharded_data_parallel import FullOptimStateDictConfig, FullStateDictConfig

fsdp_plugin = FullyShardedDataParallelPlugin(
    state_dict_config=FullStateDictConfig(offload_to_cpu=True, rank0_only=False),
    optim_state_dict_config=FullOptimStateDictConfig(offload_to_cpu=True, rank0_only=False),
)

accelerator = Accelerator(fsdp_plugin=fsdp_plugin)

from peft import prepare_model_for_kbit_training

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [27]:
from peft import LoraConfig, get_peft_model

lora_r = 64 # 16
lora_alpha = 64
lora_dropout = 0.1
lora_target_modules = [
    "q_proj",
    "up_proj",
    "o_proj",
    "k_proj",
    "down_proj",
    "gate_proj",
    "v_proj",
]


peft_config = LoraConfig(
    r=lora_r,
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    target_modules=lora_target_modules,
    bias="none",
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, peft_config)
print_trainable_parameters(model)

# Apply the accelerator. You can comment this out to remove the accelerator.
model = accelerator.prepare_model(model)

trainable params: 159907840 || all params: 3660320768 || trainable%: 4.368683788535114


In [28]:
OUTPUT_DIR = '/media/Blue2TB3/jpei/vox-finetune'
DATASET_NAME = 'teach'
from datetime import datetime
training_arguments = TrainingArguments(
    per_device_train_batch_size=4, # 4, 8
    gradient_accumulation_steps=4,
    optim="paged_adamw_8bit", # paged_adamw_32bit
    logging_steps=1,
    learning_rate=1e-4,
    fp16=True,
    max_grad_norm=0.3,
    num_train_epochs=2,
    do_train=True,
    do_eval=True,
    evaluation_strategy="steps",
    eval_steps=0.2,
    warmup_ratio=0.05,
    save_strategy="epoch",
    group_by_length=True,
    output_dir=OUTPUT_DIR,
    # report_to="tensorboard",
    report_to="wandb",  # Comment this out if you don't want to use weights & baises
    run_name=f"{MODEL_NAME}-{DATASET_NAME}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}",  # Name of the W&B run (optional)，
    save_safetensors=True,
    lr_scheduler_type="cosine",
    seed=42,
)

In [30]:
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset["train"],
    eval_dataset=dataset["validation"],
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=1024,
    tokenizer=tokenizer,
    args=training_arguments,
)

Map:   0%|          | 0/1137 [00:00<?, ? examples/s]

In [None]:
trainer.train()

[34m[1mwandb[0m: Currently logged in as: [33mppsunrise[0m ([33mn-dim-brain[0m). Use [1m`wandb login --relogin`[0m to force relogin


You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss,Validation Loss


In [None]:
trainer.save_model()

In [None]:
trainer.model

In [None]:
from peft import AutoPeftModelForCausalLM

trained_model = AutoPeftModelForCausalLM.from_pretrained(
    OUTPUT_DIR,
    low_cpu_mem_usage=True,
)

merged_model = model.merge_and_unload()
merged_model.save_pretrained("merged_model", safe_serialization=True)
tokenizer.save_pretrained("merged_model")

## Inference

In [None]:
def generate_inference_prompt(
    context: str, response: str, system_prompt: str = DEFAULT_SYSTEM_PROMPT
) -> str:
    return f"""### Instruction: {system_prompt}

### Context:
{context.strip()}

### Response:
""".strip()