In [None]:
# install some necessary packages
!pip install wandb -qU
!pip install bitsandbytes -qU
!pip install peft -qU
!pip install datasets -qU
!pip install transformers -qU
!pip install loguru -qU
!pip install tokenizers -qU
!pip install sentencepiece -qU


In [2]:
# connect to google drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Token: 
Add token as git credential? (Y/n) n
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [None]:
# wandb login
import os
import wandb

# Set the WANDB_API_KEY environment variable
os.environ['WANDB_API_KEY'] = '87246aa35c0741ef13381fe3bead47e6d1eeebfb'  # Replace with your actual key

# Login to wandb
wandb.login()


[34m[1mwandb[0m: Currently logged in as: [33mbaigzcu123[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [None]:
# import os and sys

OUTPUT_DIR = '/content/drive/MyDrive/Hallucination/Llama2_7b_Finance_FT_3'
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

import sys

In [3]:
from typing import List, Dict, Optional
from loguru import logger

import torch
import torch.nn as nn
import bitsandbytes as bnb
from datasets import load_dataset
import transformers
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    BitsAndBytesConfig
)
from peft import (
    TaskType,
    LoraConfig,
    get_peft_model,
    prepare_model_for_kbit_training,
    prepare_model_for_int8_training,
    set_peft_model_state_dict,
    get_peft_model_state_dict,
)

from peft.utils import TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING

In [32]:
mmlu_dataset_management = load_dataset("cais/mmlu", name = 'management')

In [33]:
mmlu_dataset_management['test']

Dataset({
    features: ['question', 'subject', 'choices', 'answer'],
    num_rows: 103
})

In [34]:
mmlu_dataset_marketing = load_dataset("cais/mmlu", name = 'marketing')

Generating auxiliary_train split:   0%|          | 0/99842 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/234 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/25 [00:00<?, ? examples/s]

Generating dev split:   0%|          | 0/5 [00:00<?, ? examples/s]

In [35]:
type(mmlu_dataset_marketing)

datasets.dataset_dict.DatasetDict

# Set up training hyperparameters

In [None]:
# prepare steps

# Set random seed for reproducibility
RANDOM_SEED = 3407
transformers.set_seed(RANDOM_SEED)

# Set up the training hyperparameter
MICRO_BATCH_SIZE = 64
BATCH_SIZE = 256
GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE
EPOCHS = 1  # One epoch takes ~6 hours, and 2 epochs may exceed 12-hour.
LEARNING_RATE = 2e-5  # Following stanford_alpaca
CUTOFF_LEN = 256  # 256 accounts for about 96% of the data. Shorter input, faster training/less VRAM

LORA_R = 8  # Some LoRA parameters
LORA_ALPHA = 16
LORA_DROPOUT = 0.05
VAL_SET_SIZE = 2000
TARGET_MODULES = [
    'q_proj',
    'v_prol',
]


In [None]:
# DDP setting
device_map = 'auto'
world_size = int(os.environ.get('WORLD_SIZE', 1))
ddp = (world_size != 1)  # If more than one GPU, then DDP
if ddp:
    device_map = {'': int(os.environ.get('LOCAL_RANK') or 0)}
    GRADIENT_ACCUMULATION_STEPS = GRADIENT_ACCUMULATION_STEPS // world_size

# Helper functions

In [None]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [None]:
def generate_prompt(data_point):
    """Gen. input text based on a prompt, task instruction, (context info.), and answer

    :param data_point: dict: Data point
    :return: str: Input text
    """
    # Samples with additional context into.
    if data_point['input']:
        text = 'Below is an instruction that describes a task, paired with an input that provides' \
               ' further context. Write a response that appropriately completes the request.\n\n'
        text += f'### Instruction:\n{data_point["instruction"]}\n\n'
        text += f'### Input:\n{data_point["input"]}\n\n'
        text += f'### Response:\n{data_point["output"]}'
        return text

    # Without
    else:
        text = 'Below is an instruction that describes a task. Write a response that ' \
               'appropriately completes the request.\n\n'
        text += f'### Instruction:\n{data_point["instruction"]}\n\n'
        text += f'### Response:\n{data_point["output"]}'
        return text

In [None]:
def tokenize(prompt):
    """Tokenize the input

    :param prompt: str: Input text
    :return: dict: {'tokenised input text': list, 'mask': list}
    """
    result = tokenizer(prompt, truncation=True, max_length=CUTOFF_LEN + 1, padding='max_length')
    return {
        'input_ids': result['input_ids'][:-1],
        'attention_mask': result['attention_mask'][:-1],
    }


In [None]:
def generate_and_tokenize_prompt(data_point):
    """This function masks out the labels for the input, so that our loss is computed only on the
    response."""
    if data_point['input']:
        user_prompt = 'Below is an instruction that describes a task, paired with an input that ' \
                      'provides further context. Write a response that appropriately completes ' \
                      'the request.\n\n'
        user_prompt += f'### Instruction:\n{data_point["instruction"]}\n\n'
        user_prompt += f'### Input:\n{data_point["input"]}\n\n'
        user_prompt += f'### Response:\n'
    else:
        user_prompt = 'Below is an instruction that describes a task. Write a response that ' \
                      'appropriately completes the request.'
        user_prompt += f'### Instruction:\n{data_point["instruction"]}\n\n'
        user_prompt += f'### Response:\n'

    # Count the length of prompt tokens
    len_user_prompt_tokens = len(tokenizer(user_prompt,
                                           truncation=True,
                                           max_length=CUTOFF_LEN + 1,
                                           padding='max_length')['input_ids'])
    len_user_prompt_tokens -= 1  # Minus 1 (one) for eos token

    # Tokenise the input, both prompt and output
    full_tokens = tokenizer(
        user_prompt + data_point['output'],
        truncation=True,
        max_length=CUTOFF_LEN + 1,
        padding='max_length',
    )['input_ids'][:-1]
    return {
        'input_ids': full_tokens,
        'labels': [-100] * len_user_prompt_tokens + full_tokens[len_user_prompt_tokens:],
        'attention_mask': [1] * (len(full_tokens)),
    }


# Load Model&Tokenizer

In [None]:
#from transformers import LlamaForCausalLM, LlamaTokenizer

model_name = "NousResearch/Llama-2-7b-hf"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    trust_remote_code=True,
    add_eos_token=True
)
#tokenizer.pad_token_id = 0 # unk. we want this to be different from the eos token

# Read Llama2-7b
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_8bit=True,
    trust_remote_code=True,
    device_map= 'auto',
)

model = prepare_model_for_int8_training(model, use_gradient_checkpointing=True)

tokenizer_config.json:   0%|          | 0.00/746 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/435 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/583 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/179 [00:00<?, ?B/s]



# LoRA fine-tune

In [None]:
# LoRA fine-tuning

# Set up LoRA config
lora_config = LoraConfig(
    task_type= 'CAUSAL_LM',
    r=LORA_R, # LORA_R
    lora_alpha= LORA_ALPHA, # LORA_ALPHA
    lora_dropout= LORA_DROPOUT, # LORA_DROPOUT
    target_modules= TARGET_MODULES, # TARGET_MODULES
    bias='none',
)
model = get_peft_model(model, lora_config)
print_trainable_parameters(model)


trainable params: 2097152 || all params: 6740512768 || trainable%: 0.03111264783824826


In [None]:
# this snippet helps us retry and continue training on some checkpoints
resume_from_checkpoint = None
if resume_from_checkpoint is not None:
    checkpoint_name = os.path.join(resume_from_checkpoint, 'pytorch_model.bin')
    if not os.path.exists(checkpoint_name):
        checkpoint_name = os.path.join(
            resume_from_checkpoint, 'adapter_model.bin'
        )
        resume_from_checkpoint = False
    if os.path.exists(checkpoint_name):
        logger.info(f'Restarting from {checkpoint_name}')
        adapters_weights = torch.load(checkpoint_name)
        set_peft_model_state_dict(model, adapters_weights)
    else:
        logger.info(f'Checkpoint {checkpoint_name} not found')

In [None]:
print_trainable_parameters(model)

trainable params: 2097152 || all params: 6740512768 || trainable%: 0.03111264783824826


# Load Datasets

In [None]:
# Load data
data = load_dataset('gbharti/wealth-alpaca_lora')
data = data.shuffle(seed=RANDOM_SEED)  # Shuffle dataset here

Downloading readme:   0%|          | 0.00/372 [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/31.3M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

In [None]:
# Train/val split
if VAL_SET_SIZE > 0:
    train_val = data['train'].train_test_split(
        test_size=VAL_SET_SIZE, shuffle=False, seed=RANDOM_SEED
    )
    train_data = train_val['train'].map(generate_and_tokenize_prompt)
    val_data = train_val['test'].map(generate_and_tokenize_prompt)
else:
    train_data = data['train'].map(generate_and_tokenize_prompt)
    val_data = None

Map:   0%|          | 0/42341 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

# Training

In [None]:
# HuggingFace Trainer
# Fine-tune the model

trainer = transformers.Trainer(
    model=model,
    train_dataset=train_data,
    eval_dataset=val_data,
    # trainer argument config
    args=transformers.TrainingArguments(
        seed=RANDOM_SEED,  # Reproducibility
        data_seed=RANDOM_SEED,
        per_device_train_batch_size=MICRO_BATCH_SIZE,
        gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
        warmup_steps=100,
        num_train_epochs=EPOCHS,
        learning_rate=LEARNING_RATE,
        fp16=True,
        logging_steps=5,
        evaluation_strategy='steps' if VAL_SET_SIZE > 0 else 'no',
        save_strategy='steps',
        save_steps=10,
        eval_steps=5 if VAL_SET_SIZE > 0 else None,
        output_dir=OUTPUT_DIR,
        save_total_limit=3,
        load_best_model_at_end=True if VAL_SET_SIZE > 0 else False,
        ddp_find_unused_parameters=False if ddp else None,
    ),

    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False

# PEFT setup
old_state_dict = model.state_dict
model.state_dict = (
    lambda self, *_, **__: get_peft_model_state_dict(self, old_state_dict())
).__get__(model, type(model))

# Use the latest PyTorch 2.0 if possible
if torch.__version__ >= '2' and sys.platform != 'win32':
    model = torch.compile(model)

# Train
trainer.train()
wandb.finish()

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss,Validation Loss
5,4.0833,4.059131
10,4.0642,4.059364
15,4.0126,4.059549
20,4.092,4.058634
25,4.0354,4.056879
30,4.0539,4.055991
35,4.0018,4.054271
40,3.8923,4.049274
45,3.996,4.04387
50,3.9417,4.036085




FailedPreconditionError: ignored

In [None]:
### Merge the LoRA adaptor to Base model

# Step 1: Extract LoRA Weights from the Trained Model
lora_weights = get_peft_model_state_dict(model)

# Step 2: Load the Original Llama2-7b Model
original_model = AutoModelForCausalLM.from_pretrained(model_name)

# Step 3: Update the Original Model with LoRA Weights
for key, value in lora_weights.items():
    if key in original_model.state_dict():
        original_model.state_dict()[key].data.copy_(value.data)

# Step 4: Save the Updated Model
# Specify your desired path to save the model
updated_model_path = os.path.join(OUTPUT_DIR, "Llama2-7b_Finance_FT_3_with_lora")
original_model.save_pretrained(updated_model_path)

print("LoRA parameters have been successfully merged into the original Llama2-7b model.")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]


Thrown during validation:
`do_sample` is set to `False`. However, `temperature` is set to `0.9` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`.


LoRA parameters have been successfully merged into the original Llama2-7b model.


In [None]:
from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM, LlamaForCausalLM, LlamaTokenizerFast   # 4.30.2
from peft import PeftModel  # 0.5.0
base_model = "NousResearch/Llama-2-13b-hf"
peft_model = "finetuned_model"
tokenizer = LlamaTokenizerFast.from_pretrained(base_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
model = LlamaForCausalLM.from_pretrained(base_model, trust_remote_code=True, device_map = "cuda:0", load_in_8bit = True,)
model = PeftModel.from_pretrained(model, peft_model)

# push to hub

In [None]:
!git init "/content/drive/MyDrive/Hallucination/Llama2-7b_Finance_FT_3/git"
!git remote add origin YOUR_REPO_URL


[33mhint: Using 'master' as the name for the initial branch. This default branch name[m
[33mhint: is subject to change. To configure the initial branch name to use in all[m
[33mhint: [m
[33mhint: 	git config --global init.defaultBranch <name>[m
[33mhint: [m
[33mhint: Names commonly chosen instead of 'master' are 'main', 'trunk' and[m
[33mhint: 'development'. The just-created branch can be renamed via this command:[m
[33mhint: [m
[33mhint: 	git branch -m <name>[m
Initialized empty Git repository in /content/drive/MyDrive/Hallucination/Llama2-7b_Finance_FT_3/git/.git/
fatal: not a git repository (or any of the parent directories): .git


In [None]:
from huggingface_hub import notebook_login

notebook_login()

In [None]:
from huggingface_hub import HfApi, Repository

model_name = "Llama2-7b_Finance_lora_1"
model_path = OUTPUT_DIR
repo_path = "/content/drive/MyDrive/Hallucination/Llama2-7b_Finance_FT_1/git"
repo_name = "Llama2-7b_Finance_lora_1"  # Replace with your desired repository name

# Create a repository on the Hugging Face Hub
api = HfApi()
# Pass the repo_id argument with the name of the repository
repo_url = api.create_repo(token="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", repo_id= repo_name, exist_ok=True)

# Clone the repository using the Hugging Face Hub library
repo = Repository(local_dir= repo_path, clone_from=repo_url)
repo.git_push()


In [None]:
!rm -rf repo_path
!mkdir repo_path

In [None]:

repo = Repository(local_dir=repo_path, clone_from="https://huggingface.co/Gason/Llama2-7b_Finance_lora_1")