In [1]:
# Loading the dataset

from datasets import load_dataset, Dataset, load_from_disk
import pandas as pd

dataset = load_from_disk("/home/jaswant/Documents/llm_trial/data/")

In [2]:
df = pd.DataFrame(dataset["train"])
df.head()


Unnamed: 0,output,input,instruction
0,Here are the main ideas of Jeff Walker's Produ...,,Summarize the main ideas of Jeff Walker's Prod...
1,1. Homogeneity: The segment should consist of ...,,How to tell if a customer segment is well segm...
2,You can use the `String.format()` method in Ja...,,"In Java, I want to replace string like ""This i..."
3,Metaphorical language has been summoned to des...,,Metaphorical language is also used to describe...
4,The `dummy` variable in the given C++ function...,,I have the following C++ function: \nvoid add\...


In [3]:
# Modifying into QLORA format


df['text'] = """

You are assistant that behaves very professionally. 
You will only provide the answer if you know the answer. If you do not know the answer, you will say I dont know. 

###Human: {instruction},
###Assistant: {output}

""".format(instruction=df['instruction'],
           output= df['output'])

data = Dataset.from_pandas(df)

In [4]:
## Model Loading

import torch
import transformers
import os

os.environ['CUDA_VISIBLE_DEVICES'] = '0'
torch.cuda.empty_cache()

In [5]:
model_id = 'meta-llama/Llama-2-7b-chat-hf'

# device = 'cuda:0'

bnb_config = transformers.BitsAndBytesConfig(load_in_4bit=True, 
                                             bnb_4bit_quant_type='nf4',
                                             bnb_4bit_use_double_quant=True,
                                             bnb_4bit_compute_dtype=torch.bfloat16)

hf_auth = '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'
model_config =  transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    torch_dtype=torch.float16,
    load_in_8bit=True,
    device_map='auto',
    use_auth_token=hf_auth
)

[2023-10-28 08:49:17,408] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)




Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [6]:
# Loading the tokenizer

tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=hf_auth,
    trust_remote_code=True
)
tokenizer.pad_token = tokenizer.eos_token



In [7]:
# Configuring LORA

from peft import LoraConfig, get_peft_model

lora_alpha = 16
lora_dropout = 0.1
lora_r = 64

peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM",
)

In [8]:
## Creating the trainer arguments

from transformers import TrainingArguments

output_dir = "./results"
per_device_train_batch_size = 2
gradient_accumulation_steps = 4
optim = "paged_adamw_32bit"
save_steps = 100
logging_steps = 10
learning_rate = 2e-4
max_grad_norm = 0.3
max_steps = 400
warmup_ratio = 0.03
lr_scheduler_type = "cosine"

training_arguments = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    fp16=True,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=True,
    lr_scheduler_type=lr_scheduler_type,
    # ddp_backend='nccl'
)

In [9]:
# Starting the trainer

from trl import SFTTrainer

max_seq_length = 512

trainer = SFTTrainer(
    model=model,
    train_dataset=data,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_arguments,
)



Map:   0%|          | 0/58674 [00:00<?, ? examples/s]

In [10]:
## For stability

for name, module in trainer.model.named_modules():
    if "norm" in name:
        module = module.to(torch.float32)

In [11]:
trainer.train()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mjaswanth04[0m ([33mflyers[0m). Use [1m`wandb login --relogin`[0m to force relogin


  0%|          | 0/400 [00:00<?, ?it/s]

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


{'loss': 2.4935, 'learning_rate': 0.0001666666666666667, 'epoch': 0.0}
{'loss': 1.7687, 'learning_rate': 0.00019979028262377118, 'epoch': 0.0}
{'loss': 0.7586, 'learning_rate': 0.00019893981312363562, 'epoch': 0.0}
{'loss': 0.117, 'learning_rate': 0.00019744105246469263, 'epoch': 0.01}
{'loss': 0.013, 'learning_rate': 0.0001953038210948861, 'epoch': 0.01}
{'loss': 0.004, 'learning_rate': 0.00019254212296427044, 'epoch': 0.01}
{'loss': 0.0008, 'learning_rate': 0.00018917405376582145, 'epoch': 0.01}
{'loss': 0.0003, 'learning_rate': 0.00018522168236559695, 'epoch': 0.01}
{'loss': 0.0002, 'learning_rate': 0.00018071090619916093, 'epoch': 0.01}
{'loss': 0.0002, 'learning_rate': 0.00017567128158176953, 'epoch': 0.01}
{'loss': 0.0002, 'learning_rate': 0.00017013583004418993, 'epoch': 0.01}
{'loss': 0.0002, 'learning_rate': 0.000164140821963114, 'epoch': 0.02}
{'loss': 0.0001, 'learning_rate': 0.00015772553890390197, 'epoch': 0.02}
{'loss': 0.0001, 'learning_rate': 0.00015093201623287631, 'ep

TrainOutput(global_step=400, training_loss=0.12897837026146589, metrics={'train_runtime': 2403.4962, 'train_samples_per_second': 1.331, 'train_steps_per_second': 0.166, 'train_loss': 0.12897837026146589, 'epoch': 0.05})

In [12]:
model_to_save = trainer.model.module if hasattr(trainer.model, 'module') else trainer.model  # Take care of distributed/parallel training
model_to_save.save_pretrained("outputs_7b")

In [13]:
from huggingface_hub import notebook_login, logout
logout()
notebook_login(new_session=True, write_permission=True)

Not logged in!


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [15]:
model_to_save.push_to_hub("llama2-7b-chat-sharegpt")

adapter_model.bin:   0%|          | 0.00/134M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/jaswanth04/llama2-7b-chat-sharegpt/commit/70eed6c8477710cf60cd2d26b293ea853798d484', commit_message='Upload model', commit_description='', oid='70eed6c8477710cf60cd2d26b293ea853798d484', pr_url=None, pr_revision=None, pr_num=None)

Inferencing the model