## Importing Dependencies

In [None]:
!pip install torch
!pip install transformers
!pip install datasets
!pip install peft
!pip install bitsandbytes
!pip install trl
!pip install xformers



In [None]:
import json
from pprint import pprint
import pandas as pd
import torch
from huggingface_hub import notebook_login
from peft import LoraConfig, PeftModel, get_peft_model
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments
from trl import SFTTrainer
from datasets import load_dataset



## Downloading the Model and Dataset

In [None]:
DEVICE = 'cuda:0' if torch.cuda.is_available() else 'cpu'
MODEL_NAME = 'meta-llama/Llama-2-7b-chat-hf'

In [None]:
dataset = load_dataset('ssbuild/alpaca_finance_en')

Downloading readme:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/23.0M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

In [None]:
dataset

DatasetDict({
    train: Dataset({
        features: ['id', 'instruction', 'input', 'output'],
        num_rows: 68912
    })
})

In [None]:
dataset["train"] = dataset["train"].remove_columns("id")

In [None]:
dataset

DatasetDict({
    train: Dataset({
        features: ['instruction', 'input', 'output'],
        num_rows: 68912
    })
})

In [None]:
from transformers.utils import quantization_config
def get_the_model():
  bnb_config = BitsAndBytesConfig(
      load_in_4bit = True,
      bnb_4bit_quant_type='nf4',
      bnb_4bit_compute_dtype=torch.float16
  )
  model = AutoModelForCausalLM.from_pretrained(
      MODEL_NAME,
      use_safetensors = True,
      quantization_config = bnb_config,
      trust_remote_code = True,
      device_map = DEVICE
  )

  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
  tokenizer.pad_token = tokenizer.eos_token
  tokenizer.add_special_tokens({'pad_token': '[PAD]'})

  return model, tokenizer

In [None]:
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
model, tokenizer = get_the_model()
model.config.use_cache = False

Downloading (…)lve/main/config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

Downloading (…)fetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [None]:
model

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)
          (down_proj): Linear4bit(in_features=11008, out_features=4096, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSNorm()
      )
    )
    (norm

## Testing the Base Model

In [None]:
##Creating the prompt

In [None]:
from transformers import pipeline

In [None]:
device = "cuda:0"
def get_prompt(instruction, input):
  if input:
    prompt_template = f"### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Output:\n"
  else:
    prompt_template = f"### Instruction:\n{instruction}\n\n### Output:\n"
  return prompt_template

In [None]:
pipe = pipeline(
    task="text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=150,
    repetition_penalty=1.15,
    top_p=0.95
    )
result = pipe(get_prompt("Who can truly afford luxury cars?", input=None))
print(result[0]['generated_text'])

### Instruction:
Who can truly afford luxury cars?

### Output:
Luxury cars are typically associated with wealth and exclusivity, but the cost of these vehicles varies widely depending on factors such as make, model, and features. Here are some general price ranges for new luxury cars in the United States:

* Entry-level luxury cars (e.g., Audi A4, BMW 3 Series): $30,000 - $50,000
* Mid-range luxury cars (e.g., Mercedes-Benz E-Class, Lexus GS): $50,000 - $100,


## Setting up Lora Config

In [None]:
lora_alpha = 32
lora_dropout = 0.05
lora_r = 8
peft_config = LoraConfig(
    lora_alpha = lora_alpha,
    target_modules = ["q_proj", "v_proj", "k_proj", "o_proj"],
    lora_dropout = lora_dropout,
    r = lora_r,
    bias = 'none',
    task_type= 'CAUSAL_LM'
)

## Merging the base model and the Config

In [None]:
model = get_peft_model(model, peft_config)

### Setting up Training Arguments (Hyperparameters)

In [None]:
adam_bits = 8

training_arguments = TrainingArguments(
    output_dir = "Trainer_output",
    per_device_train_batch_size = 1,
    gradient_accumulation_steps = 4,
    run_name=f"deb-v2-xl-{adam_bits}bitAdam",
    logging_steps = 20,
    learning_rate = 2e-4,
    fp16=True,
    max_grad_norm = 0.3,
    max_steps = 1200,
    warmup_ratio = 0.03,
    group_by_length=True,
    lr_scheduler_type = "constant",
)

In [None]:
trainer = SFTTrainer(
    model = model,
    train_dataset = dataset["train"],
    ##peft_config = peft_config,
    dataset_text_field="instruction",
    args = training_arguments,
    max_seq_length = 512,
    tokenizer = tokenizer
)

Map:   0%|          | 0/68912 [00:00<?, ? examples/s]

In [None]:
trainer

<trl.trainer.sft_trainer.SFTTrainer at 0x7df6928dcca0>

### Training (Finetuning) the Model

In [None]:
trainer.train()

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
20,2.7872
40,2.6697
60,2.7347
80,2.495
100,2.6475
120,2.2622
140,2.4027
160,2.7305
180,2.2332
200,2.6557


TrainOutput(global_step=1200, training_loss=2.4333551343282065, metrics={'train_runtime': 2003.3639, 'train_samples_per_second': 2.396, 'train_steps_per_second': 0.599, 'total_flos': 2827550721073152.0, 'train_loss': 2.4333551343282065, 'epoch': 0.07})

In [None]:
trainer.push_to_hub()  ##Pushing the Adapters to HuggingFace

In [None]:
trainer.save_model("Finetuned_adapter")  ##Saving the Finetuned Adapter
adapter_model = model

print("Lora Adapter saved")

Lora Adapter saved


In [2]:
##Downloading the base model again and loading the model with the finetuned adapters we just saved

In [None]:
model = AutoModelForCausalLM.from_pretrained(
      MODEL_NAME,
      use_safetensors = True,
      load_in_8bit = True,
      trust_remote_code = True,
      device_map = DEVICE
  )

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
# Load Lora adapter
model = PeftModel.from_pretrained(
    model,
    "/content/Finetuned_adapter",
    )
merged_model = model.merge_and_unload()

merged_model.save_pretrained("/content/Merged_model")
tokenizer.save_pretrained("/content/Merged_model")

In [None]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

In [None]:
tokenizer.save_pretrained("/content/Merged_model")  ##Saving the Finetuned Model

('/content/Merged_model/tokenizer_config.json',
 '/content/Merged_model/special_tokens_map.json',
 '/content/Merged_model/tokenizer.json')

In [None]:
merged_model.push_to_hub("PiyushLavaniya/Llama2_Banker")  ##Pushing the model to HuggingFace

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.02G [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/PiyushLavaniya/Llama2_Banker/commit/20324454f38946c3295d823882da50ef7f31bfc4', commit_message='Upload LlamaForCausalLM', commit_description='', oid='20324454f38946c3295d823882da50ef7f31bfc4', pr_url=None, pr_revision=None, pr_num=None)

In [None]:
tokenizer.push_to_hub("PiyushLavaniya/Llama2_Banker")  ##Pushing the Tokenizer to HuggingFace

CommitInfo(commit_url='https://huggingface.co/PiyushLavaniya/Llama2_Banker/commit/5bc3dc91e58f07d90bd57770119853be2e0e81a7', commit_message='Upload tokenizer', commit_description='', oid='5bc3dc91e58f07d90bd57770119853be2e0e81a7', pr_url=None, pr_revision=None, pr_num=None)