##Install Packages

In [19]:
import torch

# Display a summary of GPU memory usage
torch.cuda.memory_summary(device=None, abbreviated=False)

# Get the peak GPU memory usage in bytes
peak_memory_bytes = torch.cuda.max_memory_allocated()

# Get the current GPU memory usage in bytes
current_memory_bytes = torch.cuda.memory_allocated()

# Convert bytes to megabytes for easier interpretation
peak_memory_megabytes = peak_memory_bytes / (1024 ** 2)
current_memory_megabytes = current_memory_bytes / (1024 ** 2)

print(f"Peak GPU memory usage: {peak_memory_megabytes:.2f} MB")
print(f"Current GPU memory usage: {current_memory_megabytes:.2f} MB")


Peak GPU memory usage: 4976.08 MB
Current GPU memory usage: 4430.66 MB


In [2]:
!pip install -Uqqq pip
!pip install bitsandbytes
!pip install -qqq torch==2.0.1
!pip install -qqq -U git+https://github.com/huggingface/transformers.git@e03a9cc
!pip install -qqq -U git+https://github.com/huggingface/peft.git@42a184f
!pip install -qqq -U git+https://github.com/huggingface/accelerate.git@c9fbb71
!pip install -qqq datasets==2.12.0
!pip install -qqq loralib==0.1.1
!pip install -qqq einops==0.6.1

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m12.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting bitsandbytes
  Downloading bitsandbytes-0.42.0-py3-none-any.whl.metadata (9.9 kB)
Downloading bitsandbytes-0.42.0-py3-none-any.whl (105.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m105.0/105.0 MB[0m [31m9.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.42.0
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m619.9/619.9 MB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.0/21.0 MB[0m [31m84.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m849.3/849.3 kB[0m [31m48.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.8/11.8 MB[0m [31m103.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━

##Load and import packages

In [2]:
import json
import os
from pprint import pprint
import bitsandbytes as bnb
import torch
import torch.nn as nn
import transformers
from datasets import load_dataset
from huggingface_hub import notebook_login
from peft import (
    LoraConfig,
    PeftConfig,
    PeftModel,
    get_peft_model,
    prepare_model_for_kbit_training
)
from transformers import (
    AutoConfig,
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig
)

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

##Notebook Login

In [3]:
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

##Load original model in 4 byte format

In [4]:
MODEL_NAME = "vilsonrodrigues/falcon-7b-instruct-sharded"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",
    trust_remote_code=True,
    quantization_config=bnb_config
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/15 [00:00<?, ?it/s]

Some weights of FalconForCausalLM were not initialized from the model checkpoint at vilsonrodrigues/falcon-7b-instruct-sharded and are newly initialized: ['lm_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


###Function to print trainable params

In [5]:
def print_trainable_parameters(model):
  """
  Prints the number of trainable parameters in the model.
  """
  trainable_params = 0
  all_param = 0
  for _, param in model.named_parameters():
    all_param += param.numel()
    if param.requires_grad:
      trainable_params += param.numel()
  print(
      f"trainable params: {trainable_params} || all params: {all_param} || trainables%: {100 * trainable_params / all_param}"
  )

In [6]:
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

###Define qLoRA config

In [7]:
config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["query_key_value"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

trainable params: 4718592 || all params: 3613463424 || trainables%: 0.13058363808693696


##Load and process Dataset

In [8]:
data = load_dataset("csv", data_files="Book1.csv")



  0%|          | 0/1 [00:00<?, ?it/s]

In [9]:
data

DatasetDict({
    train: Dataset({
        features: ['chat'],
        num_rows: 49
    })
})

In [10]:
data["train"][0]

{'chat': 'Human: Help a 27-year-old man in Delhi, a mesomorph at 175 cm, choose an outfit for a festival. He prefers athletic-fit clothes with a broad shoulder shape and flat shoes.\n\nAI:\nOutfit 1:\nTop: Shirt: Navy blue polo shirt - This athletic-fit polo shirt with short sleeves and a broad shoulder shape will complement your mesomorph physique. Its dark navy blue color will add a touch of sophistication to your festival look.\n\nBottom: Chinos: Beige slim-fit chinos - Pair your polo shirt with these slim-fit chinos in a versatile beige color. They offer a tailored look that will accentuate your athletic silhouette.\n\nFootwear: Sneakers: White leather sneakers - Opt for a pair of white leather sneakers to keep the overall outfit casual yet stylish. The flat shoes will provide you with comfort while walking around during the festival.\n\nAccessories: Sunglasses: Black aviator sunglasses - Complete your outfit with a classic pair of black aviator sunglasses. They will add some edge 

###Get chat from dataset

In [11]:
def generate_prompt(data_point):
  return f"""{data_point["chat"]}""".strip()

def generate_and_tokenize_prompt(data_point):
  full_prompt = generate_prompt(data_point)
  tokenized_full_prompt = tokenizer(full_prompt, padding=True, truncation=True)
  return tokenized_full_prompt

In [12]:
data = data["train"].shuffle().map(generate_and_tokenize_prompt)

Map:   0%|          | 0/49 [00:00<?, ? examples/s]

In [13]:
model = model.to('cuda')

##Training config define. Start traininig

In [14]:
training_args = transformers.TrainingArguments(
      per_device_train_batch_size=1,
      gradient_accumulation_steps=4,
      num_train_epochs=1,
      learning_rate=2e-4,
      fp16=True,
      save_total_limit=3,
      logging_steps=1,
      output_dir="experiments",
      optim="paged_adamw_8bit",
      lr_scheduler_type="cosine",
      warmup_ratio=0.05,
)

trainer = transformers.Trainer(
    model=model,
    train_dataset=data,
    args=training_args,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
model.config.use_cache = False
trainer.train()
new_model = "f6"

You're using a PreTrainedTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
1,1.7879
2,1.6123
3,1.6364
4,1.633
5,1.4552
6,1.4968
7,1.7007
8,1.643
9,1.7076
10,1.5217


In [15]:
model.save_pretrained("trained-model")

In [22]:
model.push_to_hub("Jeera")
tokenizer.push_to_hub("Jeera")



Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

adapter_model.bin:   0%|          | 0.00/18.9M [00:00<?, ?B/s]

adapter_model.bin:   0%|          | 0.00/18.9M [00:00<?, ?B/s]



CommitInfo(commit_url='https://huggingface.co/Shxck69/Jeera/commit/1d140470ef0afdbfb85dff78266e2247bd921ddc', commit_message='Upload tokenizer', commit_description='', oid='1d140470ef0afdbfb85dff78266e2247bd921ddc', pr_url=None, pr_revision=None, pr_num=None)

In [21]:
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

###Push weights of fine tuned model to HuggingFace

In [23]:
PEFT_MODEL = "Jeera"

model.push_to_hub(
    PEFT_MODEL, use_auth_token=True
)

CommitInfo(commit_url='https://huggingface.co/Shxck69/Jeera/commit/67343c9c4616fdd87920135cffe9ae236cce1d33', commit_message='Upload model', commit_description='', oid='67343c9c4616fdd87920135cffe9ae236cce1d33', pr_url=None, pr_revision=None, pr_num=None)