# First we will setup the env

In [8]:
# !pwd

In [1]:
# !pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

In [2]:
# !pip install transformers

In [3]:
# !pip install datasets

In [4]:
# !pip install peft accelerate

In [5]:
# !pip install trl

In [6]:
# !pip install scipy

In [7]:
# !pip install bitsandbytes

# Code Starts Here

## Import packages

In [9]:
import torch
import transformers
print(torch.__version__)
print(transformers.__version__)
print(torch.cuda.is_available())

2.3.0+cu118
4.41.2
True


In [10]:
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    GenerationConfig
)
from tqdm import tqdm
from trl import SFTTrainer
import torch
import time
import pandas as pd
import numpy as np
from huggingface_hub import interpreter_login, login

## Import local python files

In [14]:
from common import (
    create_prompt_formats,
    get_max_length,
    preprocess_batch,
    preprocess_dataset
)

## Login to huggingface hub with key

In [13]:
interpreter_login()


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): ··········
Add token as git credential? (Y/n) n
Token is valid (permissi

In [15]:
import os
os.environ['WANDB_DISABLED']="true"

## Downloading a opensource dataset for text summarization

In [20]:
huggingface_dataset_name = "neil-code/dialogsum-test"
dataset = load_dataset(huggingface_dataset_name)

Downloading readme:   0%|          | 0.00/4.56k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.81M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/441k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/447k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1999 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/499 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/499 [00:00<?, ? examples/s]

In [16]:
compute_dtype = getattr(torch, "float16")
bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type='nf4',
        bnb_4bit_compute_dtype=compute_dtype,
        bnb_4bit_use_double_quant=False,
    )

In [17]:
model_name= 'microsoft/phi-2' #'HuggingFaceH4/zephyr-7b-alpha'
device_map = {"": 0}
original_model = AutoModelForCausalLM.from_pretrained(model_name,
                                                      device_map=device_map,
                                                      quantization_config=bnb_config,
                                                      trust_remote_code=True,
                                                      use_auth_token=True)



config.json:   0%|          | 0.00/735 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/35.7k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/564M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [18]:
tokenizer = AutoTokenizer.from_pretrained(model_name,trust_remote_code=True,padding_side="left",add_eos_token=True,add_bos_token=True,use_fast=False)
tokenizer.pad_token = tokenizer.eos_token

tokenizer_config.json:   0%|          | 0.00/7.34k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/1.08k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


## Test with prompt engineering( 0 shot learning)

In [21]:
%%time
from transformers import set_seed
seed = 42
set_seed(seed)

index = 10

prompt = dataset['test'][index]['dialogue']
summary = dataset['test'][index]['summary']

formatted_prompt = f"Instruct: Summarize the following conversation.\n{prompt}\nOutput:\n"


inputs = tokenizer(formatted_prompt, return_tensors="pt", return_attention_mask=False)

output = original_model.generate(**inputs, max_length=500)
output = tokenizer.batch_decode(output)[0]
output = output.split('Output:\n')[1]

print(output)

# res = gen(original_model,formatted_prompt,100,)
# #print(res[0])
# output = res[0].split('Output:\n')[1]

seperator = '-'.join('' for x in range(100))
print(seperator)
print(f'INPUT :::\n{formatted_prompt}')
print(seperator)
print(f'GROUND TRUTH SUMMARY :::\n{summary}\n')
print(seperator)
print(f'MODEL GENERATION SUMMARY- ZERO SHOT :::\n{output}')

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Person1 and Person2 are at a party, and Person1 asks if they can have a dance. Person2 agrees and compliments Person1 on their appearance. Person1 thanks them and expresses their enjoyment of the party. Person2 agrees that it is a great party and suggests having a drink to celebrate.
<|endoftext|>
---------------------------------------------------------------------------------------------------
INPUT :::
Instruct: Summarize the following conversation.
#Person1#: Happy Birthday, this is for you, Brian.
#Person2#: I'm so happy you remember, please come in and enjoy the party. Everyone's here, I'm sure you have a good time.
#Person1#: Brian, may I have a pleasure to have a dance with you?
#Person2#: Ok.
#Person1#: This is really wonderful party.
#Person2#: Yes, you are always popular with everyone. and you look very pretty today.
#Person1#: Thanks, that's very kind of you to say. I hope my necklace goes with my dress, and they both make me look good I feel.
#Person2#: You look great, you

## Preprocessing dataset

In [22]:
max_length = get_max_length(original_model)
print(max_length)

train_dataset = preprocess_dataset(tokenizer, max_length,seed, dataset['train'])
eval_dataset = preprocess_dataset(tokenizer, max_length,seed, dataset['validation'])

Found max lenth: 2048
2048
Preprocessing dataset...


Map:   0%|          | 0/1999 [00:00<?, ? examples/s]

Map:   0%|          | 0/1999 [00:00<?, ? examples/s]

Filter:   0%|          | 0/1999 [00:00<?, ? examples/s]

Preprocessing dataset...


Map:   0%|          | 0/499 [00:00<?, ? examples/s]

Map:   0%|          | 0/499 [00:00<?, ? examples/s]

Filter:   0%|          | 0/499 [00:00<?, ? examples/s]

## Finetuning process

In [23]:
import peft
original_model = peft.prepare_model_for_kbit_training(original_model)

In [25]:
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

config = LoraConfig(
    r=16, #Rank
    lora_alpha=16,
    target_modules=[
        'q_proj',
        'k_proj',
        'v_proj',
        'dense'
    ],
    bias="none",
    lora_dropout=0.05,  # Conventional
    task_type="CAUSAL_LM",
)

# 1 - Enabling gradient checkpointing to reduce memory usage during fine-tuning
original_model.gradient_checkpointing_enable()

peft_model = get_peft_model(original_model, config)

In [26]:
print(peft_model.print_trainable_parameters())

trainable params: 10,485,760 || all params: 2,790,169,600 || trainable%: 0.3758
None


In [33]:
import transformers
import time
output_dir = f'./peft-dialogue-summary-training'

peft_training_args = TrainingArguments(
    output_dir = output_dir,
    warmup_steps=1,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=1,
    max_steps=3,
    num_train_epochs=1,
    learning_rate=2e-4,
    optim="paged_adamw_8bit",
    logging_steps=1,
    logging_dir="./logs",
    save_strategy="steps",
    save_steps=1,
    evaluation_strategy="steps",
    eval_steps=1,
    do_eval=True,
    gradient_checkpointing=True,
    report_to="none",
    overwrite_output_dir = 'True',
    group_by_length=True,
)

peft_model.config.use_cache = False

peft_trainer = transformers.Trainer(
    model=peft_model,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    args=peft_training_args,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

max_steps is given, it will override any value given in num_train_epochs


In [34]:
%%time
# train
peft_trainer.train()



Step,Training Loss,Validation Loss
1,1.9879,2.032983
2,1.7146,2.025158
3,2.3462,2.021262




CPU times: user 4min 36s, sys: 1min 45s, total: 6min 21s
Wall time: 6min 43s


TrainOutput(global_step=3, training_loss=2.0162100791931152, metrics={'train_runtime': 401.9725, 'train_samples_per_second': 0.007, 'train_steps_per_second': 0.007, 'total_flos': 35179861248000.0, 'train_loss': 2.0162100791931152, 'epoch': 0.0015007503751875938})

In [35]:
# !zip -r /content/peft-dialogue-summary-training.zip /content/peft-dialogue-summary-training

  adding: content/peft-dialogue-summary-training/ (stored 0%)
  adding: content/peft-dialogue-summary-training/checkpoint-2/ (stored 0%)
  adding: content/peft-dialogue-summary-training/checkpoint-2/training_args.bin (deflated 52%)
  adding: content/peft-dialogue-summary-training/checkpoint-2/scheduler.pt (deflated 56%)
  adding: content/peft-dialogue-summary-training/checkpoint-2/rng_state.pth (deflated 25%)
  adding: content/peft-dialogue-summary-training/checkpoint-2/adapter_model.safetensors (deflated 8%)
  adding: content/peft-dialogue-summary-training/checkpoint-2/trainer_state.json (deflated 64%)
  adding: content/peft-dialogue-summary-training/checkpoint-2/README.md (deflated 66%)
  adding: content/peft-dialogue-summary-training/checkpoint-2/adapter_config.json (deflated 53%)
  adding: content/peft-dialogue-summary-training/checkpoint-2/optimizer.pt (deflated 56%)
  adding: content/peft-dialogue-summary-training/checkpoint-1/ (stored 0%)
  adding: content/peft-dialogue-summary-

### We can save the adapter (peft-lora model) separately.
### However it gets automatically saved duting training also inside 'output_dir' as checkpoints

In [49]:
# save adapter
peft_trainer.save_model('./adapter')
# save the training config
peft_trainer.model.config.save_pretrained('./adapter')

In [51]:
# !zip -r /content/adapter.zip /content/adapter

updating: content/adapter/ (stored 0%)
updating: content/adapter/adapter_model.safetensors (deflated 13%)
updating: content/adapter/README.md (deflated 66%)
updating: content/adapter/adapter_config.json (deflated 53%)
  adding: content/adapter/training_args.bin (deflated 52%)
  adding: content/adapter/config.json (deflated 55%)


### We can merge the adapter and the original model and use it 

In [47]:
# merge with original model and save
merged = peft_trainer.model.merge_and_unload()
merged.save_pretrained("./merged",safe_serialization=True)
tokenizer.save_pretrained("./merged")

('./merged/tokenizer_config.json',
 './merged/special_tokens_map.json',
 './merged/vocab.json',
 './merged/merges.txt',
 './merged/added_tokens.json')

In [52]:
# !zip -r /content/merged.zip /content/merged

  adding: content/merged/ (stored 0%)
  adding: content/merged/model.safetensors (deflated 22%)
  adding: content/merged/merges.txt (deflated 53%)
  adding: content/merged/generation_config.json (deflated 24%)
  adding: content/merged/config.json (deflated 55%)
  adding: content/merged/special_tokens_map.json (deflated 75%)
  adding: content/merged/added_tokens.json (deflated 84%)
  adding: content/merged/tokenizer_config.json (deflated 94%)
  adding: content/merged/vocab.json (deflated 68%)
