In [None]:
!pip install -q accelerate peft bitsandbytes transformers trl

In [None]:
pip install flash-attn --no-build-isolation



In [None]:
import os
import torch
from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig, TrainingArguments, Trainer, BitsAndBytesConfig,pipeline, logging
from transformers import (
    AutoModelForCausalLM,
    HfArgumentParser,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer


In [None]:
from datasets import load_dataset
dataset = load_dataset("gopalkalpande/bbc-news-summary")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [None]:
from datasets import DatasetDict
dataset = dataset['train']
dataset = dataset.train_test_split(test_size=0.2,seed=42)
dataset['train'] = dataset['train'].remove_columns('File_path')
dataset['test'] = dataset['test'].remove_columns('File_path')
dataset

DatasetDict({
    train: Dataset({
        features: ['Articles', 'Summaries'],
        num_rows: 1779
    })
    test: Dataset({
        features: ['Articles', 'Summaries'],
        num_rows: 445
    })
})

In [None]:
import re

def clean_text(all):
    pattern = r'\\|``|--|"|\n'
    for text in ["Articles","Summaries"]:
      all[text] = re.sub(pattern, '', all[text])
    return all

clean_dataset = dataset.map(clean_text)

In [None]:
clean_dataset

DatasetDict({
    train: Dataset({
        features: ['Articles', 'Summaries'],
        num_rows: 1779
    })
    test: Dataset({
        features: ['Articles', 'Summaries'],
        num_rows: 445
    })
})

In [None]:
DEFAULT_SYSTEM_PROMPT = """ Write a summary of the Article. """

def generate_training_prompt(
    conversation: str, summary: str, system_prompt: str = DEFAULT_SYSTEM_PROMPT
) -> str:
    return f"""### Instruction: {system_prompt}

### Input:
{conversation.strip()}

### Summary:
{summary}
""".strip()


In [None]:
def generate_text(data_point):
    summary = data_point["Summaries"]
    conversation_text = data_point["Articles"]
    return {
        "conversation": conversation_text,
        "summary": summary,
        "text": generate_training_prompt(conversation_text, summary),
    }

example_data_point = {
    "Articles": "",
    "Summaries": "",
}


example = generate_text(example_data_point)
print(example["text"])


### Instruction:  Write a summary of the Article. 

### Input:


### Summary:


In [None]:
from datasets import Dataset
def process_dataset(data: Dataset):
    processed_data = data.map(generate_text)
    return processed_data

In [None]:
clean_dataset

DatasetDict({
    train: Dataset({
        features: ['Articles', 'Summaries'],
        num_rows: 1779
    })
    test: Dataset({
        features: ['Articles', 'Summaries'],
        num_rows: 445
    })
})

In [None]:
clean_dataset_train = process_dataset(clean_dataset["train"])
clean_dataset_test = process_dataset(clean_dataset["test"])

train_data = clean_dataset_train.shuffle(seed=42).select([i for i in range(1000)])
test_data = clean_dataset_test.shuffle(seed=42).select([i for i in range(100)])

In [None]:
train_data= train_data.remove_columns('Articles')
train_data= train_data.remove_columns('Summaries')
test_data= test_data.remove_columns('Articles')
test_data= test_data.remove_columns('Summaries')


In [None]:
train_data

Dataset({
    features: ['conversation', 'summary', 'text'],
    num_rows: 1000
})

In [None]:
lora_r = 8
lora_alpha = 32
lora_dropout = 0.05

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16"
)
output_dir = './results'
num_train_epochs = 1

fp16=False
bf16 = False

per_device_train_batch_size = 4
per_device_eval_batch_size = 4

gradient_accumulation_steps = 1
gradient_checkpointing = True
max_grad_norm = 0.3

learning_rate= 2e-4
weight_decay = 0.001
optim="paged_adamw_32bit"
lr_scheduler_type="cosine"
max_steps = -1
warmup_ratio = 0.03
group_by_length = True

save_steps = 0
logging_steps = 2
max_seq_length = None

packing = False
device_map = {"": 0}


In [None]:
import torch
from peft import prepare_model_for_kbit_training

model_id = "NousResearch/Llama-2-7b-hf"

tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True,quantization_config=bnb_config, device_map="auto")
model.config.use_cache=False
model.config.pretraining_tp =1

tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

print(model)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 4096, padding_idx=0)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)
          (down_proj): Linear4bit(in_features=11008, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSNorm()
      )
    )


In [None]:
def print_trainable_parameters(model):
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )
print_trainable_parameters(model)

trainable params: 262410240 || all params: 3500412928 || trainable%: 7.496550989769399


In [None]:
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)


In [None]:
def print_trainable_parameters(model):
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )
print_trainable_parameters(model)

trainable params: 8388608 || all params: 3508801536 || trainable%: 0.23907331075678143


In [None]:
new_model = "Llama-2-7b-summarisation-finetune"

Zero shot trying summarisation with llama2

In [None]:
conversation = test_data['conversation'][2]
summary = test_data['summary'][2]


prompt = f"""
Summarize the following conversation.

### Input:
{conversation}

### Summary:
"""

inputs = tokenizer(prompt, return_tensors='pt')
generated_summary = tokenizer.decode(
    model.generate(
        inputs["input_ids"],
        max_new_tokens=64,
    )[0],
    skip_special_tokens=True
)

dash_line = '-' * 100
print(dash_line)
print(f'INPUT PROMPT:\n{prompt}')
print(dash_line)
print(f'BASELINE HUMAN SUMMARY:\n{summary}\n')
print(dash_line)
print(f'MODEL GENERATION - ZERO SHOT:\n{generated_summary}')



----------------------------------------------------------------------------------------------------
INPUT PROMPT:

Summarize the following conversation.

### Input:
Global blogger action day called..The global web blog community is being called into action to lend support to two imprisoned Iranian bloggers...The month-old Committee to Protect Bloggers' is asking those with blogs to dedicate their sites on 22 February to the Free Mojtaba and Arash Day. Arash Sigarchi and Mojtaba Saminejad are both in prison in Iran. Blogs are free sites through which people publish thoughts and opinions. Iranian authorities have been clamping down on prominent sites for some time. I hope this day will focus people, Curt Hopkins, director of the Committee, told the BBC News website...The group has a list of actions which it says bloggers can take, including writing to local Iranian embassies. The Committee has deemed Tuesday Free Mojtaba and Arash Day as part of its first campaign. It is calling on the 

In [None]:
from peft import LoraConfig, get_peft_model

peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM",
)


model = get_peft_model(model, peft_config)
print_trainable_parameters(model)

trainable params: 4194304 || all params: 3504607232 || trainable%: 0.11967971650867153


In [None]:
from trl import SFTTrainer

training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    fp16=fp16,
    bf16=bf16,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=group_by_length,
    lr_scheduler_type=lr_scheduler_type,
    load_best_model_at_end=True,
    report_to="tensorboard"
)

trainer = SFTTrainer(
    model=model,
    train_dataset=train_data,
    dataset_text_field="text",
    peft_config=peft_config,
    max_seq_length=4096,
    tokenizer=tokenizer,
    args=training_arguments,
)

trainer.train()

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Step,Training Loss
2,1.9043
4,1.9434
6,1.7281
8,1.8383
10,1.8616
12,1.4456
14,1.3859
16,1.4738
18,1.3534
20,1.2666


TrainOutput(global_step=250, training_loss=1.3289914045333862, metrics={'train_runtime': 9008.8461, 'train_samples_per_second': 0.111, 'train_steps_per_second': 0.028, 'total_flos': 2.96575078785024e+16, 'train_loss': 1.3289914045333862, 'epoch': 1.0})

In [None]:
path = "/content/llama2_finetuned/summary"
trainer.model.save_pretained(path)
tokenizer.save_pretrained(path)

In [None]:
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer

dir = "/content/llama2_finetuned/summary"

trained_model = AutoPeftModelForCausalLM.from_pretrained(
    dir,
    low_cpu_mem_usage=True,
    torch_dtype=torch.float16,
    load_in_4bit=True,
)
tokenizer = AutoTokenizer.from_pretrained(dir)

In [None]:
conversation = test_data['conversation'][2]
summary = test_data['summary'][2]


prompt = f"""
Summarize the following conversation.

### Input:
{conversation}

### Summary:
"""

inputs = tokenizer(prompt, return_tensors='pt',truncation=True).input_ids.cuda()
generated_summary = tokenizer.decode(
    model.generate(
        inputs["input_ids"],
        max_new_tokens=64,
    )[0],
    skip_special_tokens=True
)

dash_line = '-' * 100
print(dash_line)
print(f'INPUT PROMPT:\n{prompt}')
print(dash_line)
print(f'BASELINE HUMAN SUMMARY:\n{summary}\n')
print(dash_line)
print(f'MODEL GENERATION - ZERO SHOT:\n{generated_summary}')