In [None]:
!pip install -q bitsandbytes datasets accelerate loralib
!pip install -q git+https://github.com/huggingface/peft.git git+https://github.com/huggingface/transformers.git
!pip install -q scipy

In [None]:
import torch
torch.cuda.is_available()

In [None]:
import os
import torch
import torch.nn as nn
import bitsandbytes as bnb
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    "NousResearch/Llama-2-7b-chat-hf",
    torch_dtype=torch.float16,
    load_in_8bit=True,
    device_map='auto',
)

tokenizer = AutoTokenizer.from_pretrained("NousResearch/Llama-2-7b-chat-hf")

In [None]:
for param in model.parameters():
  param.requires_grad = False
  if param.ndim == 1:
    param.data = param.data.to(torch.float32)

model.gradient_checkpointing_enable()
model.enable_input_require_grads()

class CastOutputToFloat(nn.Sequential):
  def forward(self, x): return super().forward(x).to(torch.float32)
model.lm_head = CastOutputToFloat(model.lm_head)

In [None]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [None]:
from peft import LoraConfig, get_peft_model

config = LoraConfig(
    r=16,
    lora_alpha=32,
    # target_modules=["query_key_value"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

In [None]:
import pandas as pd
from datasets import load_dataset, Dataset
df = pd.read_csv("/kaggle/input/ivr-hedis/IVR_Questions.csv")
dataset = Dataset.from_pandas(df)

In [None]:
print(dataset)

In [None]:
import transformers

def generate_prompt(hedis_measure: str, question:str) -> str:
  prompt = f"### INSTRUCTION\nBelow is the Hedis Measure and IVR survey questions for a customer. Please write an IVR message for informing customer about their hedis measure.\n\n### Hedis Measure:\n{hedis_measure}\n### SMS:\n{question}"
  return prompt

mapped_dataset = dataset.map(lambda samples: tokenizer(generate_prompt(samples['Hedis Measures'], samples['IVR'])))

In [None]:
trainer = transformers.Trainer(
    model=model,
    train_dataset=mapped_dataset,
    args=transformers.TrainingArguments(
        per_device_train_batch_size=20,
        gradient_accumulation_steps=4,
        warmup_steps=100,
        max_steps=-1,
        num_train_epochs=10,
        learning_rate=1e-3,
        fp16=True,
        logging_steps=1,
        output_dir='outputs',
        report_to='tensorboard'
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
model.config.use_cache = False
with torch.autocast("cuda"):
    trainer.train()

In [None]:
trainer.model.save_pretrained('./ivr_model_llma_final')

In [None]:
model_name = "ivr_model_llma_final"

import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer

peft_model_id = "ivr_model_llma_final/"
config = PeftConfig.from_pretrained(peft_model_id)
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_8bit=True, device_map='auto')
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

# Load the Lora model
model = PeftModel.from_pretrained(model, peft_model_id)

In [None]:
from IPython.display import display, Markdown

def make_inference(hedis_measure):

    batch = tokenizer(f"### Below is the Hedis Measure of a customer. Please generate three questions for the customer.\n\n### Hedis Measure:\n{hedis_measure}\n", return_tensors='pt')
    batch = batch.to(torch.device('cuda'))

    with torch.cuda.amp.autocast():
      output_tokens = model.generate(**batch, max_new_tokens=250)
    # print(tokenizer.decode(output_tokens[0]))
    display(Markdown((tokenizer.decode(output_tokens[0], skip_special_tokens=True))))

In [None]:
torch.cuda.empty_cache()
# hedis_measure = "Cardiac Rehabiliation"
hedis_measure = "Controlling High Blood Pressure"
make_inference(hedis_measure)