In [None]:
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7

In [1]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
base_model = "NousResearch/Llama-2-13b-chat-hf"
new_model = "llama-2-13b-chat-hallucination-samsum"


In [5]:
train_dataset = load_dataset("csv", data_files="/home/bgarg/custom_new/Hallucination-Detection-and-Interpretability-for-Summarization/annotated_capstone_data_train.csv")
val_dataset = load_dataset("csv", data_files="/home/bgarg/custom_new/Hallucination-Detection-and-Interpretability-for-Summarization/annotated_capstone_data_val.csv")

Downloading data files: 100%|██████████| 1/1 [00:00<00:00, 6278.90it/s]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 875.82it/s]
  if _pandas_api.is_sparse(col):
Generating train split: 100 examples [00:00, 9921.24 examples/s]


In [6]:
train_dataset['train']


Dataset({
    features: ['ID', 'Dialogue', 'Reference Summary', 'Generated Summary', 'Annotations', 'Verified Tags', 'Missing Information', 'Redundant Information', 'Circumstance', 'Wrong Reference', 'Negation', 'Object', 'Tense', 'Modality', 'Score', 'Model Name', 'Final'],
    num_rows: 400
})

In [7]:
compute_dtype = getattr(torch, "float16")

quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=False,
)

In [8]:
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=quant_config,
    device_map={"": 0}
)
model.config.use_cache = False
model.config.pretraining_tp = 1

Loading checkpoint shards: 100%|██████████| 3/3 [00:22<00:00,  7.59s/it]


In [14]:
special_tokens = ["<START_C>", "<END_C>", "<START_S>", "<END_S>", "<START_A>", "<END_A>" ]
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
tokenizer.is_split_into_words=True
tokenizer.add_tokens(special_tokens, special_tokens=True)

6

In [15]:
peft_params = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
)

In [16]:
training_params = TrainingArguments(
    output_dir="./results",
    num_train_epochs=5,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    gradient_checkpointing=True,
    optim="paged_adamw_32bit",
    save_steps=0,
    logging_steps=1,
    learning_rate=1e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="cosine",
)

In [17]:
val_dataset['train'][0]

{'ID': 499,
 'Dialogue': 'Ray: u in ur room?\nMax: no whats up\nRay: someone locked the door from outside -_-\nMax: wtf xD\nRay: yeah dude cmon u gotta let me out\nMax: but im out\nRay: are u kidding me\nMax: sorry man XD\nRay: dude i have to pee\nMax: HAHAHAHHAHAHA XD\nRay: thats not funny >.<\nMax: it actually is xD\nRay: can u ask someone else to do it\nMax: yea let me see if my roommates there\nRay: HURRY\nMax: hes coming\nRay: tell him to HURRYYY\nMax: hes on his way \nRay: he opened it, thanks\nMax: enjoy XD',
 'Reference Summary': "Ray is locked in the room from the outside and he has to pee. Max's roommate will come and let him out.\n",
 'Generated Summary': "Max's roommate opened the door for Ray.",
 'Annotations': 'O O O O O O O M',
 'Verified Tags': None,
 'Missing Information': 'x',
 'Redundant Information': None,
 'Circumstance': None,
 'Wrong Reference': None,
 'Negation': None,
 'Object': None,
 'Tense': None,
 'Modality': None,
 'Score': 3,
 'Model Name': 'T5-Cons',
 'F

In [19]:
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset['train'],
    peft_config=peft_params,
    dataset_text_field='Final',
    max_seq_length=None,
    tokenizer=tokenizer,
    args=training_params,
    packing=False,
    eval_dataset = val_dataset['train'],
)

Map: 100%|██████████| 400/400 [00:00<00:00, 6647.84 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 9497.33 examples/s]


In [22]:
import os 

os.system('CUDA_LAUNCH_BLOCKING=1')
os.system('export TORCH_USE_CUDA_DSA=1')
trainer.train()

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [None]:
trainer.model.save_pretrained(new_model)
trainer.tokenizer.save_pretrained(new_model)

('llama-2-7b-chat-hallucination-samsum/tokenizer_config.json',
 'llama-2-7b-chat-hallucination-samsum/special_tokens_map.json',
 'llama-2-7b-chat-hallucination-samsum/tokenizer.json')

In [None]:
fn_model = PeftModel.from_pretrained(model, '/content/llama-2-7b-chat-hallucination-samsum')
# fn_model = fn_model.merge_and_unload()
pipe = pipeline(task="text-generation", model=fn_model, tokenizer=tokenizer, max_length=200)

Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.
The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MusicgenForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'O

In [None]:
prompt_template = """### Instruction:
Given a set of dialogues, the task is to generate a summary of 10-15 words by considering all the dialogues, and do token-level classification on the summary based on whether it is hallucinated or not. Use the following tag classes to label each token of the summary.
O = Not Hallucinated,
W =  Wrong person reference,
C = Circumstantial error,
OB = Object error,
N = uncommon errors like tense errors
M = Missing information
The tag M should only be added at the end of the sequence incase the summary is missing any information and not as a tag specific to a word in the summary.

Dialogue- "Hannah: Hey, do you have Betty's number?
Amanda: Lemme check
Hannah: <file_gif>
Amanda: Sorry, can't find it.
Amanda: Ask Larry
Amanda: He called her last time we were at the park together
Hannah: I don't know him well
Hannah: <file_gif>
Amanda: Don't be shy, he's very nice
Hannah: If you say so..
Hannah: I'd rather you texted him
Amanda: Just text him 馃檪
Hannah: Urgh.. Alright
Hannah: Bye
Amanda: Bye bye"

Summary- "Amanda can't find Betty's number. Larry called her last time they were at the park together. Amanda will text Larry."

Tags- "O O O O O O O O O O O O O O O O O O W O O O O"

Explanation - Let's think step by step. The dialogue is about Hannah asking for Betty's number to Amanda, who couldn't find it and suggests to ask Larry for it since he had called her(Betty) the last time they were in the park together. Hannah doesn't know him(Larry) well and is shy to text him, but Amanda asks her to do it anyway.  So according to the summary, "Amanda will text Larry" is incorrect. The way to correct this information is the token Amanda can be changed to Hannah. This is Wrong Reference (W) from the tokens described above. All other tokens are correct and are thus Not Hallucinated (O).

Similarly, for the next dialogue, generate summary of all the dialogues and tags for the summary. Think step by step to explain it.

### Input:
Dialogue- "Harry: and? have you listened to it?
Jacob: listened to what?
Harry: to the song i sent you 3 days ago -.-
Jacob: oh shit, i completely forgot...
Harry: ofc again
Jacob: don't be like this :* i'll do that later tonight
Harry: heh, okay
Harry: i'm really curious what you'll think about it
Jacob: i'll let you know, a bit busy right now, speak to you later!
Harry: okay"

### Response:

Summary-
Tags-
Explanation-
"""

input_sentence = prompt_template

result = pipe(input_sentence)
print(result[0]['generated_text'])

Input length of input_ids is 739, but `max_length` is set to 200. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.


### Instruction:
Given a set of dialogues, the task is to generate a summary of 10-15 words by considering all the dialogues, and do token-level classification on the summary based on whether it is hallucinated or not. Use the following tag classes to label each token of the summary. 
O = Not Hallucinated,
W =  Wrong person reference,
C = Circumstantial error,
OB = Object error,
N = uncommon errors like tense errors 
M = Missing information
The tag M should only be added at the end of the sequence incase the summary is missing any information and not as a tag specific to a word in the summary. 

Dialogue- "Hannah: Hey, do you have Betty's number?
Amanda: Lemme check
Hannah: <file_gif>
Amanda: Sorry, can't find it.
Amanda: Ask Larry
Amanda: He called her last time we were at the park together
Hannah: I don't know him well
Hannah: <file_gif>
Amanda: Don't be shy, he's very nice
Hannah: If you say so..
Hannah: I'd rather you texted him
Amanda: Just text him 馃檪
Hannah: Urgh.. Alright
Hanna

In [None]:
logging.set_verbosity(logging.CRITICAL)

prompt = '''
Given a set of dialogues, the task is to generate a summary of 10-15 words by considering all the dialogues, and do token-level classification on the summary based on whether it is hallucinated or not. Use the following tag classes to label each token of the summary.
O = Not Hallucinated,
W =  Wrong person reference,
C = Circumstantial error,
OB = Object error,
N = uncommon errors like tense errors
M = Missing information
The tag M should only be added at the end of the sequence incase the summary is missing any information and not as a tag specific to a word in the summary.

Dialogue- "Hannah: Hey, do you have Betty's number?
Amanda: Lemme check
Hannah: <file_gif>
Amanda: Sorry, can't find it.
Amanda: Ask Larry
Amanda: He called her last time we were at the park together
Hannah: I don't know him well
Hannah: <file_gif>
Amanda: Don't be shy, he's very nice
Hannah: If you say so..
Hannah: I'd rather you texted him
Amanda: Just text him 馃檪
Hannah: Urgh.. Alright
Hannah: Bye
Amanda: Bye bye"

Summary- "Amanda can't find Betty's number. Larry called her last time they were at the park together. Amanda will text Larry."

Tags- "O O O O O O O O O O O O O O O O O O W O O O O"

Explanation - Let's think step by step. The dialogue is about Hannah asking for Betty's number to Amanda, who couldn't find it and suggests to ask Larry for it since he had called her(Betty) the last time they were in the park together. Hannah doesn't know him(Larry) well and is shy to text him, but Amanda asks her to do it anyway.  So according to the summary, "Amanda will text Larry" is incorrect. The way to correct this information is the token Amanda can be changed to Hannah. This is Wrong Reference (W) from the tokens described above. All other tokens are correct and are thus Not Hallucinated (O).

Similarly, for the next dialogue, generate summary of all the dialogues and tags for the summary. Think step by step to explain it.

Dialogue- "Harry: and? have you listened to it?
Jacob: listened to what?
Harry: to the song i sent you 3 days ago -.-
Jacob: oh shit, i completely forgot...
Harry: ofc again
Jacob: don't be like this :* i'll do that later tonight
Harry: heh, okay
Harry: i'm really curious what you'll think about it
Jacob: i'll let you know, a bit busy right now, speak to you later!
Harry: okay"

Summary-
Tags-
Explanation-
'''

pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

[INST] 
Given a set of dialogues, the task is to generate a summary of 10-15 words by considering all the dialogues, and do token-level classification on the summary based on whether it is hallucinated or not. Use the following tag classes to label each token of the summary. 
O = Not Hallucinated,
W =  Wrong person reference,
C = Circumstantial error,
OB = Object error,
N = uncommon errors like tense errors 
M = Missing information
The tag M should only be added at the end of the sequence incase the summary is missing any information and not as a tag specific to a word in the summary. 

Dialogue- "Harry: and? have you listened to it?
Jacob: listened to what?
Harry: to the song i sent you 3 days ago -.-
Jacob: oh shit, i completely forgot...
Harry: ofc again
Jacob: don't be like this :* i'll do that later tonight
Harry: heh, okay
Harry: i'm really curious what you'll think about it
Jacob: i'll let you know, a bit busy right now, speak to you later!
Harry: okay"

GENERATE SUMMARY, TAGS, 

In [None]:
from tensorboard import notebook
log_dir = "results/runs"
notebook.start("--logdir {} --port 4000".format(log_dir))

In [None]:
!kill 4371