In [1]:
import pandas as pd

In [10]:
REPEAT_CONSISTENCY = 4

In [None]:
!pip install -q -U bitsandbytes transformers accelerate torch
!pip install -q -U safetensors xformers

In [None]:
!pip install gdown==v4.6.3

In [4]:
def gdrive_download(file_id, file_name):
    !gdown $file_id --output $file_name

!mkdir SE2024_Task06_Group_01

In [5]:
gdrive_download('1bBPcfXxPIL31-ApqccOShyNi_1A2d9fh','./SE2024_Task06_Group_01/test_GPT_data_MT.json')
gdrive_download('1Qme_usldDMvuUv9vIdWFbesCGJvVAX2_','./SE2024_Task06_Group_01/test_GPT_data_DM.json')
gdrive_download('19iLZ_rAPQjQ6anIbrYAbmOidumCdXFmj','./SE2024_Task06_Group_01/test_GPT_data_PG.json')


Downloading...
From: https://drive.google.com/uc?id=1bBPcfXxPIL31-ApqccOShyNi_1A2d9fh
To: /content/SE2024_Task06_Group_01/test_GPT_data_MT.json
100% 4.50k/4.50k [00:00<00:00, 17.0MB/s]
Downloading...
From: https://drive.google.com/uc?id=1Qme_usldDMvuUv9vIdWFbesCGJvVAX2_
To: /content/SE2024_Task06_Group_01/test_GPT_data_DM.json
100% 2.89k/2.89k [00:00<00:00, 15.6MB/s]
Downloading...
From: https://drive.google.com/uc?id=19iLZ_rAPQjQ6anIbrYAbmOidumCdXFmj
To: /content/SE2024_Task06_Group_01/test_GPT_data_PG.json
100% 2.29k/2.29k [00:00<00:00, 11.0MB/s]


In [6]:
# Read JSON file
import json
with open('./SE2024_Task06_Group_01/test_GPT_data_MT.json') as data_file:
  myconfig = json.load(data_file)
print(myconfig[0].keys())

dict_keys(['hyp', 'ref', 'src', 'tgt', 'model', 'task', 'labels', 'label', 'p(Hallucination)'])


01 : Running With LangChain

In [7]:
import torch
from transformers import BitsAndBytesConfig

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

In [8]:
model_id = "mistralai/Mistral-7B-Instruct-v0.1"

In [28]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
model_4bit = AutoModelForCausalLM.from_pretrained( model_id, device_map="auto",quantization_config=quantization_config)
tokenizer = AutoTokenizer.from_pretrained(model_id)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [29]:
pipeline = pipeline(
        "text-generation",
        model=model_4bit,
        tokenizer=tokenizer,
        use_cache=True,
        device_map="auto",
        max_length=2000,
        do_sample=True,
        top_k=max(REPEAT_CONSISTENCY, 5),
        num_return_sequences=REPEAT_CONSISTENCY,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.eos_token_id,
        truncation=True,
)

print(REPEAT_CONSISTENCY)

4


In [13]:
prompt=["","",""]
labels = {"+": "HALLUCINATED", "-": "NOT-HALLUCINATED"}

# prompt for MT model

prompt[0]="""
I used a language model to translate an input text. Here is the input, output and target which is desired output.
output: ``` <hyp> ```
target: ``` <tgt> ```
input: ``` <src> ```
Your task is to determine whether the language model has hallucinated or not.
Lets think step by step and determining whether it is hallucinated or not and at the end after complete reasoning, say 'HALLUCINATED' if it is hallucinated, or say 'NOT-HALLUCINATED' if it is correct.

Make sure to state 'HALLUCINATED' or 'NOT-HALLUCINATED' at the end. You should use one of these exact terms at the end!

Start solve it from here:
"""

# prompt for DM model

prompt[1]="""
I used a language model for a Definition Modeling task where model should give us definition of the word which is specified between <define> tags. I'm going to give you output, input and desired output which is target.
output: ``` <hyp> ```
input: ``` <src> ```
target: ``` <tgt> ```
Your task is to determine whether the language model has hallucinated or not.
Lets think step by step and determining whether it is hallucinated or not and at the end after complete reasoning, say 'HALLUCINATED' if it is hallucinated, or say 'NOT-HALLUCINATED' if it is correct.

Make sure to state 'HALLUCINATED' or 'NOT-HALLUCINATED' at the end. You should use one of these exact terms at the end!

Start solve it from here:
"""

# prompt for PG model

prompt[2]="""
Here is the output of a model in paraphrase generating task and its input.
output: ``` <hyp> ```
input: ``` <src> ```
Your task is to determine whether the language model has hallucinated or not.
Lets think step by step and determining whether it is hallucinated or not and at the end after complete reasoning, say 'HALLUCINATED' if it is hallucinated, or say 'NOT-HALLUCINATED' if it is correct.

Make sure to state 'HALLUCINATED' or 'NOT-HALLUCINATED' at the end. You should use one of these exact terms at the end!

Start solve it from here:
"""


In [44]:
def chat_with_llm(question_p):
    template = """<s>[INST] You are a helpful, respectful and honest assistant. Answer exactly in few words from the context
    Answer the question below from context below :
    {question} [/INST] </s>
    """
    prompt = template.format(question=question_p)
    response = pipeline(prompt)
    output = [gen['generated_text'].split('[/INST] </s>')[1].strip() for gen in response]

    return output


In [15]:
def MT_model_req(data, prompt):
    gpt_prompt = prompt[0].replace("<hyp>",data['hyp'])
    gpt_prompt = gpt_prompt.replace("<tgt>",data['tgt'])
    gpt_prompt = gpt_prompt.replace("<src>",data['src'])

    return chat_with_llm(gpt_prompt)

In [16]:
def DM_model_req(data, prompt):

    gpt_prompt = prompt[1].replace("<hyp>",data['hyp'])
    gpt_prompt = gpt_prompt.replace("<tgt>",data['tgt'])
    gpt_prompt = gpt_prompt.replace("<src>",data['src'])

    return chat_with_llm(gpt_prompt)

In [17]:
def PG_model_req(data, prompt):
    gpt_prompt = prompt[2].replace("<hyp>",data['hyp'])
    gpt_prompt = gpt_prompt.replace("<src>",data['src'])

    return chat_with_llm(gpt_prompt)

In [18]:
def data_enc(data,prompt):
    if data['task']=="MT" :
        result = MT_model_req(data,prompt)
    elif data['task']=="DM" :
        result = DM_model_req(data,prompt)
    elif data['task']=="PG" :
        result = PG_model_req(data,prompt)
    else:result="input data not cruct in column task"
    return result

In [19]:
import os

def write_log(task, qid, rid, content):
  path = 'hallucination_inference'
  if not os.path.exists(path):
    os.mkdir(path)

  path = os.path.join(path, task)
  if not os.path.exists(path):
    os.mkdir(path)

  path = os.path.join(path, str(qid))
  if not os.path.exists(path):
    os.mkdir(path)

  with open(os.path.join(path, str(rid)), "w", encoding='utf-8') as f:
    f.write(content)


In [45]:
import numpy as np
def set_label(data,prompt,n):
    data['Mistral_7B_label'] = None
    for i, idx in enumerate(data.index):
        if i >= n:
            break
        ds=data.loc[idx].copy()
        extracted_numbers = []

        print(f"[{i}] {ds['task']}:")
        responses = data_enc(ds,prompt)

        for idc, response in enumerate(responses, start=1):
          result = "Unknown"
          if labels['-'] in response:
            result = labels['-']
            extracted_numbers.append(0)
          elif labels['+'] in response:
            extracted_numbers.append(1)
            result = labels['+']

          write_log(ds['task'], idx, idc, response)
          print(f"\t[{idc}] {result}")

        extracted_number = np.mean(extracted_numbers)
        data.loc[idx, 'Mistral_7B_label']=extracted_number
        print()
    return data


In [21]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

In [22]:
file_to_result = {
    "SE2024_Task06_Group_01/test_GPT_data_MT.json": "MT_Mistral_7B_results.csv",
    "SE2024_Task06_Group_01/test_GPT_data_DM.json": "DM_Mistral_7B_results.csv",
    "SE2024_Task06_Group_01/test_GPT_data_PG.json": "PG_Mistral_7B_results.csv",
}

In [46]:
for of, rf in file_to_result.items():
  if os.path.exists(of):
    with open(of, "r") as f:
        data = pd.DataFrame(json.loads(f.read()))
        results =set_label(data,prompt,5)
        results.to_csv(rf,index=False)

[0] MT:
	[1] NOT-HALLUCINATED
	[2] NOT-HALLUCINATED
	[3] NOT-HALLUCINATED
	[4] HALLUCINATED

[1] MT:
	[1] HALLUCINATED
	[2] NOT-HALLUCINATED
	[3] Unknown
	[4] HALLUCINATED

[2] MT:
	[1] HALLUCINATED
	[2] HALLUCINATED
	[3] HALLUCINATED
	[4] Unknown

[3] MT:
	[1] HALLUCINATED
	[2] HALLUCINATED
	[3] HALLUCINATED
	[4] HALLUCINATED

[4] MT:
	[1] NOT-HALLUCINATED
	[2] HALLUCINATED
	[3] Unknown
	[4] HALLUCINATED

[0] DM:
	[1] Unknown
	[2] Unknown
	[3] Unknown
	[4] Unknown

[1] DM:


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


KeyboardInterrupt: 

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!export LC_ALL=C.UTF-8
!mv /content/hallucination_inference /content/MyDrive/hallucination_inference

NotImplementedError: A UTF-8 locale is required. Got ANSI_X3.4-1968