In [None]:
!pip install datasets
!pip install langchain
!pip install openai
!pip install backoff

Collecting backoff
  Downloading backoff-2.2.1-py3-none-any.whl (15 kB)
Installing collected packages: backoff
Successfully installed backoff-2.2.1


In [None]:
from datasets import load_dataset
import os
import pandas as pd
import datasets
import json
from tqdm.notebook import tqdm_notebook

from langchain.chat_models import ChatOpenAI
from langchain import PromptTemplate, LLMChain
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    AIMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage
)
import openai
import backoff  # for exponential backoff

@backoff.on_exception(backoff.expo, openai.error.RateLimitError)
def get_dialog_turns(chat, prompt, query_text):
    dialog_response= {}
    dialog_response['Error'] = ''
    try:
        messages = [
            SystemMessage(content=prompt),
            HumanMessage(content=query_text)
        ]

        dialog_message = chat(messages)
        dialog_response['text'] = dialog_message.content
        dialog_response['success'] = True
    except Exception as e:
        dialog_response['text'] = ''
        dialog_response['success'] = True
        dialog_response['Error'] = str(e)

    return dialog_response



os.environ['OPENAI_API_KEY'] =  "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"

def get_dialog_query(record):

    source = " ".join(record['source'])
    query_text = 'Dialog:\n' + source.replace('<SEP>', '\nSummary:')

    return query_text

ds = load_dataset('Deojoandco/capstone_hal_without_gold')
print(ds)

chat = ChatOpenAI(model_name='gpt-4', temperature=0.7, max_tokens=2048)

prompt = '''
Below is a dialog between people and its summary. Your task is identify how much the summary is hallucinated. The output should be token by token classification whether its hallucinated or not. Following are the available hallucination classification labels for each token. O : Not Hallucinated, W: Wrong person reference, C: circumstancial error, OB: Object error, N: uncommon error like tense errors. At the end you have to identify if there is any missing information in the summary. If there is missing information then add an extra label M else O. Remember to tag punctuations and not remove them.

Here is an example:
The Dialog is: Jesse : I have an idea that'll cheer u up ! Melvin : What is it ? Jesse : I was thinking about doing something 4 the less fortunate this year . Lee : Gr8 idea ! Anything in mind ? Maxine : So no presents 4 me ? : ( Jesse : U'll get ur presents , no worries ; ) Maxine : Phew ! Was getting a bit worried for a moment ; ) Melvin : Bt what do u have in store ? Jesse : Well , have u heard about the Refuge ? Lee : No . What's that ? Melvin : That's the Christmas foundation to help women and children ? Maxine : I think I've heard of them . So what about them ? Jesse : That's right ! They help women and children who escape from abuse . And every year they post wish lists of such ppl online and I thought that we could choose one and chip in . Melvin : That's a great idea ! Lee : Count me in ! Maxine : Me too . Jesse : Have a look at these 3 lists : <file_other> <file_other> <file_other> Lee : I think the second one would be the easiest to arrange . Maxine : Agree . Melvin : What about number 3 ? A bit ambitious , but if we pull together , we'll manage . Jesse : Actually , I'm in for the 3rd one . Maxine : I think the 2nd list would be better . The items cos more or less the same and we can easily divide it . Melvin : But if we agree to chip in the same amount of money , we can deal with the 3rd one easily . Lee : Come to think of it , the 3rd one is not that bad . A bit of planning and logistics and were good to go . Jesse : So it's settled ? Melvin : Yup . Lee : Sure . Maxine : Fine .

Summary is: Jesse , Lee and Maxine will chip in for the Refuge , a Christmas foundation for women and children who escape from abuse .

Expected answer is: <O O O O O O O O O O O O O O O O O O O O O O O O M>


Looking at the example above please look at the below dialog and its summary and analyse if the summary is hallucinated. First explain your thought process and then tag. Please folow the format of expected answer
'''
records = []

pbar = tqdm_notebook(ds['train'], desc='Creating Dialog')
for i, record in enumerate(pbar):
    query_text = get_dialog_query(record)

    dialog_message_text = ''
    dialog_success = None
    dialog_error = ''

    try:
        dialog_response = get_dialog_turns(chat, prompt, query_text)
        dialog_message_text = dialog_response['text']
        dialog_success = True
    except Exception as e:
        dialog_success = False
        dialog_error = str(e)

    record['query'] = prompt + '\n' + query_text
    record['dialog'] = dialog_message_text
    record['dialog_success'] = dialog_success
    records.append(record)
    break
ds = pd.DataFrame(data=records)
ds.head()

DatasetDict({
    train: Dataset({
        features: ['dialog_id', 'source', 'tags'],
        num_rows: 76
    })
    validation: Dataset({
        features: ['dialog_id', 'source', 'tags'],
        num_rows: 12
    })
    test: Dataset({
        features: ['dialog_id', 'source', 'tags'],
        num_rows: 12
    })
})


Creating Dialog:   0%|          | 0/76 [00:00<?, ?it/s]

Unnamed: 0,dialog_id,source,tags,query,dialog,dialog_success
0,16,"[Ethan, :, somethin, for, Scott, <file_photo>,...","[3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ...",\nBelow is a dialog between people and its sum...,The summary correctly mentions that Ethan and ...,True


In [None]:
print(ds['dialog'].tolist())

['The summary correctly mentions that Ethan and Marshall enjoy making fun of Scott, which is aligned with what is stated in the dialog. However, Toby, who is also present in the dialog and participates in making fun of Scott, is not mentioned in the summary. This omission can be considered as missing information. \n\nTags: <O O O O O O O O M>']
