In [None]:
!pip install datasets
!pip install langchain
!pip install openai
!pip install backoff

Collecting datasets
  Downloading datasets-2.14.6-py3-none-any.whl (493 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m493.7/493.7 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.8,>=0.3.0 (from datasets)
  Downloading dill-0.3.7-py3-none-any.whl (115 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0.0,>=0.14.0 (from datasets)
  Downloading huggingface_hub-0.18.0-py3-none-any.whl (301 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.0/302.0 kB[0m [31m17.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: dill, multiprocess, huggingface-hub, datasets
Successfully installed datasets-2.14.

In [None]:
from datasets import load_dataset
import os
import pandas as pd
import datasets
import json
from tqdm.notebook import tqdm_notebook

from langchain.chat_models import ChatOpenAI
from langchain import PromptTemplate, LLMChain
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    AIMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage
)
import openai
import backoff  # for exponential backoff

TAGS_BEGIN = 'Tags: <'
TAGS_END = '>'

@backoff.on_exception(backoff.expo, openai.error.RateLimitError)
def get_dialog_turns(chat, prompt, query_text):
    dialog_response= {}
    dialog_response['Error'] = ''
    try:
        messages = [
            SystemMessage(content=prompt),
            HumanMessage(content=query_text)
        ]

        dialog_message = chat(messages)
        dialog_response['text'] = dialog_message.content
        dialog_response['success'] = True
    except Exception as e:
        dialog_response['text'] = ''
        dialog_response['success'] = False
        dialog_response['Error'] = str(e)

    return dialog_response



os.environ['OPENAI_API_KEY'] =  "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"

def get_dialog_query(record):
    return f"Dialog:\n{record['dialogue']}\nSummary:\n{record['summary']}"

ds = load_dataset('Deojoandco/capstone_forgpt_without_gold')
print(ds)

chat = ChatOpenAI(model_name='gpt-4', temperature=0.7, max_tokens=2048)

prompt = '''
Below is a dialog between people and its summary. Your task is identify how much the summary is hallucinated. The output should be token by token classification whether its hallucinated or not. Following are the available hallucination classification labels for each token. O : Not Hallucinated, W: Wrong person reference, C: circumstancial error, OB: Object error, N: uncommon error like tense errors. At the end you have to identify if there is any missing information in the summary. If there is missing information then add an extra label M else O. Remember to tag punctuations and not remove them.

Here is an example:
The Dialog is: Jesse : I have an idea that'll cheer u up ! Melvin : What is it ? Jesse : I was thinking about doing something 4 the less fortunate this year . Lee : Gr8 idea ! Anything in mind ? Maxine : So no presents 4 me ? : ( Jesse : U'll get ur presents , no worries ; ) Maxine : Phew ! Was getting a bit worried for a moment ; ) Melvin : Bt what do u have in store ? Jesse : Well , have u heard about the Refuge ? Lee : No . What's that ? Melvin : That's the Christmas foundation to help women and children ? Maxine : I think I've heard of them . So what about them ? Jesse : That's right ! They help women and children who escape from abuse . And every year they post wish lists of such ppl online and I thought that we could choose one and chip in . Melvin : That's a great idea ! Lee : Count me in ! Maxine : Me too . Jesse : Have a look at these 3 lists : <file_other> <file_other> <file_other> Lee : I think the second one would be the easiest to arrange . Maxine : Agree . Melvin : What about number 3 ? A bit ambitious , but if we pull together , we'll manage . Jesse : Actually , I'm in for the 3rd one . Maxine : I think the 2nd list would be better . The items cos more or less the same and we can easily divide it . Melvin : But if we agree to chip in the same amount of money , we can deal with the 3rd one easily . Lee : Come to think of it , the 3rd one is not that bad . A bit of planning and logistics and were good to go . Jesse : So it's settled ? Melvin : Yup . Lee : Sure . Maxine : Fine .

Summary is: Jesse , Lee and Maxine will chip in for the Refuge , a Christmas foundation for women and children who escape from abuse .

Expected answer is: <O O O O O O O O O O O O O O O O O O O O O O O O M>


Looking at the example above please look at the below dialog and its summary and analyse if the summary is hallucinated. First explain your thought process and then tag. Please folow the format of expected answer and place the answer between angular brackets in 'Tags: <>'
'''
records = []

pbar = tqdm_notebook(ds['train'], desc='Creating Dialog')
for i, record in enumerate(pbar):
    query_text = get_dialog_query(record)

    dialog_response = get_dialog_turns(chat, prompt, query_text)

    record['query'] = prompt + '\n' + query_text
    record['gpt_success'] = dialog_response['success']
    record['gpt_response'] = dialog_response['text']

    if dialog_response['success'] == True:
        print(record['gpt_response'])
        tags_start_index = record['gpt_response'].index(TAGS_BEGIN)
        tags_end_index = record['gpt_response'].index(TAGS_END, tags_start_index)
        record['gpt_tags'] = record['gpt_response'][tags_start_index + len(TAGS_BEGIN):tags_end_index]

        gold_tag_tokens = record['gold_tags'].split()
        gpt_tags = record['gpt_tags'].split()

        record['gold_tags_tokens_count'] = len(gold_tag_tokens)
        record['gpt_tags_tokens_count'] = len(gpt_tags)
        record['summary_gpt_tags_token_count_match'] = record['gold_tags_tokens_count'] == record['gpt_tags_tokens_count']
        record['gold_gpt_tags_match'] = record['gold_tags'] == record['gpt_tags']

    records.append(record)
    break
df = pd.DataFrame(data=records)
df.head()

DatasetDict({
    train: Dataset({
        features: ['dialogue', 'summary', 'gold_tags'],
        num_rows: 100
    })
})


Creating Dialog:   0%|          | 0/100 [00:00<?, ?it/s]

The dialog consists of interactions between Ethan, Scott, Toby, and Marshall. In the conversation, it is clear that Ethan and Marshall, along with Toby, enjoy making fun of Scott. The dialog doesnt provide any circumstantial information, or object, so we can't make any errors there. The summary correctly captures the essence of the dialog, mentioning that Ethan and Marshall enjoy making fun of Scott. Toby's involvement in making fun of Scott is omitted in the summary.

Tags: <O O O O O O O O O O O M>


Unnamed: 0,dialogue,summary,gold_tags,query,gpt_success,gpt_response,gpt_tags,gold_tags_tokens_count,gpt_tags_tokens_count,summary_gpt_tags_token_count_match,gold_gpt_tags_match
0,Ethan : somethin for Scott <file_photo> Toby :...,Ethan and Marshall enjoy making fun of Scott .,O O O O O O O O O M,\nBelow is a dialog between people and its sum...,True,The dialog consists of interactions between Et...,O O O O O O O O O O O M,10,12,False,False


In [None]:
ds = datasets.Dataset.from_pandas(df, split='train')

# push splits to huggingface repo
print("Uploading to Huggingface")
ds.push_to_hub('capstone_fromgpt_without_gold', token ='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')