In [1]:
# !pip install nlp
# !pip install bert_score
# !pip install git+https://github.com/google-research/bleurt.git
from pathlib import Path

import pandas as pd
import asyncio
import aiohttp
import json
from aiohttp import ClientConnectorError, ClientSession
from nlp import load_metric
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm

In [2]:
data_path = Path("../data")
assert data_path.exists()
datagen_path = Path("../data/datageneration")
assert datagen_path.exists()
keys_path = Path("key.json")
assert keys_path.exists()

In [3]:
data = pd.read_json(datagen_path/"reasons.json")
s = data.apply(lambda x: pd.Series(x['Reasons']),axis=1).stack().reset_index(level=1, drop=True)
s.name = 'Reason'
data = data.drop('Reasons', axis=1).join(s)
data.Reason = data.Reason.apply(lambda x : x.replace("\nReason : ", "").replace("\n", "").replace( "Because ", ""))
data

Unnamed: 0,Input Sentence,Intent,Reason
0,I would like to Activate my card,Activation,I want to use it in Italy
0,I would like to Activate my card,Activation,I need to use it
0,I would like to Activate my card,Activation,my card is not working
0,I would like to Activate my card,Activation,I have not receive my card
0,I would like to Activate my card,Activation,I want to do online transactions
...,...,...,...
44,change my phone passcode,e-PIN modification,I lost or forgot it
44,change my phone passcode,e-PIN modification,I forgot the passcode
44,change my phone passcode,e-PIN modification,I lost it
44,change my phone passcode,e-PIN modification,I forgot


In [4]:
# data.sample(10).to_json(data_path/"prompts/IntentWithReason.json", indent=2, index=False, orient="table")

In [5]:
def prompt_generation(prompt_path):
    prompt = json.load(open(prompt_path))
    prompt = list(prompt)
    output = []
    for ele in prompt:
        for key, value in ele.items():
            output.append(f"{key} : {value}\n")
    return "".join(output)
prompt_path = data_path/"prompts/IntentWithReason.json"
prompt_generation(prompt_path)

"Input Sentence : When is my credit card/covered card payment due\nContext : Bank\nIntent : Payment due Date\nReason : I want to check my payment status\nStructure : InformalWithoutGreetings, IntentFirst, WithReason\nSentence : I want to know payment due date because I want to check my payment status\nInput Sentence : Pay my <<vendor>> bill\nContext : Bank\nIntent : Bill Payment\nReason : I want to pay\nStructure : FormalWithGreetings, ReasonFirst\nSentence : Hello, I want to pay so I want to know more about bill payment, Thank you.\nInput Sentence : You have any covered card/credit card promotions?\nContext : Bank\nIntent : Promotion\nReason : I want to compare\nStructure : FormalWithGreetings, WithoutReason\nSentence : Hello, I want to know more about promotions, Thanks a lot.\nInput Sentence : I would like to donate to Zakat\nContext : Bank\nIntent : Donation\nReason : I have surplus money\nStructure : FormalWithGreetings, WithReason, IntentFirst\nSentence : Hi, I would like to dona

In [6]:
async def make_requests_completions(query_list, prompt) -> None:
    async with ClientSession() as session:
        tasks = []
        url = "https://api.openai.com/v1/engines/curie/completions"
        for query in query_list:
            tasks.append(
                fetch_GPT3_completion_response(
                    url=url, session=session, prompt=prompt, query=query,
                )
            )
        results = [await f for f in tqdm(asyncio.as_completed(tasks), total=len(tasks))]
    return results

In [7]:
async def fetch_GPT3_completion_response(
    url: str, session: ClientSession, prompt, query
) -> tuple:
    headers = {
        "Content-Type": "application/json",
        "Authorization": f'Bearer {json.load(keys_path.open("r"))["api_key"]}',
    }
    try:
        prompt += query["Prompt"]
        data = json.dumps(
            {
                "prompt": prompt,
                "max_tokens": 50,
                "temperature": 0.55,
                "frequency_penalty": 0.80,
                "presence_penalty": 0.80,
                "stop" : "Input Sentence",
#                 "echo": True
            }
        )
        resp = await session.request(method="POST", url=url, headers=headers, data=data)
    except ClientConnectorError:
        return ("error", row.MessageId, query_intent)
    r = await resp.json()
    try: 
        output = {
           "Structure" : r["choices"][0]["text"].split("\n")[1].replace("Structure : ", ""),
            "Sentence" : r["choices"][0]["text"].split("\n")[2].replace("Sentence :", ""),
            "Input Sentence" : query["Input Sentence"],
            "Intent" : query["Intent"],
            "Reason" : query["Reason"]
        }
        return output
    except :

        return {
            "Structure" : "NA",
            "Sentence"  : "NA",
        }

In [8]:
results = await make_requests_completions(
    query_list=[
        {
            "Prompt" : f"""Input Sentence : {row["Input Sentence"]}\nContext : Bank\nIntent : {row['Intent']}\nReason : {row['Reason']}""",
            "Input Sentence" : row["Input Sentence"],
            "Intent" : row["Intent"],
            "Reason" : row["Reason"]
        }
        for _, row in data.iterrows()
    ],
    prompt=prompt_generation(prompt_path),
)

HBox(children=(FloatProgress(value=0.0, max=5950.0), HTML(value='')))




In [13]:
df = pd.DataFrame(results)
df

Unnamed: 0,Structure,Sentence,Input Sentence,Intent,Reason
0,"WithReason, FormalWithGreetings","Hello, Good Morning, I wanted to know what wa...",What was the last debit transaction on my Debi...,Debit Transaction,I need to know the last transaction
1,"WithReason, FormalWithGreetings","Hello, Good Morning, I wanted to know what wa...",What was the last debit transaction on my Debi...,Debit Transaction,I want to check my transactions
2,"WithReason, FormalWithGreetings","Hello, Good Morning, I wanted to know what wa...",What was the last debit transaction on my Debi...,Debit Transaction,To check my balance
3,"WithReason, FormalWithGreetings","Hello, Good Morning, I wanted to know what wa...",What was the last debit transaction on my Debi...,Debit Transaction,I want to see if my friend used it
4,"FormalWithGreetings, ReasonFirst","Hello, Good Morning, I wanted to know what wa...",What was the last debit transaction on my Debi...,Debit Transaction,I want to know what was the last transaction o...
...,...,...,...,...,...
5945,"FormalWithGreetings, ReasonFirst","Hello, Good Morning, I wanted to know what wa...",What was the last debit transaction on my Debi...,Debit Transaction,I want to check what was my last transaction
5946,"WithReason, FormalWithGreetings","Hello, Good Morning, I wanted to know what wa...",What was the last debit transaction on my Debi...,Debit Transaction,I want to check if I have been scammed
5947,"WithReason, FormalWithGreetings","Hello, Good Morning, I wanted to know what wa...",What was the last debit transaction on my Debi...,Debit Transaction,I want to check if it's my card
5948,"FormalWithGreetings, WithReason, ReasonFirst",Hello I want to know what was the last transa...,What was the last debit transaction on my Debi...,Debit Transaction,I want to know what was the last transaction done


In [14]:
df.to_json(datagen_path/"SentencesUsingReasons.json", indent=2)

In [15]:
from nlp import load_metric
metric = load_metric("bertscore")

In [12]:
df.dropna(inplace = True)
metric.compute(df["Input Sentence"], df["Sentence"], lang="en")



{'precision': tensor([0.9287, 0.9295, 0.9320,  ..., 0.9267, 0.9413, 0.9431]),
 'recall': tensor([0.8765, 0.8731, 0.8756,  ..., 0.8656, 0.8899, 0.8748]),
 'f1': tensor([0.9018, 0.9004, 0.9029,  ..., 0.8951, 0.9149, 0.9076]),
 'hashcode': 'roberta-large_L17_no-idf_version=0.3.5(hug_trans=3.0.2)'}

In [17]:
json.dump(results, open(datagen_path/"SentencesUsingReasons.json", "w"), indent=2)