In [1]:
# !pip install nlp
# !pip install bert_score
# !pip install git+https://github.com/google-research/bleurt.git
from pathlib import Path

import pandas as pd
import asyncio
import aiohttp
import json
from aiohttp import ClientConnectorError, ClientSession
from nlp import load_metric
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm

In [2]:
data_path = Path("../data")
assert data_path.exists()
datagen_path = Path("../data/datageneration")
assert datagen_path.exists()
keys_path = Path("key.json")
assert keys_path.exists()

In [3]:
data = pd.read_csv(data_path/"prompts/ADIB.csv")[["Example", "Intent"]]
# data.to_json(data_path/"prompts/ADIB_intent_examples.json", indent=2, index=False, orient="table")

FileNotFoundError: [Errno 2] No such file or directory: '../data/prompts/ADIB.csv'

In [None]:
def prompt_generation(prompt_path):
    prompt = json.load(open(prompt_path))
    prompt = list(prompt)
    output = []
    for ele in prompt:
        for key, value in ele.items():
            output.append(f"{key} : {value}\n")
    return "".join(output)
prompt_path = data_path/"prompts/reasons.json"
prompt_generation(prompt_path)

In [None]:
async def make_requests_completions(query_list, prompt) -> None:
    async with ClientSession() as session:
        tasks = []
        url = "https://api.openai.com/v1/engines/davinci/completions"
        for query in query_list:
            tasks.append(
                fetch_GPT3_completion_response(
                    url=url, session=session, prompt=prompt, query=query,
                )
            )
        results = [await f for f in tqdm(asyncio.as_completed(tasks), total=len(tasks))]
    return results

In [None]:
async def fetch_GPT3_completion_response(
    url: str, session: ClientSession, prompt, query
) -> tuple:
    headers = {
        "Content-Type": "application/json",
        "Authorization": f'Bearer {json.load(keys_path.open("r"))["api_key"]}',
    }
    try:
        prompt += query
        data = json.dumps(
            {
                "prompt": prompt,
                "max_tokens": 50,
                "temperature": 0.55,
                "frequency_penalty": 0.80,
                "presence_penalty": 0.80,
                "stop" : "Input Sentence"
            }
        )
        resp = await session.request(method="POST", url=url, headers=headers, data=data)
    except ClientConnectorError:
        return ("error", row.MessageId, query_intent)
    r = await resp.json()
    return r["choices"][0]["text"]

In [None]:
# results = await make_requests_completions(
#     query_list=[
#         """Input Sentence : I want refund on my item\nContext : eCommerce\nIntent : Refund"""
#        for i in range(400)
#     ],
#     prompt=prompt_generation(prompt_path),
# )

In [None]:
reasons = []
for _, row in data.iterrows():
    sentence = (
        f"Input Sentence : {row['Example']}\nIntent : {row['Intent']}"
    )
    query_list = [sentence for i in range(400)]
    results = await make_requests_completions(
        query_list=query_list, prompt=prompt_generation(prompt_path)
    )
    reasons.append({"Input Sentence" : row['Example'], "Intent": row["Intent"], "Reasons": list(set(results))})

In [None]:
json.dump(reasons, open(datagen_path/"reasons.json", "w"), indent=2)