In [None]:
import datetime
import json
import sys
sys.path.insert(0, '../src')

import warnings
from multiprocessing import Pool
from threading import Thread

import jsonlines
import pandas as pd
import seaborn as sns
from fuzzywuzzy import process
from intents import *
from matplotlib import pyplot as plt
from matplotlib.pyplot import figure
from p_tqdm import p_umap
from sklearn.metrics import (accuracy_score, confusion_matrix,
                             precision_recall_fscore_support)
from tqdm import tqdm
from utils import print_confusion_matrix
from IPython.display import clear_output
import random

tqdm.pandas()

# Binary Classification Prompt 
This could be used for one-vs-all classification. 

`intent_classification_one_feed_examples` is used for generating prompt where you supply both positive and negative examples to it and it will give you a prompt string

In [None]:
in_ = intent_classification_one_feed_examples(
    intent_positive_examples=[
        "I want to buy shoes",
        "I would like to buy a chocolate",
        "How to purchase camera",
    ],
    intent_negative_examples=[
        "I dance and sing",
        "I want to drink water",
        "Where can I talk to a real human here?",
    ],
    intent="buy",
)
classify_intents(in_, "i want to buy", return_entire_resp=True)

# Multiclass classification prompt

In [None]:
in_ = intent_classification_many_feed_examples(
    intents=["buy", "return"],
    intent_examples={
        "buy": [
            "I want to buy shoes",
            "I would like to buy a chocolate",
            "How to purchase camera",
        ],
        "return": ["I want to return this product", "How does one exchange this?"],
    },
)
classify_intents(
    in_, "How do I return my shoes that I buy online yesterday?",
)

# Looking at verloop specific intents

Intents from verdan are saved in `data/verloop_intents`

In [None]:
verloop_intents = json.load(open("../data/verloop_intents.json"))

In [None]:
%timeit classify_intents( intent_classification_many_feed_examples(intents=["AddItem", "Returns", "Refunds"], intent_examples=verloop_intents),"How do I add things on my list",)

In [None]:
ver_intents = {}
for key, value in verloop_intents.items():
    # Taking at most two intents considering prompt size constraints
    ver_intents[key] = verloop_intents[key][:2]

In [None]:
in_ = intent_classification_many_feed_examples(
    intents= list(ver_intents.keys()), 
    intent_examples= ver_intents
)

In [None]:
model = "ada"

In [None]:
def classify_intents_jsonl(query: str) -> str:
    """
    Because lambda functions can't be pickled, here insted of passing the prompt 
    as function parameters, it takes in_ and model as global prompt.
    """
    for obj in jsonlines.open(f"../data/{model}_{query_intent}.jsonl"):
        if query.MessageId == obj["MessageId"]:
            return
    gpt3_class_input = in_
    resp = openai.Completion.create(
        engine=model,
        prompt=f"{gpt3_class_input}\nS:{query.Message}\n",
        max_tokens=10,
        temperature=0,
        logprobs=10,
    )
    o = resp.choices[0].text
    write_output = {
        "time": str(datetime.datetime.now()),
        "MessageId": query.MessageId,
        "Message": query.Message,
        "predicted": o[o.find("I:") + 3 : o.find("\n")],
        "logprobs":json.dumps(resp.choices[0].logprobs),
        "text" : o
    }
    open(f"../data/{model}_{query_intent}.jsonl", "a").write(json.dumps(write_output) + "\n")

In [None]:
df_preds = pd.read_csv("../data/predictions_with_groundtruth.csv")
df_preds = df_preds[df_preds["MessageId"] != "zoJ8RNwzaLxPTzW9x"] # FALSE as a intent

In [None]:
rows_iter = [row for _, row in df_preds.iterrows()]
classify_intents_jsonl(rows_iter[0])

In [None]:
open(f"../data/{model}.jsonl", "w").write("")
with Pool(100) as p:
    r = list(p.imap(classify_intents, rows_iter))

# Looking at the output

In [None]:
output = []
field = "Refunds"
for obj in jsonlines.open(f"../data/davinci_{field}.jsonl"):
    output.append(obj)
df_out = pd.DataFrame(output)
df_out = df_out[["MessageId", "predicted"]].merge(
    df_preds[["MessageId", "VerdanPrediction", "Ground Truth"]], how="left"
)
df_out["MadeUp"] = df_out["predicted"].apply(
    lambda x: False if x in verloop_intents.keys() else True
)
df_out["CloseMatch"] = df_out["predicted"].apply(
    lambda x: process.extract(x, list(verloop_intents.keys()), limit=1)[0][0]
)

df_out["predicted"] = df_out["predicted"].apply(lambda x : x if x==field else "False")
clear_output()
df_out = df_out[df_out["Ground Truth"]==field]
df_out

# Performace - GPT3 - Measured wrt Ground Truth

In [None]:
print(len(df_out))
print(f'Accuracy : {accuracy_score(df_out["predicted"], df_out["Ground Truth"])}')
print(f'PRF : {precision_recall_fscore_support(df_out["predicted"], df_out["Ground Truth"], average="macro")}')
cf_matrix = confusion_matrix(df_out["predicted"], df_out["Ground Truth"], labels=list(set(list(df_out["predicted"]))))

# Performace - Verdan Predictions- Measured wrt Ground Truth 

In [None]:
print(len(df_out))
print(f'Accuracy : {accuracy_score(df_out["VerdanPrediction"], df_out["Ground Truth"])}')
print(f'PRF : {precision_recall_fscore_support(df_out["VerdanPrediction"], df_out["Ground Truth"], average="macro")}')
cf_matrix = confusion_matrix(df_out["VerdanPrediction"], df_out["Ground Truth"], labels=list(set(list(df_out["VerdanPrediction"]))))
print_confusion_matrix(cf_matrix, list(set(list(df_out["VerdanPrediction"]))))

# Performace - GPT3 Fuzzy Matched with Standard Intents- Measured wrt Ground Truth 

In [None]:
print(len(df_out))
print(f'Accuracy : {accuracy_score(df_out["CloseMatch"], df_out["Ground Truth"])}')
print(f'PRF : {precision_recall_fscore_support(df_out["CloseMatch"], df_out["Ground Truth"], average="macro")}')
cf_matrix = confusion_matrix(df_out["CloseMatch"], df_out["Ground Truth"], labels=list(set(list(df_out["CloseMatch"]))))
print_confusion_matrix(cf_matrix, list(set(list(df_out["CloseMatch"]))))

# Performace - GPT3 only predictions matching standard intents- Measured wrt Ground Truth 

In [None]:
df_out = df_out[df_out.MadeUp==False]
print(len(df_out))
print(f'Accuracy : {accuracy_score(df_out["predicted"], df_out["Ground Truth"])}')
print(f'PRF : {precision_recall_fscore_support(df_out["predicted"], df_out["Ground Truth"], average="macro")}')
cf_matrix = confusion_matrix(df_out["predicted"], df_out["Ground Truth"], labels=list(set(list(df_out["predicted"]))))
print_confusion_matrix(cf_matrix, list(set(list(df_out["predicted"]))))

# Binary classificaition for each intent

In [None]:
intent_prompt = {}

for intent, examples in verloop_intents.items():
    pos_examples = examples
    neg_examples = [
        examples if not (ver_int == intent) else []
        for ver_int, examples in verloop_intents.items()
    ]
    neg_examples = [item for sublist in neg_examples for item in sublist]
    intent_prompt[intent] = intent_classification_one_feed_examples(
        intent_positive_examples=pos_examples,
        intent_negative_examples=random.sample(neg_examples, 10),
        intent=intent,
    )

In [None]:
for key in intent_prompt.keys():
    open(f'../data/davinci_{key}.jsonl', "w").write("")

In [None]:
for query_intent in intent_prompt.keys():
    print(f"Writing for {query_intent}")
    in_ =intent_prompt[query_intent]
    with Pool(50) as p:
        r = list(p.imap(classify_intents_jsonl, rows_iter))