In [None]:
import pandas as pd
import requests
import time
import tqdm
import numpy as np
import converters
import uuid
import analyzers
from ibm_watson import AssistantV2
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
from sdk import Client, QnaAPI
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, f1_score

In [None]:
# Just AI
## - Select classifier type in Just AI UI - STS, Deep Learning or other.
## https://app.jaicp.com

# Just AI prepare Train
train_path = 'data/russian/chatbot-intents/train-chatbot.csv'
docs, X, y = converters.parse_data_csv(train_path)

data_dict = {'Group': [], 'Question': [], 'Enabled': [], 'Alternative phrases-Text': [], 'Alternative phrases-Patterns': [], 'Answer': []}
for doc in docs.values():
    data_dict['Question'].append(doc['question'])
    qs = "\n".join([q.replace("\n", " ") for q in doc['paraphrased_questions']])    
    data_dict['Alternative phrases-Text'].append(qs)
    data_dict['Enabled'].append('true')
    data_dict['Answer'].append(doc['answer'])
    data_dict['Group'].append(None)
    data_dict['Alternative phrases-Patterns'].append(None)
df = pd.DataFrame(data_dict)
df.to_excel('data/russian/hwu-20-ru/train-justai.xlsx', index=False)

# Goto Just AI UI and upload this excel file


In [None]:
# Just AI testing. Copy token from UI
token = 'xxx'
_, X_test, y_test = converters.parse_data_csv('data/russian/chatbot-intents/test-chatbot.csv')
times = []
y_pred = []
for i, row in enumerate(X_test):
    print(i)
    start = time.time()
    r = requests.get(f'https://app.jaicp.com/cailapub/api/caila/p/{token}/nlu/inference', params={'query': row})
    times.append(time.time() - start)
    y_pred.append(r.json()['intent']['answer'])

new_y_pred = []
for p in y_pred:
    if p is None:
        new_y_pred.append("other")
    else:
        new_y_pred.append(p)

print(classification_report(y_test, new_y_pred))
print("Accuracy: ", accuracy_score(y_test, new_y_pred)) 
print("F1-Score: ", f1_score(y_test, new_y_pred, average='macro')) 

print(f"Mean response time: {np.mean(times)} +- {np.std(times)} sec.")

In [None]:
# Autofaq http://chat.autofaq.ai
# Contact info@autofaq.ai to get user_id and user_token
user_id = 11
user_token = 'xxx'
service_response = {}
# service_response = {'service_id': 12345, 'tokens': ['xxx']}
publish_time = 0

host_url = 'https://chat.autofaq.ai'
namespace = 'core-api/crud/api/v1'
# api_url = 'https://chat.autofaq.ai/core-api/query'
api_url = 'https://api.autofaq.ai/v1' 

test_path = 'data/russian/chatbot-intents/test-chatbot.csv'
train_path = 'data/russian/chatbot-intents/train-chatbot.csv'

def metrics_calc(test_path, train_path, user_id, user_token, name, namespace, service_response):
    docs_test, X_test, y_test = converters.parse_data_csv(test_path)
    if not service_response:
        docs_train, _, _ = converters.parse_data_csv(train_path)
        print("Parsed data")
        
        client = Client(host_url=host_url, user_id=user_id, user_token=user_token, namespace=namespace)
        Client.HTTP_TIMEOUT = 180

        service_response = client.create_service({'preset': 'ru', 'name': name, 'skip_publish': True})
        for doc in docs_train.values():
            client.create_document(
                service_response['service_id'], 
                question=doc['question'], 
                answer=doc['answer'], 
                name=doc['name'], 
                paraphrases=doc['paraphrased_questions']
            )
        publish_time = time.time()
        client.publish_service(service_response['service_id'], wait_timeout=600)
        publish_time = time.time() - publish_time
        print("Service published for {} seconds".format(publish_time))

    qna = QnaAPI(api_url, service_response['service_id'], service_response['tokens'][0])

    print("Querying API ...")
    test_results = []
    times = []
    for row in X_test:
        start_time = time.time()
        res = qna.query(row)
        times.append(time.time() - start_time)
        test_results.append(res)

    y_pred = []
    for r in test_results:
        y_pred.append(r['results'][0]['name'])
    print(classification_report(y_test, y_pred))
    print("Accuracy: ", accuracy_score(y_test, y_pred)) 
    print("F1-Score: ", f1_score(y_test, y_pred, average='macro'))   
    print(f"Mean response time: {np.mean(times)} +- {np.std(times)} sec.")
    print(f"Publish time: {publish_time} sec.")

In [None]:
# Autofaq metrics calculation
metrics_calc(test_path, train_path, user_id, user_token, 'Hwu-20-ru', namespace, service_response)

In [None]:
# Prepare data for Autofaq Pro Experimental
pd.DataFrame({'is_test': ['test']*len(X_test), 'question': X_test, 'label': y_test}).to_csv('data/russian/hwu-20-ru/test-af-pro-experimental.csv', index=False, header=None)

In [None]:
# Cognigy http://cognigy.com/
train_path = 'data/russian/chatbot-intents/train-chatbot.csv'
test_path = 'data/russian/chatbot-intents/test-chatbot.csv'
docs, X, y = converters.parse_data_csv(train_path)
mock = ['exampleSentence']*len(X)
pd.DataFrame(list(zip(y, mock, X)), columns=None, index=None).to_csv('data/russian/hwu-20-ru/train-cognigy.csv', index=False, header=False)

In [None]:
# 1. Go to https://trial.cognigy.ai/login
# 2. Create Flow with Intents
# 3. Import Intents using CSV import in Web interface
# 4. Build model in Web Interface
# 5. Add Code node to Flow Chart with following code (actions.output("intentScore:" + input.intentScore +";intent:"+input.intent);)
# 5. Query from here

# Get Cognigy api key from their website
api_key = 'xxx'
def query_api(text):
  # Get Cognigy rest_endpoint from their website
  rest_endpoint = 'https://endpoint-trial.cognigy.ai/xxx'
  rest_params = {
    "userId":"312312",
    "sessionId": uuid.uuid4().hex,
    "text": text,
    "api_key": api_key,
  }
  response = requests.post(rest_endpoint, json=rest_params)
  return response

def get_intent_name(response):
    splitted = response.json()['text'].split(';')
    intent_score = splitted[0]
    intent_name_raw = splitted[1]
    intent_name = intent_name_raw.split("intent:")[1].replace(" __GARBAGE__", "")
    return intent_name

In [None]:
# Cognigy testing
times = []
_, X, y = converters.parse_data_csv(test_path)
y_pred = []
for i, sent in enumerate(X):
    if i > 75:
        start_time = time.time()
        response = query_api(sent)
        times.append(time.time() - start_time)
        y_pred.append(get_intent_name(response))
        
print(classification_report(y, y_pred))
print("Accuracy: ", accuracy_score(y, y_pred)) 
print("F1-Score: ", f1_score(y, y_pred, average='macro')) 

print(f"Mean response time: {np.mean(times)} +- {np.std(times)} sec.")

In [None]:
# Google Vertex https://console.cloud.google.com/vertex-ai/
docs, X, y = converters.parse_data_csv(train_path)
X_train, X_devtest, y_train, y_devtest = train_test_split(X, y, test_size=0.2, random_state=42)
X_dev, X_test, y_dev, y_test = train_test_split(X_devtest, y_devtest, test_size=0.5, random_state=42)
# dataframe for google vertex auto ml
df = pd.DataFrame(columns=['type', 'text', 'label'])
df['type'] = ['test']*len(X_test) + ['training']*len(X_train) + ['validation']*len(X_dev)
df['text'] = X_test + X_train + X_dev
df['label'] = y_test + y_train + y_dev
df.to_csv('data/russian/hwu-20-ru/google_vertex.csv', index=False, header=False)

# You can upload this csv to google vertex UI. But model cant be built because of too small training data.

In [None]:
# Ibm watson https://cloud.ibm.com/catalog/services/watson-assistant
docs, X, y = converters.parse_data_csv(train_path)
pd.DataFrame(list(zip(X, y)), columns=None, index=None).to_csv('data/russian/hwu-20-ru/train-watson-X-y.csv', index=False, header=False)

In [None]:
# get assistant_id, apikey, url from watson ui
assistant_id = "xxx"
apikey = "xxx"
url = "https://api.eu-de.assistant.watson.cloud.ibm.com"
authenticator = IAMAuthenticator(f'{apikey}')
assistant = AssistantV2(
    version='2021-06-14',
    authenticator = authenticator
)

assistant.set_service_url(f'{url}')

In [None]:
# Watson predict
docs, X, y = converters.parse_data_csv(test_path)
y_preds = []
times = []
for sent in X:
    query_time = time.time()
    response = assistant.message_stateless(
        assistant_id=f'{assistant_id}',
        input={
            'message_type': 'text',
            'text': sent
        }).get_result()  
    try:  
        y_preds.append(response['output']['intents'][0]['intent'])
    except IndexError:
        print(response)
        y_preds.append('other')
    query_time = time.time() - query_time
    times.append(query_time)
print(classification_report(y, y_preds))
print("Accuracy: ", accuracy_score(y, y_preds)) 
print("F1-Score: ", f1_score(y, y_preds, average='macro')) 
print(f"Mean response time: {np.mean(times)} +- {np.std(times)} sec.")

In [None]:
# Dialogflow http://dialogflow.cloud.google.com
dc = converters.DialogflowConverter()
dc.import_corpus(train_path, 'HWU_RuBench_DialogFlow', 'ru')
dc.export("HWU_RuBench_DialogFlow.zip")
# Upload this zip to dialogflow ui

In [None]:
# set your app_id
app_id = ''
# put your gcloud credentials in env_path below, e.g. /home/xxx/.config/gcloud/application_default_credentials.json
env_path = ''
da = analyzers.DialogflowAnalyser(app_id, env_path)
docs, X, y = converters.parse_data_csv(test_path)
dialogflow_results, times = da.get_annotations(X, 'ru')

In [None]:
# Dialogflow test
y_preds = [r.query_result.intent.display_name for r in dialogflow_results]

print(classification_report(y, y_preds))
print("Accuracy: ", accuracy_score(y, y_preds)) 
print("F1-Score: ", f1_score(y, y_preds, average='macro')) 

print(f"Mean response time: {np.mean(times)} +- {np.std(times)} sec.")