In [1]:
import pandas as pd
import requests
import json
import time
from sklearn.metrics import precision_recall_fscore_support

In [2]:
train_data_path = 'data/import_questions_chinese.csv'
test_data_path = 'data/test_questions_chinese.csv'

In [17]:
train_df = pd.read_csv(train_data_path, encoding='utf-8')
train_df.head()

Unnamed: 0,question,intent
0,何謂等候期？,591b04694e9337459e1c9f96
1,保費是否保證維持不變？,591be84894cab79e6d265552
2,假若我身處海外，如何延長保障期？可否通過電話辦理手續？,591bd01894cab79e6d26552a
3,同一賬戶內，如果有多個用戶擁有一個以上嘅電話號碼，「易登入」對我哋有咩幫助?,5910b2f3722b5c5f63cfdc56
4,如何聯絡AIG旅遊的緊急服務團隊？,596f041146fa08f7d2e84022


In [18]:
test_df = pd.read_csv(test_data_path, encoding='utf-8')
test_df.head()

Unnamed: 0,intent,question
0,59717c017cf3bfd43b62ec57,AIG 有冇緊急服務嘅電話號碼？
1,591bdba194cab79e6d26554c,Claim保險前洗唔洗俾一筆錢？
2,591bdba194cab79e6d26554c,do i need 承擔自負額
3,596f041146fa08f7d2e84025,Exchange可唔可以買呢份旅遊保險？
4,5910b2b7722b5c5f63cfdc53,Password有冇得改？


In [20]:
test_df = test_df.merge(train_df, left_on='intent', right_on='intent')
test_df.columns = ['intent', 'test', 'truth']
test_df.head()

Unnamed: 0,intent,test,truth
0,59717c017cf3bfd43b62ec57,AIG 有冇緊急服務嘅電話號碼？,如果我一次過去幾個國家，我需要分開購買幾份旅遊保險嗎？
1,59717c017cf3bfd43b62ec57,一份旅遊保險只會包一個國家內的旅行嗎？,如果我一次過去幾個國家，我需要分開購買幾份旅遊保險嗎？
2,59717c017cf3bfd43b62ec57,呢份旅遊保險系唔系只會包一個目的地嘅旅行？,如果我一次過去幾個國家，我需要分開購買幾份旅遊保險嗎？
3,59717c017cf3bfd43b62ec57,如果我一次過去幾個國家，呢份旅遊保險會唔會包埋？,如果我一次過去幾個國家，我需要分開購買幾份旅遊保險嗎？
4,59717c017cf3bfd43b62ec57,如果我一次過去幾個國家，旅遊保險可以保障嗎？,如果我一次過去幾個國家，我需要分開購買幾份旅遊保險嗎？


# Clare

In [6]:
key = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJqdGkiOiJjZDg4ZWMyMC02MGM1LTQ5NzMtOTBlMi1kOGU2NTU1ZDQ1MTUiLCJpc3MiOiJDbGFyZV9BSSIsImF1ZCI6IkNsYXJlX0FJIn0.UwOBPd3Ml4vjD0CGtuf1A1TQubSMkZn_KR-2oDnUKHU'
headers = {'Authorization': 'Bearer {}'.format(key),
           'Accept': 'application/json',
           'Content-Type': 'application/json'}

In [7]:
# Add intent category

data = {
    'language': 'zh-hk',
    'name': 'test',
    'feedback': True,
    'suggestion': True,
    'active': True,
    'ordering': 0,
    'confidenceOverride': False,
    'nerDisabled': False
}

r = requests.post('https://hk-demo56.clare.ai/api/v1/AddOrUpdateIntentCategory', data=json.dumps(data), headers=headers)
category_id = json.loads(r.text)['categoryId']

In [8]:
# Add intents

for question in train_df['question']:

    data = {
        'categoryId': category_id,
        'language': 'zh-hk',
        'question': question,
        'answer': '-',
        'answerFacebook': {},
        'labels': [],
        'userSays': [],
        'active': True,
    }
    
    r = requests.post('https://hk-demo56.clare.ai/api/v1/AddOrUpdateIntent', data=json.dumps(data), headers=headers)

In [23]:
# Test questions

start = time.time()

correct, pred = [], []
for i, question in enumerate(test_df['test']):
    
    payload = {
        'Query': question,
        'SessionId': str(i),
        'Language': 'zh-hk'
    }
    
    r = requests.get('https://hk-demo56.clare.ai/api/v1/MessageBot', params=payload, headers=headers)
    
    if json.loads(r.text)[0]['predictionResult']['question'] == test_df['truth'][i]:
        correct.append(1)
    else:
        correct.append(0)
    
    pred.append(json.loads(r.text)[0]['predictionResult']['question'])
        
end = time.time()
        
print('# Tested: {}'.format(len(correct)))
print('# Correct: {}'.format(sum(correct)))
print('# Wrong: {}'.format(len(correct) - sum(correct)))
print('% correct: {}'.format(100.0 * sum(correct) / len(correct)))
print('Processing time: {} seconds'.format(end - start))
print('Precision: {}'.format(precision_recall_fscore_support(test_df['truth'], pred, average='macro')[0]))
print('Recall: {}'.format(precision_recall_fscore_support(test_df['truth'], pred, average='macro')[1]))
print('F-1: {}'.format(precision_recall_fscore_support(test_df['truth'], pred, average='macro')[2]))

# Tested: 181
# Correct: 66
# Wrong: 115
% correct: 36.4640883978
Processing time: 48.2330060005 seconds
Precision: 0.411769700887
Recall: 0.389346193023
F-1: 0.366498262887


# Dialogflow

In [24]:
url = 'https://api.dialogflow.com/v1/intents?v=20180910&lang=zh-HK'

headers = {'Authorization': 'Bearer {}'.format('6c839e7ce072446bb3d81b22837d1272'),
           'Accept': 'application/json',
           'Content-Type': 'application/json'}

In [25]:
# Create intents

for i, question in enumerate(train_df['question']):
    
    data = {'languageCode': 'zh-HK',
            'name': train_df['intent'][i],
            'userSays': [{'data': [{'text': question}]}],
            'auto': True}

    r = requests.post(url, data=json.dumps(data), headers=headers)
    
    time.sleep(1)
    
    if r.status_code != 200:
        print r.text

In [28]:
# Test questions

start = time.time()

correct, pred = [], []
for i, question in enumerate(test_df['test']):
    
    url = 'https://api.dialogflow.com/v1/query?v=20180910'
    
    data = {'lang': 'zh-HK',
            'query': question,
            'sessionId': str(i)}
    
    r = requests.post(url, data=json.dumps(data), headers=headers)
    r = json.loads(r.text)
            
    if r['result']['score'] > 0.0 and r['result']['metadata']['intentName'] == test_df['intent'][i]:
        correct.append(1)
        pred.append(r['result']['metadata']['intentName'])
    else:
        correct.append(0)
        pred.append('-')
        
end = time.time()
        
print('# Tested: {}'.format(len(correct)))
print('# Correct: {}'.format(sum(correct)))
print('# Wrong: {}'.format(len(correct) - sum(correct)))
print('% correct: {}'.format(100.0 * sum(correct) / len(correct)))
print('Processing time: {} seconds'.format(end - start))
print('Precision: {}'.format(precision_recall_fscore_support(test_df['intent'], pred, average='macro')[0]))
print('Recall: {}'.format(precision_recall_fscore_support(test_df['intent'], pred, average='macro')[1]))
print('F-1: {}'.format(precision_recall_fscore_support(test_df['intent'], pred, average='macro')[2]))

# Tested: 181
# Correct: 47
# Wrong: 134
% correct: 25.9668508287
Processing time: 35.3327209949 seconds
Precision: 0.628571428571
Recall: 0.270476190476
F-1: 0.349715522573


# Watson

In [29]:
from watson_developer_cloud import AssistantV1

In [30]:
assistant = AssistantV1(
    version='2018-09-20',
    iam_apikey='BIScBMKEuhjrKtNVx6QnmyTWyZYpAC_cSO3KjHApwc5R',
    url='https://gateway.watsonplatform.net/assistant/api'
)

In [31]:
# Add intents

for i, question in enumerate(train_df['question']):
    r = assistant.create_intent(
        workspace_id='314cdbae-1126-47fd-835e-00d1fe529d2b',
        intent=train_df['intent'][i],
        examples=[{'text': question}]).get_result()

In [50]:
# Test questions

start = time.time()

correct, pred = [], []
for i, question in enumerate(test_df['test']):
    r = assistant.message(
        workspace_id='314cdbae-1126-47fd-835e-00d1fe529d2b',
        input={
            'text': question
        }
    ).get_result()
    
    if r['intents'] and r['intents'][0]['intent'] == test_df['intent'][i]:
        correct.append(1)
        pred.append(r['intents'][0]['intent'])
    else:
        correct.append(0)
        pred.append('-')
        
end = time.time()
        
print('# Tested: {}'.format(len(correct)))
print('# Correct: {}'.format(sum(correct)))
print('# Wrong: {}'.format(len(correct) - sum(correct)))
print('% correct: {}'.format(100.0 * sum(correct) / len(correct)))
print('Processing time: {} seconds'.format(end - start))
print('Precision: {}'.format(precision_recall_fscore_support(test_df['intent'], pred, average='macro')[0]))
print('Recall: {}'.format(precision_recall_fscore_support(test_df['intent'], pred, average='macro')[1]))
print('F-1: {}'.format(precision_recall_fscore_support(test_df['intent'], pred, average='macro')[2]))

# Tested: 181
# Correct: 61
# Wrong: 120
% correct: 33.7016574586
Processing time: 53.0855669975 seconds
Precision: 0.657142857143
Recall: 0.33925170068
F-1: 0.421756338899
