In [488]:
import requests
import json
import numpy as np
from sklearn.model_selection import train_test_split
import time

In [489]:
WIT_API_HOST = 'https://api.wit.ai'
WIT_API_VERSION = '20200513'

In [490]:
with open('token.txt') as file:
    TOKEN = file.read()
    
HEADERS = {
    'authorization': 'Bearer ' + TOKEN,
    'accept': 'application/vnd.wit.' + WIT_API_VERSION + '+json'
}

In [491]:
def getResponse(query, n=1):
    rsp = requests.get(WIT_API_HOST + '/message', headers=HEADERS, params={'q': query, 'n': n})
    rspJson = rsp.json()
    return rspJson

In [492]:
def getQueries():
    rsp = requests.get(WIT_API_HOST + '/utterances', headers=HEADERS, params={'limit': 1000})
    return rsp.json()

In [493]:
def delQueries(queries):
    if not queries:
        print('empty list, aborting')
        return
    confirm = input('CONFIRM DELETION: ')
    if confirm != 'yes':
        print('aborting delete')
        return
    print('deleting the following. \n')
    print(queries)
    
    queriesDelete = [{k: v for k, v in query.items() if k.startswith('text')} for query in queries]
    rsp = requests.delete(WIT_API_HOST + '/utterances', headers=HEADERS, data=str(queriesDelete))
    
    print(rsp.json())

In [494]:
# [a,b,c]
def getExamples(filename):
    examples = np.loadtxt(filename, dtype='str', delimiter='\n')
    return examples

In [495]:
# [[a,x],[b,x],[c,x]]
def labelExamples(examples, label):
    reshaped = np.reshape(examples, (-1, 1))
    labels = np.full(examples.shape[0], label).reshape(-1, 1)
    return np.concatenate((reshaped, labels), 1)

In [496]:
# [ task1 [[a,x],[b,x],[c,x]] , task2 [[d,y],[e,y],[f,y]] ]
def getLabeledExamples(filenames, labels):
    data = []
    for i in range(0, len(filenames)):
        labeled = labelExamples(getExamples(filenames[i]), labels[i])
        data.append(labeled)
    return data

In [497]:
# [ train: task1 + task2 , test: task1 + task2 ]
def formTrainTest(data, split):
    trains = []
    tests = []
    for dataset in data:
        if (split == 'alt'):
            a = dataset[0::2]
            b = dataset[1::2]
        elif (split == 'full'):
            a = dataset
            b = dataset
        else:
            a, b = train_test_split(dataset, test_size=split)
        trains.append(a)
        tests.append(b)
        
    train = np.empty((0,2))
    test = np.empty((0,2))
    for i in range(len(trains)):
        train = np.concatenate((train, trains[i]))
        test = np.concatenate((test, tests[i]))
        
    return train, test

In [498]:
def examplesToJson(examples):
    json = []
    for example in examples:
        exJson = {'text': example[0], 'entities': [], 'traits': []}
        intent = example[1]
        if (intent is not None):
            exJson['intent'] = intent
        json.append(exJson)
    return json

In [525]:
data = getLabeledExamples(['class.txt', 'reg.txt', 'nlp.txt', 'NONE.txt'], ['task_class', 'task_reg', 'task_nlp', None])
train, test = formTrainTest(data, 'full')
trainJson = examplesToJson(train)
testJson = examplesToJson(test)

In [526]:
print(len(trainJson))
print(str(trainJson))

269
[{'text': 'sex classification', 'entities': [], 'traits': [], 'intent': 'task_class'}, {'text': 'image classification on cifar-10', 'entities': [], 'traits': [], 'intent': 'task_class'}, {'text': 'classification of audio samples', 'entities': [], 'traits': [], 'intent': 'task_class'}, {'text': 'classification model for movie genres', 'entities': [], 'traits': [], 'intent': 'task_class'}, {'text': 'classifier to identify high-risk populations', 'entities': [], 'traits': [], 'intent': 'task_class'}, {'text': 'classifier for detecting frowns', 'entities': [], 'traits': [], 'intent': 'task_class'}, {'text': 'facial recognition ', 'entities': [], 'traits': [], 'intent': 'task_class'}, {'text': 'voice recognition ', 'entities': [], 'traits': [], 'intent': 'task_class'}, {'text': 'audio recognition that figures out the speaker', 'entities': [], 'traits': [], 'intent': 'task_class'}, {'text': 'automated detection of safe drinking water', 'entities': [], 'traits': [], 'intent': 'task_class'

In [538]:
rsp = requests.post(WIT_API_HOST + '/utterances', headers=HEADERS, data=str(trainJson[0:155]))
rsp.json()

{'sent': True, 'n': 155}

In [None]:
time.sleep(180) # DELAY - ALLOW IT TO TRAIN

In [554]:
#getResponse('predict the election', 2)

{'text': 'predict the election',
 'intents': [{'id': '1728532153951407',
   'name': 'task_class',
   'confidence': 0.0328},
  {'id': '184248982902938', 'name': 'task_reg', 'confidence': 0.0189}],
 'entities': {},
 'traits': {}}

In [None]:
for testExample in testJson:
    match = False
    trueIntent = testExample.get('intent', None)
    
    rspJson = getResponse(testExample['text'], 2)
    predictedIntents = rspJson['intents']
    topPrediction = predictedIntents[0]['name']
    
    if (not predictedIntents and trueIntent is None):
        match = True
    elif (topPrediction == trueIntent):
        match = True
        
    print(rspJson)
    print()
    print(match)
    print()
    print()

In [551]:
len(getQueries())
# for query in getQueries():
#     if (query.get('intent') and query.get('intent').get('name') == 'task_reg'):
#         print(query.get('text'))

269

In [None]:
delQueries(getQueries())