In [None]:
import requests
import json
import numpy as np
from sklearn.model_selection import train_test_split

In [None]:
WIT_API_HOST = 'https://api.wit.ai'
WIT_API_VERSION = '20200513'

In [None]:
with open('token.txt') as file:
    TOKEN = file.read()
    
HEADERS = {
    'authorization': 'Bearer ' + TOKEN,
    'accept': 'application/vnd.wit.' + WIT_API_VERSION + '+json'
}

In [None]:
def getQueries():
    rsp = requests.get(WIT_API_HOST + '/utterances', headers=HEADERS, params={'limit': 1000})
    return rsp.json()

In [None]:
def delQueries(queries):
    if not queries:
        print('empty list, aborting')
        return
    confirm = input('CONFIRM DELETION: ')
    if confirm != 'yes':
        print('aborting delete')
        return
    print('deleting the following. \n')
    print(queries)
    
    queriesDelete = [{k: v for k, v in query.items() if k.startswith('text')} for query in queries]
    rsp = requests.delete(WIT_API_HOST + '/utterances', headers=HEADERS, data=str(queriesDelete))
    
    print(rsp.json())

In [None]:
# [a,b,c]
def getExamples(filename):
    examples = np.loadtxt(filename, dtype='str', delimiter='\n')
    return examples

In [None]:
# [[a,x],[b,x],[c,x]]
def labelExamples(examples, label):
    reshaped = np.reshape(examples, (-1, 1))
    labels = np.full(examples.shape[0], label).reshape(-1, 1)
    return np.concatenate((reshaped, labels), 1)

In [193]:
# [ task1 [[a,x],[b,x],[c,x]] , task2 [[d,y],[e,y],[f,y]] ]
def getLabeledExamples(filenames, labels):
    data = []
    for i in range(0, len(filenames)):
        labeled = labelExamples(getExamples(filenames[i]), labels[i])
        data.append(labeled)
    return data

In [216]:
# [ train: task1 + task2 , test: task1 + task2 ]
def formTrainTest(data):
    trains = []
    tests = []
    for dataset in data:
        a, b = train_test_split(dataset, test_size=0.5)
        trains.append(a)
        tests.append(b)
        
    train = np.empty((0,2))
    test = np.empty((0,2))
    for i in range(len(trains)):
        train = np.concatenate((train, trains[i]))
        test = np.concatenate((test, tests[i]))
        
    return train, test

In [217]:
data = getLabeledExamples(['class.txt', 'reg.txt'], ['task_class', 'task_reg'])
formTrainTest(data)

(array([['detect if a patient has bronchitis from xrays', 'task_class'],
        ['label news as real or fake', 'task_class'],
        ['be able to tell whether a string is a phone number or not',
         'task_class'],
        ['classifies spam texts', 'task_class'],
        ['build a model that knows whether a medical procedure will succeed',
         'task_class'],
        ['decide what kind of animal this is', 'task_class'],
        ['determine whether a picture was drawn by a gan', 'task_class'],
        ['label photos by if they are authentic or not', 'task_class'],
        ['predict whether trump will get re-elected', 'task_class'],
        ['phishing detection', 'task_class'],
        ['classify between different types of groceries', 'task_class'],
        ['determines the level of security of a codebase', 'task_class'],
        ['categorize car pics between sedan or minivan', 'task_class'],
        ['detect fraud', 'task_class'],
        ['automated detection of safe drinking

In [None]:
#getQueries()

In [None]:
#delQueries(getQueries())