In [1]:
import json
import spacy

In [2]:
def load_test_data(root='data/testing_set.json'):
    with open(root, encoding="utf8") as json_file:
        data = json.load(json_file)
    # Shuffle the data
    list_intent = list({sample["intent"] for sample in data})
    data = [(sample["intent"], sample["sentence"]) for sample in data]
    labels, texts = zip(*data)
    # get the categories for each review
    cats = []
    for true_intent in labels:
        tmp_cat = {intent_label : False for intent_label in list_intent}
        tmp_cat[true_intent] = True
        cats.append(tmp_cat)
    return texts, cats

def evaluate(tokenizer, textcat, texts, cats):
    docs = (tokenizer(text) for text in texts)
    tp = 0.0  # True positives
    fp = 1e-8  # False positives
    fn = 1e-8  # False negatives
    tn = 0.0  # True negatives
    for i, doc in enumerate(textcat.pipe(docs)):
        gold = cats[i]
        for label, score in doc.cats.items():
            if label not in gold:
                continue
            if score >= 0.5 and gold[label] >= 0.5:
                tp += 1.0
            elif score >= 0.5 and gold[label] < 0.5:
                fp += 1.0
            elif score < 0.5 and gold[label] < 0.5:
                tn += 1
            elif score < 0.5 and gold[label] >= 0.5:
                fn += 1
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    if (precision + recall) == 0:
        f_score = 0.0
    else:
        f_score = 2 * (precision * recall) / (precision + recall)
    return {"textcat_p": precision, "textcat_r": recall, "textcat_f": f_score}

In [3]:
nlp = spacy.load('model_save')

# get text categorize from model
textcat = [x[1] for x in nlp.pipeline if x[0] == 'textcat'][0]

test_texts, test_cats = load_test_data()

other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'textcat']
with nlp.disable_pipes(*other_pipes):  # only train textcat
    print('{:^5}\t{:^5}\t{:^5}'.format('P', 'R', 'F'))


    # Calling the evaluate() function and printing the scores
    scores = evaluate(nlp.tokenizer, textcat, test_texts, test_cats)
    print( '{0:.3f}\t{1:.3f}\t{2:.3f}'.format(scores['textcat_p'], scores['textcat_r'], scores['textcat_f']) )

  P  	  R  	  F  
0.826	0.824	0.825
