Loading data into representations that Pandas can handle.

In [5]:
import pandas as pd
import json

with open('data_full.json') as json_file: 
    data_dict = json.load(json_file)

train = data_dict['train']
val = data_dict['val']
test = data_dict['test']

oos_train = data_dict['oos_train']
oos_val = data_dict['oos_val']
oos_test = data_dict['oos_test']


train_val_df = pd.DataFrame(train + val, columns =['query', 'intent'])
test_df = pd.DataFrame(test, columns =['query', 'intent'])

train_val_oos_df = pd.DataFrame(train + val + oos_train + oos_val, columns =['query', 'intent'])
test_oos_df = pd.DataFrame(test + oos_test, columns =['query', 'intent'])

# Collecting intent->domain mappings for later analysis.
with open('domains.json') as json_file:
    domain_dict = json.load(json_file)
inv_domain_dict = {k:v for v,klist in domain_dict.items() for k in klist}

Preparing datasets for training, merging train and validation datasets.

In [6]:
sents_train = train_val_df['query']
y_train_intents = train_val_df['intent']
y_train_domains = train_val_df['intent'].map(inv_domain_dict)
sents_test = test_df['query']
y_test_intents = test_df['intent']
y_test_domains = test_df['intent'].map(inv_domain_dict)

# Creating a separate dataset that includes 'out-of-scope' as an other intent.
inv_domain_dict_oos = inv_domain_dict.copy()
inv_domain_dict_oos['oos'] = 'oos'
sents_train_oos = train_val_oos_df['query']
y_train_oos_domains = train_val_oos_df['intent'].map(inv_domain_dict_oos)
sents_test_oos = test_oos_df['query']
y_test_oos_domains = test_oos_df['intent'].map(inv_domain_dict_oos)

Feature extraction using bag of words and TF-IDF.

In [7]:
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

count_vectorizer = CountVectorizer()
X_train_BOF = count_vectorizer.fit_transform(sents_train).toarray()
X_test_BOF = count_vectorizer.transform(sents_test).toarray()

tfidf_vectorizer = TfidfVectorizer()
X_train_TFIDF = tfidf_vectorizer.fit_transform(sents_train).toarray()
X_test_TFIDF = tfidf_vectorizer.transform(sents_test).toarray()

count_vectorizer_oos = CountVectorizer()
X_train_oos_BOF = count_vectorizer_oos.fit_transform(sents_train_oos).toarray()
X_test_oos_BOF = count_vectorizer_oos.transform(sents_test_oos).toarray()

Function for evaluating classifier preformance by printing scores in 4 different metrics.

In [9]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

def print_res(classifier, X_test, y_test):
    y_pred = classifier.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average="macro")
    recall = recall_score(y_test, y_pred, average="macro")
    f1 = f1_score(y_test, y_pred, average="macro")

    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1:", f1)

Some preliminary experiments showed that among the basic models, logistic regression is the fastest at training while retaining pretty high performance (according to the scores). Two tasks (inent and intent domain prediction), two feature extraction methods (Bag of Words and TF-IDF) and two datasets (without or with out-of-scope entries) were tested during the expreiments.

In [10]:
from sklearn.linear_model import LogisticRegression

Intent prediction, bag of words.

In [6]:
clf_log_int_BOF = LogisticRegression()
clf_log_int_BOF.fit(X_train_BOF, y_train_intents)
print_res(clf_log_int_BOF, X_test_BOF, y_test_intents)

Accuracy: 0.908
Precision: 0.9124231868458526
Recall: 0.9080000000000001
F1: 0.9078813647351022


Domain prediction, bag of words.

In [11]:
clf_log_dom_BOF = LogisticRegression(max_iter=300)
clf_log_dom_BOF.fit(X_train_BOF, y_train_domains)
print_res(clf_log_dom_BOF, X_test_BOF, y_test_domains)

Accuracy: 0.954
Precision: 0.9545715463290531
Recall: 0.9540000000000001
F1: 0.9540870037465297


Intent prediction, TF-IDF.

In [13]:
clf_log_int_TFIDF = LogisticRegression()
clf_log_int_TFIDF.fit(X_train_TFIDF, y_train_intents)
print_res(clf_log_int_TFIDF, X_test_TFIDF, y_test_intents)

Accuracy: 0.9004444444444445
Precision: 0.9055589994843706
Recall: 0.9004444444444446
F1: 0.8998474385369699


Domain prediction, TF-IDF.

In [15]:
clf_log_dom_TFIDF = LogisticRegression(max_iter=300)
clf_log_dom_TFIDF.fit(X_train_TFIDF, y_train_domains)
print_res(clf_log_dom_TFIDF, X_test_TFIDF, y_test_domains)

Accuracy: 0.9535555555555556
Precision: 0.9542313025907816
Recall: 0.9535555555555556
F1: 0.9536453047160508


Domain prediction with OOS, Bag of Words.

In [12]:
clf_oos_log_dom_BOF = LogisticRegression(max_iter=300)
clf_oos_log_dom_BOF.fit(X_train_oos_BOF, y_train_oos_domains)
print_res(clf_oos_log_dom_BOF, X_test_oos_BOF, y_test_oos_domains)

Accuracy: 0.8029090909090909
Precision: 0.8189435571270347
Recall: 0.8780101010101009
F1: 0.812591250960203


In [None]:
# from sklearn.naive_bayes import MultinomialNB

# clf = MultinomialNB()
# clf.fit(X_train_BOF, y_train_intents)

Simple code to test one of the models on user-given data. (With Bag of Words feature extraction.)

In [15]:
sentence = input("Write in a sentence:")
sentence_vec = count_vectorizer.transform([sentence]).toarray()
sentence_vec_oos = count_vectorizer_oos.transform([sentence]).toarray()
print("Sentence given: ", sentence)
print("Domain prediction: ", clf_log_dom_BOF.predict(sentence_vec)[0])
print("Domain prediction (oos included): ", clf_oos_log_dom_BOF.predict(sentence_vec_oos)[0])

Sentence given:  Can you decide if this question belongs to oos or not?
Domain prediction:  meta
Domain prediction (oos included):  meta
