In [1]:
#importing packages
import pandas as pd
import json
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.utils import shuffle
import time
import torchvision.transforms as transforms

In [2]:
#loading data
with open('data_full.json') as file:
    oos = json.load(file)

In [3]:
#listing files
oos.keys()

dict_keys(['oos_val', 'val', 'train', 'oos_test', 'test', 'oos_train'])

In [4]:
#assembling files into pandas dataframes
temp = oos['train']
train = pd.DataFrame(temp).rename(columns={0:"query", 1:"domain"})
temp = oos['oos_test']
oos_test = pd.DataFrame(temp).rename(columns={0:"query", 1:"domain"})
temp = oos['test']
test = pd.DataFrame(temp).rename(columns={0:"query", 1:"domain"})
temp = oos['oos_train']
oos_train = pd.DataFrame(temp).rename(columns={0:"query", 1:"domain"})

In [5]:
#inspecting dataframes
print(train.head())
print(oos_test.head())
print(test.head())
print(oos_train.head())


                                               query     domain
0  what expression would i use to say i love you ...  translate
1  can you tell me how to say 'i do not speak muc...  translate
2  what is the equivalent of, 'life is good' in f...  translate
3  tell me how to say, 'it is a beautiful morning...  translate
4  if i were mongolian, how would i say that i am...  translate
                                               query domain
0                 how much has the dow changed today    oos
1  how many prime numbers are there between 0 and...    oos
2  can you tell me how to solve simple algebraic ...    oos
3            can you dim the brightness of my screen    oos
4  what is the account number to the internet ser...    oos
                                 query     domain
0     how would you say fly in italian  translate
1    what's the spanish word for pasta  translate
2  how would they say butter in zambia  translate
3       how do you say fast in spanish  translate
4  wha

In [6]:
#our training protocol will use the 'out of scope' training data to train for this class, so appending the data to one dataframe
train = train.append(oos_train, ignore_index=True)

In [7]:
#defining the vectorizer that will be used for this dataset
vectorizer = TfidfVectorizer()

In [8]:
#fitting the TFIDF vectorizer to the training data queries and transforming it
X = vectorizer.fit_transform(train['query']).toarray()

In [9]:
# checking the size of the array
X.shape

(15100, 5146)

In [10]:
#inspecting vectorizer features.
print(vectorizer.get_feature_names()[:20])
print(vectorizer.get_feature_names()[-20:])

['00', '000', '005', '00am', '00pm', '01', '02', '03', '05', '098098', '10', '100', '1000', '10000', '100000', '10294', '104', '10500', '10am', '10kg']
['zales', 'zander', 'zazie', 'zealand', 'zebras', 'zen', 'zenith', 'zepher', 'zephers', 'zeppelin', 'zesty', 'zeus', 'zion', 'zippy', 'zippys', 'ziti', 'zombie', 'zone', 'zoo', 'zulu']


These aren't all words, but this reflects the fact that inputs will not always be words. Equally some are variations (zippy/zippys) but these will be preserved to minimise the work done in preprocessing input data during deployment.

In [11]:
#making dataframe of X
X_df = pd.DataFrame(X)

In [12]:
#shape of X dataframe
X_df.shape

(15100, 5146)

In [13]:
#shape of initial training dataframe
train.shape

(15100, 2)

In [14]:
#joining training dataframe to vectorized words
train_vec = train.join(X_df)

In [15]:
#finding shape of joined dataframe
train_vec.shape

(15100, 5148)

In [16]:
#dropping the text queries
train_vec = train_vec.drop('query', axis=1)

In [17]:
#Inspecting first few rows of dataframe
train_vec.head()

Unnamed: 0,domain,0,1,2,3,4,5,6,7,8,...,5136,5137,5138,5139,5140,5141,5142,5143,5144,5145
0,translate,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,translate,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,translate,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,translate,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,translate,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [18]:
#inspecting values of a randomly chosen column to check that not all entries are 0.0
train_vec[5136].value_counts().head(10)

0.000000    15095
0.744097        1
0.731775        1
0.694293        1
0.577886        1
0.698728        1
Name: 5136, dtype: int64

In [19]:
#inspecting unique domains
train_vec['domain'].unique()

array(['translate', 'transfer', 'timer', 'definition', 'meaning_of_life',
       'insurance_change', 'find_phone', 'travel_alert', 'pto_request',
       'improve_credit_score', 'fun_fact', 'change_language', 'payday',
       'replacement_card_duration', 'time', 'application_status',
       'flight_status', 'flip_coin', 'change_user_name',
       'where_are_you_from', 'shopping_list_update', 'what_can_i_ask_you',
       'maybe', 'oil_change_how', 'restaurant_reservation', 'balance',
       'confirm_reservation', 'freeze_account', 'rollover_401k',
       'who_made_you', 'distance', 'user_name', 'timezone', 'next_song',
       'transactions', 'restaurant_suggestion', 'rewards_balance',
       'pay_bill', 'spending_history', 'pto_request_status',
       'credit_score', 'new_card', 'lost_luggage', 'repeat', 'mpg',
       'oil_change_when', 'yes', 'travel_suggestion', 'insurance',
       'todo_list_update', 'reminder', 'change_speed', 'tire_pressure',
       'no', 'apr', 'nutrition_info', 'c

In [20]:
#constructing a mapping dictionary for domains to tranform them into nubmers
y_dic = {}
domain = 0
for item in train_vec['domain'].unique():
    y_dic[item] = domain
    domain += 1

In [21]:
#shuffling training data
train_vec = shuffle(train_vec, random_state=0)
train_vec.head()

Unnamed: 0,domain,0,1,2,3,4,5,6,7,8,...,5136,5137,5138,5139,5140,5141,5142,5143,5144,5145
8218,do_you_have_pets,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8136,routing,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
577,insurance_change,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7446,todo_list,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3978,pto_request_status,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [22]:
#creating a column of target values
train_vec['y'] = train_vec['domain'].replace(y_dic)
#removing the text domains
train_vec = train_vec.iloc[:, 1:]
#defining x and y data
train_x = train_vec.iloc[:,:-1]
train_y = train_vec.iloc[:,-1]

In [23]:
#Transforming queries from other datasets to vectors
Xtest = vectorizer.transform(test['query']).toarray()
Xto = vectorizer.transform(oos_test['query']).toarray()

In [24]:
#turning arrays to dataframes
Xtest_df = pd.DataFrame(Xtest)
Xto_df = pd.DataFrame(Xto)

In [25]:
#creating vector datasets for testing set (excluding out of scope values)
test_vec = pd.concat([test, Xtest_df], axis=1)
test_vec['y']= test_vec['domain'].replace(y_dic)
test_x = test_vec.iloc[:,2:-1]
test_y = test_vec.iloc[:,-1]

In [26]:
#creating vector datasets for out of scope testing set
test_oos_vec = pd.concat([oos_test, Xto_df], axis=1)
test_oos_vec['y'] = test_oos_vec['domain'].replace(y_dic)
test_oos_x = test_oos_vec.iloc[:,2:-1]
test_oos_y = test_oos_vec.iloc[:,-1]

In [27]:
#importing torch packages
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data_utils


In [28]:
#importing svc packages
from sklearn.svm import LinearSVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.svm import SVC

In [29]:
#re-defining optimal parameters for MLP
class CLINCModule(nn.Module):
    def __init__(
            self,
            input_dim=5146, #vocab size
            hidden_dim=800, #number of hidden neurons
            output_dim=151, #number of output classes
            dropout=0.75 #dropout rate
    ):
        super(CLINCModule, self).__init__()
        self.dropout = nn.Dropout(dropout)

        self.hidden = nn.Linear(input_dim, hidden_dim)
        self.output = nn.Linear(hidden_dim, output_dim)

    def forward(self, X, **kwargs):
        X = F.relu(self.hidden(X)) #ReLU activation function
        X = self.dropout(X)
        X = F.softmax(self.output(X), dim=-1) #softmax activation function
        return X

In [30]:
#importing skorch modules
from skorch import NeuralNetClassifier
from skorch.callbacks import EarlyStopping
from skorch.dataset import Dataset
from skorch.helper import predefined_split

In [31]:
#transforming data to arrays & tensors

test_x = np.array(test_x)
tensor_test_x = torch.tensor(test_x).float()
test_oos_x = np.array(test_oos_x)
tensor_test_oos_x = torch.tensor(test_oos_x).float()

In [32]:
#transforming targets to arrays
test_y = np.array(test_y)
test_oos_y = np.array(test_oos_y)

In [33]:
#loading models
import pickle
with open('oklsvc.pkl', 'rb') as f: #import best svm model
    lsvc = pickle.load(f)
with open('okmlp.pkl', 'rb') as f: #import best mlp model
    mlp = pickle.load(f)

In [34]:
#predicting in scope test data
time1 = time.time()
ls_labels = lsvc.predict(test_x)
time2 = time.time()
test_vec['ls_labels'] = ls_labels
ls_acc= accuracy_score(ls_labels, test_y)
print('linearSVC accuracy')
print(ls_acc)
#predicting out of scope data
time3 = time.time()
ols_labels = lsvc.predict(test_oos_x)
time4 = time.time()
ols_acc= accuracy_score(ols_labels, test_oos_y)
test_oos_vec['ls_labels'] = ols_labels
print('linearSVC OOS accuracy')
print(ols_acc)
svc_time = (time2-time1)+(time4-time3)
print('svc prediction time (both sets)')
print(svc_time)


time5 = time.time()
m_labels = mlp.predict(tensor_test_x)
time6 = time.time()
m_acc= accuracy_score(m_labels, test_y)
test_vec['mlp_labels'] = m_labels
print('mlp accuracy')
print(m_acc)
time7 = time.time()
om_labels = mlp.predict(tensor_test_oos_x)
time8 = time.time()
om_acc= accuracy_score(om_labels, test_oos_y)
test_oos_vec['mlp_labels'] = om_labels
print('mlp OOS accuracy')
print(om_acc)
mlp_time = (time6-time5)+(time8-time7)
print('mlp prediction time (both sets)')
print(mlp_time)


linearSVC accuracy
0.9117777777777778
linearSVC OOS accuracy
0.148
svc prediction time (both sets)
0.22853302955627441
mlp accuracy
0.9133333333333333
mlp OOS accuracy
0.156
mlp prediction time (both sets)
0.7070567607879639


Following cells are for inspection of test results

In [35]:
#inverse mapping to enable human view of output
label_to_domain = {label : domain for domain, label in y_dic.items()}

In [36]:
test_output_text = test_vec.iloc[:,[0,1,-3,-2,-1]] #selecting only relevant columns, dropping word vectors
test_output_text['ls_labels'] = test_output_text['ls_labels'].map(label_to_domain) #mapping svc output to class names
test_output_text['mlp_labels'] = test_output_text['mlp_labels'].map(label_to_domain) #mapping mlp output to class names
test_output_text = test_output_text.drop('y', axis=1) #dropping numerical target
print(test_output_text.head(10)) #inspecting

#as above but for out of scope queries
test_oos_output_text = test_oos_vec.iloc[:,[0,1,-3,-2,-1]]
test_oos_output_text['ls_labels'] = test_oos_output_text['ls_labels'].map(label_to_domain)
test_oos_output_text['mlp_labels'] = test_oos_output_text['mlp_labels'].map(label_to_domain)
test_oos_output_text = test_oos_output_text.drop('y', axis=1)
print(test_oos_output_text.head(10))




                                         query     domain   ls_labels  \
0             how would you say fly in italian  translate   translate   
1            what's the spanish word for pasta  translate   translate   
2          how would they say butter in zambia  translate   translate   
3               how do you say fast in spanish  translate   translate   
4          what's the word for trees in norway  translate         oos   
5         how does one say wonderful in german  translate   translate   
6              how do they say tacos in mexico  translate   translate   
7           how would one say cruiser in china  translate   translate   
8    what's the french word you use for potato  translate  definition   
9  what would the word for grass be in finland  translate   translate   

        mlp_labels  
0        translate  
1        translate  
2        translate  
3        translate  
4              oos  
5        translate  
6        translate  
7        translate  
8  chan

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_output_text['ls_labels'] = test_output_text['ls_labels'].map(label_to_domain) #mapping svc output to class names
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_output_text['mlp_labels'] = test_output_text['mlp_labels'].map(label_to_domain) #mapping mlp output to class names
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#

In [37]:
#selecting the items that either classifier failed to classify
test_failures = test_output_text[(test_output_text['domain']!=test_output_text['ls_labels']) | (test_output_text['domain']!=test_output_text['mlp_labels'])]
test_failures.head(10)

Unnamed: 0,query,domain,ls_labels,mlp_labels
4,what's the word for trees in norway,translate,oos,oos
8,what's the french word you use for potato,translate,definition,change_language
9,what would the word for grass be in finland,translate,translate,spelling
25,english to spanish for dog,translate,change_language,translate
28,dog in spanish,translate,change_language,translate
53,repeat what the weather will be like,transfer,weather,weather
65,who set up the numbers for it,timer,damaged_card,smart_home
92,what is the meaning of incandescent,definition,definition,meaning_of_life
96,what is the definiton of auspicious,definition,calculator,calculator
106,can you tell me the meaning of fluctuate,definition,definition,meaning_of_life


In [38]:
#selecting the items that only SVM failed to classify
test_SVC_failures = test_output_text[(test_output_text['domain']!=test_output_text['ls_labels']) & (test_output_text['domain']==test_output_text['mlp_labels'])]
test_SVC_failures.head(10)

Unnamed: 0,query,domain,ls_labels,mlp_labels
25,english to spanish for dog,translate,change_language,translate
28,dog in spanish,translate,change_language,translate
171,what is the procedure for signing up for a new...,insurance_change,pin_change,insurance_change
256,can you help me plan a vacation,pto_request,pto_balance,pto_request
258,is it ok if i use some of my pto on may 24th t...,pto_request,ingredient_substitution,pto_request
269,vacation request please,pto_request,pto_request_status,pto_request
270,how can i increase my credit score,improve_credit_score,credit_score,improve_credit_score
279,give me some ideas for boosting my credit score,improve_credit_score,credit_score,improve_credit_score
287,can you tell me some strategies for raising my...,improve_credit_score,credit_score,improve_credit_score
349,i'd like to use you using russian,change_language,international_fees,change_language


In [39]:
#selecting the items that only MLP failed to classify
test_MLP_failures = test_output_text[(test_output_text['domain']==test_output_text['ls_labels']) & (test_output_text['domain']!=test_output_text['mlp_labels'])]
test_MLP_failures.head(10)

Unnamed: 0,query,domain,ls_labels,mlp_labels
9,what would the word for grass be in finland,translate,translate,spelling
92,what is the meaning of incandescent,definition,definition,meaning_of_life
106,can you tell me the meaning of fluctuate,definition,definition,meaning_of_life
112,i heard some woman say she was going to yerd m...,definition,definition,repeat
115,what's the meaning of naff,definition,definition,meaning_of_life
239,can people safely travel to cambodia,travel_alert,travel_alert,travel_suggestion
245,how is a vacation request done,pto_request,pto_request,pto_request_status
374,when's my next pay day,payday,payday,next_holiday
383,what day can i expect to receive payment on,payday,payday,bill_due
406,what is the shipping time for my card,replacement_card_duration,replacement_card_duration,time


In [40]:
#selecting the items that both classifiers failed to classify
mutual_failures = test_output_text[(test_output_text['domain']!=test_output_text['ls_labels']) & (test_output_text['domain']!=test_output_text['mlp_labels'])]
mutual_failures.head(10)

Unnamed: 0,query,domain,ls_labels,mlp_labels
4,what's the word for trees in norway,translate,oos,oos
8,what's the french word you use for potato,translate,definition,change_language
53,repeat what the weather will be like,transfer,weather,weather
65,who set up the numbers for it,timer,damaged_card,smart_home
96,what is the definiton of auspicious,definition,calculator,calculator
143,is there a reason beyond biology about why hum...,meaning_of_life,account_blocked,account_blocked
148,what's the answer to it all,meaning_of_life,maybe,maybe
170,i need to sign up for a new allstate plan,insurance_change,new_card,new_card
172,how do i sign up for a new allstatedplan,insurance_change,new_card,new_card
227,what are the travel conditions for haiti,travel_alert,weather,weather


In [41]:
#selecting the items that both classified correctly
mutuals = test_output_text[(test_output_text['domain']==test_output_text['ls_labels']) & (test_output_text['domain']==test_output_text['mlp_labels'])]
mutuals.head(10)

Unnamed: 0,query,domain,ls_labels,mlp_labels
0,how would you say fly in italian,translate,translate,translate
1,what's the spanish word for pasta,translate,translate,translate
2,how would they say butter in zambia,translate,translate,translate
3,how do you say fast in spanish,translate,translate,translate
5,how does one say wonderful in german,translate,translate,translate
6,how do they say tacos in mexico,translate,translate,translate
7,how would one say cruiser in china,translate,translate,translate
10,how do you say please in french,translate,translate,translate
11,how would i say nice to meet you if i were rus...,translate,translate,translate
12,what is the right way to say excuse me in spanish,translate,translate,translate


In [42]:
#inspecting size of each of these reduced datasets
print(test_MLP_failures.shape)
print(test_SVC_failures.shape)
print(mutual_failures.shape)
print(mutuals.shape)

(82, 4)
(89, 4)
(308, 4)
(4021, 4)


In [43]:
##selecting the out of scope items that either classifier failed to classify
otest_failures = test_oos_output_text[(test_oos_output_text['domain']!=test_oos_output_text['ls_labels']) | (test_oos_output_text['domain']!=test_oos_output_text['mlp_labels'])]
otest_failures.head(10)

Unnamed: 0,query,domain,ls_labels,mlp_labels
0,how much has the dow changed today,oos,income,date
1,how many prime numbers are there between 0 and...,oos,measurement_conversion,greeting
3,can you dim the brightness of my screen,oos,smart_home,w2
4,what is the account number to the internet ser...,oos,account_blocked,account_blocked
5,can you see a hdmi cord,oos,greeting,greeting
6,what veggies can i pair with mushrooms,oos,sync_device,sync_device
7,can you put the car in fuel efficient mode,oos,gas_type,gas_type
8,at what age can someone get a card,oos,how_old_are_you,how_old_are_you
9,please find today's most read stories from the...,oos,book_hotel,date
10,how do i get red wine out of a couch cushion,oos,ingredient_substitution,goodbye


In [44]:
#selecting the out of scope items that MLP failed to classify
otest_MLP_failures = test_oos_output_text[(test_oos_output_text['domain']==test_oos_output_text['ls_labels']) & (test_oos_output_text['domain']!=test_oos_output_text['mlp_labels'])]
otest_MLP_failures.head(10)

Unnamed: 0,query,domain,ls_labels,mlp_labels
30,what is the population of south africa,oos,oos,vaccines
48,what is the best product for removing soap scum,oos,oos,thank_you
61,how do i make my android phone more secure,oos,oos,sync_device
78,which stocks gained the most today,oos,oos,date
87,can you tell me the score of the heat game,oos,oos,credit_score
102,open up internet browser,oos,oos,greeting
107,what are the highest-rated android phones,oos,oos,weather
112,how does my current htc phone compare to other...,oos,oos,sync_device
127,how long is the bank open until,oos,oos,distance
128,what are some good games for my android phone,oos,oos,find_phone


In [45]:
#selecting the out of scope items that SVM failed to classify
otest_SVC_failures = test_oos_output_text[(test_oos_output_text['domain']!=test_oos_output_text['ls_labels']) & (test_oos_output_text['domain']==test_oos_output_text['mlp_labels'])]
otest_SVC_failures.head(10)

Unnamed: 0,query,domain,ls_labels,mlp_labels
39,is anything being newscasted about the threat ...,oos,account_blocked,oos
80,tell me about the earthquake in california tha...,oos,alarm,oos
83,is the bank open on mlk day,oos,change_ai_name,oos
124,how many moons does mars have,oos,rewards_balance,oos
133,how many people died in yesterday's storm,oos,jump_start,oos
152,what stores are at my local mall,oos,restaurant_reviews,oos
161,who has the cheapest insurance for my model ca...,oos,smart_home,oos
172,does centurytel offer any better plans than th...,oos,improve_credit_score,oos
177,what stores are in the local mall,oos,translate,oos
183,what are cars like mine selling for online,oos,car_rental,oos


In [46]:
#selecting the out of scope items that both classifiers failed to classify
omutual_failures = test_oos_output_text[(test_oos_output_text['domain']!=test_oos_output_text['ls_labels']) & (test_oos_output_text['domain']!=test_oos_output_text['mlp_labels'])]
omutual_failures.head(10)

Unnamed: 0,query,domain,ls_labels,mlp_labels
0,how much has the dow changed today,oos,income,date
1,how many prime numbers are there between 0 and...,oos,measurement_conversion,greeting
3,can you dim the brightness of my screen,oos,smart_home,w2
4,what is the account number to the internet ser...,oos,account_blocked,account_blocked
5,can you see a hdmi cord,oos,greeting,greeting
6,what veggies can i pair with mushrooms,oos,sync_device,sync_device
7,can you put the car in fuel efficient mode,oos,gas_type,gas_type
8,at what age can someone get a card,oos,how_old_are_you,how_old_are_you
9,please find today's most read stories from the...,oos,book_hotel,date
10,how do i get red wine out of a couch cushion,oos,ingredient_substitution,goodbye


In [47]:
#selecting the out of scope items that both models classified correctly
omutuals = test_oos_output_text[(test_oos_output_text['domain']==test_oos_output_text['ls_labels']) & (test_oos_output_text['domain']==test_oos_output_text['mlp_labels'])]
omutuals.head(10)

Unnamed: 0,query,domain,ls_labels,mlp_labels
2,can you tell me how to solve simple algebraic ...,oos,oos,oos
23,what's at movies,oos,oos,oos
24,what is the amount of blood needed to stay alive,oos,oos,oos
38,who won the patriots game last nightu,oos,oos,oos
53,do i have overdraft protection,oos,oos,oos
55,can you give me the most current market news,oos,oos,oos
60,open cnn websiteo,oos,oos,oos
64,which airports do southwest service around the...,oos,oos,oos
68,can you tell me the s&p average,oos,oos,oos
81,change color contrast,oos,oos,oos


In [48]:
#inspecting the shape of these datasets
print(otest_MLP_failures.shape)
print(otest_SVC_failures.shape)
print(omutual_failures.shape)
print(omutuals.shape)

(47, 4)
(55, 4)
(797, 4)
(101, 4)


In [49]:
#importing McNemar's test
from statsmodels.stats.contingency_tables import mcnemar

In [50]:
#creating arrays with the values found above, to enable McNemar's test
in_scope = [[308, 89], 
            [82, 4021]]
out_of_scope = [[797, 55],
               [47, 101]]

In [51]:
#performing McNemar's test
mcN_is = mcnemar(in_scope, exact=True)
mcN_oos = mcnemar(out_of_scope, exact=True)

In [52]:
print(mcN_is)

pvalue      0.646483344615315
statistic   82.0


In [53]:
print(mcN_oos)

pvalue      0.4884344645326324
statistic   47.0


Both of these p values are substantially greater than 0.05, indicating that the result is not statistically significant. That is, both classifiers have performed equally on the test data in terms of classification accuracy.

In [54]:
#creation of successful classification datasets, to enable inspection of which classes are least well classified
test_SVC_succ = test_output_text[(test_output_text['domain']==test_output_text['ls_labels'])]
test_MLP_succ = test_output_text[(test_output_text['domain']==test_output_text['mlp_labels'])]
otest_SVC_succ = test_oos_output_text[(test_oos_output_text['domain']!=test_oos_output_text['ls_labels'])]
otest_MLP_succ = test_oos_output_text[(test_oos_output_text['domain']!=test_oos_output_text['mlp_labels'])]

In [55]:
#to allow for inspection of examples poorly classified labels
omutual_failures[omutual_failures['mlp_labels']=='w2']

Unnamed: 0,query,domain,ls_labels,mlp_labels
3,can you dim the brightness of my screen,oos,smart_home,w2
100,can you tell me how to get gum out of my rug,oos,w2,w2
109,find articles about the protests in parisi,oos,book_hotel,w2
163,where can i list my vehicle for sell,oos,smart_home,w2
195,i need to find a black suit under $100,oos,book_flight,w2
204,can you check my voicemail,oos,confirm_reservation,w2
213,i would like to find a stylish suit for a party,oos,w2,w2
323,where is te lawrence,oos,where_are_you_from,w2
326,what's the most practiced form of mma in the w...,oos,w2,w2
337,how can i get help for my mental health,oos,w2,w2


In [56]:
#least well classified intents by each classifier, and intents most OOS misclassified to
print(test_SVC_succ['domain'].value_counts().tail(5))
print(test_MLP_succ['domain'].value_counts().tail(5))
print(otest_SVC_succ['ls_labels'].value_counts().head(5))
print(otest_MLP_succ['mlp_labels'].value_counts().head(5))

calendar            20
yes                 20
change_user_name    20
distance            18
shopping_list       17
Name: domain, dtype: int64
play_music          20
yes                 19
ingredients_list    19
distance            18
change_user_name    18
Name: domain, dtype: int64
who_made_you             31
calculator               20
w2                       20
restaurant_suggestion    17
where_are_you_from       17
Name: ls_labels, dtype: int64
greeting             56
w2                   43
directions           27
calculator           25
travel_suggestion    21
Name: mlp_labels, dtype: int64


Timing function for benchmarking. Takes time to run as averages over 1000 iterations each.

In [None]:
time1 = time.time() #start timer
for i in range(1000): #loop 1000 times for accuracy
    ls_labels = lsvc.predict(test_x) #predict test values using SVM
    ols_labels = lsvc.predict(test_oos_x) #predict oos test values
time2 = time.time() # stop timer
svc_time = (time2-time1)/1000 #divide by 1000 to get time for one set
print('svc prediction time (both sets)')
print(svc_time) #print

#as above but for mlp
time5 = time.time()
for i in range(1000):
    m_labels = mlp.predict(tensor_test_x)
    om_labels = mlp.predict(tensor_test_oos_x)
time6 = time.time()
mlp_time = (time6-time5)/1000
print('mlp prediction time (both sets)')
print(mlp_time)