In [1]:
import numpy as np
import pickle
from mltools.preprocessing import Tokenizer, Indexer, Pipeline, LabelIndexer
from sklearn.metrics import classification_report, confusion_matrix, f1_score

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
x_train  = np.load('../00_data/encoded/snips_x_train.npy')
x_test   = np.load('../00_data/encoded/snips_x_test.npy')
yt_train = np.load('../00_data/encoded/snips_y_tags_train.npy')
yt_test  = np.load('../00_data/encoded/snips_y_tags_test.npy')
yi_train = np.load('../00_data/encoded/snips_y_int_train.npy')
yi_test  = np.load('../00_data/encoded/snips_y_int_test.npy')

yt_preds = np.load('../00_data/model/snips_pred_tags.npy')
yi_preds = np.load('../00_data/model/snips_pred_ints.npy')

In [3]:
intent_indexer = pickle.load(open("../00_data/encoded/snips_intent_indexer.pkl", "rb"))
label_indexer  = pickle.load(open("../00_data/encoded/snips_label_indexer.pkl", "rb"))
word_idxpipe   = pickle.load(open("../00_data/encoded/snips_sent_indexer.pkl", "rb"))

In [4]:
yi_preds = np.argmax(yi_preds, axis=1)
yi_preds = intent_indexer.inverse_transform(yi_preds)

In [5]:
yi_test = np.squeeze(yi_test)
yi_test = intent_indexer.inverse_transform(yi_test)

### intents

In [6]:
print(f1_score(yi_test, yi_preds, average='micro'))
print()
print(classification_report(yi_test, yi_preds))

0.7642857142857142

                      precision    recall  f1-score   support

       AddToPlaylist       0.82      0.99      0.90       100
      BookRestaurant       0.91      0.98      0.94       100
          GetWeather       0.94      0.58      0.72       100
           PlayMusic       0.15      0.08      0.10       100
            RateBook       0.99      0.99      0.99       100
  SearchCreativeWork       0.55      0.97      0.71       100
SearchScreeningEvent       0.94      0.76      0.84       100

         avg / total       0.76      0.76      0.74       700



In [7]:
labels = [t[0] for t in sorted(list(intent_indexer.tag2idx.items()), key=lambda x:x[1])]
print(labels)
print(confusion_matrix(yi_test, yi_preds, labels=labels))

['SearchScreeningEvent', 'GetWeather', 'BookRestaurant', 'AddToPlaylist', 'SearchCreativeWork', 'RateBook', 'PlayMusic']
[[76  3  3  0 11  0  7]
 [ 3 58  0  0  0  0 39]
 [ 0  0 98  0  2  0  0]
 [ 0  0  0 99  1  0  0]
 [ 1  0  2  0 97  0  0]
 [ 0  0  1  0  0 99  0]
 [ 1  1  4 21 64  1  8]]


### labels

In [8]:
yt_preds = np.argmax(yt_preds, axis=-1)
yt_test = np.squeeze(yt_test, axis=-1)
yt_preds = label_indexer.inverse_transform(yt_preds)
yt_test = label_indexer.inverse_transform(yt_test)

In [11]:
y_test_tot, y_pred_tot = [], []
for i in range(len(yt_test)):
    for j in range(len(yt_test[i])):
        if yt_test[i][j] != 'PAD':
            y_test_tot.append(yt_test[i][j])
            y_pred_tot.append(yt_preds[i][j])

In [12]:
print(f1_score(y_test_tot, y_pred_tot, average='micro'))
print()
print(classification_report(y_test_tot, y_pred_tot))

0.5569014982467325

                            precision    recall  f1-score   support

                      NONE       0.61      0.88      0.72      3127
                     album       0.00      0.00      0.00        35
                    artist       0.00      0.00      0.00       222
               best_rating       0.00      0.00      0.00        51
                      city       0.13      0.04      0.06        84
     condition_description       0.00      0.00      0.00        22
     condition_temperature       0.00      0.00      0.00        21
                   country       0.00      0.00      0.00        64
                   cuisine       0.00      0.00      0.00        13
          current_location       0.00      0.00      0.00        20
               entity_name       0.00      0.00      0.00        72
                  facility       0.00      0.00      0.00         7
                     genre       0.00      0.00      0.00         6
            geographic_poi 

  'precision', 'predicted', average, warn_for)
