In [1]:
import tensorflow_text
import tensorflow as tf
import tensorflow_hub as hub

import glob, pickle, os
import numpy as np

In [2]:
from maupassant.feature_extraction.embedding import BertEmbedding
from maupassant.training_utils import TrainerHelper

In [3]:
bert_module = hub.KerasLayer("https://tfhub.dev/google/universal-sentence-encoder-multilingual/3", input_shape=[], dtype=tf.string, trainable=False, name='multilingual_embed')

In [None]:
def load_encoder(model_dir):
    encoders_files = glob.glob(model_dir + "/*encoder.pkl")
    encoders = {}
    for file in encoders_files:
        encoder = pickle.load(open(file, "rb"))
        encoder_name = os.path.split(file)[1].split('.')[0]
        encoders[encoder_name] = dict(enumerate(encoder.classes_))

    return encoders

In [22]:
paths = [
    "/home/jwuthri/Documents/GitHub/Maupassant/maupassant/models/one_to_one_2020_03_24_21_13_29", 
    "/home/jwuthri/Documents/GitHub/Maupassant/maupassant/models/one_to_one_2020_03_25_10_52_14"
]

## Sentiment

In [166]:
path = "/home/jwuthri/Documents/GitHub/Maupassant/maupassant/models/one_to_one_2020_03_24_21_13_29"
latest = tf.train.latest_checkpoint(path)
encoder = load_encoder(path)

In [169]:
input_layer = tf.keras.Input((), dtype=tf.string, name="input_layer")
embedding_layer = bert_module(input_layer)
reshape_layer = tf.keras.layers.Reshape(target_shape=(1, 512))(embedding_layer)
conv_layer = tf.keras.layers.Conv1D(512, 3, padding='same', activation='relu', strides=1)(reshape_layer)
gpooling_layer = tf.keras.layers.GlobalMaxPooling1D()(conv_layer)
flatten_layer = tf.keras.layers.Flatten()(gpooling_layer)
dense_layer = tf.keras.layers.Dense(250, activation="relu")(flatten_layer)
dropout_layer = tf.keras.layers.Dropout(0.25)(dense_layer)
layer1 = tf.keras.layers.Dense(8, activation="sigmoid", name="sentiment")(dropout_layer)
sentiment = tf.keras.models.Model(inputs=input_layer, outputs=layer1)
sentiment.load_weights(latest)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7fc652550a90>

In [91]:
def pred_sentiment(x):
    proba = sentiment.predict(np.asarray([x]))[0]
    for k, v in encoder.items():
        preds = [(v[label], th) for label, th in enumerate(proba) if th >= 0.5]

    return preds

In [92]:
pred_sentiment("I will not buy from you anymore")

[('toxic', 0.95566)]

## Intent

In [178]:
path = "/home/jwuthri/Documents/GitHub/Maupassant/maupassant/models/one_to_one_2020_03_25_10_52_14"
latest = tf.train.latest_checkpoint(path)
encoder = load_encoder(path)

In [180]:
input_layer = tf.keras.Input((), dtype=tf.string, name="input_layer")
embedding_layer = bert_module(input_layer)
reshape_layer = tf.keras.layers.Reshape(target_shape=(1, 512))(embedding_layer)
conv_layer = tf.keras.layers.Conv1D(512, 3, padding='same', activation='relu', strides=1)(reshape_layer)
gpooling_layer = tf.keras.layers.GlobalMaxPooling1D()(conv_layer)
flatten_layer = tf.keras.layers.Flatten()(gpooling_layer)
dense_layer = tf.keras.layers.Dense(250, activation="relu")(flatten_layer)
dropout_layer = tf.keras.layers.Dropout(0.25)(dense_layer)
layer = tf.keras.layers.Dense(25, activation="sigmoid", name="intent")(dropout_layer)
intent = tf.keras.models.Model(inputs=input_layer, outputs=layer)
intent.load_weights(latest)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7fc65214a590>

In [115]:
def pred_intent(x):
    proba = intent.predict(np.asarray([x]))[0]
    for k, v in encoder.items():
        preds = [(v[label], th) for label, th in enumerate(proba) if th >= 0.5]

    return preds

In [116]:
pred_intent("Where is my order?")

[('shipping/status', 1.0)]

## Combine model

In [198]:
input_layer = tf.keras.Input((), dtype=tf.string, name="input_layer")
embedding_layer = bert_module(input_layer)
reshape_layer = tf.keras.layers.Reshape(target_shape=(1, 512))(embedding_layer)
outs = []
for model in [sentiment, intent]:
    conv_layer = model.layers[3](reshape_layer)
    gpooling_layer = model.layers[4](conv_layer)
    flatten_layer = model.layers[5](gpooling_layer)
    dense_layer = model.layers[6](flatten_layer)
    dropout_layer = model.layers[7](dense_layer)
    layer = model.layers[8](dropout_layer)
    outs.append(layer)

In [199]:
model = tf.keras.models.Model(inputs=input_layer, outputs=outs)

In [205]:
%%timeit
model.predict(np.asarray(['Hello retard, where is my order?']))

5.71 ms ± 76.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [204]:
model.predict(np.asarray(['Hello retard, where is my order?']))[0]

array([[0.0000000e+00, 0.0000000e+00, 9.9997437e-01, 1.6412139e-04,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 1.3952255e-02]],
      dtype=float32)

In [206]:
model.predict(np.asarray(['Hello retard, where is my order?']))[1]

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 1., 0., 0.]], dtype=float32)

In [207]:
path = "/home/jwuthri/Documents/GitHub/Maupassant/maupassant/models/one_to_one_2020_03_24_21_13_29"
sent_encoder = load_encoder(path)

path = "/home/jwuthri/Documents/GitHub/Maupassant/maupassant/models/one_to_one_2020_03_25_10_52_14"
intent_encoder = load_encoder(path)

In [212]:
model.save_weights('intent_sentiment_model/model')

## Load combined model

In [218]:
latest = tf.train.latest_checkpoint('intent_sentiment_model/')

In [221]:
input_layer = tf.keras.Input((), dtype=tf.string, name="input_layer")
embedding_layer = bert_module(input_layer)
reshape_layer = tf.keras.layers.Reshape(target_shape=(1, 512))(embedding_layer)
outs = []
for model in ['sentiment', 'intent']:
    n_classes = 25 if model == 'intent' else 8
    conv_layer = tf.keras.layers.Conv1D(512, 3, padding='same', activation='relu', strides=1)(reshape_layer)
    gpooling_layer = tf.keras.layers.GlobalMaxPooling1D()(conv_layer)
    flatten_layer = tf.keras.layers.Flatten()(gpooling_layer)
    dense_layer = tf.keras.layers.Dense(250, activation="relu")(flatten_layer)
    dropout_layer = tf.keras.layers.Dropout(0.25)(dense_layer)
    layer = tf.keras.layers.Dense(n_classes, activation="sigmoid", name=model)(dropout_layer)
    outs.append(layer)

In [222]:
loaded_model = tf.keras.models.Model(inputs=input_layer, outputs=outs)
loaded_model.load_weights(latest)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7fc7a14f6150>

In [223]:
loaded_model.predict(np.asarray(['Hello retard, where is my order?']))

[array([[0.0000000e+00, 0.0000000e+00, 9.9997437e-01, 1.6412139e-04,
         0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 1.3952255e-02]],
       dtype=float32),
 array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 1., 0., 0.]], dtype=float32)]

In [226]:
loaded_model.summary()

Model: "model_34"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_layer (InputLayer)        [(None,)]            0                                            
__________________________________________________________________________________________________
multilingual_embed (KerasLayer) (None, 512)          68927232    input_layer[0][0]                
__________________________________________________________________________________________________
reshape_21 (Reshape)            (None, 1, 512)       0           multilingual_embed[22][0]        
__________________________________________________________________________________________________
conv1d_22 (Conv1D)              (None, 1, 512)       786944      reshape_21[0][0]                 
___________________________________________________________________________________________

In [228]:
loaded_model.layers[-1].name

'intent'

In [229]:
loaded_model.layers[-2].name

'sentiment'

In [230]:
encoders = load_encoder('intent_sentiment_model/')

In [342]:
pba = loaded_model.predict(np.asarray(['I am angry, I want a refund']))

In [343]:
preds = list(zip(encoders, pba))

In [344]:
for pred in preds:
    v = encoders[pred[0]]
    res = [(v[label], th) for label, th in enumerate(pred[1][0]) if th >= 0.5]
    print(res)

[('negativ', 1.0), ('toxic', 0.9999999)]
[('refund/request', 1.0)]


In [234]:
def predict_format(x):
    if isinstance(x, str):
        x = np.asarray([x])
    if isinstance(x, list):
        x = np.asarray(x)

    return x