In [1]:
import tensorflow as tf
import tensorflow_hub as hub
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import re
import seaborn as sns
from keras.layers import Input, Lambda, Dropout, Dense
from keras.models import Model
from keras.callbacks import ModelCheckpoint, LearningRateScheduler
from keras import backend as K

from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


# Data preperation

In [2]:
data = pd.read_csv('data/Tweets.csv')

In [3]:
def process_text(text):
    """
    - replace http link with ulmfit style token xxlink
    - first token will be thrown as model can become biased because of disbalances
        in class destribution through companies
    """
    text = re.sub('http[:\.\w\d/_]+', 'xxlink', text)
    text = ' '.join(text.split(' ')[1:])
    return text

In [4]:
cls_df = data.copy()

In [5]:
cls_df['text'] = cls_df['text'].apply(lambda x: process_text(x))

In [6]:
np.asarray(pd.get_dummies(cls_df['airline_sentiment']), dtype = np.int8)

array([[0, 1, 0],
       [0, 0, 1],
       [0, 1, 0],
       ...,
       [0, 1, 0],
       [1, 0, 0],
       [0, 1, 0]], dtype=int8)

In [7]:
label_dict = dict(
    negative=-1,
    neutral=0,
    positive=1
)

In [8]:
# cls_df['label'] = cls_df['airline_sentiment'].apply(lambda x: label_dict[x])
cls_df['label'] = cls_df['airline_sentiment']

In [9]:
# the same splitting as for baseline
train_df, test_df = train_test_split(cls_df, test_size=0.2, stratify=data[['airline_sentiment', 'airline']],
                               random_state=21)

In [10]:
# the same splitting as in flair notebook
train_df, val_df = train_test_split(train_df, test_size=0.15, stratify=train_df[['airline_sentiment', 'airline']],
                               random_state=21)

In [11]:
train_df = train_df[['text', 'label']]
val_df = val_df[['text', 'label']]
test_df = test_df[['text', 'label']]

In [12]:
train_df.head()

Unnamed: 0,text,label
8938,Got flight reschedule to flight form PIT to FL...,negative
1935,more lies... xxlink,negative
5517,I'm on the 10:55 flight! Everyone has been so ...,positive
2030,you have Cancelled Flightled my flight UA922 f...,negative
13916,because you won't get our bags for us because ...,negative


In [13]:
train_text = train_df['text'].tolist()
train_text = np.array(train_text, dtype=object)[:, np.newaxis]
train_label = np.asarray(pd.get_dummies(train_df['label']), dtype = np.int8)

In [14]:
val_text = val_df['text'].tolist()
val_text = np.array(val_text, dtype=object)[:, np.newaxis]
val_label = np.asarray(pd.get_dummies(val_df.label), dtype = np.int8)

In [15]:
test_text = test_df['text'].tolist()
test_text = np.array(test_text, dtype=object)[:, np.newaxis]
test_label = np.asarray(pd.get_dummies(test_df.label), dtype = np.int8)

# Training

In [16]:
module_url = "https://tfhub.dev/google/universal-sentence-encoder-large/3"
# Import the Universal Sentence Encoder's TF Hub module
embed = hub.Module(module_url)

In [17]:
def UniversalEmbedding(x):
    return embed(tf.squeeze(tf.cast(x, tf.string)), signature="default", as_dict=True)["default"]

In [18]:
def get_model(n_labels, drop_rate=0.5):
    inputs = Input(shape=(1,), dtype=tf.string)
    emb = Lambda(UniversalEmbedding, output_shape=(512,))(inputs)
    emb = Dropout(drop_rate)(emb)
    dense = Dense(256, activation='relu')(emb)
    dense = Dropout(drop_rate)(dense)
    pred = Dense(3, activation='softmax')(dense)
    
    model = Model(inputs=[inputs], outputs=pred)
    
    return model

In [19]:
model = get_model(n_labels=3)

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

W0728 21:05:15.330640 140408322873152 deprecation_wrapper.py:119] From /home/egor/.local/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0728 21:05:15.331425 140408322873152 deprecation_wrapper.py:119] From /home/egor/.local/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0728 21:05:15.904527 140408322873152 deprecation_wrapper.py:119] From /home/egor/.local/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:133: The name tf.placeholder_with_default is deprecated. Please use tf.compat.v1.placeholder_with_default instead.

W0728 21:05:15.911577 140408322873152 deprecation.py:506] From /home/egor/.local/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:3445: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated an

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 1)                 0         
_________________________________________________________________
lambda_1 (Lambda)            (None, 512)               0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 256)               131328    
_________________________________________________________________
dropout_2 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 3)                 771       
Total params: 132,099
Trainable params: 132,099
Non-trainable params: 0
_________________________________________________________________


In [20]:
layers = list(model.layers)
layers[1].trainable = False

In [21]:
for layer in model.layers:
    print(layer.trainable)

False
False
True
True
True
True


In [22]:
# model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [23]:
with tf.Session() as session:
    K.set_session(session)
    session.run(tf.global_variables_initializer())
    session.run(tf.tables_initializer())
    history = model.fit(train_text, train_label,
                    validation_data=(val_text, val_label),
                    epochs=1,
                    batch_size=64)
    model.save_weights('head_trained')

W0728 21:05:19.086076 140408322873152 deprecation.py:323] From /home/egor/.local/lib/python3.6/site-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Train on 9955 samples, validate on 1757 samples
Epoch 1/1


In [24]:
layers[1].trainable = True
for layer in model.layers:
    print(layer.trainable)

False
True
True
True
True
True


In [25]:
save_callback = ModelCheckpoint('best_USE', save_best_only=True, save_weights_only=True)

In [26]:
with tf.Session() as session:
    K.set_session(session)
    session.run(tf.global_variables_initializer())
    session.run(tf.tables_initializer())
    model.load_weights('head_trained')
    history = model.fit(train_text, train_label,
                    validation_data=(val_text, val_label),
                    epochs=13,
                    batch_size=64,
                    callbacks=[save_callback])

Train on 9955 samples, validate on 1757 samples
Epoch 1/13
  64/9955 [..............................] - ETA: 1:49 - loss: 0.4573 - acc: 0.7812

Exception ignored in: <bound method BaseSession._Callable.__del__ of <tensorflow.python.client.session.BaseSession._Callable object at 0x7fb2b00bbd68>>
Traceback (most recent call last):
  File "/home/egor/.local/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1473, in __del__
    self._session._session, self._handle)
tensorflow.python.framework.errors_impl.CancelledError: (None, None, 'Session has been closed.')




Exception ignored in: <bound method BaseSession._Callable.__del__ of <tensorflow.python.client.session.BaseSession._Callable object at 0x7fb2b00a9438>>
Traceback (most recent call last):
  File "/home/egor/.local/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1473, in __del__
    self._session._session, self._handle)
tensorflow.python.framework.errors_impl.CancelledError: (None, None, 'Session has been closed.')


Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13


In [31]:
%%time
with tf.Session() as session:
    K.set_session(session)
    session.run(tf.global_variables_initializer())
    session.run(tf.tables_initializer())
    model.load_weights('best_USE')
    test_preds = model.predict(test_text)

Exception ignored in: <bound method BaseSession._Callable.__del__ of <tensorflow.python.client.session.BaseSession._Callable object at 0x7fb27c7a8828>>
Traceback (most recent call last):
  File "/home/egor/.local/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1473, in __del__
    self._session._session, self._handle)
tensorflow.python.framework.errors_impl.CancelledError: (None, None, 'Session has been closed.')


CPU times: user 53.1 s, sys: 973 ms, total: 54.1 s
Wall time: 9.89 s


In [33]:
test_preds = test_preds.argmax(axis=1)
test_labels = test_label.argmax(axis=1)

<b>I didn't expected that but USE showed the same results as BERT. But it's more faster.

In [34]:
f1_score(test_preds, test_labels, average='macro'), f1_score(test_preds, test_labels, average='micro')

(0.783511065796814, 0.835724043715847)