In [13]:
!pip install bert
!pip install transformers
!pip install tf-models-official



In [14]:
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow.keras import layers
import bert
import random
import pandas as pd
import numpy as np
# from keras import backend as K
from sklearn.metrics import classification_report

from google.colab import drive
drive.mount('/content/drive')

import os

from transformers import BertTokenizer

from tensorflow.keras import backend as K
from tensorflow.keras.layers import Dense, Lambda, Dot, Activation, Concatenate, Layer

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [15]:
def recall_m(y_true, y_pred):
    y_true = tf.cast(y_true, tf.float32)
    y_pred = tf.cast(tf.squeeze(y_pred, axis=-1), tf.float32)
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    y_true = tf.cast(y_true, tf.float32)
    y_pred = tf.cast(tf.squeeze(y_pred, axis=-1), tf.float32)
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

def dataset_embedding(dataset_path, tokenizer, batch_size=32):
    dataset = pd.read_csv(dataset_path)[["tweet", "sarcastic"]]
    dataset = dataset[dataset['tweet'].notna()]

    tokenized_tweets = [tokenizer.convert_tokens_to_ids(tokenizer.tokenize(tweet)) for tweet in dataset['tweet']]

    tweets_with_len = [[tweet, dataset['sarcastic'].iloc[i], len(tweet)] for i, tweet in enumerate(tokenized_tweets)]
    random.Random(42).shuffle(tweets_with_len)

    tweets_with_len.sort(key=lambda x: x[2])
    sorted_tweets_labels = [(tweet_lab[0], tweet_lab[1]) for tweet_lab in tweets_with_len] # remove tweet len
    processed_dataset = tf.data.Dataset.from_generator(lambda: sorted_tweets_labels, output_types=(tf.int32, tf.int32))

    return processed_dataset.padded_batch(batch_size, padded_shapes=((None, ), ()))


def prepare_datasets(train_path, test_path):
    # BertTokenizer = bert.bert_tokenization.FullTokenizer
    bert_layer = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/1", trainable=True)
    vocabulary_file = bert_layer.resolved_object.vocab_file.asset_path.numpy()
    to_lower_case = bert_layer.resolved_object.do_lower_case.numpy()

    vocabulary_file_str = vocabulary_file.decode("utf-8")
    do_lower_case_bool = bool(to_lower_case)

    # tokenizer = BertTokenizer(vocabulary_file, to_lower_case)
    tokenizer = BertTokenizer(
        vocab_file=vocabulary_file_str,
        do_lower_case=do_lower_case_bool
    )

    dataset_train = dataset_embedding(train_path, tokenizer)
    dataset_test = dataset_embedding(test_path, tokenizer)

    return dataset_train, dataset_test, tokenizer

In [16]:
debug_flag = int(os.environ.get('KERAS_ATTENTION_DEBUG', 0))

class Attention(object if debug_flag else Layer):
    def __init__(self, units=128, **kwargs):
        super(Attention, self).__init__(**kwargs)
        self.units = units

    def build(self, input_shape):
        input_dim = int(input_shape[-1])
        with K.name_scope(self.name if not debug_flag else 'attention'):
            self.attention_score_vec = Dense(input_dim, use_bias=False, name='attention_score_vec')
            self.h_t = Lambda(lambda x: x[:, -1, :], output_shape=(input_dim,), name='last_hidden_state')
            self.attention_score = Dot(axes=[1, 2], name='attention_score')
            self.attention_weight = Activation('softmax', name='attention_weight')
            self.context_vector = Dot(axes=[1, 1], name='context_vector')
            self.attention_output = Concatenate(name='attention_output')
            self.attention_vector = Dense(self.units, use_bias=False, activation='tanh', name='attention_vector')
        if not debug_flag:
            super(Attention, self).build(input_shape)

    def compute_output_shape(self, input_shape):
        return input_shape[0], self.units

    # def __call__(self, inputs, training=None, **kwargs):
    #     if debug_flag:
    #         return self.call(inputs, training, **kwargs)
    #     else:
    #         return super(Attention, self).__call__(inputs, training, **kwargs)

    def __call__(self, inputs, training=None, **kwargs):
        if debug_flag:
            return self.call(inputs, training=training, **kwargs)
        else:
            return super(Attention, self).__call__(inputs, training=training, **kwargs)

    def call(self, inputs, training=None, **kwargs):
        """
        Many-to-one attention mechanism for Keras.
        @param inputs: 3D tensor with shape (batch_size, time_steps, input_dim).
        @param training: not used in this layer.
        @return: 2D tensor with shape (batch_size, units)
        @author: felixhao28, philipperemy.
        """
        if debug_flag:
            self.build(inputs.shape)
        score_first_part = self.attention_score_vec(inputs)
        h_t = self.h_t(inputs)
        score = self.attention_score([h_t, score_first_part])
        attention_weights = self.attention_weight(score)
        context_vector = self.context_vector([inputs, attention_weights])
        pre_activation = self.attention_output([context_vector, h_t])
        attention_vector = self.attention_vector(pre_activation)
        return attention_vector

    def get_config(self):
        """
        Returns the config of a the layer. This is used for saving and loading from a model
        :return: python dictionary with specs to rebuild layer
        """
        config = super(Attention, self).get_config()
        config.update({'units': self.units})
        return config


In [17]:
train_path = '/content/drive/My Drive/Colab Notebooks/6812_gwp/data/Train_Dataset.csv'
test_path = '/content/drive/My Drive/Colab Notebooks/6812_gwp/data/Test_Dataset.csv'

train_data, test_data, tokenizer = prepare_datasets(train_path, test_path)

att_lstm = tf.keras.Sequential([
    tf.keras.layers.Embedding(len(tokenizer.vocab), 128),
    tf.keras.layers.LSTM(64, return_sequences=True, dropout=0.3),
    tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(64, activation='relu')),
    tf.keras.layers.LSTM(64, return_sequences=True, dropout=0.3),
    tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(64, activation='relu')),
    tf.keras.layers.LSTM(64, return_sequences=True, dropout=0.3),
    Attention(),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# att_lstm.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy', f1_m])
att_lstm.compile(
    loss='binary_crossentropy',
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    metrics=['accuracy', f1_m]
)
print(att_lstm.summary())

att_lstm.fit(train_data, epochs=10, validation_data = test_data, class_weight={1:4, 0:1})

loss_test, acc_test, f1_test = att_lstm.evaluate(test_data)
print("Loss:", loss_test, "Accuracy:", acc_test, "F1:", f1_test)

None
Epoch 1/10
    217/Unknown [1m12s[0m 27ms/step - accuracy: 0.4743 - f1_m: 0.2775 - loss: 1.1921



[1m217/217[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 33ms/step - accuracy: 0.4736 - f1_m: 0.2778 - loss: 1.1922 - val_accuracy: 0.1429 - val_f1_m: 0.2337 - val_loss: 0.7422
Epoch 2/10
[1m217/217[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 29ms/step - accuracy: 0.2666 - f1_m: 0.3950 - loss: 1.1663 - val_accuracy: 0.4829 - val_f1_m: 0.2461 - val_loss: 0.7848
Epoch 3/10
[1m217/217[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 29ms/step - accuracy: 0.5385 - f1_m: 0.4946 - loss: 1.0051 - val_accuracy: 0.5357 - val_f1_m: 0.2492 - val_loss: 0.9936
Epoch 4/10
[1m217/217[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 29ms/step - accuracy: 0.7510 - f1_m: 0.6100 - loss: 0.8488 - val_accuracy: 0.5129 - val_f1_m: 0.2246 - val_loss: 1.1067
Epoch 5/10
[1m217/217[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 29ms/step - accuracy: 0.7439 - f1_m: 0.6452 - loss: 0.7287 - val_a

In [18]:
all_labels = []
all_preds = []

for x_batch, y_batch in test_data:
    preds = att_lstm.predict(x_batch)
    preds = (preds > 0.5).astype(int).flatten()
    all_preds.extend(preds)
    all_labels.extend(y_batch.numpy())

print(classification_report(all_labels, all_preds))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 471ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 659ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 

In [19]:
att_Blstm = tf.keras.Sequential([
  tf.keras.layers.Embedding(len(tokenizer.vocab), 128),
  tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True, dropout=0.3, recurrent_dropout=0.3)),
  tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(128, activation='relu')),
  tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True, dropout=0.3, recurrent_dropout=0.3)),
  tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(128, activation='relu')),
  tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True, dropout=0.3, recurrent_dropout=0.3)),
  Attention(),
  tf.keras.layers.Dropout(0.5),
  tf.keras.layers.Dense(1, activation='sigmoid')
])

# att_Blstm.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy', f1_m])
att_Blstm.compile(
    loss='binary_crossentropy',
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    metrics=['accuracy', f1_m]
)
print(att_Blstm.summary())

att_Blstm.fit(train_data, epochs=10, validation_data = test_data, class_weight={1:4, 0:1})

loss_test, acc_test, f1_test = att_Blstm.evaluate(test_data)
print("Loss:", loss_test, "Accuracy:", acc_test, "F1:", f1_test)

None
Epoch 1/10
    217/Unknown [1m75s[0m 245ms/step - accuracy: 0.3811 - f1_m: 0.3631 - loss: 1.1897



[1m217/217[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 268ms/step - accuracy: 0.3807 - f1_m: 0.3632 - loss: 1.1898 - val_accuracy: 0.1429 - val_f1_m: 0.2337 - val_loss: 0.7371
Epoch 2/10
[1m217/217[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 257ms/step - accuracy: 0.2565 - f1_m: 0.3911 - loss: 1.1679 - val_accuracy: 0.5657 - val_f1_m: 0.1993 - val_loss: 0.7905
Epoch 3/10
[1m217/217[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 258ms/step - accuracy: 0.6654 - f1_m: 0.5566 - loss: 0.9062 - val_accuracy: 0.6086 - val_f1_m: 0.2054 - val_loss: 0.7925
Epoch 4/10
[1m217/217[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 256ms/step - accuracy: 0.8511 - f1_m: 0.6844 - loss: 0.6774 - val_accuracy: 0.5836 - val_f1_m: 0.2174 - val_loss: 1.0865
Epoch 5/10
[1m217/217[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 256ms/step - accuracy: 0.8461 - f1_m: 0.7446 - loss: 0.51

In [20]:
all_labels = []
all_preds = []

for x_batch, y_batch in test_data:
    preds = att_Blstm.predict(x_batch)
    preds = (preds > 0.5).astype(int).flatten()
    all_preds.extend(preds)
    all_labels.extend(y_batch.numpy())

print(classification_report(all_labels, all_preds))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/s