In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import seaborn as sns
import transformers

import nltk
import re


from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score, roc_curve

plt.style.use('seaborn')

In [2]:
print(tf.__version__)
print(tf.config.list_physical_devices('GPU'))

2.5.0
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [3]:
df = pd.read_csv('memotion_dataset_7k/labels.csv')
df.drop(df.columns[df.columns.str.contains('unnamed',case = False)],axis = 1, inplace = True)
df = df.drop(columns = ['text_ocr', 'humour', 'sarcasm', 'offensive', 'motivational'])
df.head()

Unnamed: 0,image_name,text_corrected,overall_sentiment
0,image_1.jpg,LOOK THERE MY FRIEND LIGHTYEAR NOW ALL SOHALIK...,positive
1,image_2.jpeg,The best of #10 YearChallenge! Completed in le...,positive
2,image_3.JPG,Sam Thorne @Strippin ( Follow Follow Saw every...,positive
3,image_4.png,10 Year Challenge - Sweet Dee Edition,positive
4,image_5.png,10 YEAR CHALLENGE WITH NO FILTER 47 Hilarious ...,neutral


In [4]:
meme_text = pd.DataFrame()
meme_text['text'] = df.text_corrected
meme_text['sentiment'] = df.overall_sentiment
meme_text.head()
cleaned = meme_text.copy()
cleaned.dropna(inplace=True)
cleaned.isnull().any()

text         False
sentiment    False
dtype: bool

In [5]:
cleaned.head()

Unnamed: 0,text,sentiment
0,LOOK THERE MY FRIEND LIGHTYEAR NOW ALL SOHALIK...,positive
1,The best of #10 YearChallenge! Completed in le...,positive
2,Sam Thorne @Strippin ( Follow Follow Saw every...,positive
3,10 Year Challenge - Sweet Dee Edition,positive
4,10 YEAR CHALLENGE WITH NO FILTER 47 Hilarious ...,neutral


In [6]:
from transformers import TFXLNetModel, XLNetTokenizer

In [10]:
xlnet_model = 'xlnet-large-cased'
xlnet_tokenizer = XLNetTokenizer.from_pretrained(xlnet_model)

In [11]:
def create_xlnet(mname):
    """ Creates the model. It is composed of the XLNet main block and then
    a classification head its added
    """
    # Define token ids as inputs
    word_inputs = tf.keras.Input(shape=(120,), name='word_inputs', dtype='int32')

    # Call XLNet model
    xlnet = TFXLNetModel.from_pretrained(mname)
    xlnet_encodings = xlnet(word_inputs)[0]

    # CLASSIFICATION HEAD 
    # Collect last step from last hidden state (CLS)
    doc_encoding = tf.squeeze(xlnet_encodings[:, -1:, :], axis=1)
    # Apply dropout for regularization
    doc_encoding = tf.keras.layers.Dropout(.1)(doc_encoding)
    # Final output 
    outputs = tf.keras.layers.Dense(3, activation='softmax', name='outputs')(doc_encoding)

    # Compile model
    model = tf.keras.Model(inputs=[word_inputs], outputs=[outputs])
    model.compile(optimizer=tf.keras.optimizers.Adam(lr=2e-5), loss='categorical_crossentropy', metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), ])

    return model

In [None]:
xlnet = create_xlnet(xlnet_model)

In [10]:
xlnet.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
word_inputs (InputLayer)     [(None, 120)]             0         
_________________________________________________________________
tfxl_net_model (TFXLNetModel TFXLNetModelOutput(last_h 360268800 
_________________________________________________________________
tf.__operators__.getitem (Sl (None, 1, 1024)           0         
_________________________________________________________________
tf.compat.v1.squeeze (TFOpLa (None, 1024)              0         
_________________________________________________________________
dropout_73 (Dropout)         (None, 1024)              0         
_________________________________________________________________
outputs (Dense)              (None, 5)                 5125      
Total params: 360,273,925
Trainable params: 360,273,925
Non-trainable params: 0
_______________________________________________

In [11]:
tweets = cleaned['text']
Y = pd.get_dummies(cleaned['sentiment']).values
#labels = cleaned['sentiment']

X_train, X_test, y_train, y_test = train_test_split(tweets, Y)

In [12]:
X_train.shape, y_train.shape

((5240,), (5240, 5))

In [13]:
def get_inputs(tweets, tokenizer, max_len=120):
    """ Gets tensors from text using the tokenizer provided"""
    inps = [tokenizer.encode_plus(t, max_length=max_len, pad_to_max_length=True, add_special_tokens=True) for t in tweets]
    inp_tok = np.array([a['input_ids'] for a in inps])
    ids = np.array([a['attention_mask'] for a in inps])
    segments = np.array([a['token_type_ids'] for a in inps])
    return inp_tok, ids, segments

def warmup(epoch, lr):
    """Used for increasing the learning rate slowly, this tends to achieve better convergence.
    However, as we are finetuning for few epoch it's not crucial.
    """
    return max(lr +1e-6, 2e-5)

def plot_metrics(pred, true_labels):
    """Plots a ROC curve with the accuracy and the AUC"""
    acc = accuracy_score(true_labels, np.array(pred.flatten() >= .5, dtype='int'))
    fpr, tpr, thresholds = roc_curve(true_labels, pred)
    auc = roc_auc_score(true_labels, pred)

    fig, ax = plt.subplots(1, figsize=(8,8))
    ax.plot(fpr, tpr, color='red')
    ax.plot([0,1], [0,1], color='black', linestyle='--')
    ax.set_title(f"AUC: {auc}\nACC: {acc}");
    return fig

In [14]:
inp_tok, ids, segments = get_inputs(X_train, xlnet_tokenizer)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


In [15]:
callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=4, min_delta=0.02, restore_best_weights=True),
    tf.keras.callbacks.LearningRateScheduler(warmup, verbose=0),
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', factor=1e-6, patience=2, verbose=0, mode='auto', min_delta=0.001, cooldown=0, min_lr=1e-6)
]

In [16]:
hist = xlnet.fit(x=inp_tok, y=y_train, epochs=1, batch_size=4, validation_split=.15, callbacks=callbacks)



ResourceExhaustedError: in user code:

    C:\Users\hp\anaconda3\envs\Rohit\lib\site-packages\tensorflow\python\keras\engine\training.py:855 train_function  *
        return step_function(self, iterator)
    C:\Users\hp\anaconda3\envs\Rohit\lib\site-packages\tensorflow\python\keras\engine\training.py:845 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    C:\Users\hp\anaconda3\envs\Rohit\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1285 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    C:\Users\hp\anaconda3\envs\Rohit\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2833 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    C:\Users\hp\anaconda3\envs\Rohit\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:3608 _call_for_each_replica
        return fn(*args, **kwargs)
    C:\Users\hp\anaconda3\envs\Rohit\lib\site-packages\tensorflow\python\keras\engine\training.py:838 run_step  **
        outputs = model.train_step(data)
    C:\Users\hp\anaconda3\envs\Rohit\lib\site-packages\tensorflow\python\keras\engine\training.py:799 train_step
        self.optimizer.minimize(loss, self.trainable_variables, tape=tape)
    C:\Users\hp\anaconda3\envs\Rohit\lib\site-packages\tensorflow\python\keras\optimizer_v2\optimizer_v2.py:530 minimize
        return self.apply_gradients(grads_and_vars, name=name)
    C:\Users\hp\anaconda3\envs\Rohit\lib\site-packages\tensorflow\python\keras\optimizer_v2\optimizer_v2.py:636 apply_gradients
        self._create_all_weights(var_list)
    C:\Users\hp\anaconda3\envs\Rohit\lib\site-packages\tensorflow\python\keras\optimizer_v2\optimizer_v2.py:823 _create_all_weights
        self._create_slots(var_list)
    C:\Users\hp\anaconda3\envs\Rohit\lib\site-packages\tensorflow\python\keras\optimizer_v2\adam.py:124 _create_slots
        self.add_slot(var, 'm')
    C:\Users\hp\anaconda3\envs\Rohit\lib\site-packages\tensorflow\python\keras\optimizer_v2\optimizer_v2.py:909 add_slot
        weight = tf_variables.Variable(
    C:\Users\hp\anaconda3\envs\Rohit\lib\site-packages\tensorflow\python\ops\variables.py:262 __call__
        return cls._variable_v2_call(*args, **kwargs)
    C:\Users\hp\anaconda3\envs\Rohit\lib\site-packages\tensorflow\python\ops\variables.py:244 _variable_v2_call
        return previous_getter(
    C:\Users\hp\anaconda3\envs\Rohit\lib\site-packages\tensorflow\python\ops\variables.py:67 getter
        return captured_getter(captured_previous, **kwargs)
    C:\Users\hp\anaconda3\envs\Rohit\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:3523 creator
        return next_creator(**kwargs)
    C:\Users\hp\anaconda3\envs\Rohit\lib\site-packages\tensorflow\python\ops\variables.py:67 getter
        return captured_getter(captured_previous, **kwargs)
    C:\Users\hp\anaconda3\envs\Rohit\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:3523 creator
        return next_creator(**kwargs)
    C:\Users\hp\anaconda3\envs\Rohit\lib\site-packages\tensorflow\python\ops\variables.py:67 getter
        return captured_getter(captured_previous, **kwargs)
    C:\Users\hp\anaconda3\envs\Rohit\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:3523 creator
        return next_creator(**kwargs)
    C:\Users\hp\anaconda3\envs\Rohit\lib\site-packages\tensorflow\python\ops\variables.py:67 getter
        return captured_getter(captured_previous, **kwargs)
    C:\Users\hp\anaconda3\envs\Rohit\lib\site-packages\tensorflow\python\eager\def_function.py:750 variable_capturing_scope
        v = UnliftedInitializerVariable(
    C:\Users\hp\anaconda3\envs\Rohit\lib\site-packages\tensorflow\python\ops\variables.py:264 __call__
        return super(VariableMetaclass, cls).__call__(*args, **kwargs)
    C:\Users\hp\anaconda3\envs\Rohit\lib\site-packages\tensorflow\python\eager\def_function.py:293 __init__
        initial_value = initial_value()
    C:\Users\hp\anaconda3\envs\Rohit\lib\site-packages\tensorflow\python\keras\initializers\initializers_v2.py:151 __call__
        return array_ops.zeros(shape, dtype)
    C:\Users\hp\anaconda3\envs\Rohit\lib\site-packages\tensorflow\python\util\dispatch.py:206 wrapper
        return target(*args, **kwargs)
    C:\Users\hp\anaconda3\envs\Rohit\lib\site-packages\tensorflow\python\ops\array_ops.py:2911 wrapped
        tensor = fun(*args, **kwargs)
    C:\Users\hp\anaconda3\envs\Rohit\lib\site-packages\tensorflow\python\ops\array_ops.py:2972 zeros
        output = fill(shape, constant(zero, dtype=dtype), name=name)
    C:\Users\hp\anaconda3\envs\Rohit\lib\site-packages\tensorflow\python\util\dispatch.py:206 wrapper
        return target(*args, **kwargs)
    C:\Users\hp\anaconda3\envs\Rohit\lib\site-packages\tensorflow\python\ops\array_ops.py:239 fill
        result = gen_array_ops.fill(dims, value, name=name)
    C:\Users\hp\anaconda3\envs\Rohit\lib\site-packages\tensorflow\python\ops\gen_array_ops.py:3367 fill
        _ops.raise_from_not_ok_status(e, name)
    C:\Users\hp\anaconda3\envs\Rohit\lib\site-packages\tensorflow\python\framework\ops.py:6897 raise_from_not_ok_status
        six.raise_from(core._status_to_exception(e.code, message), None)
    <string>:3 raise_from
        

    ResourceExhaustedError: OOM when allocating tensor with shape[1024,4096] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:Fill]
