#01 - BERT- Multi tasking on TPU

In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'arabic-youtube-comments-by-khalaya:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F4605500%2F7852763%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240524%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240524T231813Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D85525cb905776cf0a1cdbe2eac8d0031abf975fd145c664935453bd98ea51773a96b18342eac576a8229b4149c579c6cc6087b66f558085bf0c1233beff83e0a2e99b975fde18884a7af19ca7759572e113a53d8850ede5a1fc8640a955d2c8d8afbbda81cd7071ecc381823ff349ef4a2d18341215a546b54b6dd642675f2561eccea88dea7e09b7ee829fd46f1e31f34bdec4f739b6ab2b78581eb931c01d5d69863a890ba44308f997bac8e64f14fe5e6969612c62e5c626c604c9ea9b8970ef48b6c4752d9ad6077fee33250b666fbb1a278c4b1eea71ebf7bb3bb94db25526c32012da98f51ee0b37f1e98f86ea3a84274c5b7583228db8235e57d17c38'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


Failed to load (likely expired) https://storage.googleapis.com/kaggle-data-sets/4605500/7852763/bundle/archive.zip?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gcp-kaggle-com%40kaggle-161607.iam.gserviceaccount.com%2F20240524%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20240524T231813Z&X-Goog-Expires=259200&X-Goog-SignedHeaders=host&X-Goog-Signature=85525cb905776cf0a1cdbe2eac8d0031abf975fd145c664935453bd98ea51773a96b18342eac576a8229b4149c579c6cc6087b66f558085bf0c1233beff83e0a2e99b975fde18884a7af19ca7759572e113a53d8850ede5a1fc8640a955d2c8d8afbbda81cd7071ecc381823ff349ef4a2d18341215a546b54b6dd642675f2561eccea88dea7e09b7ee829fd46f1e31f34bdec4f739b6ab2b78581eb931c01d5d69863a890ba44308f997bac8e64f14fe5e6969612c62e5c626c604c9ea9b8970ef48b6c4752d9ad6077fee33250b666fbb1a278c4b1eea71ebf7bb3bb94db25526c32012da98f51ee0b37f1e98f86ea3a84274c5b7583228db8235e57d17c38 to path /kaggle/input/arabic-youtube-comments-by-khalaya
Data source import complete.


# Import libs

In [None]:
%%capture
!pip install tensorflow==2.15.0
!pip install transformers==4.37.2

In [None]:
import tensorflow as tf
from transformers import AutoTokenizer,TFAutoModelForMaskedLM,TFAutoModelForSequenceClassification
import pandas as pd
import numpy as np
from tensorflow.keras import backend as K
from sklearn.model_selection import train_test_split,KFold
from sklearn.metrics import confusion_matrix,f1_score,classification_report,auc,roc_curve,RocCurveDisplay,precision_score,recall_score
import matplotlib.pyplot as plt
import seaborn as sns
import os
from tqdm import tqdm
import warnings
# Suppress FutureWarning messages
import logging, os
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
logging.disable(logging.WARNING)
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
import tensorflow as tf
warnings.simplefilter(action='ignore', category=FutureWarning)
pd.options.mode.chained_assignment = None

In [None]:
tf.config.optimizer.set_experimental_options({"auto_mixed_precision": True})
print('Mixed precision enabled')

Mixed precision enabled


In [None]:
import tensorflow as tf
print("Tensorflow version " + tf.__version__)

try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection
#     print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])
except ValueError:
    raise BaseException('ERROR: Not connected to a TPU runtime; please see the previous cell in this notebook for instructions!')

tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu)
tpu_strategy = tf.distribute.TPUStrategy(tpu)

Tensorflow version 2.15.0


# Hyperparameters

In [None]:
#First Trail
EPOCHS = 7
LEARNING_RATE_MAX = 2e-5
LEARNING_RATE = 2e-5
PCT = 0.02
BATCH_SIZE = 512
WD = 0.001
MAX_LENGTH = 128
DROP_OUT = 0.1

# Functions

In [None]:
def f_beta_score(y_true, y_pred):
    beta=1
    def recall_m(y_true, y_pred):
        TP = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        Positives = K.sum(K.round(K.clip(y_true, 0, 1)))

        recall = TP / (Positives+K.epsilon())
        return recall

    def precision_m(y_true, y_pred):
        TP = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        Pred_Positives = K.sum(K.round(K.clip(y_pred, 0, 1)))

        precision = TP / (Pred_Positives+K.epsilon())
        return precision

    precision, recall = precision_m(y_true, y_pred), recall_m(y_true, y_pred)

    return (beta+1)*((precision*recall)/(precision+recall+K.epsilon()))

In [None]:
def get_ds(data,Xcol,ycol,max_padding,tokenizer, batch_size=32):

    X = data[Xcol]
    y = data[ycol]

    speech_act_encoder = {'Expression':0,'Assertion':1,'Question':2,'Recommendation':3,'Request':4,'Miscellaneous':5}
    sentiment_encoder = {'Positive':0,'Neutral':1,'Negative':2,'Mixed':3}
    y['speech_act'] = y['speech_act'].apply(lambda x:speech_act_encoder[x])
    y['sentiment'] = y['sentiment'].apply(lambda x:sentiment_encoder[x])
    y['sarcasm'] = y['sarcasm'].astype(int)
#     y['dangerous'] = y['dangerous'].astype(int)

    def transform(arr):
        result = np.zeros([arr.shape[0],12])
        arr = arr.values
        result[:,0:4][np.arange(arr.shape[0]), arr[:,0]] = 1
        result[:,4:10][np.arange(arr.shape[0]), arr[:,1]] = 1
        result[:,10:12][np.arange(arr.shape[0]), arr[:,2]] = 1
#         result[:,12:][np.arange(arr.shape[0]), arr[:,3]] = 1
        return result

    y = transform(y)
    assert y.shape[0]*3 == np.sum(y)
    X, y = X.tolist(), y.tolist()

    X = tokenizer(X, truncation=True, padding='max_length',max_length=max_padding)

    data = tf.data.Dataset.from_tensor_slices((
        dict(X),
        y
    ))

    data = data.batch(batch_size)
    return data

In [None]:
class Categorical_loss(tf.keras.losses.Loss):
    def __init__(self,speech_act_alpha,
                 sentiment_alpha,
                 sarcasm_alpha,
                 dangerous_alpha,reduction=tf.keras.losses.Reduction.NONE,
                 name='Categorical_loss',):
        super().__init__(reduction=reduction, name=name)
        # Initialize the loss functions with the specified reduction
        self.cce_sen = tf.keras.losses.CategoricalFocalCrossentropy(reduction=reduction,alpha=sentiment_alpha)
        self.cce_sa = tf.keras.losses.CategoricalFocalCrossentropy(reduction=reduction,alpha=speech_act_alpha)
        self.cce_sar = tf.keras.losses.CategoricalFocalCrossentropy(reduction=reduction,alpha=sarcasm_alpha)
#         self.cce_dan = tf.keras.losses.CategoricalFocalCrossentropy(reduction=reduction,alpha=dangerous_alpha)

    def call(self, y_true, y_pred):
        # Compute the losses for different segments
        sa_loss = self.cce_sa(y_true[:, 4:10], y_pred[:, 4:10])
        sa_loss = tf.reduce_mean(sa_loss)
        sen_loss = self.cce_sen(y_true[:, 0:4], y_pred[:, 0:4])
        sen_loss = tf.reduce_mean(sen_loss)
        sar_loss = self.cce_sar(y_true[:, 10:12], y_pred[:, 10:12])
        sar_loss = tf.reduce_mean(sar_loss)
#         dan_loss = self.cce_dan(y_true[:, 12:], y_pred[:, 12:])
#         dan_loss = tf.reduce_mean(sar_loss)

        # Combine the losses
        total_loss = sa_loss + sen_loss + sar_loss
        return total_loss
class Categorical_loss_sentiment(tf.keras.losses.Loss):
    def __init__(self, reduction=tf.keras.losses.Reduction.NONE, name='Categorical_loss_sentiment'):
        super().__init__(reduction=reduction, name=name)
        # Initialize the loss functions with the specified reduction
        self.cce = tf.keras.losses.CategoricalCrossentropy(reduction=reduction)

    def call(self, y_true, y_pred):
        # Compute the losses for different segments
        sen_loss = self.cce(y_true[:, 0:4], y_pred[:, 0:4])
        sen_loss = tf.reduce_mean(sen_loss)
        return sen_loss

class Categorical_loss_sarcasm(tf.keras.losses.Loss):
    def __init__(self, reduction=tf.keras.losses.Reduction.NONE, name='Categorical_loss_sarcasm'):
        super().__init__(reduction=reduction, name=name)
        # Initialize the loss functions with the specified reduction
        self.cce = tf.keras.losses.CategoricalCrossentropy(reduction=reduction)

    def call(self, y_true, y_pred):
        # Compute the losses for different segments
        sar_loss = self.cce(y_true[:, 10:12], y_pred[:, 10:12])
        sar_loss = tf.reduce_mean(sar_loss)
        return sar_loss

class Categorical_loss_speech_act(tf.keras.losses.Loss):
    def __init__(self, reduction=tf.keras.losses.Reduction.NONE, name='Categorical_loss_speech_act'):
        super().__init__(reduction=reduction, name=name)
        # Initialize the loss functions with the specified reduction
        self.cce = tf.keras.losses.CategoricalCrossentropy(reduction=reduction)

    def call(self, y_true, y_pred):
        # Compute the losses for different segments
        sa_loss = self.cce(y_true[:, 4:10], y_pred[:, 4:10])
        sa_loss = tf.reduce_mean(sa_loss)
        return sa_loss

class Categorical_loss_dangerous(tf.keras.losses.Loss):
    def __init__(self, reduction=tf.keras.losses.Reduction.NONE, name='Categorical_loss_dangerous'):
        super().__init__(reduction=reduction, name=name)
        # Initialize the loss functions with the specified reduction
        self.cce = tf.keras.losses.CategoricalCrossentropy(reduction=reduction)

    def call(self, y_true, y_pred):
        # Compute the losses for different segments
        sa_loss = self.cce(y_true[:, 12:], y_pred[:, 12:])
        sa_loss = tf.reduce_mean(sa_loss)
        return sa_loss


In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import logging

logging.getLogger('tensorflow').setLevel(logging.ERROR)

from tensorflow.keras.callbacks import Callback

class CosineAnnealer:

    def __init__(self, start, end, steps):
        self.start = start
        self.end = end
        self.steps = steps
        self.n = 0

    def step(self):
        self.n += 1
        cos = np.cos(np.pi * (self.n / self.steps)) + 1
        return self.end + (self.start - self.end) / 2. * cos


class OneCycleScheduler(Callback):
    """
    """

    def __init__(self, lr_max, steps, mom_min=0.85, mom_max=0.95, phase_1_pct=0.25, div_factor=2.):
        super(OneCycleScheduler, self).__init__()
        lr_min = lr_max / div_factor
        final_lr = lr_max / (div_factor * 1e2)
        phase_1_steps = steps * phase_1_pct
        phase_2_steps = steps - phase_1_steps

        self.phase_1_steps = phase_1_steps
        self.phase_2_steps = phase_2_steps
        self.phase = 0
        self.step = 0

        self.phases = [[CosineAnnealer(lr_min, lr_max, phase_1_steps), CosineAnnealer(mom_max, mom_min, phase_1_steps)],
                 [CosineAnnealer(lr_max, final_lr, phase_2_steps), CosineAnnealer(mom_min, mom_max, phase_2_steps)]]

        self.lrs = []
        self.moms = []

    def on_train_begin(self, logs=None):
        self.phase = 0
        self.step = 0

        self.set_lr(self.lr_schedule().start)
        self.set_momentum(self.mom_schedule().start)

    def on_train_batch_begin(self, batch, logs=None):
        self.lrs.append(self.get_lr())
        self.moms.append(self.get_momentum())

    def on_train_batch_end(self, batch, logs=None):
        self.step += 1
        if self.step >= self.phase_1_steps:
            self.phase = 1

        self.set_lr(self.lr_schedule().step())
        self.set_momentum(self.mom_schedule().step())

    def get_lr(self):
        try:
            return tf.keras.backend.get_value(self.model.optimizer.lr)
        except AttributeError:
            return None

    def get_momentum(self):
        try:
            return tf.keras.backend.get_value(self.model.optimizer.momentum)
        except AttributeError:
            return None

    def set_lr(self, lr):
        try:
            tf.keras.backend.set_value(self.model.optimizer.lr, lr)
        except AttributeError:
            pass # ignore

    def set_momentum(self, mom):
        try:
            tf.keras.backend.set_value(self.model.optimizer.momentum, mom)
        except AttributeError:
            pass # ignore

    def lr_schedule(self):
        return self.phases[self.phase][0]

    def mom_schedule(self):
        return self.phases[self.phase][1]

    def plot(self):
        ax = plt.subplot(1, 2, 1)
        ax.plot(self.lrs)
        ax.set_title('Learning Rate')
        ax = plt.subplot(1, 2, 2)
        ax.plot(self.moms)
        ax.set_title('Momentum')

In [None]:
def get_model():
    model = TFAutoModelForMaskedLM.from_pretrained("UBC-NLP/MARBERTv2",name='BERT')
    input_ids = tf.keras.Input(shape=(None, ),dtype='int32',name='input_ids')
    token_type_ids = tf.keras.Input(shape=(None, ),dtype='int32',name='token_type_ids')
    attention_mask = tf.keras.Input(shape=(None, ), dtype='int32',name='attention_mask')

    transformer = model(input_ids,attention_mask, token_type_ids,output_hidden_states=True)
    cls = transformer.hidden_states[-1][:,0,:]
    drop_out = tf.keras.layers.Dropout(DROP_OUT)(cls)

    sentiment = tf.keras.layers.Dense(768,activation='relu',name='sentiment')(drop_out)
    sentiment = tf.keras.layers.Dense(768,activation='relu',name='sentiment2')(sentiment)
    sentiment = tf.keras.layers.Dense(768,activation='relu',name='sentiment3')(sentiment)
    sentiment = tf.keras.layers.Dense(4,activation='softmax',name='sentiment_out')(sentiment)

    speech_act = tf.keras.layers.Dense(768,activation='relu',name='speech_act')(drop_out)
    speech_act = tf.keras.layers.Dense(768,activation='relu',name='speech_act2')(speech_act)
    speech_act = tf.keras.layers.Dense(768,activation='relu',name='speech_act3')(speech_act)
    speech_act = tf.keras.layers.Dense(6,activation='softmax',name='speech_act_out')(speech_act)

    sarcasm = tf.keras.layers.Dense(768,activation='relu',name='sarcasm')(drop_out)
    sarcasm = tf.keras.layers.Dense(768,activation='relu',name='sarcasm2')(sarcasm)
    sarcasm = tf.keras.layers.Dense(768,activation='relu',name='sarcasm3')(sarcasm)
    sarcasm = tf.keras.layers.Dense(2,activation='softmax',name='sarcasm_out')(sarcasm)

#     dangerous = tf.keras.layers.Dense(768,activation='relu',name='dangerous')(drop_out)
#     dangerous = tf.keras.layers.Dense(768,activation='relu',name='dangerous2')(dangerous)
#     dangerous = tf.keras.layers.Dense(768,activation='relu',name='dangerous3')(dangerous)
#     dangerous = tf.keras.layers.Dense(2,activation='softmax',name='dangerous_out')(dangerous)

    output = tf.keras.layers.Concatenate(axis = -1)([sentiment,speech_act,sarcasm])

    Fmodel = tf.keras.Model(inputs=[input_ids,token_type_ids, attention_mask], outputs=output)
    return Fmodel

In [None]:
import pandas as pd


In [None]:
data = pd.read_csv('/kaggle/input/arabic-youtube-comments-by-khalaya/data.csv')

In [None]:
data['speech_act'] = data.speech_act.str.replace("Recomendation","Recommendation")
data['speech_act'] = data.speech_act.str.replace("Recommmendation","Recommendation")
data['speech_act'] = data.speech_act.str.replace("Recommenation","Recommendation")
data['speech_act'] = data.speech_act.str.replace("Experssion","Expression")

In [None]:
speech_act_counts = data.value_counts('speech_act')

In [None]:
data = data[~data.isin(list(speech_act_counts[speech_act_counts < 100].index))]

In [None]:
speech_act_alpha = (1 - (data.value_counts('speech_act') / data.value_counts('speech_act').sum())).tolist()

In [None]:
sentiment_alpha = (1 - (data.value_counts('sentiment') / data.value_counts('sentiment').sum())).tolist()

In [None]:
sarcasm_alpha = (1 - (data.value_counts('sarcasm') / data.value_counts('sarcasm').sum())).tolist()

In [None]:
dangerous_alpha = (1 - (data.value_counts('dangerous') / data.value_counts('dangerous').sum())).tolist()

In [None]:
data = data.dropna(subset='speech_act')

In [None]:
data = data.sample(n = data.shape[0])

In [None]:
data

Unnamed: 0,title,comment,video_id,channel_id,sentiment,sarcasm,speech_act,dangerous,sentiment_reasoning,sarcasm_reasoning,speech_act_reasoning,channel_name
69821,«مدائن صالح» مسمى خاطئ | #بودكاست_مربع,سواءً مدائن صالح والا غيره المنطقة مر بها وان...,cUh7fvj2mQc,UCwjLh640nGXSGa9iHRS31ag,Negative,False,Expression,False,The comment suggests rushing through the area ...,,,thmanyahPodcasts
67275,ضيوف لم نراهم في فنجان | #بودكاست_فنجان,الدحيح احمد هبول هذا تافه واسائة لكم استقباله,1LW_ukrku5g,UCwjLh640nGXSGa9iHRS31ag,Negative,False,Expression,False,The commenter expresses a negative opinion abo...,,The commenter is expressing their negative opi...,thmanyahPodcasts
9258,مواطن مع وقف التنفيذ | بدون ورق 108 | حجاج فهد...,شوفو لقاءه مع المديفر وتعرفونه على حقيقته . لا...,ukbiDTahq1I,UC7mCgzz-LYRt-a3mCvUbccg,Negative,False,Recommendation,False,The commenter is expressing a negative opinion...,,The commenter is making a recommendation to wa...,BidonWaraq
5776,كيف تعيش في اليابان؟ | بدون ورق 80 | فيصل السالم,الحلقه ممتعه جدًا ولا تحس بالوقت،..نبي حلقه ثا...,Hq-51kLlOgM,UC7mCgzz-LYRt-a3mCvUbccg,Positive,False,Request,False,The commenter finds the episode very enjoyable...,,The commenter is requesting for another episod...,BidonWaraq
33976,بودكاست دكة | قوة الهلال رغم الغيابات | عودة ب...,حسافه معاذ مو موجود 😔💔💔,v-IUSR3mbJs,UClVSGDODgPxFZKNvtj5AuAg,Negative,False,Expression,False,The commenter is expressing regret and sadness...,,The commenter is expressing feelings of regret...,mmr_sa1
...,...,...,...,...,...,...,...,...,...,...,...,...
5642,بين الإبل والتكنولوجيا | بدون ورق 93 | عبدالله...,اتمنى عدم الحديث باللغه الانجليزيه لأنه يثر ال...,Zl8TQ_c1ez0,UC7mCgzz-LYRt-a3mCvUbccg,Negative,False,Assertion,False,Feeling disgusted when others speak in a diffe...,,Expressing strong opinion against using Englis...,BidonWaraq
66036,هل #ميسي تقبّل البشت العربي؟ | #بودكاست_مربع ...,ومن يكون هذا حتى يرضى او مايرضى.كبروا عقولكم,sQlPq0Ei_to,UCwjLh640nGXSGa9iHRS31ag,Neutral,False,Expression,False,,,,thmanyahPodcasts
13035,ايران والخليج | بدون ورق 100 | د. عبدالله فهد ...,النواب ذوم قادوا الشعب لبسوه في الحيطة وتهجروا,LyefkxZi0H4,UC7mCgzz-LYRt-a3mCvUbccg,Negative,False,Assertion,False,The commenter is expressing dissatisfaction wi...,,The commenter is stating a fact or opinion abo...,BidonWaraq
42394,اغنية شونق وباور 🔥 | #WeArePOWR,اخيرن رجعو احلا اغنية فلعالم,4ke2HyntAbY,UCm6dEXyAMIy0njEOW-suLww,Positive,False,Expression,False,The comment expresses happiness and approval r...,,The comment is expressing feelings and opinions.,POWR-Esports


In [None]:
tokenizer = AutoTokenizer.from_pretrained("UBC-NLP/MARBERTv2")

NameError: name 'AutoTokenizer' is not defined

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(data['comment'],
                                                    data[['sentiment','speech_act','sarcasm']],
                                                    test_size=0.1,
                                                    random_state=42,
                                                    shuffle=True)
X_train, X_val, y_train, y_val = train_test_split(X_train,
                                                    y_train,
                                                    test_size=0.1,
                                                    random_state=42,
                                                    shuffle=True)

In [None]:
X_train

38627                                              Sxd sxd
65663    الله يرحم والديك على عيني وراسي اهل القصيم كله...
42760                               الله نزلت ثانيييهه 💔💔😭
46355    ترا مو غصب يقاطعو ومو على كيفكم ومزاجكم تخلون ...
27210    الله متع اسد السنة بالصحة والعافية الى إن يلقا...
                               ...                        
14566    عبد الله النفيسي شخص كذاب لي متابعة منذ القدم ...
37612                ديمممممممم وربييييي عجبنييييي الطرببب
38385                                  دييييم ماشاء الله 👌
20026    يعني الاستاذ المؤرخ مهنا حمد المهنا  يتم تعريف...
886                                   حلقة أسطورية تاريخية
Name: comment, Length: 56336, dtype: object

In [None]:
y_train

Unnamed: 0,sentiment,speech_act,sarcasm
38627,Neutral,Miscellaneous,False
65663,Neutral,Expression,False
42760,Negative,Expression,False
46355,Negative,Expression,False
27210,Positive,Expression,False
...,...,...,...
14566,Negative,Assertion,False
37612,Positive,Expression,False
38385,Positive,Expression,False
20026,Negative,Assertion,False


In [None]:
train_tensor = get_ds(pd.concat([X_train,y_train],axis=1),
                      Xcol='comment',
                      ycol=['sentiment','speech_act','sarcasm'],
                      max_padding=MAX_LENGTH,
                      tokenizer=tokenizer,
                      batch_size=BATCH_SIZE)
val_tensor = get_ds(pd.concat([X_val,y_val],axis=1),
                      Xcol='comment',
                      ycol=['sentiment','speech_act','sarcasm'],
                      max_padding=MAX_LENGTH,
                      tokenizer=tokenizer,
                      batch_size=BATCH_SIZE)

NameError: name 'MAX_LENGTH' is not defined

In [None]:
with tpu_strategy.scope():
    model = get_model()

lr_schedule = OneCycleScheduler(LEARNING_RATE_MAX, len(train_tensor) * EPOCHS,phase_1_pct=PCT)

optimizer = tf.keras.optimizers.AdamW(learning_rate=LEARNING_RATE,epsilon=1e-8,beta_1=0.9,beta_2=0.999,weight_decay=WD)
model.compile(optimizer = optimizer,
            loss = Categorical_loss(speech_act_alpha=speech_act_alpha,
             sentiment_alpha=sentiment_alpha,
             sarcasm_alpha=sarcasm_alpha,
             dangerous_alpha=dangerous_alpha),
            metrics = [f_beta_score,Categorical_loss_speech_act(),Categorical_loss_sentiment(),Categorical_loss_sarcasm()]
            )
print(model.summary())
model.fit(train_tensor,validation_data = val_tensor,epochs=EPOCHS,callbacks=[lr_schedule])

config.json:   0%|          | 0.00/757 [00:00<?, ?B/s]

tf_model.h5:   0%|          | 0.00/652M [00:00<?, ?B/s]

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_ids (InputLayer)      [(None, None)]               0         []                            
                                                                                                  
 attention_mask (InputLayer  [(None, None)]               0         []                            
 )                                                                                                
                                                                                                  
 token_type_ids (InputLayer  [(None, None)]               0         []                            
 )                                                                                                
                                                                                              

<keras.src.callbacks.History at 0x7f1d49dfcb80>

**168267436 (641.89 MB)**

In [None]:
test_tensor = get_ds(pd.concat([X_test,y_test],axis=1),
                      Xcol='comment',
                      ycol=['sentiment','speech_act','sarcasm'],
                      max_padding=MAX_LENGTH,
                      tokenizer=tokenizer,
                      batch_size=1)

In [None]:
preds = model.predict(test_tensor)



In [None]:
preds = np.stack(list(preds))

In [None]:
test = []
for i in list(test_tensor):
    test.append(i[1].numpy()[0])

Exception ignored in: <function Executor.__del__ at 0x7f1e98c175b0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/tensorflow/python/eager/executor.py", line 46, in __del__
    self.wait()
  File "/usr/local/lib/python3.10/dist-packages/tensorflow/python/eager/executor.py", line 65, in wait
    pywrap_tfe.TFE_ExecutorWaitForAllPendingNodes(self._handle)
tensorflow.python.framework.errors_impl.OutOfRangeError: End of sequence


In [None]:
test = np.stack(test)

In [None]:
test.shape

(6956, 12)

In [None]:
sentiment_pred = np.argmax(preds[:,0:4],axis=1)
speech_act_pred = np.argmax(preds[:,4:10],axis=1)
sarcasm_pred = np.argmax(preds[:,10:12],axis=1)
# dangerous_pred = np.argmax(preds[:,12:14],axis=1)

sentiment_test = np.argmax(test[:,0:4],axis=1)
speech_act_test = np.argmax(test[:,4:10],axis=1)
sarcasm_test = np.argmax(test[:,10:12],axis=1)
# dangerous_test = np.argmax(test[:,12:14],axis=1)

In [None]:
print(classification_report(y_true=sentiment_test,y_pred=sentiment_pred))
print(classification_report(y_true=speech_act_test,y_pred=speech_act_pred))
print(classification_report(y_true=sarcasm_test,y_pred=sarcasm_pred))
# print(classification_report(y_true=dangerous_test,y_pred=dangerous_pred))

              precision    recall  f1-score   support

           0       0.91      0.91      0.91      3719
           1       0.70      0.65      0.67      1101
           2       0.85      0.88      0.86      2129
           3       0.00      0.00      0.00         7

    accuracy                           0.86      6956
   macro avg       0.61      0.61      0.61      6956
weighted avg       0.86      0.86      0.86      6956

              precision    recall  f1-score   support

           0       0.93      0.82      0.87      4326
           1       0.70      0.84      0.76      1599
           2       0.77      0.83      0.80       338
           3       0.61      0.70      0.66       366
           4       0.68      0.78      0.73       241
           5       0.19      0.36      0.25        86

    accuracy                           0.81      6956
   macro avg       0.65      0.72      0.68      6956
weighted avg       0.83      0.81      0.82      6956

              precisio

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
model.save_weights('model_3L4.h5')

Change Hyperparameters

In [None]:
#Second Trail
EPOCHS = 15 #was 7
LEARNING_RATE_MAX = 2e-7 #Was 2e-5
LEARNING_RATE = 2e-5
PCT = 0.02
BATCH_SIZE = 256 #Was 512
WD = 0.001
MAX_LENGTH = 128
DROP_OUT = 0.1

In [None]:
with tpu_strategy.scope():
    model = get_model()

lr_schedule = OneCycleScheduler(LEARNING_RATE_MAX, len(train_tensor) * EPOCHS,phase_1_pct=PCT)

optimizer = tf.keras.optimizers.AdamW(learning_rate=LEARNING_RATE,epsilon=1e-8,beta_1=0.9,beta_2=0.999,weight_decay=WD)
model.compile(optimizer = optimizer,
            loss = Categorical_loss(speech_act_alpha=speech_act_alpha,
             sentiment_alpha=sentiment_alpha,
             sarcasm_alpha=sarcasm_alpha,
             dangerous_alpha=dangerous_alpha),
            metrics = [f_beta_score,Categorical_loss_speech_act(),Categorical_loss_sentiment(),Categorical_loss_sarcasm()]
            )
print(model.summary())
model.fit(train_tensor,validation_data = val_tensor,epochs=EPOCHS,callbacks=[lr_schedule])

config.json:   0%|          | 0.00/757 [00:00<?, ?B/s]

tf_model.h5:   0%|          | 0.00/652M [00:00<?, ?B/s]

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_ids (InputLayer)      [(None, None)]               0         []                            
                                                                                                  
 attention_mask (InputLayer  [(None, None)]               0         []                            
 )                                                                                                
                                                                                                  
 token_type_ids (InputLayer  [(None, None)]               0         []                            
 )                                                                                                
                                                                                              

<keras.src.callbacks.History at 0x7aa855a42770>

**Parameters**

In [None]:
test_tensor = get_ds(pd.concat([X_test,y_test],axis=1),
                      Xcol='comment',
                      ycol=['sentiment','speech_act','sarcasm'],
                      max_padding=MAX_LENGTH,
                      tokenizer=tokenizer,
                      batch_size=1)

In [None]:
preds = model.predict(test_tensor)



In [None]:
preds = np.stack(list(preds))

In [None]:
test = []
for i in list(test_tensor):
    test.append(i[1].numpy()[0])

Exception ignored in: <function Executor.__del__ at 0x7aa9a2000f70>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/tensorflow/python/eager/executor.py", line 46, in __del__
    self.wait()
  File "/usr/local/lib/python3.10/dist-packages/tensorflow/python/eager/executor.py", line 65, in wait
    pywrap_tfe.TFE_ExecutorWaitForAllPendingNodes(self._handle)
tensorflow.python.framework.errors_impl.OutOfRangeError: End of sequence


In [None]:
test = np.stack(test)

In [None]:
test.shape

(6956, 12)

In [None]:
sentiment_pred = np.argmax(preds[:,0:4],axis=1)
speech_act_pred = np.argmax(preds[:,4:10],axis=1)
sarcasm_pred = np.argmax(preds[:,10:12],axis=1)
# dangerous_pred = np.argmax(preds[:,12:14],axis=1)

sentiment_test = np.argmax(test[:,0:4],axis=1)
speech_act_test = np.argmax(test[:,4:10],axis=1)
sarcasm_test = np.argmax(test[:,10:12],axis=1)
# dangerous_test = np.argmax(test[:,12:14],axis=1)

In [None]:
print(classification_report(y_true=sentiment_test,y_pred=sentiment_pred))
print(classification_report(y_true=speech_act_test,y_pred=speech_act_pred))
print(classification_report(y_true=sarcasm_test,y_pred=sarcasm_pred))
# print(classification_report(y_true=dangerous_test,y_pred=dangerous_pred))

              precision    recall  f1-score   support

           0       0.73      0.80      0.76      3664
           1       0.47      0.07      0.12      1140
           2       0.61      0.79      0.69      2146
           3       0.00      0.00      0.00         6

    accuracy                           0.68      6956
   macro avg       0.45      0.41      0.39      6956
weighted avg       0.65      0.68      0.63      6956

              precision    recall  f1-score   support

           0       0.74      0.83      0.78      4356
           1       0.51      0.68      0.58      1570
           2       0.00      0.00      0.00       378
           3       0.00      0.00      0.00       314
           4       0.00      0.00      0.00       230
           5       0.00      0.00      0.00       108

    accuracy                           0.67      6956
   macro avg       0.21      0.25      0.23      6956
weighted avg       0.58      0.67      0.62      6956

              precisio

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
model.save_weights('2model_3L4.h5')

Trail 3 - Hyperparameters

In [None]:
EPOCHS = 25 #was 15
LEARNING_RATE_MAX = 2e-7
LEARNING_RATE = 2e-7
PCT = 0.02
BATCH_SIZE = 128 #was 256
WD = 0.001
MAX_LENGTH = 64 #was 128
DROP_OUT = 0.1

In [None]:
train_tensor = get_ds(pd.concat([X_train,y_train],axis=1),
                      Xcol='comment',
                      ycol=['sentiment','speech_act','sarcasm'],
                      max_padding=MAX_LENGTH,
                      tokenizer=tokenizer,
                      batch_size=BATCH_SIZE)
val_tensor = get_ds(pd.concat([X_val,y_val],axis=1),
                      Xcol='comment',
                      ycol=['sentiment','speech_act','sarcasm'],
                      max_padding=MAX_LENGTH,
                      tokenizer=tokenizer,
                      batch_size=BATCH_SIZE)

In [None]:
with tpu_strategy.scope():
    model = get_model()

lr_schedule = OneCycleScheduler(LEARNING_RATE_MAX, len(train_tensor) * EPOCHS,phase_1_pct=PCT)

optimizer = tf.keras.optimizers.AdamW(learning_rate=LEARNING_RATE,epsilon=1e-8,beta_1=0.9,beta_2=0.999,weight_decay=WD)
model.compile(optimizer = optimizer,
            loss = Categorical_loss(speech_act_alpha=speech_act_alpha,
             sentiment_alpha=sentiment_alpha,
             sarcasm_alpha=sarcasm_alpha,
             dangerous_alpha=dangerous_alpha),
            metrics = [f_beta_score,Categorical_loss_speech_act(),Categorical_loss_sentiment(),Categorical_loss_sarcasm()]
            )
print(model.summary())
model.fit(train_tensor,validation_data = val_tensor,epochs=EPOCHS,callbacks=[lr_schedule])

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_ids (InputLayer)      [(None, None)]               0         []                            
                                                                                                  
 attention_mask (InputLayer  [(None, None)]               0         []                            
 )                                                                                                
                                                                                                  
 token_type_ids (InputLayer  [(None, None)]               0         []                            
 )                                                                                                
                                                                                            

<keras.src.callbacks.History at 0x7aa84a4cbeb0>

** Trainable parameters**

In [None]:
test_tensor = get_ds(pd.concat([X_test,y_test],axis=1),
                      Xcol='comment',
                      ycol=['sentiment','speech_act','sarcasm'],
                      max_padding=MAX_LENGTH,
                      tokenizer=tokenizer,
                      batch_size=1)

In [None]:
preds = model.predict(test_tensor)



In [None]:
preds = np.stack(list(preds))

In [None]:
test = []
for i in list(test_tensor):
    test.append(i[1].numpy()[0])

Exception ignored in: <function Executor.__del__ at 0x7aa9a2000f70>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/tensorflow/python/eager/executor.py", line 46, in __del__
    self.wait()
  File "/usr/local/lib/python3.10/dist-packages/tensorflow/python/eager/executor.py", line 65, in wait
    pywrap_tfe.TFE_ExecutorWaitForAllPendingNodes(self._handle)
tensorflow.python.framework.errors_impl.OutOfRangeError: End of sequence


In [None]:
test = np.stack(test)

In [None]:
test.shape

(6956, 12)

In [None]:
sentiment_pred = np.argmax(preds[:,0:4],axis=1)
speech_act_pred = np.argmax(preds[:,4:10],axis=1)
sarcasm_pred = np.argmax(preds[:,10:12],axis=1)
# dangerous_pred = np.argmax(preds[:,12:14],axis=1)

sentiment_test = np.argmax(test[:,0:4],axis=1)
speech_act_test = np.argmax(test[:,4:10],axis=1)
sarcasm_test = np.argmax(test[:,10:12],axis=1)
# dangerous_test = np.argmax(test[:,12:14],axis=1)

In [None]:
print(classification_report(y_true=sentiment_test,y_pred=sentiment_pred))
print(classification_report(y_true=speech_act_test,y_pred=speech_act_pred))
print(classification_report(y_true=sarcasm_test,y_pred=sarcasm_pred))
# print(classification_report(y_true=dangerous_test,y_pred=dangerous_pred))

              precision    recall  f1-score   support

           0       0.87      0.85      0.86      3664
           1       0.60      0.45      0.51      1140
           2       0.74      0.87      0.80      2146
           3       0.00      0.00      0.00         6

    accuracy                           0.79      6956
   macro avg       0.55      0.54      0.54      6956
weighted avg       0.78      0.79      0.78      6956

              precision    recall  f1-score   support

           0       0.86      0.79      0.82      4356
           1       0.56      0.81      0.66      1570
           2       0.72      0.75      0.73       378
           3       0.59      0.14      0.23       314
           4       0.66      0.55      0.60       230
           5       0.00      0.00      0.00       108

    accuracy                           0.74      6956
   macro avg       0.56      0.51      0.51      6956
weighted avg       0.75      0.74      0.73      6956

              precisio

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
model.save_weights('model_3L4.h5')