In [None]:
import numpy as np
import re
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from imblearn.over_sampling import RandomOverSampler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from transformers import BertTokenizer
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from keras.callbacks import Callback
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_curve, auc
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Dense, GlobalAveragePooling1D, Dropout, LayerNormalization, MultiHeadAttention
from tensorflow.keras.layers import Layer, GRU, Bidirectional, Dense, Input, Reshape, GlobalAveragePooling1D
import nltk
from nltk.corpus import wordnet
import random
from lime.lime_text import LimeTextExplainer




nltk.download('punkt')
%matplotlib inline

In [None]:
df = pd.read_excel('./bangla_online_comments_dataset.xlsx')
df.head()

In [None]:
df.dropna(inplace=True)

In [None]:
df['label'].value_counts()

In [None]:
# remove punctuation
remove_punctuations = [
    "/::\)","/::","(-_-)","(*_*)","(>_<)",":)",";)",":P","xD","-_-","#","(>_<)","...",".",",",";",":","!","?","'","অ�", "অাবার", "।","?",
    "\"","-","_","/","\\","|","{","}","[","]","(",")","<",">","@","#","$","%","^","&","*","~","`","+","=","0","1","2","3","4","5","6","7","8","9","৳","০",
    "১","২","৩","৪","৫","৬","৭","৮","৯","\n","\t","\r","\f","\v","\u00C0-\u017F","\u2000-\u206F","\u25A0-\u25FF","\u2600-\u26FF","\u2B00-\u2BFF","\u3000-\u303F",
    "\uFB00-\uFB4F","\uFE00-\uFE0F","\uFE30-\uFE4F","\u1F600-\u1F64F","\u1F300-\u1F5FF","\u1F680-\u1F6FF","\u1F1E0-\u1F1FF","\u2600-\u26FF","\u2700-\u27BF",
    "\u1F300-\u1F5FF","\u1F900-\u1F9FF","\u1F600-\u1F64F","\u1F680-\u1F6FF","\u1F1E0-\u1F1FF","\u1F600-\u1F64F",
]
# reset index of the dataframe
df.reset_index(inplace=True)

In [None]:
for i in range(len(df)):
    text = df.loc[i,'comment']
    for punctuation in remove_punctuations:
        text = text.replace(punctuation,' ')
    df.loc[i,'comment'] = text

In [None]:
# remove emoji
def remove_emoji(text):
    emoji_pattern = re.compile(
        "["u"\U0001F600-\U0001F64F"  # emoticons
        u"\U0001F300-\U0001F5FF"  # symbols & pictographs
        u"\U0001F680-\U0001F6FF"  # transport & map symbols
        u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
        u"\U00002702-\U000027B0"
        u"\U000024C2-\U0001F251"
        "]+",
        flags=re.UNICODE,
    )
    return emoji_pattern.sub(r"", text)

In [None]:
# remove emoji
for i in range(len(df)):
    text = df.loc[i,'comment']
    text = remove_emoji(text)
    df.loc[i,'comment'] = text

In [None]:
# remove english character
def remove_english_character(text):
    english_character = re.compile("[a-zA-Z]+")
    return english_character.sub(r"", text)

In [None]:
# remove english character
for i in range(len(df)):
    text = df.loc[i,'comment']
    text = remove_english_character(text)
    df.loc[i,'comment'] = text

In [None]:
# remove extra space
def remove_extra_space(text):
    extra_space = re.compile("\s+")
    return extra_space.sub(r" ", text)

In [None]:
def remove_single_bengali_character(text):
    # Regular expression pattern to match single Bengali characters
    single_character = re.compile(r'\s[ঀ-৿]\s')
    return single_character.sub(" ", text)

# Identify data to check if the remove_single_bengali_character function works
for i in range(5):
    print("Original data:-\n", df['comment'][i])
    print("Processed data:-\n", remove_single_bengali_character(df['comment'][i]))
    print("-----------------------------------------------------------")


In [None]:
df['comment'] = df['comment'].apply(remove_single_bengali_character)
df.head()

In [None]:
# explore the datasets
def explore_data(data):
    for i in range(5):
        print("Sample Comment:-\n",data['comment'][i])
        print("-----------------------------------------------------------")
        print("Sample Label:-\n",data['label'][i])
        print("-----------------------------------------------------------")

    # analyse the length of text
    text_len = [len(text) for text in data['comment']]
    print("Average length of text:-",np.mean(text_len))
    print("Max length of text:-",np.max(text_len))
    print("Min length of text:-",np.min(text_len))
    print("Standard deviation of length of text:-",np.std(text_len))
    print("Median length of text:-",np.median(text_len))
    print("25 percentile of length of text:-",np.percentile(text_len,25))
    print("50 percentile of length of text:-",np.percentile(text_len,50))
    print("75 percentile of length of text:-",np.percentile(text_len,75))
    print("100 percentile of length of text:-",np.percentile(text_len,100))
    print("-----------------------------------------------------------")


explore_data(df)

In [None]:
# remove extra space
for i in range(len(df)):
    text = df.loc[i,'comment']
    text = remove_extra_space(text)
    df.loc[i,'comment'] = text

In [None]:
# number unique words
unique_words = set()
for comment in df['comment']:
    for word in comment.split():
        unique_words.add(word)

print(len(unique_words))

In [None]:
# total number of words
total_words = [word for comment in df['comment'] for word in comment.split()]
print(len(total_words))

In [None]:
df = df[['comment', 'label']]

In [None]:
oversampler = RandomOverSampler()

X = df.drop(columns=['label'])
y = df['label']

X_resampled, y_resampled = oversampler.fit_resample(X, y)
balanced_df = pd.concat([X_resampled, y_resampled], axis=1)

print(balanced_df['label'].value_counts())

balanced_df.shape

In [None]:
def replace_synonyms(text):
    words = text.split()
    augmented_text = []
    for word in words:
        synsets = wordnet.synsets(word)
        if synsets:
            synonyms = [synonym for synset in synsets for synonym in synset.lemma_names() if synonym != word]
            if synonyms:
                synonym = random.choice(synonyms)
                augmented_text.append(synonym)
            else:
                augmented_text.append(word)
        else:
            augmented_text.append(word)
    return ' '.join(augmented_text)

def insert_random_word(text):
    words = text.split()
    num_insertions = min(3, len(words))
    for _ in range(num_insertions):
        index = random.randint(0, len(words))
        words.insert(index, 'random_word')
    return ' '.join(words)

In [None]:
augmented_df_synonyms = balanced_df.copy()
augmented_df_synonyms['comment'] = augmented_df_synonyms['comment'].apply(replace_synonyms)
augmented_df_synonyms['comment'] = augmented_df_synonyms['comment'].str.replace('random_word', '')

augmented_df_random = balanced_df.copy()
augmented_df_random['comment'] = augmented_df_random['comment'].apply(insert_random_word)
augmented_df_random['comment'] = augmented_df_random['comment'].str.replace('random_word', '')

final_df = pd.concat([balanced_df, augmented_df_synonyms, augmented_df_random], ignore_index=True)

final_df = final_df.sample(frac=1).reset_index(drop=True)

print(final_df.head())

In [None]:
# Ensure each entry in augmented_comment is a list
df['comment'] = df['comment'].apply(lambda x: [str(x)] if not isinstance(x, list) else x)

# Join the lists of strings into single strings
df['comment'] = df['comment'].apply(lambda x: ' '.join(map(str, x)))

In [None]:
tokenizer = BertTokenizer.from_pretrained("sagorsarker/bangla-bert-base")

def bert_tokenizer(text):
    tokens = tokenizer.encode_plus(text,
                                   add_special_tokens=True,
                                   max_length=120,
                                   padding='max_length',
                                   truncation=True,
                                   return_attention_mask=True,
                                   return_tensors='np')

    input_ids = tokens['input_ids'].tolist()
    attention_mask = tokens['attention_mask'].tolist()

    return input_ids, attention_mask


# Split the dataset into train, validation, and test sets
train_texts, test_texts, train_labels, test_labels = train_test_split(final_df['comment'], final_df['label'], test_size=0.2, random_state=42)
val_texts, test_texts, val_labels, test_labels = train_test_split(test_texts, test_labels, test_size=0.5, random_state=42)

In [None]:
# Convert labels to one-hot encoding
label_dict = {label: idx for idx, label in enumerate(final_df['label'].unique())}
num_classes = len(label_dict)
train_labels = tf.keras.utils.to_categorical(train_labels.map(label_dict))
val_labels = tf.keras.utils.to_categorical(val_labels.map(label_dict))
test_labels = tf.keras.utils.to_categorical(test_labels.map(label_dict))

In [None]:
# Apply the tokenizer function to each text entry in the DataFrame
train_input_ids, train_attention_mask = zip(*train_texts.map(bert_tokenizer))
val_input_ids, val_attention_mask = zip(*val_texts.map(bert_tokenizer))
test_input_ids, test_attention_mask = zip(*test_texts.map(bert_tokenizer))

# Convert lists to arrays
train_input_ids = np.array(train_input_ids)
train_attention_mask = np.array(train_attention_mask)
val_input_ids = np.array(val_input_ids)
val_attention_mask = np.array(val_attention_mask)
test_input_ids = np.array(test_input_ids)
test_attention_mask = np.array(test_attention_mask)

# Print shapes
print("Train shapes:", train_input_ids.shape, train_attention_mask.shape, train_labels.shape)
print("Validation shapes:", val_input_ids.shape, val_attention_mask.shape, val_labels.shape)
print("Test shapes:", test_input_ids.shape, test_attention_mask.shape, test_labels.shape)

In [None]:
# Reshape input_ids and attention_mask arrays to remove the extra dimension
train_input_ids = np.squeeze(train_input_ids, axis=1)
train_attention_mask = np.squeeze(train_attention_mask, axis=1)
val_input_ids = np.squeeze(val_input_ids, axis=1)
val_attention_mask = np.squeeze(val_attention_mask, axis=1)
test_input_ids = np.squeeze(test_input_ids, axis=1)
test_attention_mask = np.squeeze(test_attention_mask, axis=1)

# Print shapes
print("Train shapes:", train_input_ids.shape, train_attention_mask.shape, train_labels.shape)
print("Validation shapes:", val_input_ids.shape, val_attention_mask.shape, val_labels.shape)
print("Test shapes:", test_input_ids.shape, test_attention_mask.shape, test_labels.shape)

Transformer

In [None]:
input_ids = Input(shape=(120,), dtype=tf.int32, name="input_ids")
attention_mask = Input(shape=(120,), dtype=tf.int32, name="attention_mask")

# Define the Transformer block
class TransformerBlock(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = tf.keras.Sequential([
            Dense(ff_dim, activation='relu'),
            Dense(embed_dim),
        ])
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
        self.dropout1 = Dropout(rate)
        self.dropout2 = Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

In [None]:
# Define the Transformer model
def create_transformer_model(input_shape, vocab_size, embed_dim=128, num_heads=2, ff_dim=128, num_transformer_blocks=2, rate=0.1):
    inputs = [input_ids, attention_mask] 
    embedding_layer = Embedding(input_dim=vocab_size, output_dim=embed_dim)(inputs[0]) 
    transformer_blocks = [TransformerBlock(embed_dim, num_heads, ff_dim, rate) for _ in range(num_transformer_blocks)]
    x = embedding_layer
    for transformer_block in transformer_blocks:
        x = transformer_block(x, training = True)
    x = GlobalAveragePooling1D()(x)
    x = Dense(64, activation='relu')(x)
    outputs = Dense(5, activation='softmax')(x)
    model = Model(inputs=inputs, outputs=outputs)
    return model

input_shape = (128,)
vocab_size = tokenizer.vocab_size + 1

transformer_model = create_transformer_model(input_shape, vocab_size)

transformer_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
transformer_model.summary()

In [None]:
# Define early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)

# Fit the model with early stopping and learning rate scheduler
history_1 = transformer_model.fit(
    x=[train_input_ids, train_attention_mask],  
    y=train_labels, 
    epochs = 10,  
    batch_size=32,  
    validation_data=([val_input_ids, val_attention_mask], val_labels),
    callbacks=[early_stopping]
)

In [None]:
# Plot training & validation accuracy and loss values in a single plot
plt.figure(figsize=(12, 6))

# Plot accuracy
plt.plot(history_1.history['accuracy'], label='Train Accuracy', marker='o')
plt.plot(history_1.history['val_accuracy'], label='Validation Accuracy', marker='o')

# Plot loss
plt.plot(history_1.history['loss'], label='Train Loss', marker='o')
plt.plot(history_1.history['val_loss'], label='Validation Loss', marker='o')

plt.title('Model Training Metrics')
plt.xlabel('Epoch')
plt.ylabel('Metrics Value')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Predict labels for the test set
test_pred = transformer_model.predict([test_input_ids, test_attention_mask])
test_pred_labels = np.argmax(test_pred, axis=1)

# Evaluate the model
loss, accuracy = transformer_model.evaluate([test_input_ids, test_attention_mask], test_labels)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

# Calculate test accuracy
test_accuracy = accuracy_score(test_labels.argmax(axis=1), test_pred_labels)
print("Test Accuracy:", test_accuracy)

class_names = ["Not Bully", "Troll", "Sexual", "Religious", "Threat"]

# Define true and predicted class names
true_class_names = np.array([class_names[label.argmax()] for label in test_labels])
predicted_class_names = np.array([class_names[label.argmax()] for label in test_pred])

print("Classification Report:")
print(classification_report(test_labels.argmax(axis=1), test_pred_labels, target_names=class_names))

# Plot Confusion Matrix
def plot_confusion_matrix(y_true, y_pred, classes,
                          normalize=False,
                          title=None,
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if not title:
        if normalize:
            title = 'Normalized Confusion Matrix'
        else:
            title = 'Confusion Matrix'

    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    # Only use the labels that appear in the data
    classes = unique_labels(y_true, y_pred)
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    fig, ax = plt.subplots()
    im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
    ax.figure.colorbar(im, ax=ax)
    # We want to show all ticks...
    ax.set(xticks=np.arange(len(classes)),
           yticks=np.arange(len(classes)),
           xticklabels=classes, yticklabels=classes,
           title=title,
           ylabel='True label',
           xlabel='Predicted label')

    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
             rotation_mode="anchor")

    # Loop over data dimensions and create text annotations.
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i in range(len(classes)):
        for j in range(len(classes)):
            ax.text(j, i, format(cm[i, j], fmt),
                    ha="center", va="center",
                    color="white" if cm[i, j] > thresh else "black")
    fig.tight_layout()
    plt.show()

# Plot non-normalized confusion matrix
plot_confusion_matrix(true_class_names, predicted_class_names, classes=np.array(class_names),
                      title='Confusion Matrix')

# Calculate precision, recall, and F1-score
precision = precision_score(test_labels.argmax(axis=1), test_pred_labels, average='macro')
recall = recall_score(test_labels.argmax(axis=1), test_pred_labels, average='macro')
f1 = f1_score(test_labels.argmax(axis=1), test_pred_labels, average='macro')

print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

# Compute ROC curve and ROC area for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(len(class_names)):
    fpr[i], tpr[i], _ = roc_curve(test_labels[:, i], test_pred[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC curve
plt.figure(figsize=(8, 6))
for i in range(len(class_names)):
    plt.plot(fpr[i], tpr[i], label='ROC curve (area = %0.2f) for class %s' % (roc_auc[i], class_names[i]))

plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.show()


In [None]:
def model_predict(texts):
    processed_texts = np.array(list(map(bert_tokenizer, texts)))
    input_ids, attention_mask = zip(*processed_texts)
    
    input_ids = np.squeeze(np.array(input_ids), axis=1)
    attention_mask = np.squeeze(np.array(attention_mask), axis=1)
    
    predictions = transformer_model.predict([input_ids, attention_mask])
    
    return predictions

In [None]:
# Instantiate a LIME text explainer
explainer = LimeTextExplainer(class_names=final_df['label'].unique())

def explain_instance(text, model_predict):
    exp = explainer.explain_instance(text, model_predict, num_features=10)
    exp_html = exp.show_in_notebook(text=text)

    return exp_html

text_to_explain = "আমি একজন উদার মানুষ হিসেবে পরিচিত হতে চাই। আমি যখন আমার পরিবারের সদস্যদের সাথে সময় কাটাই, তখন আমি সবার কাছে সত্যিকারের সাথে থাকতে চেষ্টা করি। আমি অনেক বিষয়ে আগ্রহী এবং শিখতে চাই। আমি আমার দৈনন্দিন জীবনে নতুন চ্যালেঞ্জ এবং অভিজ্ঞতা অনুভব করতে ভালবাসি। আমার জীবনে সময় কাটানোর প্রধান উপাদান হল পরিবার এবং সামাজিক সম্পর্ক। আমি আমার পরিবারের সদস্যদের সাথে সময় কাটাতে ভালবাসি এবং তাদের সাথে অনেক মজা করি। সাথে থাকা সময়ে আমি সবার কাছে আমার ভাবনা এবং আলোচনা শেয়ার করতে পছন্দ করি।"
explanation = explain_instance(text_to_explain, model_predict)
explanation

Transformer-GRU

In [None]:
def create_transformer_gru_model(input_shape, vocab_size, embed_dim=128, num_heads=4, ff_dim=128, num_transformer_blocks=2, rate=0.1):
    inputs = [input_ids, attention_mask]
    embedding_layer = Embedding(input_dim=vocab_size, output_dim=embed_dim)(inputs[0])
    transformer_blocks = [TransformerBlock(embed_dim, num_heads, ff_dim, rate) for _ in range(num_transformer_blocks)]
    x = embedding_layer
    for transformer_block in transformer_blocks:
        x = transformer_block(x)
    x = GlobalAveragePooling1D()(x)
    x = Dense(64, activation='relu')(x)
    x = Reshape((-1, 64))(x)
    x = GRU(64, return_sequences=True)(x)
    x = GlobalAveragePooling1D()(x)
    outputs = Dense(5, activation='softmax')(x)
    model = Model(inputs=inputs, outputs=outputs)
    return model

input_shape = (128,)
vocab_size = tokenizer.vocab_size
transformer_gru_model = create_transformer_gru_model(input_shape, vocab_size)
transformer_gru_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
transformer_gru_model.summary()

In [None]:
history_2 = transformer_gru_model.fit(
    x=[train_input_ids, train_attention_mask],
    y=train_labels,
    epochs=10,
    batch_size=32,
    validation_data=([val_input_ids, val_attention_mask], val_labels),
    callbacks=[early_stopping]
)

In [None]:
# Plot training & validation accuracy and loss values in a single plot
plt.figure(figsize=(12, 6))

# Plot accuracy
plt.plot(history_2.history['accuracy'], label='Train Accuracy', marker='o')
plt.plot(history_2.history['val_accuracy'], label='Validation Accuracy', marker='o')

# Plot loss
plt.plot(history_2.history['loss'], label='Train Loss', marker='o')
plt.plot(history_2.history['val_loss'], label='Validation Loss', marker='o')

plt.title('Model Training Metrics')
plt.xlabel('Epoch')
plt.ylabel('Metrics Value')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Predict labels for the test set
test_pred = transformer_gru_model.predict([test_input_ids, test_attention_mask])
test_pred_labels = np.argmax(test_pred, axis=1)

# Evaluate the model
loss, accuracy = transformer_gru_model.evaluate([test_input_ids, test_attention_mask], test_labels)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

# Calculate test accuracy
test_accuracy = accuracy_score(test_labels.argmax(axis=1), test_pred_labels)
print("Test Accuracy:", test_accuracy)

class_names = ["Not Bully", "Troll", "Sexual", "Religious", "Threat"]

# Define true and predicted class names
true_class_names = np.array([class_names[label.argmax()] for label in test_labels])
predicted_class_names = np.array([class_names[label.argmax()] for label in test_pred])

print("Classification Report:")
print(classification_report(test_labels.argmax(axis=1), test_pred_labels, target_names=class_names))

# Plot Confusion Matrix
def plot_confusion_matrix(y_true, y_pred, classes,
                          normalize=False,
                          title=None,
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if not title:
        if normalize:
            title = 'Normalized Confusion Matrix'
        else:
            title = 'Confusion Matrix'

    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)

    # Only use the labels that appear in the data
    classes = unique_labels(y_true, y_pred)
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    fig, ax = plt.subplots()
    im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
    ax.figure.colorbar(im, ax=ax)
    # We want to show all ticks...
    ax.set(xticks=np.arange(len(classes)),
           yticks=np.arange(len(classes)),
           xticklabels=classes, yticklabels=classes,
           title=title,
           ylabel='True label',
           xlabel='Predicted label')

    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
             rotation_mode="anchor")

    # Loop over data dimensions and create text annotations.
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i in range(len(classes)):
        for j in range(len(classes)):
            ax.text(j, i, format(cm[i, j], fmt),
                    ha="center", va="center",
                    color="white" if cm[i, j] > thresh else "black")
    fig.tight_layout()
    plt.show()

# Plot non-normalized confusion matrix
plot_confusion_matrix(true_class_names, predicted_class_names, classes=np.array(class_names),
                      title='Confusion Matrix')

# Calculate precision, recall, and F1-score
precision = precision_score(test_labels.argmax(axis=1), test_pred_labels, average='macro')
recall = recall_score(test_labels.argmax(axis=1), test_pred_labels, average='macro')
f1 = f1_score(test_labels.argmax(axis=1), test_pred_labels, average='macro')

print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

# Compute ROC curve and ROC area for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(len(class_names)):
    fpr[i], tpr[i], _ = roc_curve(test_labels[:, i], test_pred[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC curve
plt.figure(figsize=(8, 6))
for i in range(len(class_names)):
    plt.plot(fpr[i], tpr[i], label='ROC curve (area = %0.2f) for class %s' % (roc_auc[i], class_names[i]))

plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.show()


In [None]:
def model_predict(texts):
    processed_texts = np.array(list(map(bert_tokenizer, texts)))
    input_ids, attention_mask = zip(*processed_texts)
    
    input_ids = np.squeeze(np.array(input_ids), axis=1)
    attention_mask = np.squeeze(np.array(attention_mask), axis=1)
    
    predictions = transformer_gru_model.predict([input_ids, attention_mask])
    
    return predictions

In [None]:
# Instantiate a LIME text explainer
explainer = LimeTextExplainer(class_names=final_df['label'].unique())

def explain_instance(text, model_predict):
    exp = explainer.explain_instance(text, model_predict, num_features=10)
    exp_html = exp.show_in_notebook(text=text)

    return exp_html

text_to_explain = "তুমি কি সেরা হ্যাকার? কাজের পাঁচ মিনিটে তোর হ্যাক করে দিব। এক বিশ্বাস নিবে?"
explanation = explain_instance(text_to_explain, model_predict)
explanation

Transformer-LSTM

In [None]:
from tensorflow.keras.layers import Reshape, LSTM

def create_transformer_gru_model(input_shape, vocab_size, embed_dim=128, num_heads=4, ff_dim=128, num_transformer_blocks=2, rate=0.1):
    inputs = [input_ids, attention_mask]
    embedding_layer = Embedding(input_dim=vocab_size, output_dim=embed_dim)(inputs[0])
    transformer_blocks = [TransformerBlock(embed_dim, num_heads, ff_dim, rate) for _ in range(num_transformer_blocks)]
    x = embedding_layer
    for transformer_block in transformer_blocks:
        x = transformer_block(x)
    x = GlobalAveragePooling1D()(x)
    x = Dense(64, activation='relu')(x)
    x = Reshape((-1, 64))(x)  # Reshape the output to (batch_size, 1, 64)
    x = LSTM(64, return_sequences=True)(x)
    x = GlobalAveragePooling1D()(x)
    outputs = Dense(5, activation='softmax')(x)
    model = Model(inputs=inputs, outputs=outputs)
    return model

input_shape = (128,)
vocab_size = tokenizer.vocab_size
transformer_lstm = create_transformer_gru_model(input_shape, vocab_size)
transformer_lstm.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
transformer_lstm.summary()

In [None]:
history_3 = transformer_lstm.fit(
    x=[train_input_ids, train_attention_mask],
    y=train_labels,
    epochs=10,
    batch_size=32,
    validation_data=([val_input_ids, val_attention_mask], val_labels),
    callbacks=[early_stopping]
)

In [None]:
# Plot training & validation accuracy and loss values in a single plot
plt.figure(figsize=(12, 6))

# Plot accuracy
plt.plot(history_3.history['accuracy'], label='Train Accuracy', marker='o')
plt.plot(history_3.history['val_accuracy'], label='Validation Accuracy', marker='o')

# Plot loss
plt.plot(history_3.history['loss'], label='Train Loss', marker='o')
plt.plot(history_3.history['val_loss'], label='Validation Loss', marker='o')

plt.title('Model Training Metrics')
plt.xlabel('Epoch')
plt.ylabel('Metrics Value')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Predict labels for the test set
test_pred = transformer_lstm.predict([test_input_ids, test_attention_mask])
test_pred_labels = np.argmax(test_pred, axis=1)

# Evaluate the model
loss, accuracy = transformer_lstm.evaluate([test_input_ids, test_attention_mask], test_labels)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

# Calculate test accuracy
test_accuracy = accuracy_score(test_labels.argmax(axis=1), test_pred_labels)
print("Test Accuracy:", test_accuracy)

class_names = ["Not Bully", "Troll", "Sexual", "Religious", "Threat"]

# Define true and predicted class names
true_class_names = np.array([class_names[label.argmax()] for label in test_labels])
predicted_class_names = np.array([class_names[label.argmax()] for label in test_pred])

print("Classification Report:")
print(classification_report(test_labels.argmax(axis=1), test_pred_labels, target_names=class_names))

# Plot Confusion Matrix
def plot_confusion_matrix(y_true, y_pred, classes,
                          normalize=False,
                          title=None,
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if not title:
        if normalize:
            title = 'Normalized Confusion Matrix'
        else:
            title = 'Confusion Matrix'

    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    # Only use the labels that appear in the data
    classes = unique_labels(y_true, y_pred)
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    fig, ax = plt.subplots()
    im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
    ax.figure.colorbar(im, ax=ax)
    # We want to show all ticks...
    ax.set(xticks=np.arange(len(classes)),
           yticks=np.arange(len(classes)),
           xticklabels=classes, yticklabels=classes,
           title=title,
           ylabel='True label',
           xlabel='Predicted label')

    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
             rotation_mode="anchor")

    # Loop over data dimensions and create text annotations.
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i in range(len(classes)):
        for j in range(len(classes)):
            ax.text(j, i, format(cm[i, j], fmt),
                    ha="center", va="center",
                    color="white" if cm[i, j] > thresh else "black")
    fig.tight_layout()
    plt.show()

# Plot non-normalized confusion matrix
plot_confusion_matrix(true_class_names, predicted_class_names, classes=np.array(class_names),
                      title='Confusion Matrix')

# Calculate precision, recall, and F1-score
precision = precision_score(test_labels.argmax(axis=1), test_pred_labels, average='macro')
recall = recall_score(test_labels.argmax(axis=1), test_pred_labels, average='macro')
f1 = f1_score(test_labels.argmax(axis=1), test_pred_labels, average='macro')

print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

# Compute ROC curve and ROC area for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(len(class_names)):
    fpr[i], tpr[i], _ = roc_curve(test_labels[:, i], test_pred[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC curve
plt.figure(figsize=(8, 6))
for i in range(len(class_names)):
    plt.plot(fpr[i], tpr[i], label='ROC curve (area = %0.2f) for class %s' % (roc_auc[i], class_names[i]))

plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.show()


In [None]:
def model_predict(texts):
    processed_texts = np.array(list(map(bert_tokenizer, texts)))
    input_ids, attention_mask = zip(*processed_texts)
    
    input_ids = np.squeeze(np.array(input_ids), axis=1)
    attention_mask = np.squeeze(np.array(attention_mask), axis=1)
    
    predictions = transformer_lstm.predict([input_ids, attention_mask])
    
    return predictions

# Instantiate a LIME text explainer
explainer = LimeTextExplainer(class_names=final_df['label'].unique())

def explain_instance(text, model_predict):
    exp = explainer.explain_instance(text, model_predict, num_features=10)
    exp_html = exp.show_in_notebook(text=text)

    return exp_html

text_to_explain = "আমি একজন উদার মানুষ হিসেবে পরিচিত হতে চাই। আমি যখন আমার পরিবারের সদস্যদের সাথে সময় কাটাই"
explanation = explain_instance(text_to_explain, model_predict)
explanation

Transformer-GRU-LSTM

In [None]:
from tensorflow.keras.layers import Reshape

def create_transformer_gru_model(input_shape, vocab_size, embed_dim=128, num_heads=4, ff_dim=128, num_transformer_blocks=2, rate=0.1):
    inputs = [input_ids, attention_mask]
    embedding_layer = Embedding(input_dim=vocab_size, output_dim=embed_dim)(inputs[0])
    transformer_blocks = [TransformerBlock(embed_dim, num_heads, ff_dim, rate) for _ in range(num_transformer_blocks)]
    x = embedding_layer
    for transformer_block in transformer_blocks:
        x = transformer_block(x)
    x = GlobalAveragePooling1D()(x)
    x = Dense(64, activation='relu')(x)
    x = Reshape((-1, 64))(x)
    x = GRU(64, return_sequences=True)(x)
    x = LSTM(64, return_sequences=True)(x)
    x = GlobalAveragePooling1D()(x)
    outputs = Dense(5, activation='softmax')(x)
    model = Model(inputs=inputs, outputs=outputs)
    return model

input_shape = (128,)
vocab_size = tokenizer.vocab_size
Hybird_transformer = create_transformer_gru_model(input_shape, vocab_size)
Hybird_transformer.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
Hybird_transformer.summary()

In [None]:
history_4 = Hybird_transformer.fit(
    x=[train_input_ids, train_attention_mask],
    y=train_labels,
    epochs=10,
    batch_size=32,
    validation_data=([val_input_ids, val_attention_mask], val_labels),
    callbacks=[early_stopping]
)

In [None]:
# Plot training & validation accuracy and loss values in a single plot
plt.figure(figsize=(12, 6))

# Plot accuracy
plt.plot(history_4.history['accuracy'], label='Train Accuracy', marker='o')
plt.plot(history_4.history['val_accuracy'], label='Validation Accuracy', marker='o')

# Plot loss
plt.plot(history_4.history['loss'], label='Train Loss', marker='o')
plt.plot(history_4.history['val_loss'], label='Validation Loss', marker='o')

plt.title('Model Training Metrics')
plt.xlabel('Epoch')
plt.ylabel('Metrics Value')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Predict labels for the test set
test_pred = Hybird_transformer.predict([test_input_ids, test_attention_mask])
test_pred_labels = np.argmax(test_pred, axis=1)

# Evaluate the model
loss, accuracy = Hybird_transformer.evaluate([test_input_ids, test_attention_mask], test_labels)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

# Calculate test accuracy
test_accuracy = accuracy_score(test_labels.argmax(axis=1), test_pred_labels)
print("Test Accuracy:", test_accuracy)

class_names = ["Not Bully", "Troll", "Sexual", "Religious", "Threat"]

# Define true and predicted class names
true_class_names = np.array([class_names[label.argmax()] for label in test_labels])
predicted_class_names = np.array([class_names[label.argmax()] for label in test_pred])

print("Classification Report:")
print(classification_report(test_labels.argmax(axis=1), test_pred_labels, target_names=class_names))

# Plot Confusion Matrix
def plot_confusion_matrix(y_true, y_pred, classes,
                          normalize=False,
                          title=None,
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if not title:
        if normalize:
            title = 'Normalized Confusion Matrix'
        else:
            title = 'Confusion Matrix'

    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    # Only use the labels that appear in the data
    classes = unique_labels(y_true, y_pred)
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    fig, ax = plt.subplots()
    im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
    ax.figure.colorbar(im, ax=ax)
    # We want to show all ticks...
    ax.set(xticks=np.arange(len(classes)),
           yticks=np.arange(len(classes)),
           xticklabels=classes, yticklabels=classes,
           title=title,
           ylabel='True label',
           xlabel='Predicted label')

    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
             rotation_mode="anchor")

    # Loop over data dimensions and create text annotations.
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i in range(len(classes)):
        for j in range(len(classes)):
            ax.text(j, i, format(cm[i, j], fmt),
                    ha="center", va="center",
                    color="white" if cm[i, j] > thresh else "black")
    fig.tight_layout()
    plt.show()

# Plot non-normalized confusion matrix
plot_confusion_matrix(true_class_names, predicted_class_names, classes=np.array(class_names),
                      title='Confusion Matrix')

# Calculate precision, recall, and F1-score
precision = precision_score(test_labels.argmax(axis=1), test_pred_labels, average='macro')
recall = recall_score(test_labels.argmax(axis=1), test_pred_labels, average='macro')
f1 = f1_score(test_labels.argmax(axis=1), test_pred_labels, average='macro')

print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

# Compute ROC curve and ROC area for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(len(class_names)):
    fpr[i], tpr[i], _ = roc_curve(test_labels[:, i], test_pred[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC curve
plt.figure(figsize=(8, 6))
for i in range(len(class_names)):
    plt.plot(fpr[i], tpr[i], label='ROC curve (area = %0.2f) for class %s' % (roc_auc[i], class_names[i]))

plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.show()


In [None]:
def model_predict(texts):
    processed_texts = np.array(list(map(bert_tokenizer, texts)))
    input_ids, attention_mask = zip(*processed_texts)
    
    input_ids = np.squeeze(np.array(input_ids), axis=1)
    attention_mask = np.squeeze(np.array(attention_mask), axis=1)
    
    predictions = Hybird_transformer.predict([input_ids, attention_mask])
    
    return predictions


# Instantiate a LIME text explainer
explainer = LimeTextExplainer(class_names=final_df['label'].unique())

def explain_instance(text, model_predict):
    exp = explainer.explain_instance(text, model_predict, num_features=10)
    exp_html = exp.show_in_notebook(text=text)

    return exp_html

text_to_explain = "তুমি কি সেরা হ্যাকার? কাজের পাঁচ মিনিটে তোর হ্যাক করে দিব। এক বিশ্বাস নিবে?"
explanation = explain_instance(text_to_explain, model_predict)
explanation

Transformer-BiGRU-BiLSTM

In [None]:
def create_hybrid_transformer_model(input_shape, vocab_size, embed_dim=128, num_heads=4, ff_dim=128, num_transformer_blocks=2, rate=0.1):
    input_ids = Input(shape=input_shape, name="input_ids")
    attention_mask = Input(shape=input_shape, name="attention_mask")
    
    embedding_layer = Embedding(input_dim=vocab_size, output_dim=embed_dim)(input_ids)
    
    # Transformer blocks
    transformer_blocks = [TransformerBlock(embed_dim, num_heads, ff_dim, rate) for _ in range(num_transformer_blocks)]
    x = embedding_layer
    for transformer_block in transformer_blocks:
        x = transformer_block(x)
    
    x = GlobalAveragePooling1D()(x)
    
    # Dense layer
    x = Dense(64, activation='relu')(x)
    x = Reshape((-1, 64))(x)
    x = Bidirectional(GRU(64, return_sequences=True))(x)
    x = Bidirectional(LSTM(64, return_sequences=True))(x)
    
    x = GlobalAveragePooling1D()(x)
    outputs = Dense(5, activation='softmax')(x)

    model = Model(inputs=[input_ids, attention_mask], outputs=outputs)
    
    return model

input_shape = (120,)
vocab_size = tokenizer.vocab_size
hybrid_transformer = create_hybrid_transformer_model(input_shape, vocab_size)

# Create and compile the model
hybrid_transformer = create_hybrid_transformer_model(input_shape, vocab_size)
hybrid_transformer.compile(optimizer='nadam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
history_5 = hybrid_transformer.fit(
    x=[train_input_ids, train_attention_mask],
    y=train_labels,
    epochs=10,
    batch_size=32,
    validation_data=([val_input_ids, val_attention_mask], val_labels),
    callbacks=[early_stopping]
)

In [None]:
# Plot training & validation accuracy and loss values in a single plot
plt.figure(figsize=(12, 6))

# Plot accuracy
plt.plot(history_5.history['accuracy'], label='Train Accuracy', marker='o')
plt.plot(history_5.history['val_accuracy'], label='Validation Accuracy', marker='o')

# Plot loss
plt.plot(history_5.history['loss'], label='Train Loss', marker='o')
plt.plot(history_5.history['val_loss'], label='Validation Loss', marker='o')

plt.title('Model Training Metrics')
plt.xlabel('Epoch')
plt.ylabel('Metrics Value')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Predict labels for the test set
test_pred = hybrid_transformer.predict([test_input_ids, test_attention_mask])
test_pred_labels = np.argmax(test_pred, axis=1)

# Evaluate the model
loss, accuracy = hybrid_transformer.evaluate([test_input_ids, test_attention_mask], test_labels)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

# Calculate test accuracy
test_accuracy = accuracy_score(test_labels.argmax(axis=1), test_pred_labels)
print("Test Accuracy:", test_accuracy)

class_names = ["Not Bully", "Troll", "Sexual", "Religious", "Threat"]

# Define true and predicted class names
true_class_names = np.array([class_names[label.argmax()] for label in test_labels])
predicted_class_names = np.array([class_names[label.argmax()] for label in test_pred])

print("Classification Report:")
print(classification_report(test_labels.argmax(axis=1), test_pred_labels, target_names=class_names))

# Plot Confusion Matrix
def plot_confusion_matrix(y_true, y_pred, classes,
                          normalize=False,
                          title=None,
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if not title:
        if normalize:
            title = 'Normalized Confusion Matrix'
        else:
            title = 'Confusion Matrix'

    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    # Only use the labels that appear in the data
    classes = unique_labels(y_true, y_pred)
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    fig, ax = plt.subplots()
    im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
    ax.figure.colorbar(im, ax=ax)
    # We want to show all ticks...
    ax.set(xticks=np.arange(len(classes)),
           yticks=np.arange(len(classes)),
           xticklabels=classes, yticklabels=classes,
           title=title,
           ylabel='True label',
           xlabel='Predicted label')

    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
             rotation_mode="anchor")

    # Loop over data dimensions and create text annotations.
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i in range(len(classes)):
        for j in range(len(classes)):
            ax.text(j, i, format(cm[i, j], fmt),
                    ha="center", va="center",
                    color="white" if cm[i, j] > thresh else "black")
    fig.tight_layout()
    plt.show()

# Plot non-normalized confusion matrix
plot_confusion_matrix(true_class_names, predicted_class_names, classes=np.array(class_names),
                      title='Confusion Matrix')

# Calculate precision, recall, and F1-score
precision = precision_score(test_labels.argmax(axis=1), test_pred_labels, average='macro')
recall = recall_score(test_labels.argmax(axis=1), test_pred_labels, average='macro')
f1 = f1_score(test_labels.argmax(axis=1), test_pred_labels, average='macro')

print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

# Compute ROC curve and ROC area for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(len(class_names)):
    fpr[i], tpr[i], _ = roc_curve(test_labels[:, i], test_pred[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC curve
plt.figure(figsize=(8, 6))
for i in range(len(class_names)):
    plt.plot(fpr[i], tpr[i], label='ROC curve (area = %0.2f) for class %s' % (roc_auc[i], class_names[i]))

plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.show()


In [None]:
def model_predict(texts):
    processed_texts = np.array(list(map(bert_tokenizer, texts)))
    input_ids, attention_mask = zip(*processed_texts)
    
    input_ids = np.squeeze(np.array(input_ids), axis=1)
    attention_mask = np.squeeze(np.array(attention_mask), axis=1)
    
    predictions = hybrid_transformer.predict([input_ids, attention_mask])
    
    return predictions

# Instantiate a LIME text explainer
explainer = LimeTextExplainer(class_names=final_df['label'].unique())

def explain_instance(text, model_predict):
    exp = explainer.explain_instance(text, model_predict, num_features=10)
    exp_html = exp.show_in_notebook(text=text)

    return exp_html

text_to_explain = "তুমি কি সেরা হ্যাকার? কাজের পাঁচ মিনিটে তোর হ্যাক করে দিব। এক বিশ্বাস নিবে?"
explanation = explain_instance(text_to_explain, model_predict)
explanation

Transformer-XL

In [None]:
class RelativePositionalEncoding(Layer):
    def __init__(self, d_model):
        super(RelativePositionalEncoding, self).__init__()
        self.d_model = d_model

    def call(self, inputs):
        seq_len = tf.shape(inputs)[1]
        position_ids = tf.range(seq_len, dtype=tf.float32)[tf.newaxis, :]
        position_encodings = self._get_position_encodings(position_ids)
        return position_encodings

    def _get_position_encodings(self, position_ids):
        angles = 1 / tf.pow(10000, (2 * (tf.range(self.d_model) // 2)) / tf.cast(self.d_model, tf.float32))
        positions = tf.einsum('bi,ij->bij', position_ids, angles)
        position_encodings = tf.concat([tf.sin(positions[:, :, 0::2]), tf.cos(positions[:, :, 1::2])], axis=-1)
        return position_encodings

In [None]:
class TransformerXLBlock(Layer):
    def __init__(self, d_model, num_heads, d_ff, dropout, mem_len):
        super(TransformerXLBlock, self).__init__()
        self.mem_len = mem_len
        self.num_heads = num_heads
        self.d_model = d_model
        self.d_ff = d_ff
        self.dropout_rate = dropout

        self.self_attention = MultiHeadAttention(d_model, num_heads)

        self.ffn = tf.keras.Sequential([
            Dense(d_ff, activation='relu'),
            Dense(d_model)
        ])

        self.ln1 = LayerNormalization(epsilon=1e-6)
        self.ln2 = LayerNormalization(epsilon=1e-6)

        self.dropout1 = Dropout(dropout)
        self.dropout2 = Dropout(dropout)

    def build(self, input_shape):
        self.pos_encoding = RelativePositionalEncoding(self.d_model)
        super(TransformerXLBlock, self).build(input_shape)

    def call(self, inputs, memory=None, training=None):
        query = inputs

        attn_output = self.self_attention(query, query, training=training)

        attn_output = self.dropout1(attn_output, training=training)
        attn_output += query
        attn_output = self.ln1(attn_output)

        ffn_output = self.ffn(attn_output)

        ffn_output = self.dropout2(ffn_output, training=training)
        ffn_output += attn_output
        ffn_output = self.ln2(ffn_output)

        return ffn_output

    def causal_attention_mask(self, query):
        seq_length = tf.shape(query)[1]
        mask = 1 - tf.linalg.band_part(tf.ones((seq_length, seq_length)), -1, 0)
        return mask

    def compute_output_shape(self, input_shape):
        return input_shape

In [None]:
def create_hybrid_transformerxl_model(input_shape, vocab_size, d_model=128, num_heads=4, d_ff=128, num_blocks=2, rate=0.1):
    input_ids = Input(shape=input_shape, name="input_ids")
    attention_mask = Input(shape=input_shape, name="attention_mask")
    
    embedding_layer = Embedding(input_dim=vocab_size, output_dim=d_model)(input_ids)
    
    transformer_xl_blocks = [TransformerXLBlock(d_model, num_heads, d_ff, rate, mem_len=128) for _ in range(num_blocks)]
    x = embedding_layer
    for transformer_xl_block in transformer_xl_blocks:
        x = transformer_xl_block(x)
    
    x = GlobalAveragePooling1D()(x)
    outputs = Dense(5, activation='softmax')(x)

    model = Model(inputs=[input_ids, attention_mask], outputs=outputs)
    
    return model

In [None]:
input_shape = (120,)
vocab_size = tokenizer.vocab_size + 1
hybrid_transformerxl_model = create_hybrid_transformerxl_model(input_shape, vocab_size)
hybrid_transformerxl_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
hybrid_transformerxl_model.summary()

In [None]:
history_6 = hybrid_transformerxl_model.fit(
    x=[train_input_ids, train_attention_mask],
    y=train_labels,
    epochs=10,
    batch_size=32,
    validation_data=([val_input_ids, val_attention_mask], val_labels),
    callbacks=[early_stopping]
)

In [None]:
# Plot training & validation accuracy and loss values in a single plot
plt.figure(figsize=(12, 6))

# Plot accuracy
plt.plot(history_6.history['accuracy'], label='Train Accuracy', marker='o')
plt.plot(history_6.history['val_accuracy'], label='Validation Accuracy', marker='o')

# Plot loss
plt.plot(history_6.history['loss'], label='Train Loss', marker='o')
plt.plot(history_6.history['val_loss'], label='Validation Loss', marker='o')

plt.title('Model Training Metrics')
plt.xlabel('Epoch')
plt.ylabel('Metrics Value')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Predict labels for the test set
test_pred = hybrid_transformerxl_model.predict([test_input_ids, test_attention_mask])
test_pred_labels = np.argmax(test_pred, axis=1)

# Evaluate the model
loss, accuracy = hybrid_transformerxl_model.evaluate([test_input_ids, test_attention_mask], test_labels)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

# Calculate test accuracy
test_accuracy = accuracy_score(test_labels.argmax(axis=1), test_pred_labels)
print("Test Accuracy:", test_accuracy)

class_names = ["Not Bully", "Troll", "Sexual", "Religious", "Threat"]

# Define true and predicted class names
true_class_names = np.array([class_names[label.argmax()] for label in test_labels])
predicted_class_names = np.array([class_names[label.argmax()] for label in test_pred])

print("Classification Report:")
print(classification_report(test_labels.argmax(axis=1), test_pred_labels, target_names=class_names))

# Plot Confusion Matrix
def plot_confusion_matrix(y_true, y_pred, classes,
                          normalize=False,
                          title=None,
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if not title:
        if normalize:
            title = 'Normalized Confusion Matrix'
        else:
            title = 'Confusion Matrix'

    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    # Only use the labels that appear in the data
    classes = unique_labels(y_true, y_pred)
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    fig, ax = plt.subplots()
    im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
    ax.figure.colorbar(im, ax=ax)
    # We want to show all ticks...
    ax.set(xticks=np.arange(len(classes)),
           yticks=np.arange(len(classes)),
           xticklabels=classes, yticklabels=classes,
           title=title,
           ylabel='True label',
           xlabel='Predicted label')

    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
             rotation_mode="anchor")

    # Loop over data dimensions and create text annotations.
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i in range(len(classes)):
        for j in range(len(classes)):
            ax.text(j, i, format(cm[i, j], fmt),
                    ha="center", va="center",
                    color="white" if cm[i, j] > thresh else "black")
    fig.tight_layout()
    plt.show()

# Plot non-normalized confusion matrix
plot_confusion_matrix(true_class_names, predicted_class_names, classes=np.array(class_names),
                      title='Confusion Matrix')

# Calculate precision, recall, and F1-score
precision = precision_score(test_labels.argmax(axis=1), test_pred_labels, average='macro')
recall = recall_score(test_labels.argmax(axis=1), test_pred_labels, average='macro')
f1 = f1_score(test_labels.argmax(axis=1), test_pred_labels, average='macro')

print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

# Compute ROC curve and ROC area for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(len(class_names)):
    fpr[i], tpr[i], _ = roc_curve(test_labels[:, i], test_pred[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot ROC curve
plt.figure(figsize=(8, 6))
for i in range(len(class_names)):
    plt.plot(fpr[i], tpr[i], label='ROC curve (area = %0.2f) for class %s' % (roc_auc[i], class_names[i]))

plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.show()


In [None]:
def model_predict(texts):
    processed_texts = np.array(list(map(bert_tokenizer, texts)))
    input_ids, attention_mask = zip(*processed_texts)
    
    input_ids = np.squeeze(np.array(input_ids), axis=1)
    attention_mask = np.squeeze(np.array(attention_mask), axis=1)
    
    predictions = hybrid_transformerxl_model.predict([input_ids, attention_mask])
    
    return predictions

# Instantiate a LIME text explainer
explainer = LimeTextExplainer(class_names=final_df['label'].unique())

def explain_instance(text, model_predict):
    exp = explainer.explain_instance(text, model_predict, num_features=10)
    exp_html = exp.show_in_notebook(text=text)

    return exp_html

text_to_explain = "তুমি কি সেরা হ্যাকার? কাজের পাঁচ মিনিটে তোর হ্যাক করে দিব। এক বিশ্বাস নিবে?"
explanation = explain_instance(text_to_explain, model_predict)
explanation

Transformer-XL-BiGRU-BiLSTM(Fusion Transformer-XL)

In [None]:
def create_hybrid_transformerxl_model(input_shape, vocab_size, d_model=128, num_heads=6, d_ff=128, num_blocks=4, rate=0.1):
    input_ids = Input(shape=input_shape, name="input_ids")
    attention_mask = Input(shape=input_shape, name="attention_mask")
    
    embedding_layer = Embedding(input_dim=vocab_size, output_dim=d_model)(input_ids)
    
    transformer_xl_blocks = [TransformerXLBlock(d_model, num_heads, d_ff, rate, mem_len=128) for _ in range(num_blocks)]
    x = embedding_layer
    for transformer_xl_block in transformer_xl_blocks:
        x = transformer_xl_block(x)
    
    # Dense layer
    x = Dense(64, activation='relu')(x)
    x = Reshape((-1, 64))(x)
    x = Bidirectional(GRU(128, return_sequences=True))(x)
    x = Bidirectional(LSTM(128, return_sequences=True))(x)
    
    x = GlobalAveragePooling1D()(x)
    outputs = Dense(5, activation='softmax')(x)

    model = Model(inputs=[input_ids, attention_mask], outputs=outputs)
    
    return model

In [None]:
input_shape = (120,)
vocab_size = tokenizer.vocab_size + 1
hybrid_transformerxl = create_hybrid_transformerxl_model(input_shape, vocab_size)
hybrid_transformerxl.compile(optimizer='nadam', loss='categorical_crossentropy', metrics=['accuracy'])
hybrid_transformerxl.summary()

In [None]:
history_7 = hybrid_transformerxl.fit(
    x=[train_input_ids, train_attention_mask],
    y=train_labels,
    epochs=20,
    batch_size=64,
    validation_data=([val_input_ids, val_attention_mask], val_labels),
    callbacks=[early_stopping]
)

plt.figure(figsize=(12, 6))

plt.plot(history_7.history['accuracy'], label='Train Accuracy', marker='o')
plt.plot(history_7.history['val_accuracy'], label='Validation Accuracy', marker='o')

plt.plot(history_7.history['loss'], label='Train Loss', marker='o')
plt.plot(history_7.history['val_loss'], label='Validation Loss', marker='o')

plt.title('Model Training Metrics')
plt.xlabel('Epoch')
plt.ylabel('Metrics Value')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
test_pred = hybrid_transformerxl.predict([test_input_ids, test_attention_mask])
test_pred_labels = np.argmax(test_pred, axis=1)

loss, accuracy = hybrid_transformerxl.evaluate([test_input_ids, test_attention_mask], test_labels)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

test_accuracy = accuracy_score(test_labels.argmax(axis=1), test_pred_labels)
print("Test Accuracy:", test_accuracy)

class_names = ["Not Bully", "Troll", "Sexual", "Religious", "Threat"]

true_class_names = np.array([class_names[label.argmax()] for label in test_labels])
predicted_class_names = np.array([class_names[label.argmax()] for label in test_pred])

print("Classification Report:")
print(classification_report(test_labels.argmax(axis=1), test_pred_labels, target_names=class_names))

def plot_confusion_matrix(y_true, y_pred, classes,
                          normalize=False,
                          title=None,
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if not title:
        if normalize:
            title = 'Normalized Confusion Matrix'
        else:
            title = 'Confusion Matrix'

    cm = confusion_matrix(y_true, y_pred)
    classes = unique_labels(y_true, y_pred)
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    fig, ax = plt.subplots()
    im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
    ax.figure.colorbar(im, ax=ax)
    ax.set(xticks=np.arange(len(classes)),
           yticks=np.arange(len(classes)),
           xticklabels=classes, yticklabels=classes,
           title=title,
           ylabel='True label',
           xlabel='Predicted label')

    plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
             rotation_mode="anchor")

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i in range(len(classes)):
        for j in range(len(classes)):
            ax.text(j, i, format(cm[i, j], fmt),
                    ha="center", va="center",
                    color="white" if cm[i, j] > thresh else "black")
    fig.tight_layout()
    plt.show()

plot_confusion_matrix(true_class_names, predicted_class_names, classes=np.array(class_names),
                      title='Confusion Matrix')

precision = precision_score(test_labels.argmax(axis=1), test_pred_labels, average='macro')
recall = recall_score(test_labels.argmax(axis=1), test_pred_labels, average='macro')
f1 = f1_score(test_labels.argmax(axis=1), test_pred_labels, average='macro')

print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(len(class_names)):
    fpr[i], tpr[i], _ = roc_curve(test_labels[:, i], test_pred[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

plt.figure(figsize=(8, 6))
for i in range(len(class_names)):
    plt.plot(fpr[i], tpr[i], label='ROC curve (area = %0.2f) for class %s' % (roc_auc[i], class_names[i]))

plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.show()

In [None]:
def model_predict(texts):
    processed_texts = np.array(list(map(bert_tokenizer, texts)))
    input_ids, attention_mask = zip(*processed_texts)
    
    input_ids = np.squeeze(np.array(input_ids), axis=1)
    attention_mask = np.squeeze(np.array(attention_mask), axis=1)
    
    predictions = hybrid_transformerxl.predict([input_ids, attention_mask])
    
    return predictions

explainer = LimeTextExplainer(class_names=final_df['label'].unique())

def explain_instance(text, model_predict):
    exp = explainer.explain_instance(text, model_predict, num_features=10)
    exp_html = exp.show_in_notebook(text=text)

    return exp_html

text_to_explain = "তুমি কি সেরা হ্যাকার? কাজের পাঁচ মিনিটে তোর হ্যাক করে দিব। এক বিশ্বাস নিবে?"
explanation = explain_instance(text_to_explain, model_predict)
explanation

In [None]:
# Custom callback to calculate metrics
class MetricsCallback(Callback):
    def __init__(self, val_data):
        super(MetricsCallback, self).__init__()
        self.validation_data = val_data
        self.precisions = []
        self.recalls = []
        self.f1s = []
        self.accuracies = []

    def on_epoch_end(self, epoch, logs=None):
        val_pred = np.argmax(self.model.predict(self.validation_data[0]), axis=1)
        val_true = np.argmax(self.validation_data[1], axis=1)
        
        precision = precision_score(val_true, val_pred, average='weighted')
        recall = recall_score(val_true, val_pred, average='weighted')
        f1 = f1_score(val_true, val_pred, average='weighted')
        accuracy = accuracy_score(val_true, val_pred)
        
        self.precisions.append(precision)
        self.recalls.append(recall)
        self.f1s.append(f1)
        self.accuracies.append(accuracy)
        
        print(f' - val_accuracy: {accuracy:.4f} - val_precision: {precision:.4f} - val_recall: {recall:.4f} - val_f1: {f1:.4f}')

# Initialize a list to store the history of each fold
fold_histories = []

k_folds =5

# Define the K-fold cross-validator
kfold = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=42)

In [None]:
# Initialize lists to store metrics for all folds
all_accuracies = []
all_precisions = []
all_recalls = []
all_f1s = []

# Iterate over each fold
for fold, (train_index, val_index) in enumerate(kfold.split(train_input_ids, np.argmax(train_labels, axis=1))):
    print(f'Fold {fold + 1}/{k_folds}')
    
    # Split the data into train and validation sets for this fold
    fold_train_input_ids, fold_val_input_ids = train_input_ids[train_index], train_input_ids[val_index]
    fold_train_attention_mask, fold_val_attention_mask = train_attention_mask[train_index], train_attention_mask[val_index]
    fold_train_labels, fold_val_labels = train_labels[train_index], train_labels[val_index]
    
    # Create the model
    hybrid_transformerxl_model = create_hybrid_transformerxl_model(input_shape, vocab_size)
    hybrid_transformerxl_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    
    # Define the metrics callback
    metrics_callback = MetricsCallback(([fold_val_input_ids, fold_val_attention_mask], fold_val_labels))
    
    # Train the model for this fold
    fold_history = hybrid_transformerxl_model.fit(
        x=[fold_train_input_ids, fold_train_attention_mask],
        y=fold_train_labels,
        epochs=10,
        batch_size=32,
        validation_data=([fold_val_input_ids, fold_val_attention_mask], fold_val_labels),
        callbacks=[early_stopping, metrics_callback]
    )
    
    # Store the metrics for this fold
    all_accuracies.extend(metrics_callback.accuracies)
    all_precisions.extend(metrics_callback.precisions)
    all_recalls.extend(metrics_callback.recalls)
    all_f1s.extend(metrics_callback.f1s)

# Calculate average metrics
avg_accuracy = np.mean(all_accuracies)
avg_precision = np.mean(all_precisions)
avg_recall = np.mean(all_recalls)
avg_f1 = np.mean(all_f1s)

print(f'Average Accuracy: {avg_accuracy:.4f}')
print(f'Average Precision: {avg_precision:.4f}')
print(f'Average Recall: {avg_recall:.4f}')
print(f'Average F1-score: {avg_f1:.4f}')
