# Toxic Comment Classification

#### Dissertation Project
### Name: Renee Mendonca
### Student Number-221040908



# Models Used on Toxicity Type
I have used 5 models-
1. CNN
2. Bidirectional LSTM
3. Bidirectional GRU
4. Bidirectional CNN
5. Ensemble Model

As our dataset is multi-label so I have used OneVsRestClassifier()- as our toxic labels data has 1'0 and 0's as the values. OneVsRestClassifier performs better for this type of datasets as it accepts binary data over multiple labels and results of prediction is also in the form of 0's and 1's.   

Mount Your Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


CNN Model

In [None]:
from keras.layers import Dense, Input, Conv2D, Embedding, Dropout, Reshape, Flatten, Lambda, concatenate
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.layers import GlobalMaxPool2D
from keras.optimizers import Adam
from keras.models import Model
import keras.backend as K
import tensorflow as tf
import numpy as np
import random
import time

# Fix seeds for reproducibility
np.random.seed(7)
tf.random.set_seed(7)  # Use tf.random.set_seed in newer TensorFlow versions
random.seed(7)

def get_f1(y_true, y_pred):
    y_pred = tf.cast(y_pred >= 0.5, tf.float32)
    TP = tf.reduce_sum(tf.cast(y_true * y_pred, tf.float32), axis=0)
    FP = tf.reduce_sum(tf.cast((1 - y_true) * y_pred, tf.float32), axis=0)
    FN = tf.reduce_sum(tf.cast(y_true * (1 - y_pred), tf.float32), axis=0)

    precision = TP / (TP + FP + K.epsilon())
    recall = TP / (TP + FN + K.epsilon())

    f1 = 2 * (precision * recall) / (precision + recall + K.epsilon())
    return tf.reduce_mean(f1)

def f1_score_metric(y_true, y_pred):
    return get_f1(y_true, y_pred)

start_time = time.time()

TRAIN_DATA_FILE = "/content/drive/My Drive/toxic-comments-classification/toxicity-types-data/training-data-tokenized.csv"
VALIDATION_DATA_FILE = "/content/drive/My Drive/toxic-comments-classification/toxicity-types-data/validation-data-tokenized.csv"
EMBEDDING_FILE = "/content/drive/My Drive/toxic-comments-classification/toxicity-types-data/fasttext-embedding-matrix.csv"

# Parameters
max_comment_length = 150
vector_size = 300

# Read data
training_data = np.loadtxt(TRAIN_DATA_FILE, delimiter=',')
print(training_data.shape)

training_input = training_data[:, :max_comment_length]
print(training_input.shape)

training_labels = training_data[:, max_comment_length:max_comment_length + 6]
print(training_labels.shape)

embedding_matrix = np.loadtxt(EMBEDDING_FILE, delimiter=',')
print(embedding_matrix.shape)
vocab_count = embedding_matrix.shape[0]
print(vocab_count)

validation_data = np.loadtxt(VALIDATION_DATA_FILE, delimiter=',')
validation_input = validation_data[:, :max_comment_length]
validation_labels = validation_data[:, max_comment_length:max_comment_length + 6]
print(validation_labels.shape)

total_records_count = validation_data.shape[0]
class_records_count = np.sum(validation_labels, axis=0)
print(class_records_count)
print(total_records_count)

class_percentage = class_records_count / total_records_count
print(class_percentage)

target = np.sum(class_percentage)
print(target)

# Build model
input_layer = Input(shape=(max_comment_length, ))

x = Embedding(input_dim=vocab_count, output_dim=vector_size, weights=[embedding_matrix], trainable=True)(input_layer)
x_reshaped = Reshape((max_comment_length, vector_size, 1))(x)
cnn_output = Conv2D(filters=300, kernel_size=(4, vector_size), activation='relu', data_format="channels_last")(x_reshaped)
pooling_output = GlobalMaxPool2D()(cnn_output)
dense1_output = Dense(50, activation="relu")(pooling_output)
dropout1_output = Dropout(0.2)(dense1_output)
output_layer = Dense(6, activation="sigmoid")(dropout1_output)

# Compile model
model = Model(inputs=input_layer, outputs=output_layer)
print(model.summary())

model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.0002), metrics=['accuracy', f1_score_metric])

# Callbacks for early stopping and saving the best model
checkpoint = ModelCheckpoint('model-cnn.h5', monitor='val_f1_score_metric', verbose=1, save_best_only=True, mode='max')
early_stopping = EarlyStopping(monitor='val_f1_score_metric', patience=5, mode='max')

callbacks_list = [checkpoint, early_stopping]

model.fit(training_input, training_labels, batch_size=1024, epochs=100, callbacks=callbacks_list,
          validation_data=(validation_input, validation_labels))

print("--- %s seconds ---" % (time.time() - start_time))


(36491, 156)
(36491, 150)
(36491, 6)
(29355, 300)
29355
(1622, 6)
[1531.  148.  850.   54.  833.  142.]
1622
[0.94389642 0.09124538 0.52404439 0.03329223 0.5135635  0.08754624]
2.193588162762022
Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 150)]             0         
                                                                 
 embedding_1 (Embedding)     (None, 150, 300)          8806500   
                                                                 
 reshape_1 (Reshape)         (None, 150, 300, 1)       0         
                                                                 
 conv2d_1 (Conv2D)           (None, 147, 1, 300)       360300    
                                                                 
 global_max_pooling2d_1 (Gl  (None, 300)               0         
 obalMaxPooling2D)                                            

Bidirectional LSTM Model

In [None]:
!pip install --upgrade tensorflow




In [None]:
from keras.layers import Dense, Input, LSTM, Bidirectional, Embedding, Dropout
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.layers import GlobalMaxPool1D
from keras.optimizers import Adam
from sklearn.metrics import roc_auc_score
from keras.models import Model
import tensorflow as tf
from tensorflow.keras.metrics import AUC # Import AUC here
import numpy as np
import random
import time

# fix seeds for re-producability
np.random.seed(7)
tf.random.set_seed(7)  # Use tf.random.set_seed in newer TensorFlow versions
random.seed(7)

# Initialize AUC metric outside the tf.function
auc_metric = AUC()

# Remove .numpy() call within the metric function
def get_roc_auc(actual, predicted):
    # Assuming 'actual' and 'predicted' are TensorFlow tensors
    auc_metric.update_state(actual, predicted)
    # Return the symbolic tensor representing AUC
    return auc_metric.result()

def get_f1(actual, predicted):
  TP = tf.math.count_nonzero(predicted * actual)
  TN = tf.math.count_nonzero((predicted - 1) * (actual - 1))
  FP = tf.math.count_nonzero(predicted * (actual - 1))
  FN = tf.math.count_nonzero((predicted - 1) * actual)

  precision = tf.divide(TP, TP + FP)
  recall = tf.divide(TP, TP + FN)

  f1 = tf.divide(2 * precision * recall, precision + recall)
  return f1

def f1_score_metric(actual, predicted):
  predicted = tf.cast(predicted>=0.5, tf.int32) # Use tf.cast for type conversion
  actual = tf.cast(actual, tf.int32) # Use tf.cast for type conversion

  # Calculate class_percentage and target here
  total_records_count = tf.cast(tf.shape(actual)[0], tf.float32)
  class_records_count = tf.reduce_sum(tf.cast(actual, tf.float32), axis=0)
  class_percentage = tf.cast(class_records_count / total_records_count, tf.float32) # Cast to float32
  target = tf.reduce_sum(class_percentage)

  new_overall_f1_weighted = 0.0 # Initialize as float32

  for class_id in range(0, 6):

    # F1 score
    f1 = get_f1(actual=actual[:, class_id], predicted=predicted[:, class_id])
    # Cast f1 to float32 to match class_percentage
    f1 = tf.cast(f1, tf.float32)

    new_overall_f1_weighted += f1 * class_percentage[class_id]


  return new_overall_f1_weighted/target

start_time = time.time()

TRAIN_DATA_FILE = "/content/drive/My Drive/toxic-comments-classification/toxicity-types-data/training-data-tokenized.csv"
VALIDATION_DATA_FILE = "/content/drive/My Drive/toxic-comments-classification/toxicity-types-data/validation-data-tokenized.csv"
EMBEDDING_FILE = "/content/drive/My Drive/toxic-comments-classification/toxicity-types-data/fasttext-embedding-matrix.csv"

# Parameters
max_comment_length = 150
vector_size = 300

# Read data
training_data = np.loadtxt(TRAIN_DATA_FILE, delimiter=',')
print(training_data.shape)

training_input = training_data[:, 0:max_comment_length]
print(training_input.shape)

training_labels = training_data[:, max_comment_length:156]
print(training_labels.shape)

embedding_matrix = np.loadtxt(EMBEDDING_FILE, delimiter=',')
print(embedding_matrix.shape)
vocab_count = embedding_matrix.shape[0]
print(vocab_count)

validation_data = np.loadtxt(VALIDATION_DATA_FILE, delimiter=',')

# Build model
input = Input(shape=(max_comment_length, ))

x = Embedding(input_dim=vocab_count, output_dim=vector_size, weights=[embedding_matrix], trainable=True)(input)

lstm_output = Bidirectional(LSTM(units=30, activation="tanh", return_sequences=True))(x)

pooling_output = GlobalMaxPool1D()(lstm_output)

dense1_output = Dense(60, activation="relu")(pooling_output)
dropout1_output = Dropout(0.2)(dense1_output)

dense2_output = Dense(60, activation="relu")(dropout1_output)
dropout2_output = Dropout(0.2)(dense2_output)

output = Dense(6, activation="sigmoid")(dropout2_output)


# Compile Model
model = Model(inputs=input, outputs=output)
print(model.summary())

model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.003), metrics=['accuracy', f1_score_metric, get_roc_auc])

# Callbacks for early stopping and saving the best model
# checkpoint = ModelCheckpoint('model-lstm.h5', monitor='val_f1_score_metric',
#                              verbose=1, save_best_only=True, mode='max')
# Callbacks for early stopping and saving the best model
checkpoint = ModelCheckpoint('model-lstm.keras', monitor='val_get_roc_auc', # Changed file extension to .keras
                             verbose=1, save_best_only=True, mode='max')

early_stopping = EarlyStopping(monitor='val_get_roc_auc', patience=5, mode='max')

callbacks_list = [checkpoint, early_stopping]

model.fit(training_input, training_labels, batch_size=1024, epochs=100, callbacks=callbacks_list,
          validation_data=(validation_data[:, 0:max_comment_length], validation_data[:,max_comment_length:156]))
print("--- %s seconds ---" % (time.time() - start_time))

(36491, 156)
(36491, 150)
(36491, 6)
(29355, 300)
29355


None
Epoch 1/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 89ms/step - accuracy: 0.8214 - f1_score_metric: nan - get_roc_auc: 0.7752 - loss: 0.4932
Epoch 1: val_get_roc_auc improved from -inf to 0.89683, saving model to model-lstm.keras
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 130ms/step - accuracy: 0.8211 - f1_score_metric: nan - get_roc_auc: 0.7768 - loss: 0.4909 - val_accuracy: 0.8681 - val_f1_score_metric: nan - val_get_roc_auc: 0.8968 - val_loss: 0.3267
Epoch 2/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 89ms/step - accuracy: 0.6922 - f1_score_metric: 0.8888 - get_roc_auc: 0.9102 - loss: 0.2613
Epoch 2: val_get_roc_auc improved from 0.89683 to 0.93641, saving model to model-lstm.keras
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 114ms/step - accuracy: 0.6925 - f1_score_metric: 0.8892 - get_roc_auc: 0.9105 - loss: 0.2606 - val_accuracy: 0.8576 - val_f1_score_metric: 0.8262 - val_get_roc_auc: 0

Bidirectional GRU Model

In [None]:
from keras.layers import Dense, Input, GRU, Bidirectional, Embedding, Dropout
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.layers import GlobalMaxPool1D
from keras.optimizers import Adam
from keras.models import Model
import tensorflow as tf
import numpy as np
import random
import time

# fix seeds for re-producability
np.random.seed(7)
tf.random.set_seed(7)
random.seed(7)

def get_f1(actual, predicted):
  TP = tf.math.count_nonzero(predicted * actual) # Use tf.math.count_nonzero
  TN = tf.math.count_nonzero((predicted - 1) * (actual - 1)) # Use tf.math.count_nonzero
  FP = tf.math.count_nonzero(predicted * (actual - 1)) # Use tf.math.count_nonzero
  FN = tf.math.count_nonzero((predicted - 1) * actual) # Use tf.math.count_nonzero

  precision = tf.divide(TP, TP + FP)
  recall = tf.divide(TP, TP + FN)

  f1 = tf.divide(2 * precision * recall, precision + recall)

  return f1

def f1_score_metric(actual, predicted):
  predicted = tf.cast(predicted>=0.5, tf.int32) # Use tf.cast for type conversion
  actual = tf.cast(actual, tf.int32) # Use tf.cast for type conversion

  new_overall_f1_weighted = 0

  for class_id in range(0, 6):

    # F1 score
    f1 = get_f1(actual=actual[:, class_id], predicted=predicted[:, class_id])

    new_overall_f1_weighted += f1 * class_percentage[class_id]


  return new_overall_f1_weighted/target


start_time = time.time()

TRAIN_DATA_FILE = "/content/drive/My Drive/toxic-comments-classification/toxicity-types-data/training-data-tokenized.csv"
VALIDATION_DATA_FILE = "/content/drive/My Drive/toxic-comments-classification/toxicity-types-data/validation-data-tokenized.csv"
EMBEDDING_FILE = "/content/drive/My Drive/toxic-comments-classification/toxicity-types-data/fasttext-embedding-matrix.csv"

# Parameters
max_comment_length = 150
vector_size = 300

# Read data
training_data = np.loadtxt(TRAIN_DATA_FILE, delimiter=',')
print(training_data.shape)

training_input = training_data[:, 0:max_comment_length]
print(training_input.shape)

training_labels = training_data[:, max_comment_length:156]
print(training_labels.shape)

embedding_matrix = np.loadtxt(EMBEDDING_FILE, delimiter=',')
print(embedding_matrix.shape)
vocab_count = embedding_matrix.shape[0]
print(vocab_count)

validation_data = np.loadtxt(VALIDATION_DATA_FILE, delimiter=',')
validation_labels = validation_data[:, max_comment_length:156]

total_records_count = validation_data.shape[0]
class_records_count = np.sum(validation_labels, axis=0)

print(class_records_count)
print(total_records_count)

class_percentage = (class_records_count/total_records_count)
print(class_percentage)

target = np.sum(class_percentage)
print(target)


# Build model
input = Input(shape=(max_comment_length, ))

x = Embedding(input_dim=vocab_count, output_dim=vector_size, weights=[embedding_matrix], trainable=True)(input)

gru_output = Bidirectional(GRU(units=40, activation="tanh", return_sequences=True))(x)

pooling_output = GlobalMaxPool1D()(gru_output)

dense1_output = Dense(80, activation="relu")(pooling_output)
dropout1_output = Dropout(0.2)(dense1_output)

dense2_output = Dense(80, activation="relu")(dropout1_output)
dropout2_output = Dropout(0.2)(dense2_output)

output = Dense(6, activation="sigmoid")(dropout2_output)


# Compile Model
model = Model(inputs=input, outputs=output)
print(model.summary())

model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.0045), metrics=['accuracy', f1_score_metric])

# Callbacks for early stopping and saving the best model
checkpoint = ModelCheckpoint('model-gru.keras', monitor='val_f1_score_metric', # Changed file extension to .keras
                             verbose=1, save_best_only=True, mode='max')

early_stopping = EarlyStopping(monitor='val_f1_score_metric', patience=5, mode='max')

callbacks_list = [checkpoint, early_stopping]

model.fit(training_input, training_labels, batch_size=1024, epochs=100, callbacks=callbacks_list,
          validation_data=(validation_data[:, 0:max_comment_length], validation_data[:,max_comment_length:156]))

print("--- %s seconds ---" % (time.time() - start_time))

(36491, 156)
(36491, 150)
(36491, 6)
(29355, 300)
29355
[1531.  148.  850.   54.  833.  142.]
1622
[0.94389642 0.09124538 0.52404439 0.03329223 0.5135635  0.08754624]
2.193588162762022


None
Epoch 1/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 93ms/step - accuracy: 0.7796 - f1_score_metric: nan - loss: 0.4599
Epoch 1: val_f1_score_metric improved from -inf to 0.83336, saving model to model-gru.keras
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 137ms/step - accuracy: 0.7796 - f1_score_metric: nan - loss: 0.4574 - val_accuracy: 0.8989 - val_f1_score_metric: 0.8334 - val_loss: 0.3072
Epoch 2/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step - accuracy: 0.8090 - f1_score_metric: 0.9330 - loss: 0.2134
Epoch 2: val_f1_score_metric did not improve from 0.83336
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 98ms/step - accuracy: 0.8087 - f1_score_metric: 0.9332 - loss: 0.2128 - val_accuracy: 0.8878 - val_f1_score_metric: 0.8272 - val_loss: 0.3085
Epoch 3/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92ms/step - accuracy: 0.7742 - f1_score_metric: 0.9585 - loss: 0.1319


Bidirectional CNN

In [None]:
from keras.layers import Dense, Input, Conv2D, Embedding, Dropout, Reshape, Flatten, Lambda, concatenate
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.layers import GlobalMaxPool2D
from keras.optimizers import Adam
from keras.models import Model
import keras.backend as K
import tensorflow as tf
import numpy as np
import random
import time

# fix seeds for re-producability
np.random.seed(7)
tf.random.set_seed(7)
random.seed(7)

def get_f1(actual, predicted):
  TP = tf.math.count_nonzero(predicted * actual) # Use tf.math.count_nonzero
  TN = tf.math.count_nonzero((predicted - 1) * (actual - 1)) # Use tf.math.count_nonzero
  FP = tf.math.count_nonzero(predicted * (actual - 1)) # Use tf.math.count_nonzero
  FN = tf.math.count_nonzero((predicted - 1) * actual) # Use tf.math.count_nonzero

  precision = tf.divide(TP, TP + FP)
  recall = tf.divide(TP, TP + FN)

  f1 = tf.divide(2 * precision * recall, precision + recall)

  return f1

def f1_score_metric(actual, predicted):
  predicted = tf.cast(predicted>=0.5, tf.int32) # Use tf.cast for type conversion
  actual = tf.cast(actual, tf.int32) # Use tf.cast for type conversion

  new_overall_f1_weighted = 0

  for class_id in range(0, 6):

    # F1 score
    f1 = get_f1(actual=actual[:, class_id], predicted=predicted[:, class_id])

    new_overall_f1_weighted += f1 * class_percentage[class_id]


  return new_overall_f1_weighted/target

def flip(x):
  return tf.reverse(x, axis=[1]) # Use tf.reverse instead of k.reverse

start_time = time.time()

TRAIN_DATA_FILE = "/content/drive/My Drive/toxic-comments-classification/toxicity-types-data/training-data-tokenized.csv"
VALIDATION_DATA_FILE = "/content/drive/My Drive/toxic-comments-classification/toxicity-types-data/validation-data-tokenized.csv"
EMBEDDING_FILE = "/content/drive/My Drive/toxic-comments-classification/toxicity-types-data/fasttext-embedding-matrix.csv"

# Parameters
max_comment_length = 150
vector_size = 300

# Read data
training_data = np.loadtxt(TRAIN_DATA_FILE, delimiter=',')
print(training_data.shape)

training_input = training_data[:, 0:max_comment_length]
print(training_input.shape)

training_labels = training_data[:, max_comment_length:156]
print(training_labels.shape)

embedding_matrix = np.loadtxt(EMBEDDING_FILE, delimiter=',')
print(embedding_matrix.shape)
vocab_count = embedding_matrix.shape[0]
print(vocab_count)

validation_data = np.loadtxt(VALIDATION_DATA_FILE, delimiter=',')
validation_labels = validation_data[:, max_comment_length:156]

total_records_count = validation_data.shape[0]
class_records_count = np.sum(validation_labels, axis=0)

print(class_records_count)
print(total_records_count)

class_percentage = (class_records_count/total_records_count)
print(class_percentage)

target = np.sum(class_percentage)
print(target)

# Build model

layer = Lambda(flip, output_shape=(150, 300, 1))

input = Input(shape=(max_comment_length, ))

x = Embedding(input_dim=vocab_count, output_dim=vector_size, weights=[embedding_matrix], trainable=True)(input)

x_reshaped = Reshape((150, 300, 1))(x)

# Define the output shape for the Lambda layer
layer = Lambda(flip, output_shape=(150, 300, 1))

cnn_output1 = Conv2D(filters=200, kernel_size=(8,300), activation='relu', data_format="channels_last")(x_reshaped)
cnn_output2 = Conv2D(filters=200, kernel_size=(8,300), activation='relu', data_format="channels_last")(layer(x_reshaped))

pooling_output1 = GlobalMaxPool2D()(cnn_output1)
pooling_output2 = GlobalMaxPool2D()(cnn_output2)

pooling_output_merged = concatenate([pooling_output1, pooling_output2])

dense1_output = Dense(50, activation="relu")(pooling_output_merged)
dropout1_output = Dropout(0.2)(dense1_output)
output = Dense(6, activation="sigmoid")(dropout1_output)

# Compile Model
model = Model(inputs=input, outputs=output)
print(model.summary())

model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.0005), metrics=['accuracy', f1_score_metric])

# Callbacks for early stopping and saving the best model
checkpoint = ModelCheckpoint('model-cnn-bidirectional.keras', monitor='val_f1_score_metric', # Changed file extension to .keras
                             verbose=1, save_best_only=True, mode='max')

early_stopping = EarlyStopping(monitor='val_f1_score_metric', patience=5, mode='max')

callbacks_list = [checkpoint, early_stopping]

model.fit(training_input, training_labels, batch_size=1024, epochs=100, callbacks=callbacks_list,
          validation_data=(validation_data[:, 0:max_comment_length], validation_data[:,max_comment_length:156]))


print("--- %s seconds ---" % (time.time() - start_time))

(36491, 156)
(36491, 150)
(36491, 6)
(29355, 300)
29355
[1531.  148.  850.   54.  833.  142.]
1622
[0.94389642 0.09124538 0.52404439 0.03329223 0.5135635  0.08754624]
2.193588162762022


None
Epoch 1/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5s/step - accuracy: 0.8785 - f1_score_metric: nan - loss: 0.5134   
Epoch 1: val_f1_score_metric improved from -inf to 0.82028, saving model to model-cnn-bidirectional.keras
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m431s[0m 5s/step - accuracy: 0.8774 - f1_score_metric: nan - loss: 0.5112 - val_accuracy: 0.8859 - val_f1_score_metric: 0.8203 - val_loss: 0.3458
Epoch 2/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 333ms/step - accuracy: 0.6847 - f1_score_metric: 0.9008 - loss: 0.3119
Epoch 2: val_f1_score_metric improved from 0.82028 to 0.83615, saving model to model-cnn-bidirectional.keras
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 365ms/step - accuracy: 0.6849 - f1_score_metric: 0.9010 - loss: 0.3114 - val_accuracy: 0.8773 - val_f1_score_metric: 0.8362 - val_loss: 0.3128
Epoch 3/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33

Merge different kernel sizes in CNN

In [None]:
from keras.layers import Dense, Input, Conv2D, Embedding, Dropout, Reshape, Flatten, Lambda, concatenate
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.layers import GlobalMaxPool2D
from keras.optimizers import Adam
from keras.models import Model
import keras.backend as K
import tensorflow as tf
import numpy as np
import random
import time

# fix seeds for re-producability
np.random.seed(7)
tf.random.set_seed(7)
random.seed(7)

def get_f1(actual, predicted):
  TP = tf.count_nonzero(predicted * actual)
  TN = tf.count_nonzero((predicted - 1) * (actual - 1))
  FP = tf.count_nonzero(predicted * (actual - 1))
  FN = tf.count_nonzero((predicted - 1) * actual)

  precision = tf.divide(TP, TP + FP)
  recall = tf.divide(TP, TP + FN)

  f1 = tf.divide(2 * precision * recall, precision + recall)

  return f1

def f1_score_metric(actual, predicted):
  predicted = tf.to_int32(predicted>=0.5)
  actual = tf.to_int32(actual)

  new_overall_f1_weighted = 0

  for class_id in range(0, 6):

    # F1 score
    f1 = get_f1(actual=actual[:, class_id], predicted=predicted[:, class_id])

    new_overall_f1_weighted += f1 * class_percentage[class_id]


  return new_overall_f1_weighted/target

start_time = time.time()

# TRAIN_DATA_FILE = "drive/toxicity type classifier/training-data-tokenized-augemented.csv"
# VALIDATION_DATA_FILE = "drive/toxicity type classifier/validation-data-tokenized.csv"
# EMBEDDING_FILE = "drive/toxicity type classifier/fasttext-embedding-matrix.csv"

TRAIN_DATA_FILE = "/content/drive/My Drive/toxic-comments-classification/toxicity-types-data/training-data-tokenized-augmented.csv"
VALIDATION_DATA_FILE = "/content/drive/My Drive/toxic-comments-classification/toxicity-types-data/validation-data-tokenized.csv"
EMBEDDING_FILE = "/content/drive/My Drive/toxic-comments-classification/toxicity-types-data/fasttext-embedding-matrix.csv"


# Parameters
max_comment_length = 150
vector_size = 300

# Read data
training_data = np.loadtxt(TRAIN_DATA_FILE, delimiter=',')
print(training_data.shape)

training_input = training_data[:, 0:max_comment_length]
print(training_input.shape)

training_labels = training_data[:, max_comment_length:156]
print(training_labels.shape)

embedding_matrix = np.loadtxt(EMBEDDING_FILE, delimiter=',')
print(embedding_matrix.shape)
vocab_count = embedding_matrix.shape[0]
print(vocab_count)

validation_data = np.loadtxt(VALIDATION_DATA_FILE, delimiter=',')
validation_labels = validation_data[:, max_comment_length:156]

total_records_count = validation_data.shape[0]
class_records_count = np.sum(validation_labels, axis=0)

print(class_records_count)
print(total_records_count)

class_percentage = (class_records_count/total_records_count)
print(class_percentage)

target = np.sum(class_percentage)
print(target)

# Build model

input = Input(shape=(max_comment_length, ))

x = Embedding(input_dim=vocab_count, output_dim=vector_size, weights=[embedding_matrix], trainable=True)(input)

x_reshaped = Reshape((150, 300, 1))(x)

cnn_output1 = Conv2D(filters=300, kernel_size=(2,300), activation='relu', data_format="channels_last")(x_reshaped)
cnn_output2 = Conv2D(filters=300, kernel_size=(4,300), activation='relu', data_format="channels_last")(x_reshaped)
cnn_output3 = Conv2D(filters=300, kernel_size=(6,300), activation='relu', data_format="channels_last")(x_reshaped)
cnn_output4 = Conv2D(filters=300, kernel_size=(8,300), activation='relu', data_format="channels_last")(x_reshaped)


pooling_output1 = GlobalMaxPool2D()(cnn_output1)
pooling_output2 = GlobalMaxPool2D()(cnn_output2)
pooling_output3 = GlobalMaxPool2D()(cnn_output3)
pooling_output4 = GlobalMaxPool2D()(cnn_output4)

pooling_output_merged = concatenate([pooling_output1, pooling_output2, pooling_output3, pooling_output4])

dense1_output = Dense(50, activation="relu")(pooling_output_merged)
dropout1_output = Dropout(0.2)(dense1_output)
output = Dense(6, activation="sigmoid")(dropout1_output)

# Compile Model
model = Model(inputs=input, outputs=output)
print(model.summary())

model.compile(loss='binary_crossentropy', optimizer=Adam(lr=0.0001), metrics=['accuracy', f1_score_metric])

# Callbacks for early stopping and saving the best model
checkpoint = ModelCheckpoint('model-cnn-ensemble.h5', monitor='val_f1_score_metric',
                             verbose=1, save_best_only=True, mode='max')

early_stopping = EarlyStopping(monitor='val_f1_score_metric', patience=5, mode='max')

callbacks_list = [checkpoint, early_stopping]

model.fit(training_input, training_labels, batch_size=1024, epochs=100, callbacks=callbacks_list,
          validation_data=(validation_data[:, 0:max_comment_length], validation_data[:,max_comment_length:156]))


print("--- %s seconds ---" % (time.time() - start_time))

FileNotFoundError: /content/drive/My Drive/toxic-comments-classification/toxicity-types-data/training-data-tokenized-augmented.csv not found.

Ensemble Model

In [None]:
!pip install tensorflow==2.0.0 # Install tensorflow version 2.0.0


In [None]:
!pip install scikit-learn # Install scikit-learn as it provides the roc_auc_score function


In [None]:
from keras.layers import Dense, Input, Conv2D, Embedding, Dropout, Reshape, Flatten, concatenate, LSTM, GRU, Bidirectional
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.layers import GlobalMaxPool1D, GlobalMaxPool2D
from keras.optimizers import Adam
from keras.models import Model
from sklearn.metrics import roc_auc_score # Import the roc_auc_score function
import keras.backend as K
import tensorflow as tf
import numpy as np
import random
import time

# fix seeds for re-producability
np.random.seed(7)
tf.random.set_seed(7)
random.seed(7)

def roc_auc_metric(y_true, y_pred):
  return tf.py_function(roc_auc_score, (y_true, y_pred), tf.double) # Remove the extra 'weighted' argument

def get_f1(actual, predicted):
  TP = tf.math.count_nonzero(predicted * actual)
  TN = tf.math.count_nonzero((predicted - 1) * (actual - 1))
  FP = tf.math.count_nonzero(predicted * (actual - 1))
  FN = tf.math.count_nonzero((predicted - 1) * actual)

  precision = tf.divide(TP, TP + FP)
  recall = tf.divide(TP, TP + FN)

  f1 = tf.divide(2 * precision * recall, precision + recall)
  return f1

def f1_score_metric(actual, predicted):
  predicted = tf.cast(predicted>=0.5, tf.int32)
  actual = tf.cast(actual, tf.int32)

  new_overall_f1_weighted = 0

  for class_id in range(0, 6):

    # F1 score
    f1 = get_f1(actual=actual[:, class_id], predicted=predicted[:, class_id])

    new_overall_f1_weighted += f1 * class_percentage[class_id]


  return new_overall_f1_weighted/target

start_time = time.time()

# TRAIN_DATA_FILE = "drive/toxicity type classifier/training-data-tokenized-augemented.csv"
TRAIN_DATA_FILE = "/content/drive/My Drive/toxic-comments-classification/toxicity-types-data/training-data-tokenized.csv"
VALIDATION_DATA_FILE = "/content/drive/My Drive/toxic-comments-classification/toxicity-types-data/validation-data-tokenized.csv"
EMBEDDING_FILE = "/content/drive/My Drive/toxic-comments-classification/toxicity-types-data/fasttext-embedding-matrix.csv"

# TRAIN_DATA_FILE = "training-data-tokenized-augmented-1-2.csv"
# VALIDATION_DATA_FILE = "validation-data-tokenized-augmented-1-2.csv"
# EMBEDDING_FILE = "fasttext-embedding-matrix-augmented-1-2.csv"

# TRAIN_DATA_FILE = "/content/drive/My Drive/toxic-comments-classification/toxicity-types-data/training-data-tokenized.csv"
# VALIDATION_DATA_FILE = "/content/drive/My Drive/toxic-comments-classification/toxicity-types-data/validation-data-tokenized.csv"
# EMBEDDING_FILE = "/content/drive/My Drive/toxic-comments-classification/toxicity-types-data/fasttext-embedding-matrix.csv"


# Parameters
max_comment_length = 150
vector_size = 300

# Read data
training_data = np.loadtxt(TRAIN_DATA_FILE, delimiter=',')
print('shape of training data', training_data.shape)

training_input = training_data[:, 0:max_comment_length]
print(training_input.shape)

training_labels = training_data[:, max_comment_length:156]
print(training_labels.shape)

embedding_matrix = np.loadtxt(EMBEDDING_FILE, delimiter=',')
print(embedding_matrix.shape)
vocab_count = embedding_matrix.shape[0]
print(vocab_count)

validation_data = np.loadtxt(VALIDATION_DATA_FILE, delimiter=',')
validation_labels = validation_data[:, max_comment_length:156]

total_records_count = validation_data.shape[0]
class_records_count = np.sum(validation_labels, axis=0)

print(class_records_count)
print(total_records_count)

class_percentage = (class_records_count/total_records_count)
print(class_percentage)

target = np.sum(class_percentage)
print(target)

# Build model


input = Input(shape=(max_comment_length, ))

x = Embedding(input_dim=vocab_count, output_dim=vector_size, weights=[embedding_matrix], trainable=True)(input)

x_reshaped = Reshape((150, 300, 1))(x)
cnn_output1 = Conv2D(filters=300, kernel_size=(2,300), activation='relu', data_format="channels_last")(x_reshaped)
cnn_output2 = Conv2D(filters=300, kernel_size=(4,300), activation='relu', data_format="channels_last")(x_reshaped)
cnn_output3 = Conv2D(filters=300, kernel_size=(6,300), activation='relu', data_format="channels_last")(x_reshaped)
cnn_output4 = Conv2D(filters=300, kernel_size=(8,300), activation='relu', data_format="channels_last")(x_reshaped)
cnn_pooling_output1 = GlobalMaxPool2D()(cnn_output1)
cnn_pooling_output2 = GlobalMaxPool2D()(cnn_output2)
cnn_pooling_output3 = GlobalMaxPool2D()(cnn_output3)
cnn_pooling_output4 = GlobalMaxPool2D()(cnn_output4)

lstm_output = Bidirectional(LSTM(units=30, activation="tanh", return_sequences=True))(x)
lstm_pooling_output = GlobalMaxPool1D()(lstm_output)

gru_output = Bidirectional(GRU(units=40, activation="tanh", return_sequences=True))(x)
gru_pooling_output = GlobalMaxPool1D()(gru_output)


pooling_output_merged = concatenate([cnn_pooling_output1, cnn_pooling_output2, cnn_pooling_output3, cnn_pooling_output4, lstm_pooling_output, gru_pooling_output])

dense1_output = Dense(200, activation="relu")(pooling_output_merged)
dropout1_output = Dropout(0.2)(dense1_output)

dense2_output = Dense(200, activation="relu")(dropout1_output)
dropout2_output = Dropout(0.2)(dense2_output)

output = Dense(6, activation="sigmoid")(dropout2_output)

# Compile Model
model = Model(inputs=input, outputs=output)
print(model.summary())
print(validation_data.shape)
model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.0002), metrics=['accuracy', f1_score_metric, roc_auc_metric])

# Callbacks for early stopping and saving the best model
checkpoint = ModelCheckpoint('model-ensemble.h5', monitor='val_get_roc_auc',
                             verbose=1, save_best_only=True, mode='max')

early_stopping = EarlyStopping(monitor='val_get_roc_auc', patience=5, mode='max')

callbacks_list = [checkpoint, early_stopping]

model.fit(training_input, training_labels, batch_size=1024, epochs=100, callbacks=callbacks_list,
          validation_data=(validation_data[:, 0:max_comment_length], validation_data[:,max_comment_length:156]))


print("--- %s seconds ---" % (time.time() - start_time))

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
!pip install tensorflow keras
!pip install scikit-learn




In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, roc_auc_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, GlobalMaxPooling1D, Embedding, LSTM, GRU, Bidirectional
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import random
import keras.backend as K

random.seed(7)
np.random.seed(7)
tf.random.set_seed(7)

# Load the dataset
train_data = pd.read_csv('/content/drive/My Drive/toxic-comments-classification/toxicity-types-data/training-data-tokenized.csv')  # Adjust the path as needed

# Print the column names to check if 'comment_text' is present and correctly spelled
print(train_data.columns)

# Check if the 'comment' column exists, if so, rename it
if 'comment' in train_data.columns:
    train_data = train_data.rename(columns={'comment': 'comment_text'})
    print("Renamed 'comment' column to 'comment_text'")


# Preprocess the data
max_words = 50000
max_len = 150
tokenizer = Tokenizer(num_words=max_words)
# Use the correct column name here: 'comment_text'
tokenizer.fit_on_texts(train_data['comment_text'])

X = tokenizer.texts_to_sequences(train_data['comment_text'])
X = pad_sequences(X, maxlen=max_len)

y = train_data[['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']].values

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1, random_state=42)

# Define the model
def build_model():
    model = Sequential()
    model.add(Embedding(max_words, 128, input_length=max_len))
    model.add(Bidirectional(LSTM(64, return_sequences=True)))
    model.add(GlobalMaxPooling1D())
    model.add(Dense(64, activation='relu'))
    model.add(Dense(6, activation='sigmoid'))

    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# Instantiate the model
model = build_model()
model.summary()

# Train the model
history = model.fit(X_train, y_train, epochs=5, batch_size=128, validation_data=(X_val, y_val))

# Save the model
model.save('/content/drive/My Drive/toxic-comments-classification/model-ensemble.h5')

# Evaluate the model
y_pred = model.predict(X_val)
y_pred[y_pred >= 0.5] = 1
y_pred[y_pred < 0.5] = 0

f1 = f1_score(y_val, y_pred, average='macro')
roc_auc = roc_auc_score(y_val, y_pred)

print('Macro F1 Score:', f1)
print('ROC-AUC Score:', roc_auc)


Index(['2.390000000000000000e+02', '1.306100000000000000e+04',
       '2.390000000000000000e+02.1', '2.286000000000000000e+03',
       '1.656200000000000000e+04', '2.017000000000000000e+03',
       '1.125700000000000000e+04', '1.710000000000000000e+02',
       '2.327000000000000000e+03', '9.870000000000000000e+02',
       ...
       '0.000000000000000000e+00.113', '0.000000000000000000e+00.114',
       '0.000000000000000000e+00.115', '0.000000000000000000e+00.116',
       '1.000000000000000000e+00', '0.000000000000000000e+00.117',
       '0.000000000000000000e+00.118', '0.000000000000000000e+00.119',
       '0.000000000000000000e+00.120', '0.000000000000000000e+00.121'],
      dtype='object', length=156)


KeyError: 'comment_text'

In [None]:
history = model.fit(X_train, y_train, epochs=5, batch_size=128, validation_data=(X_val, y_val))


AttributeError: 'NoneType' object has no attribute 'shape'

Evaluate Single Model

In [None]:
from sklearn.metrics import auc, roc_curve, confusion_matrix, f1_score
from sklearn.metrics import roc_auc_score
from keras.models import load_model
from keras.layers import Lambda
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import random
import keras.backend as K

# fix seeds for re-producability
random.seed(7)
np.random.seed(7)
tf.random.set_seed(7)

def get_f1(actual, predicted):
  TP = tf.count_nonzero(predicted * actual)
  TN = tf.count_nonzero((predicted - 1) * (actual - 1))
  FP = tf.count_nonzero(predicted * (actual - 1))
  FN = tf.count_nonzero((predicted - 1) * actual)

  precision = tf.divide(TP, TP + FP)
  recall = tf.divide(TP, TP + FN)

  f1 = tf.divide(2 * precision * recall, precision + recall)

  return f1

# def f1_score_metric(actual, predicted):
#   predicted = tf.to_int32(predicted>=0.5)
#   actual = tf.to_int32(actual)

#   new_overall_f1_weighted = 0

#   for class_id in range(0, 6):

#     # F1 score
#     f1 = get_f1(actual=actual[:, class_id], predicted=predicted[:, class_id])

#     new_overall_f1_weighted += f1 * class_percentage[class_id]

#   return new_overall_f1_weighted/target

def f1_score_metric(y_true, y_pred):
    y_pred = tf.cast(y_pred>=0.5, tf.int32)
    y_true = tf.cast(y_true, tf.int32)

    tp = K.sum(K.cast(y_true*y_pred, 'float'), axis=0)
    tn = K.sum(K.cast((1-y_true)*(1-y_pred), 'float'), axis=0)
    fp = K.sum(K.cast((1-y_true)*y_pred, 'float'), axis=0)
    fn = K.sum(K.cast(y_true*(1-y_pred), 'float'), axis=0)

    p = tp / (tp + fp + K.epsilon())
    r = tp / (tp + fn + K.epsilon())

    f1 = 2*p*r / (p+r+K.epsilon())
    f1 = tf.where(tf.math.is_nan(f1), tf.zeros_like(f1), f1)
    return K.mean(f1)

def flip(x):
  import keras.backend as k
  return k.reverse(x,axes=0)

VALIDATION_DATA_FILE = "drive/My Drive/Deep Learning in NLP/toxicity type classifier/validation-data-tokenized.csv"

max_comment_length = 150

# Read data
validation_data = np.loadtxt(VALIDATION_DATA_FILE, delimiter=',')
print(validation_data.shape)

validation_input = validation_data[:, 0:max_comment_length]
print(validation_input.shape)

validation_labels = validation_data[:, max_comment_length:156]
print(validation_labels.shape)

total_records_count = validation_labels.shape[0]

class_records_count = np.sum(validation_labels, axis=0)

print(class_records_count)
print(total_records_count)
class_percentage = (class_records_count/total_records_count)
print(class_percentage)
target = np.sum(class_percentage)
print(target)

layer = Lambda(flip)

# load model
model = load_model('/content/drive/My Drive/Deep Learning in NLP/toxicity type**/model-ensemble.h5', custom_objects={'f1_score_metric': f1_score_metric, 'layer': layer})
# model = load_model('model-ensemble.h5', custom_objects={'f1_score_metric': f1_score_metric, 'layer': layer})
# model = load_model('drive/My Drive/Deep Learning in NLP/toxicity type**/model-cnn-ensemble.h5', custom_objects={'f1_score_metric': f1_score_metric, 'layer': layer})

y_pred = model.predict(validation_input)



y_pred[y_pred >= 0.5] = 1
y_pred[y_pred < 0.5] = 0

xx = f1_score(y_true=validation_labels, y_pred=y_pred, average=None)
print(xx)
print('macro', sum(xx)/6)

# Compute ROC
roc_auc = roc_auc_score(y_true=validation_labels, y_score=y_pred)
print('new roc-auc', roc_auc)

# Confusion matrix and F1 scores
print('**************************************\n')
new_overall_f1_weighted = 0

for class_id in range(0, 6):
  print('Class ', class_id)

  # F1 score
  f1_not_weighted = f1_score(y_true=validation_labels[:, class_id], y_pred=y_pred[:, class_id], average=None, pos_label=1)
  print('F1 Score =', f1_not_weighted)

  print('F1 score for positive class, weighted by the class percentage = ', f1_not_weighted[1]*class_percentage[class_id])
  new_overall_f1_weighted += f1_not_weighted[1]*class_percentage[class_id]

  f1_weighted = f1_score(y_true=validation_labels[:, class_id], y_pred=y_pred[:, class_id], average='macro', pos_label=1)
  print('Weighted F1 Score =', f1_weighted)

  roc_auc = roc_auc_score(validation_labels[:, class_id], y_score=y_pred[:, class_id])
  print('class roc-auc', roc_auc)

  print('**************************************\n')


f1_all_classes = f1_score(y_true=validation_labels, y_pred=y_pred, average='macro')
print('macro average F1 score over all classes =', f1_all_classes)
f1_all_classes = f1_score(y_true=validation_labels, y_pred=y_pred, average='micro')
print('micro average F1 score over all classes =', f1_all_classes)

print('new_overall_f1_weighted = ', new_overall_f1_weighted)
print('to go= ', target-new_overall_f1_weighted)
print('to go ratio = ',  new_overall_f1_weighted/target)

new_f1 = 0
for i in range(0, total_records_count):
  actual = validation_labels[i]
  predicted = y_pred[i]

  correct = 0
  for j in range(0, 6):
    if actual[j] == predicted[j]:
      correct += 1

  new_f1 += 2*correct / 12

print('new f1 = ', new_f1/total_records_count)


Evaluate Ensemble Model

In [None]:
from sklearn.metrics import roc_auc_score
from keras.models import load_model
import matplotlib.pyplot as plt
from sklearn.metrics import auc, roc_curve, confusion_matrix, f1_score
import tensorflow as tf
import numpy as np
import random

# fix seeds for re-producability
random.seed(7)
np.random.seed(7)
tf.set_random_seed(7)

def f1_score_metric(actual, predicted):
  predicted = tf.to_int32(predicted>=0.5)
  actual = tf.to_int32(actual)

  TP = tf.count_nonzero(predicted * actual)
  TN = tf.count_nonzero((predicted - 1) * (actual - 1))
  FP = tf.count_nonzero(predicted * (actual - 1))
  FN = tf.count_nonzero((predicted - 1) * actual)

  precision = tf.divide(TP, TP + FP)
  recall = tf.divide(TP, TP + FN)

  f1 = tf.divide(2 * precision * recall, precision + recall)


  return f1

VALIDATION_DATA_FILE = "drive/toxicity type classifier/validation-data-tokenized.csv"

# Read data
validation_data = np.loadtxt(VALIDATION_DATA_FILE, delimiter=',')
print(validation_data.shape)

validation_input = validation_data[:, 0:150]
print(validation_input.shape)

validation_labels = validation_data[:, 150:156]
print(validation_labels.shape)

# load models
cnn_model = load_model('drive/Deep Learning in NLP /toxicity type**/model-cnn-ensemble.h5', custom_objects={'f1_score_metric': f1_score_metric})
lstm_model = load_model('drive/Deep Learning in NLP /toxicity type**/model-lstm.h5', custom_objects={'f1_score_metric': f1_score_metric})
gru_model = load_model('drive/Deep Learning in NLP /toxicity type**/model-gru.h5', custom_objects={'f1_score_metric': f1_score_metric})

print('models loaded')

cnn_y_pred = cnn_model.predict(validation_input)
print(cnn_y_pred[5])

lstm_y_pred = lstm_model.predict(validation_input)
print(lstm_y_pred[5])

gru_y_pred = gru_model.predict(validation_input)
print(gru_y_pred[0])


sum_prediction1 = cnn_y_pred + lstm_y_pred + gru_y_pred
print(sum_prediction1[5])
# sum_prediction1 /= 3
print(sum_prediction1[5])
# Compute ROC
roc_auc = roc_auc_score(y_true=validation_labels, y_score=sum_prediction1, average='weighted', sample_weight=None)
print('roc-auc', roc_auc)

roc_auc = roc_auc_score(y_true=validation_labels, y_score=sum_prediction)
print('new roc-auc', roc_auc)

cnn_y_pred[cnn_y_pred >= 0.5] = 1
cnn_y_pred[cnn_y_pred < 0.5] = 0

lstm_y_pred[lstm_y_pred >= 0.5] = 1
lstm_y_pred[lstm_y_pred < 0.5] = 0

gru_y_pred[gru_y_pred >= 0.5] = 1
gru_y_pred[gru_y_pred < 0.5] = 0

print(cnn_y_pred[5])
print(lstm_y_pred[5])
print(gru_y_pred[5])


cnn_y_pred = np.array(cnn_y_pred)
lstm_y_pred = np.array(lstm_y_pred)
gru_y_pred = np.array(gru_y_pred)

sum_prediction = cnn_y_pred + lstm_y_pred + gru_y_pred
print(sum_prediction.shape)
print(sum_prediction[5])

sum_prediction[sum_prediction < 2] = 0
sum_prediction[sum_prediction >= 2] = 1
print(sum_prediction[5])

# Confusion matrix and F1 scores
print('**************************************\n')
y_pred = sum_prediction
print(y_pred.shape)
new_overall_f1_weighted = 0

for class_id in range(0, 6):
  print('Class ', class_id)
  print(validation_labels[:, class_id])

  # F1 score
  f1_not_weighted = f1_score(y_true=validation_labels[:, class_id], y_pred=y_pred[:, class_id], average=None, pos_label=1)
  print('F1 Score =', f1_not_weighted)

  print('F1 score for positive class, weighted by the class percentage = ', f1_not_weighted[1]*class_percentage[class_id])
  new_overall_f1_weighted += f1_not_weighted[1]*class_percentage[class_id]

  f1_weighted = f1_score(y_true=validation_labels[:, class_id], y_pred=y_pred[:, class_id], average='weighted', pos_label=1)
  print('Weighted F1 Score =', f1_weighted)

  print('**************************************\n')


f1_all_classes = f1_score(y_true=validation_labels, y_pred=y_pred, average='micro')
print('micro F1 score over all classes =', f1_all_classes)

print('new_overall_f1_weighted = ', new_overall_f1_weighted)
print('to go= ', target-new_overall_f1_weighted)
print('to go ratio = ',  new_overall_f1_weighted/target)

new_f1 = 0
for i in range(0, total_records_count):
  actual = validation_labels[i]
  predicted = y_pred[i]

  correct = 0
  for j in range(0, 6):
    if actual[j] == predicted[j]:
      correct += 1

  new_f1 += 2*correct / 12

print('new f1 = ', new_f1/total_records_count)

