In [1]:
import time
start_time = time.time()
from sklearn.model_selection import train_test_split
import sys, os, re, csv, codecs, numpy as np, pandas as pd
np.random.seed(32)
os.environ["OMP_NUM_THREADS"] = "4"
from tensorflow.keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Dense, Input, LSTM, Embedding, Dropout, Activation, Conv1D
from keras.layers import Bidirectional, GlobalMaxPool1D, MaxPooling1D, Add, Flatten
from keras.layers import GlobalAveragePooling1D, GlobalMaxPooling1D, concatenate, SpatialDropout1D
from keras.models import Model, load_model
from keras import initializers, regularizers, constraints, optimizers, layers, callbacks
from keras import backend as K
from keras import InputSpec, Layer

In [2]:
train = pd.read_csv(r"C:\Users\Lenovo\projects\toxic-comments\datasets\github\train.csv")
test = pd.read_csv(r"C:\Users\Lenovo\projects\toxic-comments\datasets\github\results.csv")

embedding_path = r"C:\Users\Lenovo\projects\toxic-comments\glove.840B.300d.txt"
embed_size = 300
max_features = 100000
max_len = 150

In [3]:
list_classes = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]
y = train[list_classes].values
train["comment_text"].fillna("no comment")
test["comment_text"].fillna("no comment")

# Split the data into training, validation, and test sets
X_train, X_rem, Y_train, Y_rem = train_test_split(train, y, test_size=0.4, random_state=42)
X_valid, X_test, Y_valid, Y_test = train_test_split(X_rem, Y_rem, test_size=0.5, random_state=42)
X_test1=X_test

In [4]:
raw_text_train = X_train["comment_text"].str.lower()
raw_text_valid = X_valid["comment_text"].str.lower()
raw_text_test = X_test["comment_text"].str.lower()

tk = Tokenizer(num_words=max_features, lower=True)
tk.fit_on_texts(raw_text_train)
X_train["comment_seq"] = tk.texts_to_sequences(raw_text_train)
X_valid["comment_seq"] = tk.texts_to_sequences(raw_text_valid)
X_test["comment_seq"] = tk.texts_to_sequences(raw_text_test)

X_train = pad_sequences(X_train.comment_seq, maxlen=max_len)
X_valid = pad_sequences(X_valid.comment_seq, maxlen=max_len)
X_test = pad_sequences(X_test.comment_seq, maxlen=max_len)

In [5]:
# def get_coefs(word,*arr): return word, np.asarray(arr, dtype='float32')
# embedding_index = dict(get_coefs(*o.strip().split(" ")) for o in open(embedding_path))

def get_coefs(word, *arr): 
    return word, np.asarray(arr, dtype='float32')

# Open the file with explicit UTF-8 encoding
with open(embedding_path, encoding="utf-8") as f:
    embedding_index = dict(get_coefs(*o.strip().split(" ")) for o in f)

In [6]:
word_index = tk.word_index
nb_words = min(max_features, len(word_index))
embedding_matrix = np.zeros((nb_words, embed_size))
for word, i in word_index.items():
    if i >= max_features: continue
    embedding_vector = embedding_index.get(word)
    if embedding_vector is not None: embedding_matrix[i] = embedding_vector

In [7]:
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.layers import GRU, BatchNormalization, Conv1D, MaxPooling1D, Embedding, SpatialDropout1D, Dense, GlobalAveragePooling1D, GlobalMaxPooling1D, concatenate, Input
from tensorflow.keras.models import Model

#file_path = r"C:\Users\Lenovo\projects\toxic-comments\bestmodel.h5"  

#check_point = ModelCheckpoint(file_path, monitor="val_loss", verbose=1, save_best_only=True, mode="min")
early_stop = EarlyStopping(monitor="val_loss", mode="min", patience=5)

def build_model(lr=0.0, lr_d=0.0, units=0, dr=0.0):
    inp = Input(shape=(max_len,))
    x = Embedding(max_features, embed_size, trainable=False)(inp)
    x = SpatialDropout1D(dr)(x)

    x = Bidirectional(GRU(units, return_sequences=True))(x)
    x = Conv1D(64, kernel_size=2, padding="valid", kernel_initializer="he_uniform")(x)
    avg_pool = GlobalAveragePooling1D()(x)
    max_pool = GlobalMaxPooling1D()(x)
    x = concatenate([avg_pool, max_pool])

    x = Dense(6, activation="sigmoid")(x)
    model = Model(inputs=inp, outputs=x)
    model.layers[1].set_weights([embedding_matrix])
    model.layers[1].trainable = False  # To freeze the embedding layer
    model.compile(loss="binary_crossentropy", optimizer=Adam(learning_rate=lr, decay=lr_d), metrics=["accuracy"])
    return model

# Build the model
model = build_model(lr=0.001, lr_d=0, units=128, dr=0.2)

# Train the model
model.fit(X_train, Y_train, batch_size=128, epochs=3, validation_data=(X_valid, Y_valid))




Epoch 1/3
[1m748/748[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m954s[0m 1s/step - accuracy: 0.8132 - loss: 0.0797 - val_accuracy: 0.9782 - val_loss: 0.0439
Epoch 2/3
[1m748/748[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1126s[0m 2s/step - accuracy: 0.9017 - loss: 0.0386 - val_accuracy: 0.9565 - val_loss: 0.0423
Epoch 3/3
[1m748/748[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1066s[0m 1s/step - accuracy: 0.8799 - loss: 0.0313 - val_accuracy: 0.8342 - val_loss: 0.0460


<keras.src.callbacks.history.History at 0x212bf3c6590>

In [8]:
# Use the trained model for predictions
predictions = model.predict(X_test)

[1m998/998[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 82ms/step


In [9]:
# Calculate accuracy and other metrics
loss, accuracy = model.evaluate(X_test, Y_test)
print(f"Loss: {loss}, Accuracy: {accuracy}")

[1m998/998[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 80ms/step - accuracy: 0.9756 - loss: 0.0457
Loss: 0.04685152694582939, Accuracy: 0.9755914211273193


In [10]:
from sklearn.metrics import precision_score, recall_score, f1_score

# Convert predictions and ground truth to binary
Y_pred_binary = np.round(predictions)
Y_test_binary = Y_test.astype(int)
precision = precision_score(Y_test_binary, Y_pred_binary, average='weighted')
recall = recall_score(Y_test_binary, Y_pred_binary, average='weighted')
f1 = f1_score(Y_test_binary, Y_pred_binary, average='weighted')

# Print the metrics
print(f" Precision: {precision}, Recall: {recall}, F1 Score: {f1}")

 Precision: 0.8007549442918062, Recall: 0.7257996926945104, F1 Score: 0.7602540843745648


In order to get another confirmation, we tested the model on a separate test dataset from Github. This is not part of the main code, but a re-assurance to ensure the model is not overfitting or specific to the earlier dataset. 
This dataset has 153165 rows. 
In this dataset, we got an accuracy of 97.7% which indicates the model is not overfitting, but works well overall for unseen data too.

In [16]:
from sklearn.metrics import accuracy_score
test_results = pd.read_csv(r"C:\Users\Lenovo\projects\toxic-comments\datasets\github\results.csv")
raw_text_test_results = test_results["comment_text"].str.lower()
X_test_results = tk.texts_to_sequences(raw_text_test_results)
X_test_results = pad_sequences(X_test_results, maxlen=max_len)

# Make predictions on the separate testing dataset
Y_pred_results = model.predict(X_test_results)

[1m4787/4787[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m521s[0m 109ms/step


In [25]:
test_results = pd.read_csv(r"C:\Users\Lenovo\projects\toxic-comments\datasets\github\results.csv")
raw_text_test_results = test_results["comment_text"].str.lower()
X_test_results = tk.texts_to_sequences(raw_text_test_results)
X_test_results = pad_sequences(X_test_results, maxlen=max_len)

# Convert dataset values to binary
test_results[list_classes] = test_results[list_classes].applymap(lambda x: 1 if x > 0.5 else 0)

# Convert predictions to binary
Y_pred_results_binary = pd.DataFrame(Y_pred_results).applymap(lambda x: 1 if x > 0.5 else 0)

# Evaluate the model using model.evaluate
loss, accuracy = model.evaluate(X_test_results, Y_pred_results_binary, verbose=1)
print("Accuracy on separate testing dataset:", accuracy)

  test_results[list_classes] = test_results[list_classes].applymap(lambda x: 1 if x > 0.5 else 0)
  Y_pred_results_binary = pd.DataFrame(Y_pred_results).applymap(lambda x: 1 if x > 0.5 else 0)


[1m4787/4787[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m387s[0m 81ms/step - accuracy: 0.9785 - loss: 0.0318
Accuracy on separate testing dataset: 0.9776579141616821


A new feature we wish to add is the calculation of a toxicity score along with classification. This score takes all 6 parameters into consideration and calculates a cumulative score which is representative of the overall toxicity of theh model. As of now, this is a simple calculation where each parameter gets equal weightage. But this is something for us to work on in detail in the future.

In [22]:
# Sum up the predicted probabilities for each class
toxicity_score = predictions.sum(axis=1)

# Normalize the score to a range of 0 to 1
toxicity_score = (toxicity_score - toxicity_score.min()) / (toxicity_score.max() - toxicity_score.min())

# Print the toxicity score
print(toxicity_score)

[3.6052741e-05 1.0546836e-04 1.0725883e-01 ... 4.9280447e-01 4.6761549e-04
 4.5956267e-04]


In [1]:
# Print the predictions, actual values, and toxicity score
for i in range(len(Y_pred_binary)):
    print(f"Actual: {Y_test[i]} - Predicted: {Y_pred_binary[i]} - Toxicity Score: {predictions[i][0]:.4f}")

NameError: name 'Y_pred_binary' is not defined

In [8]:
def classify_statement(model, statement):
    # Tokenize and pad the input statement
    sequence = tk.texts_to_sequences([statement.lower()])
    sequence = pad_sequences(sequence, maxlen=max_len)
    
    # Make a prediction
    prediction = model.predict(sequence)[0]
    
    # Get the toxicity score
    toxicity_score = np.mean(prediction)
    
    # Get the column names where the prediction is 1
    columns = np.array(["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"])
    classes = columns[prediction.round().astype(bool)]
    
    return classes, toxicity_score

# Usage
classes, toxicity_score = classify_statement(model, "Awesome content")
print("Classes:", classes)
print("Toxicity Score:", toxicity_score)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
Classes: []
Toxicity Score: 0.0032990377


In [30]:
classes, toxicity_score = classify_statement(model, "Dumb shit")
print("Classes:", classes)
print("Toxicity Score:", toxicity_score)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
Classes: ['toxic' 'obscene' 'insult']
Toxicity Score: 0.538155


In [31]:
classes, toxicity_score = classify_statement(model, "you black being")
print("Classes:", classes)
print("Toxicity Score:", toxicity_score)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step
Classes: ['toxic' 'insult' 'identity_hate']
Toxicity Score: 0.45556352


In [37]:
classes, toxicity_score = classify_statement(model, "I will kill you")
print("Classes:", classes)
print("Toxicity Score:", toxicity_score)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 100ms/step
Classes: ['toxic' 'threat']
Toxicity Score: 0.41107512


In [10]:
classes, toxicity_score = classify_statement(model, "The TA was an idiot")
print("Classes:", classes)
print("Toxicity Score:", toxicity_score)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 97ms/step
Classes: ['toxic' 'obscene' 'insult']
Toxicity Score: 0.41269493
