<a href="https://colab.research.google.com/github/2003UJAN/Cyber-Bullying-Sivdutt-Ajew-Minor-Project/blob/main/Cyber_Bullying_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import json
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [2]:
dataset_path = "/content/Dataset.json"
with open(dataset_path, "r", encoding="utf-8") as f:
    data = json.load(f)

In [3]:
texts = [entry["content"] for entry in data]
labels = [int(entry["annotation"]["label"][0]) for entry in data]

In [4]:
labels = np.array(labels)

In [5]:
MAX_VOCAB = 10000
MAX_LENGTH = 100

tokenizer = Tokenizer(num_words=MAX_VOCAB, oov_token="<OOV>")
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
padded_sequences = pad_sequences(sequences, maxlen=MAX_LENGTH, padding="post", truncating="post")

In [6]:
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=0.2, random_state=42)

In [7]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=MAX_VOCAB, output_dim=64, input_length=MAX_LENGTH),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
    tf.keras.layers.Dense(32, activation="relu"),
    tf.keras.layers.Dense(1, activation="sigmoid")
])

model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])



In [8]:
history = model.fit(X_train, y_train, epochs=5, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/5
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m107s[0m 201ms/step - accuracy: 0.7317 - loss: 0.5024 - val_accuracy: 0.8480 - val_loss: 0.3280
Epoch 2/5
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m135s[0m 187ms/step - accuracy: 0.9047 - loss: 0.2355 - val_accuracy: 0.8828 - val_loss: 0.2860
Epoch 3/5
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 175ms/step - accuracy: 0.9496 - loss: 0.1410 - val_accuracy: 0.8940 - val_loss: 0.3017
Epoch 4/5
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m148s[0m 188ms/step - accuracy: 0.9640 - loss: 0.1032 - val_accuracy: 0.8843 - val_loss: 0.3501
Epoch 5/5
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m135s[0m 174ms/step - accuracy: 0.9768 - loss: 0.0680 - val_accuracy: 0.9095 - val_loss: 0.3191


In [9]:
y_pred = (model.predict(X_test) > 0.5).astype("int32")
print(classification_report(y_test, y_pred))

[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 56ms/step
              precision    recall  f1-score   support

           0       0.95      0.90      0.92      2424
           1       0.86      0.92      0.89      1577

    accuracy                           0.91      4001
   macro avg       0.90      0.91      0.91      4001
weighted avg       0.91      0.91      0.91      4001



In [10]:
model.save("cyberbullying_lstm.h5")
print("Model saved!")



Model saved!
