<a href="https://colab.research.google.com/github/meghan-a1/CyberBullyingDetection/blob/main/RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import warnings
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
warnings.filterwarnings('ignore')

In [None]:
from google.colab import drive

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Loading the dataset
data = pd.read_csv('/content/drive/MyDrive/Dataset.csv')


In [None]:
# Extract tweets and labels
tweets = data['Comment'].values
labels = data['label'].values  # 1 for cyberbullying, 0 for not

In [None]:
# Initialize the tokenizer and fit it on the tweets
tweets=data['Comment'].astype(str).fillna('')
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(tweets)

# Convert texts to sequences of integers
sequences = tokenizer.texts_to_sequences(tweets)

# Pad sequences to ensure uniform input size with post-padding
max_length = 100
padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='pre')

# Assign X and y
X = padded_sequences
y = labels

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y , test_size=0.25, random_state=42)

In [None]:
#RNN model
model = Sequential()
model.add(Embedding(input_dim=5000, output_dim=64, input_length=X.shape[1]))
model.add(SimpleRNN(32))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
 #Train the model on the training data
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 59ms/step - accuracy: 0.7132 - loss: 0.5670 - val_accuracy: 0.8388 - val_loss: 0.3603
Epoch 2/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 51ms/step - accuracy: 0.9210 - loss: 0.2081 - val_accuracy: 0.8080 - val_loss: 0.4312
Epoch 3/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 58ms/step - accuracy: 0.9777 - loss: 0.0714 - val_accuracy: 0.8238 - val_loss: 0.5131
Epoch 4/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 55ms/step - accuracy: 0.9927 - loss: 0.0296 - val_accuracy: 0.8268 - val_loss: 0.5948
Epoch 5/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 55ms/step - accuracy: 0.9961 - loss: 0.0157 - val_accuracy: 0.8218 - val_loss: 0.6775
Epoch 6/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 58ms/step - accuracy: 0.9972 - loss: 0.0117 - val_accuracy: 0.8028 - val_loss: 0.7659
Epoch 7/10
[1m4

In [None]:
# Evaluate the model on test data
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy:.4f}')

[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.8248 - loss: 0.9507
Test Accuracy: 0.8232


In [None]:
from sklearn.metrics import classification_report, confusion_matrix

# Predictions on the test set
y_pred = (model.predict(X_test) > 0.5).astype("int32")

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step
Confusion Matrix:
[[1143  385]
 [ 499 2973]]

Classification Report:
              precision    recall  f1-score   support

           0       0.70      0.75      0.72      1528
           1       0.89      0.86      0.87      3472

    accuracy                           0.82      5000
   macro avg       0.79      0.80      0.80      5000
weighted avg       0.83      0.82      0.82      5000

