<a href="https://colab.research.google.com/github/ATANU0023/spam_comment_detection/blob/main/spamComment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models, preprocessing, Sequential
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

#  Load and preprocess the data

In [7]:

data = pd.read_csv('/content/Youtube-Spam-Dataset.csv')  # Load your dataset
comments = data['CONTENT']  # Use the 'CONTENT' column as input
labels = data['CLASS']  # Use the 'CLASS' column as labels

#  Tokenize and pad the sequences (for comments)

In [8]:

tokenizer = preprocessing.text.Tokenizer(num_words=5000, oov_token='<OOV>')
tokenizer.fit_on_texts(comments)
sequences = tokenizer.texts_to_sequences(comments)
padded_sequences = preprocessing.sequence.pad_sequences(sequences, padding='post', maxlen=100)

#  Encode the labels (assuming 'CLASS' is binary; 0 for non-spam, 1 for spam)

In [9]:

label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

#  Train-test split

In [10]:
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=0.2, random_state=42)

#  Build the model

In [11]:

model = Sequential([
    layers.Embedding(input_dim=5000, output_dim=32, input_length=100),
    layers.Conv1D(128, 5, activation='relu'),
    layers.GlobalMaxPooling1D(),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')  # Sigmoid for binary classification (spam or not spam)
])



#  Compile the model

In [12]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

#  Train the model

In [13]:

history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/10
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 21ms/step - accuracy: 0.6433 - loss: 0.6667 - val_accuracy: 0.8495 - val_loss: 0.4390
Epoch 2/10
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.8835 - loss: 0.3412 - val_accuracy: 0.9286 - val_loss: 0.1825
Epoch 3/10
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.9620 - loss: 0.1264 - val_accuracy: 0.9439 - val_loss: 0.1451
Epoch 4/10
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.9881 - loss: 0.0639 - val_accuracy: 0.9439 - val_loss: 0.1576
Epoch 5/10
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - accuracy: 0.9962 - loss: 0.0304 - val_accuracy: 0.9413 - val_loss: 0.1572
Epoch 6/10
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 29ms/step - accuracy: 0.9949 - loss: 0.0207 - val_accuracy: 0.9439 - val_loss: 0.1717
Epoch 7/10
[1m49/49[0m [32m━━━━

#  Evaluate the model


In [14]:
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_acc}")

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.9389 - loss: 0.2328
Test Accuracy: 0.9438775777816772


In [15]:

# 9. Add an input section for spam detection
while True:
    # Accept a comment input from the user
    user_comment = input("\nEnter a comment to check (or type 'exit' to quit): ")

    # Check if the user wants to exit
    if user_comment.lower() == 'exit':
        break

    # Preprocess the input comment (similar to training data)
    user_comment_seq = tokenizer.texts_to_sequences([user_comment])
    user_comment_padded = preprocessing.sequence.pad_sequences(user_comment_seq, maxlen=100, padding='post')

    # Predict whether it's spam or not
    prediction = model.predict(user_comment_padded)

    # Output the result
    if prediction > 0.5:
        print("Prediction: Spam")
    else:
        print("Prediction: Not Spam")


Enter a comment to check (or type 'exit' to quit): This is a great product! Prediction
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 113ms/step
Prediction: Not Spam

Enter a comment to check (or type 'exit' to quit): Click here to win a free iPhone! Prediction
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
Prediction: Spam

Enter a comment to check (or type 'exit' to quit): win a lottery of 1000000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
Prediction: Not Spam

Enter a comment to check (or type 'exit' to quit): Congratulations! You've won a FREE iPhone! Click the link to claim your prize now: [spam-link]. Don’t miss out!
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
Prediction: Spam

Enter a comment to check (or type 'exit' to quit): exit
