In [None]:
!pip install tensorflow keras scikit-learn pandas numpy matplotlib



In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import pickle

# Load dataset
df = pd.read_csv("combined_data.csv")

# Encode labels (Spam = 1, Not Spam = 0)
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['label'])

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.2, random_state=42)

# Tokenize and convert text to sequences
tokenizer = Tokenizer(num_words=10000, oov_token="<OOV>")
tokenizer.fit_on_texts(X_train)

X_train_seq = pad_sequences(tokenizer.texts_to_sequences(X_train), maxlen=200)
X_test_seq = pad_sequences(tokenizer.texts_to_sequences(X_test), maxlen=200)

# Save the tokenizer
with open("tokenizer.pkl", "wb") as f:
    pickle.dump(tokenizer, f)

print("Dataset Processed Successfully!")


Dataset Processed Successfully!


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout

# Define model
model = Sequential([
    Embedding(input_dim=10000, output_dim=128, input_length=200),
    LSTM(64, return_sequences=True),
    Dropout(0.3),
    LSTM(32),
    Dropout(0.3),
    Dense(1, activation='sigmoid')
])

# Compile model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train model
history = model.fit(X_train_seq, y_train, validation_data=(X_test_seq, y_test), epochs=5, batch_size=64)

# Save model
model.save("spam_detection_lstm.h5")

print("Model Training Completed and Saved Successfully!")




Epoch 1/5
[1m1044/1044[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m455s[0m 431ms/step - accuracy: 0.9308 - loss: 0.1926 - val_accuracy: 0.9783 - val_loss: 0.0660
Epoch 2/5
[1m1044/1044[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m440s[0m 422ms/step - accuracy: 0.9829 - loss: 0.0596 - val_accuracy: 0.9834 - val_loss: 0.0506
Epoch 3/5
[1m1044/1044[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m420s[0m 400ms/step - accuracy: 0.9896 - loss: 0.0346 - val_accuracy: 0.9834 - val_loss: 0.0600
Epoch 4/5
[1m1044/1044[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m431s[0m 413ms/step - accuracy: 0.9929 - loss: 0.0221 - val_accuracy: 0.9873 - val_loss: 0.0405
Epoch 5/5
[1m1044/1044[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m443s[0m 414ms/step - accuracy: 0.9957 - loss: 0.0138 - val_accuracy: 0.9857 - val_loss: 0.0477




Model Training Completed and Saved Successfully!


In [None]:
def predict_email_spam(email_text):
    """Predicts if a custom email input
    is spam using the trained LSTM model."""
    with open("tokenizer.pkl", "rb") as f:
        tokenizer = pickle.load(f)

    model = keras.models.load_model("spam_detection_lstm.h5")

    # Preprocess input email
    email_seq = pad_sequences(tokenizer.texts_to_sequences([email_text]), maxlen=200)

    # Predict1
    prediction = model.predict(email_seq)[0][0]
    return "Spam" if prediction > 0.5 else "Not Spam"

# Example usage
email_text = "You have won a lottery! Claim your prize now."
print("Spam Detection Result:", predict_email_spam(email_text))




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 392ms/step
Spam Detection Result: Spam


In [None]:
!pip install openai-whisper

Collecting openai-whisper
  Downloading openai-whisper-20240930.tar.gz (800 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m800.5/800.5 kB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting tiktoken (from openai-whisper)
  Downloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->openai-whisper)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch->openai-whisper)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch->openai-whisper)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none

In [None]:
import whisper
import google.generativeai as genai

# Load Whisper Model
whisper_model = whisper.load_model("base")

# Configure Gemini API
genai.configure(api_key="AIzaSyCvbThiHG3ejamsVq99sTgbpSJJKuyFbSc")

def transcribe_audio(file_path):
    """Transcribes an audio file into text using Whisper."""
    result = whisper_model.transcribe(file_path)
    return result["text"]

def enhance_prompt_with_gemini(initial_prompt):
    """Enhances a manually written prompt using Gemini AI."""
    model = genai.GenerativeModel("gemini-1.5-pro-latest")
    response = model.generate_content(f"Improve this spam detection prompt:\n{initial_prompt}")
    return response.text.strip()

def detect_spam_from_audio(audio_path):
    """Transcribes audio and detects spam using enhanced Gemini AI prompt engineering."""
    transcribed_text = transcribe_audio(audio_path)

    # Initial manual prompt
    initial_prompt = f"""
    You are analyzing a transcribed audio message for spam detection.
    Classify each message as either "Spam" or "Not Spam".
    Provide only one word per message (Spam or Not Spam).

    Message: {transcribed_text}
    """

    # Enhance the prompt using Gemini
    enhanced_prompt = enhance_prompt_with_gemini(initial_prompt)

    # Get spam classification from Gemini
    model = genai.GenerativeModel("gemini-1.5-pro-latest")
    response = model.generate_content(enhanced_prompt)

    return response.text.strip()

# Example usage
audio_file = "Conference.wav"
print("🔊 Spam Detection from Audio:", detect_spam_from_audio(audio_file))




🔊 Spam Detection from Audio: Not Spam


In [None]:
import whisper
import google.generativeai as genai

# Load Whisper Model
whisper_model = whisper.load_model("base")

# Configure Gemini API
genai.configure(api_key="AIzaSyCvbThiHG3ejamsVq99sTgbpSJJKuyFbSc")

def transcribe_audio(file_path):
    """Transcribes an audio file into text using Whisper."""
    result = whisper_model.transcribe(file_path)
    return result["text"]

def enhance_prompt(prompt):
    """Enhances the initial prompt using Gemini AI, ensuring no extra explanations."""
    model = genai.GenerativeModel("gemini-1.5-pro-latest")
    response = model.generate_content(f"Make this a better spam classification prompt without adding extra explanation:\n{prompt}")
    return response.text.strip()


def detect_spam_from_audio(audio_path):
    """Transcribes audio and detects if it's spam using Gemini AI."""
    transcribed_text = transcribe_audio(audio_path)

    # Manual prompt
    initial_prompt = f"Classify this email as Spam or Not Spam:\n{transcribed_text}"

    # Use Gemini AI to enhance prompt
    enhanced_prompt = enhance_prompt(initial_prompt)

    # Get final spam classification from Gemini
    model = genai.GenerativeModel("gemini-1.5-pro-latest")
    response = model.generate_content(enhanced_prompt)

    return response.text.strip()

# Example usage
audio_file = "/content/Conference.wav"
print("Spam Detection from Audio:", detect_spam_from_audio(audio_file))




Spam Detection from Audio: Likely, yes.  The informal, abrupt style, the multiple first names without context, and the lack of a clear purpose all point to a potential spam or phishing attempt.  It's best to be cautious and not interact with it further.


In [None]:
import time

def classify_bulk_messages(messages):
    """Classifies multiple messages as Spam or Not Spam with enhanced prompt engineering."""
    # Manually define the initial prompt
    initial_prompt = f"""
    You are an advanced spam detection system.
    Classify each message below as either "Spam" or "Not Spam".
    Only return the classification for each message without additional explanation.

    Messages:
    {messages}
    """

    # Enhance the prompt using Gemini AI
    enhanced_prompt = enhance_prompt_with_gemini(initial_prompt)

    time.sleep(2)  # Wait 2 seconds before making the request

    # Get spam classification from Gemini
    model = genai.GenerativeModel("gemini-1.5-pro-latest")
    response = model.generate_content(enhanced_prompt)

    return response.text.strip()

# Example usage
messages = [
    "Win a free iPhone now! Click this link.",
    "Hey, are we meeting at 5 PM?",
    "Your account has been compromised. Reset your password now!"
]

print("📩 Bulk Spam Detection:", classify_bulk_messages("\n".join(messages)))


📩 Bulk Spam Detection: Spam
Not Spam
Spam
Spam
Not Spam
Spam


In [None]:
def detect_spam_from_chat(chat_message):
    """Detects spam in a chat message using Gemini AI after prompt enhancement."""
    # Manual prompt
    initial_prompt = f"Analyze this chat message and classify it as Spam or Not Spam:\n{chat_message}"

    # Enhance the prompt using Gemini AI
    enhanced_prompt = enhance_prompt(initial_prompt)

    # Get spam classification
    model = genai.GenerativeModel("gemini-1.5-pro-latest")
    response = model.generate_content(enhanced_prompt)

    return response.text.strip()

# Example chat input
chat_message = "Click this link to get free money now!"
print("Chat Spam Detection:", detect_spam_from_chat(chat_message))


Chat Spam Detection: Yes, this message is highly likely to be spam.  Offers of "free money" with a prompt to click a link are classic spam tactics.  The link likely leads to a phishing site, malware download, or some other scam.  It's best to avoid clicking such links.
