In [1]:
import re

# Simulated email content
email_subject = "URGENT: Your account needs verification"
email_body = "Dear customer, your account requires immediate verification. Click the link to proceed: http://fakephishingsite.com"

# Phishing indicators
phishing_keywords = ["URGENT", "account verification", "click the link", "verify now", "action required"]
phishing_url_pattern = r"(http|https)://[^\s/$.?#].[^\s]*fakephishingsite\.com"

# Content analysis
def is_phishing(email_subject, email_body):
    for keyword in phishing_keywords:
        if keyword in email_subject or keyword in email_body:
            return True

    if re.search(phishing_url_pattern, email_body):
        return True

    return False

# Test the email
if is_phishing(email_subject, email_body):
    print("This email appears to be a phishing attempt.")
else:
    print("This email seems legitimate.")


This email appears to be a phishing attempt.


In [2]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Simulate email data with more overlapping features
num_samples = 1000
num_features = 5

phishing_data = np.random.rand(num_samples, num_features) + np.array([0.5, 0.5, 0.5, 0.5, 0.5])
legitimate_data = np.random.rand(num_samples, num_features)

# Combine data
all_data = np.vstack((phishing_data, legitimate_data))

# Create labels with more balance
phishing_labels = np.ones(num_samples)
legitimate_labels = np.zeros(num_samples)
labels = np.concatenate((phishing_labels, legitimate_labels))

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(all_data, labels, test_size=0.2, random_state=42)

# Create and train Random Forest classifier
classifier = RandomForestClassifier()
classifier.fit(X_train, y_train)

# Make predictions
y_pred = classifier.predict(X_test)

# Evaluate
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=['Legitimate', 'Phishing'])

print(f"Accuracy: {accuracy:.2f}")
print(report)


Accuracy: 0.99
              precision    recall  f1-score   support

  Legitimate       0.99      0.99      0.99       201
    Phishing       0.99      0.99      0.99       199

    accuracy                           0.99       400
   macro avg       0.99      0.99      0.99       400
weighted avg       0.99      0.99      0.99       400



In [3]:
#Generative AI for phishing

import re

# Define a list of phishing keywords
phishing_keywords = [
    "urgent",
    "verify",
    "account",
    "password",
    "login",
    "security",
    "suspicious",
    "confirm",
    "bank",
    "credit card"
]

# Example email subject and body
email_subject = "Urgent: Verify Your Account Information"
email_body = "Dear customer, your account has been compromised. Click the link to verify your details."

# Convert text to lowercase for case-insensitive matching
email_subject = email_subject.lower()
email_body = email_body.lower()

# Initialize a flag for phishing detection
is_phishing = False

# Check for phishing keywords in the subject
for keyword in phishing_keywords:
    if re.search(r'\b' + re.escape(keyword) + r'\b', email_subject):
        is_phishing = True
        break

# Check for phishing keywords in the body
for keyword in phishing_keywords:
    if re.search(r'\b' + re.escape(keyword) + r'\b', email_body):
        is_phishing = True
        break

# Print the result
if is_phishing:
    print("This email might be a phishing attempt.")
else:
    print("This email seems legitimate.")


This email might be a phishing attempt.


In [4]:
#check multiple email subjects

import re

# Define a list of phishing keywords
phishing_keywords = [
    "urgent",
    "verify",
    "account",
    "password",
    "login",
    "security",
    "suspicious",
    "confirm",
    "bank",
    "credit card"
]

# List of example email subjects and bodies
email_data = [
    ("Urgent: Verify Your Account Information", "Dear customer, your account has been compromised."),
    ("Important Security Update", "We noticed suspicious activity on your account."),
    ("Greetings from Your Bank", "Please log in to confirm your details."),
    ("Meeting Invitation", "You're invited to a meeting tomorrow."),
    ("Win a Free Vacation!", "Congratulations, you've won a free vacation!")
]

# Iterate through each email and perform phishing detection
for email_subject, email_body in email_data:
    email_subject = email_subject.lower()
    email_body = email_body.lower()

    is_phishing = False

    # Check for phishing keywords in the subject
    for keyword in phishing_keywords:
        if re.search(r'\b' + re.escape(keyword) + r'\b', email_subject):
            is_phishing = True
            break

    # Check for phishing keywords in the body
    for keyword in phishing_keywords:
        if re.search(r'\b' + re.escape(keyword) + r'\b', email_body):
            is_phishing = True
            break

    # Print the result for each email
    if is_phishing:
        print(f"Email Subject: {email_subject} | This email might be a phishing attempt.")
    else:
        print(f"Email Subject: {email_subject} | This email seems legitimate.")


Email Subject: urgent: verify your account information | This email might be a phishing attempt.
Email Subject: important security update | This email might be a phishing attempt.
Email Subject: greetings from your bank | This email might be a phishing attempt.
Email Subject: meeting invitation | This email seems legitimate.
Email Subject: win a free vacation! | This email seems legitimate.
