<a href="https://colab.research.google.com/github/Octaxx/DLI/blob/main/Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import pandas as pd
from sklearn.utils import resample
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


url = "https://raw.githubusercontent.com/Octaxx/DLI/main/CEAS_08.csv"
df = pd.read_csv(url)

In [None]:
df['text'] = df['subject'].fillna('') + ' ' + df['body'].fillna('')

X = df['text']
y = df['label']

In [None]:
# Sets how many times to train & test the model
num_runs = 10
total_accuracy = 0

for run in range(num_runs):
    print(f"\n=== Run {run + 1}/{num_runs} ===")

    # TF-IDF Vectorization
    vectorizer = TfidfVectorizer(stop_words='english', max_features=5000, ngram_range=(1, 2), min_df=2)
    X_tfidf = vectorizer.fit_transform(X)

    # Train-Test Split
    X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y, test_size=0.2, random_state=run, stratify=y)

    # Naive Bayes Model
    model = MultinomialNB(alpha=1.0)
    model.fit(X_train, y_train)

    # Predictions & Evaluation
    y_pred = model.predict(X_test)

    # Accuracy for this run
    accuracy = accuracy_score(y_test, y_pred)
    total_accuracy += accuracy

    print("Accuracy:", accuracy)

avg_accuracy = total_accuracy / num_runs
print(f"\n✅ Average Accuracy over {num_runs} runs: {avg_accuracy:.4f}")

In [None]:
# Save the model
import pickle
with open('/content/drive/My Drive/Colab Notebooks/model.pkl', 'wb') as model_file:
    pickle.dump(vectorizer, model_file)

with open('/content/drive/My Drive/Colab Notebooks/vectorizer.pkl', 'wb') as vectorizer_file:
    pickle.dump(vectorizer, vectorizer_file)

In [None]:
print("\n======== Email Spam Detection ========")

# Take user input
user_input = input("Enter your email text: ")

user_input_tfidf = vectorizer.transform([user_input])
prediction = model.predict(user_input_tfidf)

# Show result
if prediction[0] == 1:
    print("🔴 This email is classified as: Phishing / Spam")
else:
    print("🟢 This email is classified as: Safe / Not Phishing")