<a href="https://colab.research.google.com/github/NimraAkram81/DEPT1/blob/main/DENT2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

# Load the dataset
file_path = 'emails.csv'
emails_df = pd.read_csv(file_path)

# Data Cleaning and Preprocessing
# Drop the "Email No." column as it's unnecessary for classification
emails_df_cleaned = emails_df.drop(columns=["Email No."])

# Check for missing values (if there are any missing values, handle them)
missing_values = emails_df_cleaned.isnull().sum().sum()
if missing_values > 0:
    print(f"Missing values in the dataset: {missing_values}. Please handle them before proceeding.")
else:
    print(f"No missing values found.")

# Feature Extraction using TF-IDF (Term Frequency-Inverse Document Frequency)
# This assumes the rest of the columns represent word counts. If it were text, we'd tokenize here.
# We are using the already tokenized columns, but this is how TF-IDF would work.

# Separate features (X) and target (y)
X = emails_df_cleaned.drop(columns=["Prediction"])
y = emails_df_cleaned["Prediction"]

# Train-Test Split (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model 1: Naive Bayes
nb_model = MultinomialNB()
nb_model.fit(X_train, y_train)
y_pred_nb = nb_model.predict(X_test)

# Model 2: Support Vector Machine (SVM)
svm_model = SVC(kernel='linear')
svm_model.fit(X_train, y_train)
y_pred_svm = svm_model.predict(X_test)

# Model Evaluation
print("Naive Bayes Model Evaluation:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_nb) * 100:.2f}%")
print("Classification Report:")
print(classification_report(y_test, y_pred_nb))

print("\nSVM Model Evaluation:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_svm) * 100:.2f}%")
print("Classification Report:")
print(classification_report(y_test, y_pred_svm))


No missing values found.
Naive Bayes Model Evaluation:
Accuracy: 95.46%
Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.95      0.97       739
           1       0.89      0.96      0.92       296

    accuracy                           0.95      1035
   macro avg       0.94      0.96      0.95      1035
weighted avg       0.96      0.95      0.96      1035


SVM Model Evaluation:
Accuracy: 95.94%
Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.97      0.97       739
           1       0.92      0.94      0.93       296

    accuracy                           0.96      1035
   macro avg       0.95      0.95      0.95      1035
weighted avg       0.96      0.96      0.96      1035

