In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
emails_df = pd.read_csv('emails.csv')

# Drop the 'Email No.' column as it is irrelevant for analysis
emails_df.drop(columns=['Email No.'], inplace=True)

# Step 1: Separate features and target variable
X = emails_df.drop(columns=['Prediction'])
y = emails_df['Prediction']

# Step 2: Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3a: Define the model
nb_model = MultinomialNB()

# Step 3b: Set up parameter grids for Grid Search and Random Search
param_grid = {'alpha': [0.1, 0.5, 1.0, 2.0, 5.0]}  # For Grid Search
param_dist = {'alpha': np.linspace(0.1, 5.0, 50)}  # For Random Search

# Step 4a: Apply Grid Search
grid_search = GridSearchCV(nb_model, param_grid, cv=5, scoring='f1', n_jobs=-1)
grid_search.fit(X_train, y_train)
best_grid_model = grid_search.best_estimator_

print("Best parameters from Grid Search:", grid_search.best_params_)

# Step 4b: Apply Random Search
random_search = RandomizedSearchCV(nb_model, param_distributions=param_dist, n_iter=10, cv=5, scoring='f1', random_state=42, n_jobs=-1)
random_search.fit(X_train, y_train)
best_random_model = random_search.best_estimator_

print("Best parameters from Random Search:", random_search.best_params_)

# Step 5: Evaluate both models on the test set
for model_name, model in [("Grid Search Model", best_grid_model), ("Random Search Model", best_random_model)]:
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    print(f"\n{model_name} Evaluation Metrics:")
    print(f"Accuracy: {accuracy * 100:.2f}%")
    print(f"Precision: {precision:.2f}")
    print(f"Recall: {recall:.2f}")
    print(f"F1 Score: {f1:.2f}")


Best parameters from Grid Search: {'alpha': 0.1}
Best parameters from Random Search: {'alpha': 1.4000000000000001}

Grid Search Model Evaluation Metrics:
Accuracy: 95.75%
Precision: 0.90
Recall: 0.96
F1 Score: 0.93

Random Search Model Evaluation Metrics:
Accuracy: 95.36%
Precision: 0.89
Recall: 0.96
F1 Score: 0.92
