In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load dataset
emails  = pd.read_csv("emails.csv")

# Extract features and labels
# All columns except the first one and the last
X = emails.iloc[:, 1:-1].values
y = emails['Prediction'].values  # The last column

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
# Split data into training, evaluation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(
    X_scaled, y, test_size=0.4, random_state=42, stratify=y)
X_eval, X_test, y_eval, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp)

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import GridSearchCV

# 3. Model Implementation
log_reg = LogisticRegression(
	penalty='l2', solver='liblinear', max_iter=10000, random_state=42
)

# 4. Model Training
log_reg.fit(X_train, y_train)

# 5. Model Tuning using Grid Search
param_grid = {'C': [0.1, 1, 10, 100]}
grid_search = GridSearchCV(
    LogisticRegression(), param_grid, cv=5, scoring='accuracy',
)
grid_search.fit(X_eval, y_eval)

# Best parameters and best score
best_params = grid_search.best_params_
best_score = grid_search.best_score_

# Evaluate on test set
y_pred = grid_search.best_estimator_.predict(X_test)
print(f"Best Parameters: {best_params}")
print(f"Best Evaluation Score: {best_score}")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred)}")
print(classification_report(y_test, y_pred))

In [None]:
from sklearn.svm import SVC

# 3. Model Implementation
svm_clf = SVC()

# 4. Model Training
svm_clf.fit(X_train, y_train)

# 5. Model Tuning using Grid Search
param_grid = {
	'C': [0.1, 1, 10, 100],  # Regularization parameter 
	'gamma': [1, 0.1, 0.01, 0.001],   # Kernel coefficient
	'kernel': ['rbf'],  # Kernel type
	'class_weight': ['balanced', None],  # Penalize
	'probability': [True], # Enable probability estimates
}
grid_search = GridSearchCV(SVC(), param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_eval, y_eval)

# Best parameters and best score
best_params = grid_search.best_params_
best_score = grid_search.best_score_

# Evaluate on test set
y_pred = grid_search.best_estimator_.predict(X_test)
print(f"Best Parameters: {best_params}")
print(f"Best Evaluation Score: {best_score}")
print(f"Test Accuracy: {accuracy_score(y_test, y_pred)}")
print(classification_report(y_test, y_pred))