In [8]:
# Importing necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler


# Load the dataset from the given URL
def load_dataset(url, column_names):
    """
    Load the dataset from the given URL.

    Args:
        url (str): The URL to fetch the dataset from.
        column_names (list): List of column names.

    Returns:
        pd.DataFrame: Loaded dataset.
    """
    return pd.read_csv(url, header=None, names=column_names)


# Preprocess the data
def preprocess_data(data, label_column):
    """
    Split data into features and labels, then train-test split and normalize.

    Args:
        data (pd.DataFrame): The dataset to preprocess.
        label_column (str): The name of the label column.

    Returns:
        tuple: Scaled training and testing features, and corresponding labels.
    """
    # Features and labels
    X = data.drop(label_column, axis=1)
    y = data[label_column]

    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )

    # Normalize the dataset
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    return X_train, X_test, y_train, y_test


# Train and evaluate classifiers
def evaluate_classifiers(classifiers, X_train, X_test, y_train, y_test):
    """
    Train and evaluate each classifier.

    Args:
        classifiers (dict): Dictionary of classifier name and instance.
        X_train (array-like): Training features.
        X_test (array-like): Testing features.
        y_train (array-like): Training labels.
        y_test (array-like): Testing labels.

    Returns:
        None
    """
    for name, clf in classifiers.items():
        clf.fit(X_train, y_train)  # Train the model
        y_pred = clf.predict(X_test)  # Predict on test data

        # Calculate performance metrics
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred)
        recall = recall_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)

        # Display the results
        print(f"{name} Classifier Results:")
        print(f"Accuracy: {accuracy:.4f}")
        print(f"Precision: {precision:.4f}")
        print(f"Recall: {recall:.4f}")
        print(f"F1 Score: {f1:.4f}")
        print("-" * 30)


# Main function
def main():
    """
    Main function to orchestrate dataset loading, preprocessing, and evaluation.
    """
    # Define constants
    url = "https://archive.ics.uci.edu/ml/machine-learning-databases/spambase/spambase.data"
    column_names = [f'feature_{i}' for i in range(1, 58)] + ['spam_label']

    # Load and preprocess the dataset
    print("Loading dataset...")
    data = load_dataset(url, column_names)
    print("Dataset loaded successfully.")

    print("Preprocessing dataset...")
    X_train, X_test, y_train, y_test = preprocess_data(data, 'spam_label')
    print("Preprocessing complete.")

    # Define classifiers
    classifiers = {
        'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
        'Naive Bayes': GaussianNB()
        # Add more classifiers as needed
    }

    # Evaluate classifiers
    print("Evaluating classifiers...")
    evaluate_classifiers(classifiers, X_train, X_test, y_train, y_test)


# Entry point
if __name__ == "__main__":
    print("Starting the program...")
    main()


Starting the program...
Loading dataset...
Dataset loaded successfully.
Preprocessing dataset...
Preprocessing complete.
Evaluating classifiers...
Random Forest Classifier Results:
Accuracy: 0.9555
Precision: 0.9755
Recall: 0.9179
F1 Score: 0.9458
------------------------------
Naive Bayes Classifier Results:
Accuracy: 0.8219
Precision: 0.7233
Recall: 0.9385
F1 Score: 0.8170
------------------------------
