In [18]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score, recall_score
import warnings

warnings.filterwarnings("ignore")


def load_data():
    # Load Color Data
    data_col = pd.read_csv("Colored_Features_cleaned.csv")
    data_col.drop(labels=["Unnamed: 0", "Expert 1", "Expert 2", "Expert 3", "Expert 4"], axis=1, inplace=True)

    # Load Greyscale data
    data_gray = pd.read_csv("Grayscale_Features_Cleaned.csv")
    data_gray.drop(labels=["Unnamed: 0"], axis=1, inplace=True)

    return data_col, data_gray


def preprocess_data(data_col, data_gray):
    features = pd.concat([data_gray, data_col], axis=1, ignore_index=False)
    features = features.dropna()

    X = features.iloc[:, 4:]
    y = features.iloc[:, :4]

    y = np.round((y["Expert 1"] + y["Expert 2"] + y["Expert 3"] + y["Expert 4"]) / 4, 0)

    y = y.apply(lambda x: 1 if x >= 3 else 0)

    return X, y


def train_classifiers(classifiers, X_train, y_train, X_test, y_test):
    # Initialize lists for storing metrics
    classifier_names = []
    accuracy_scores = []
    f1_scores = []
    auc_scores = []
    recall_scores = []

    # Iterate through classifiers and collect metrics
    for clf in classifiers:
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        classifier_names.append(clf.__class__.__name__)
        accuracy_scores.append(accuracy_score(y_pred, y_test))
        f1_scores.append(f1_score(y_pred, y_test))
        auc_scores.append(roc_auc_score(y_pred, y_test))
        recall_scores.append(recall_score(y_pred, y_test))

    # Create a DataFrame with the results
    results_df = pd.DataFrame({
        'Classifier': classifier_names,
        'Accuracy': accuracy_scores,
        'F1': f1_scores,
        'Recall': recall_scores,
        'AUC': auc_scores
    })

    # Print the results
    print("Classification Results:")
    print(results_df.to_string(index=False))


def main():
    data_col, data_gray = load_data()
    X, y = preprocess_data(data_col, data_gray)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    classifiers = [
        LogisticRegression(penalty='l2', C=1.0, fit_intercept=True, solver='lbfgs', max_iter=100,random_state=0),
        RandomForestClassifier(n_estimators=100, criterion='gini', max_depth=6, min_samples_split=2),
        MLPClassifier(hidden_layer_sizes=(1000,), activation="logistic"),
        AdaBoostClassifier(n_estimators=50, learning_rate=1.0, random_state=0)
    ]

    train_classifiers(classifiers, X_train, y_train, X_test, y_test)


if __name__ == "__main__":
    main()

Classification Results:
            Classifier  Accuracy       F1   Recall      AUC
    LogisticRegression  0.949495 0.897959 0.916667 0.938333
RandomForestClassifier  0.959596 0.916667 0.956522 0.958524
         MLPClassifier  0.954545 0.901099 1.000000 0.971338
    AdaBoostClassifier  0.964646 0.927835 0.957447 0.962167
