In [1]:
# Execute the evaluation of all models on all datasets in evaluate.py
# This will store the confusion matrices of all the datasets and models in the results folder

import Evaluate
Evaluate.evaulate_models()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_x["Sex"] = data_x["Sex"].map({'I': 0, 'F': 1, 'M': 2})


AttributeError: module 'ml_dtypes' has no attribute 'float8_e3m4'




In [2]:
from Helper import load_results
from Helper import is_evaluated
from Helper import calculate_accuracy

def get_model_accuracies(models, datasets):
    accuracies = {}
    for model in models:
        accuracies[model] = {}
        for dataset in datasets:
            if is_evaluated(model, dataset):
                cm = load_results(model, dataset)
                accuracy = calculate_accuracy(cm)
                accuracies[model][dataset] = accuracy
    return accuracies

def get_model_scores(models, datasets):
    # Get the f1 score for each model and dataset
    scores = {}
    for model in models:
        scores[model] = {}
        for dataset in datasets:
            cm = load_results(model, dataset)
            # Find the number of classes
            num_classes = len(cm)
            # Calculate the accuracy
            accuracy = calculate_accuracy(cm)
            # Determine the precision, recall and f1 score for each class
            precision = []
            recall = []
            f1 = []
            for i in range(num_classes):
                tp = cm[i][i]
                fp = sum([cm[j][i] for j in range(num_classes)]) - tp
                fn = sum(cm[i]) - tp
                precision.append(tp / (tp + fp + 0.0001))
                recall.append(tp / (tp + fn + 0.0001))
                f1.append(2 * precision[i] * recall[i] / (precision[i] + recall[i] + 0.0001))
            # Calculate the macro average of f1 score
            macro_f1 = sum(f1) / num_classes
            scores[model][dataset] = macro_f1
    return scores

def display_model_accuracies(accuracies):
    for model, datasets in accuracies.items():
        print(f"Model: {model}")
        for dataset, accuracy in datasets.items():
            print(f"Dataset: {dataset}, Accuracy: {accuracy}")
        print()

def display_model_accuracies_and_scores(models, datasets):
    scores = {}
    for model in models:
        scores[model] = {}
        for dataset in datasets:
            cm = load_results(model, dataset)
            # Find the number of classes
            num_classes = len(cm)
            # Calculate the accuracy
            accuracy = calculate_accuracy(cm)
            # Determine the precision, recall and f1 score for each class
            precision = []
            recall = []
            f1 = []
            for i in range(num_classes):
                tp = cm[i][i]
                fp = sum([cm[j][i] for j in range(num_classes)]) - tp
                fn = sum(cm[i]) - tp
                precision.append(tp / (tp + fp + 0.0001))
                recall.append(tp / (tp + fn + 0.0001))
                f1.append(2 * precision[i] * recall[i] / (precision[i] + recall[i] + 0.0001))
            # Calculate the macro average of precision, recall and f1 score
            macro_precision = sum(precision) / num_classes
            macro_recall = sum(recall) / num_classes
            macro_f1 = sum(f1) / num_classes
            scores[model][dataset] = {
                "accuracy": accuracy,
                "precision": precision,
                "recall": recall,
                "f1": f1,
                "macro_precision": macro_precision,
                "macro_recall": macro_recall,
                "macro_f1": macro_f1,
                "classes": num_classes,
                "data_count": sum([sum(row) for row in cm])  # Calculate the total number of data points
            }
        scores[model]["average"] = {
            "accuracy": sum([scores[model][dataset]["accuracy"] for dataset in datasets]) / len(datasets),
            "macro_precision": sum([scores[model][dataset]["macro_precision"] for dataset in datasets]) / len(datasets),
            "macro_recall": sum([scores[model][dataset]["macro_recall"] for dataset in datasets]) / len(datasets),
            "macro_f1": sum([scores[model][dataset]["macro_f1"] for dataset in datasets]) / len(datasets),
            "classes": sum([scores[model][dataset]["classes"] for dataset in datasets]) / len(datasets),
            "data_count": sum([scores[model][dataset]["data_count"] for dataset in datasets]) / len(datasets)
        }
    # Display the scores in a table using the tabulate library
    from tabulate import tabulate
    for model in scores:
        print(f"Model: {model}")
        headers = ["Dataset", "Classes", "Test Data", "Accuracy", "Macro Precision", "Macro Recall", "Macro F1"]
        table = []
        for dataset in datasets:
            table.append([
                dataset,
                scores[model][dataset]["classes"],
                scores[model][dataset]["data_count"],
                scores[model][dataset]["accuracy"],
                scores[model][dataset]["macro_precision"],
                scores[model][dataset]["macro_recall"],
                scores[model][dataset]["macro_f1"]
            ])
        table.append([
            "Average",
            scores[model]["average"]["classes"],
            scores[model]["average"]["data_count"],
            scores[model]["average"]["accuracy"],
            scores[model]["average"]["macro_precision"],
            scores[model]["average"]["macro_recall"],
            scores[model]["average"]["macro_f1"]
        ])
        print(tabulate(table, headers, tablefmt="grid"))
        print()

    

In [3]:
models = ["rf"]
# Print accuracy of som model on cifar10 dataset
accuracies = display_model_accuracies_and_scores(["som"], ["cifar10", "mnist"])
accuracies = display_model_accuracies_and_scores(["gmm"], ["cifar10", "mnist"])

# Get all datasets by globbing the results/rf folder
import glob
datasets = [path.split("\\")[-1] for path in glob.glob("results\\rf\\*")]
# Remove the .npy extension
datasets = [dataset.split(".")[0] for dataset in datasets]

image_datasets = ["cifar10", "cifar100", "mnist", "fashion_mnist", "malaria", "mnist_corrupted", "rock_paper_scissors", "skin_segmentation", "tf_flowers"]
tabular_datasets = [dataset for dataset in datasets if dataset not in image_datasets]

# Get the accuracies of the models
accuracies = get_model_accuracies(models, datasets)
display_model_accuracies_and_scores(models, datasets)

# Get the accuracies of all the supervised models
supervised_models = ["rf", "dt", "ann", "svm"]
# Get the accuracies of all the unsupervised models
unsupervised_models = ["knn", "som", "gmm"]
models = supervised_models + unsupervised_models
accuracies = get_model_accuracies(models, datasets)
scores = get_model_scores(models, datasets)
# Print the average accuracy of each model
avearage_accuracies = {model: sum(accuracies[model].values()) / (len(accuracies[model])) for model in accuracies}
for model, accuracy in avearage_accuracies.items():
    print(f"Model: {model}, Average Accuracy: {accuracy}")

# Print the average accuracy of each model on image vs tabular datasets
image_accuracies = get_model_accuracies(models, image_datasets)
tabular_accuracies = get_model_accuracies(models, tabular_datasets)
average_image_accuracies = {model: sum(image_accuracies[model].values()) / len(image_accuracies[model]) for model in image_accuracies}
average_tabular_accuracies = {model: sum(tabular_accuracies[model].values()) / len(tabular_accuracies[model]) for model in tabular_accuracies}
for model, accuracy in avearage_accuracies.items():
    print(f"Model: {model}, Average Image Accuracy: {average_image_accuracies[model]}, Average Tabular Accuracy: {average_tabular_accuracies[model]}")

# Print the F1 scores of all the models
average_f1_scores = {model: sum([scores[model][dataset] for dataset in datasets]) / len(datasets) for model in scores}
for model, f1 in average_f1_scores.items():
    print(f"Model: {model}, Average F1 Score: {f1}")
    print(f"Average Image Score: {sum([scores[model][dataset] for dataset in image_datasets]) / len(image_datasets)}")
    print(f"Average Tabular Score: {sum([scores[model][dataset] for dataset in tabular_datasets]) / len(tabular_datasets)}")

Model: som
+-----------+-----------+-------------+------------+-------------------+----------------+------------+
| Dataset   |   Classes |   Test Data |   Accuracy |   Macro Precision |   Macro Recall |   Macro F1 |
| cifar10   |        10 |        2000 |    0.321   |          0.319399 |       0.317251 |   0.309427 |
+-----------+-----------+-------------+------------+-------------------+----------------+------------+
| mnist     |        10 |        2000 |    0.8255  |          0.830943 |       0.826743 |   0.826712 |
+-----------+-----------+-------------+------------+-------------------+----------------+------------+
| Average   |        10 |        2000 |    0.57325 |          0.575171 |       0.571997 |   0.568069 |
+-----------+-----------+-------------+------------+-------------------+----------------+------------+

Model: gmm
+-----------+-----------+-------------+------------+-------------------+----------------+------------+
| Dataset   |   Classes |   Test Data |   Accuracy