# test_analysis
Stuff to analyze results gotten from the testing set.

In [9]:
import pandas as pd
import numpy as np
import sklearn.metrics as mt
import os

In [10]:
# Stuff to change around:

source_dir = "/home/vincent/Documenten/BachelorsProject/ft_artist/"
target_csv = "/home/vincent/Documenten/BachelorsProject/GitHub_Repo/results/ft_artist/test-stats.csv"

models = {
    "vit_b_16": {"vit": True},
    "swin_b": {"vit": True},
    "beit_b_16": {"vit": True},
    "deit_b_16": {"vit": True},
    "vgg19": {"vit": False},
    "resnet50": {"vit": False},
    "efficientnetv2_m": {"vit": False},
    "convnext_b": {"vit": False}
}

In [11]:
def getArrays(csv_predictions: str, csv_confusion: str):
    """Returns numpy arrays we can work with using scikit-learn"""
    df_pred = pd.read_csv(csv_predictions)
    df_conf = pd.read_csv(csv_confusion)

    true_y = df_pred["actual_idx"].to_numpy()
    pred_y = df_pred[df_conf.columns[1:]].to_numpy()

    # Rows gives actual, columns prediction
    conf = df_conf[df_conf.columns[1:]].to_numpy().T

    return true_y, pred_y, conf

In [12]:
for model in models:
    models[model]["preds"] = []
    models[model]["confs"] = []

for file in os.scandir(source_dir):
    if file.is_file():
        if "predictions.csv" in file.name:
            for model in models:
                if model in file.name:
                    models[model]["preds"] += [file.path]
                    break
        if "confusion.csv" in file.name:
            for model in models:
                if model in file.name:
                    models[model]["confs"] += [file.path]
                    break

for model in models:
    models[model]["preds"].sort()
    models[model]["confs"].sort()

In [13]:
for model in models:
    acc = []
    acc3 = []
    acc5 = []
    accB = []
    for pred, conf in zip(models[model]["preds"], models[model]["confs"]):
        true_y, pred_y, conf = getArrays(pred, conf)
        pred_y_max = np.argmax(pred_y, axis=1)
        acc += [mt.accuracy_score(true_y, pred_y_max)]
        acc3 += [mt.top_k_accuracy_score(true_y, pred_y, k=3)]
        acc5 += [mt.top_k_accuracy_score(true_y, pred_y, k=5)]
        accB += [mt.balanced_accuracy_score(true_y, pred_y_max)]
    
    models[model]["acc_mean"] = np.mean(acc)
    models[model]["acc_std"] = np.std(acc)

    models[model]["acc3_mean"] = np.mean(acc3)
    models[model]["acc3_std"] = np.std(acc3)

    models[model]["acc5_mean"] = np.mean(acc5)
    models[model]["acc5_std"] = np.std(acc5)

    models[model]["accB_mean"] = np.mean(accB)
    models[model]["accB_std"] = np.std(accB)

    models[model].pop("preds")
    models[model].pop("confs")

In [14]:
table = []

for model in models:
    table += [[
        model,
        models[model]["acc_mean"],
        models[model]["acc_std"],
        models[model]["acc3_mean"],
        models[model]["acc3_std"],
        models[model]["acc5_mean"],
        models[model]["acc5_std"],
        models[model]["accB_mean"],
        models[model]["accB_std"]
    ]]

    df = pd.DataFrame(table, columns=[
        "model",
        "accuracy_mean",
        "accuracy_std",
        "top3_accuracy_mean",
        "top3_accuracy_std",
        "top5_accuracy_mean",
        "top5_accuracy_std",
        "balanced_accuracy_mean",
        "balanced_accuracy_std",
    ])

In [15]:
df

Unnamed: 0,model,accuracy_mean,accuracy_std,top3_accuracy_mean,top3_accuracy_std,top5_accuracy_mean,top5_accuracy_std,balanced_accuracy_mean,balanced_accuracy_std
0,vit_b_16,0.92054,0.004447,0.977511,0.004547,0.986807,0.004386,0.897748,0.003825
1,swin_b,0.950525,0.004741,0.987106,0.002244,0.992504,0.001642,0.939386,0.008104
2,beit_b_16,0.912744,0.011315,0.978411,0.005967,0.986807,0.005228,0.888334,0.016307
3,deit_b_16,0.933733,0.011473,0.985607,0.003623,0.990705,0.002203,0.916696,0.015477
4,vgg19,0.922039,0.004927,0.977811,0.003471,0.986507,0.001896,0.901841,0.007167
5,resnet50,0.947226,0.00743,0.984108,0.003745,0.989205,0.003471,0.934111,0.010504
6,efficientnetv2_m,0.926537,0.005364,0.978411,0.00489,0.987706,0.003471,0.908445,0.005054
7,convnext_b,0.946027,0.005447,0.988006,0.004133,0.991604,0.002244,0.931281,0.006115


In [16]:
df.to_csv(target_csv, index=False)