# test_analysis
Stuff to analyze results gotten from the testing set.

In [3]:
import pandas as pd
import numpy as np
import sklearn.metrics as mt
import os

In [1]:
# Stuff to change around:

source_dir = "/home/vincent/Documenten/BachelorsProject/ots_artist/"
target_csv = "/home/vincent/Documenten/BachelorsProject/GitHub_Repo/results/ots_artist/test-stats.csv"

models = {
    "vit_b_16": {"vit": True},
    "swin_b": {"vit": True},
    "beit_b_16": {"vit": True},
    "deit_b_16": {"vit": True},
    "vgg19": {"vit": False},
    "resnet50": {"vit": False},
    "efficientnetv2_m": {"vit": False},
    "convnext_b": {"vit": False}
}

In [2]:
def getArrays(csv_predictions: str, csv_confusion: str):
    """Returns numpy arrays we can work with using scikit-learn"""
    df_pred = pd.read_csv(csv_predictions)
    df_conf = pd.read_csv(csv_confusion)

    true_y = df_pred["actual_idx"].to_numpy()
    pred_y = df_pred[df_conf.columns[1:]].to_numpy()

    # Rows gives actual, columns prediction
    conf = df_conf[df_conf.columns[1:]].to_numpy().T

    return true_y, pred_y, conf

In [4]:
for model in models:
    models[model]["preds"] = []
    models[model]["confs"] = []

for file in os.scandir(source_dir):
    if file.is_file():
        if "predictions.csv" in file.name:
            for model in models:
                if model in file.name:
                    models[model]["preds"] += [file.path]
                    break
        if "confusion.csv" in file.name:
            for model in models:
                if model in file.name:
                    models[model]["confs"] += [file.path]
                    break

for model in models:
    models[model]["preds"].sort()
    models[model]["confs"].sort()

In [5]:
for model in models:
    acc = []
    acc3 = []
    acc5 = []
    accB = []
    for pred, conf in zip(models[model]["preds"], models[model]["confs"]):
        true_y, pred_y, conf = getArrays(pred, conf)
        pred_y_max = np.argmax(pred_y, axis=1)
        acc += [mt.accuracy_score(true_y, pred_y_max)]
        acc3 += [mt.top_k_accuracy_score(true_y, pred_y, k=3)]
        acc5 += [mt.top_k_accuracy_score(true_y, pred_y, k=5)]
        accB += [mt.balanced_accuracy_score(true_y, pred_y_max)]
    
    models[model]["acc_mean"] = np.mean(acc)
    models[model]["acc_std"] = np.std(acc)

    models[model]["acc3_mean"] = np.mean(acc3)
    models[model]["acc3_std"] = np.std(acc3)

    models[model]["acc5_mean"] = np.mean(acc5)
    models[model]["acc5_std"] = np.std(acc5)

    models[model]["accB_mean"] = np.mean(accB)
    models[model]["accB_std"] = np.std(accB)

    models[model].pop("preds")
    models[model].pop("confs")

In [6]:
table = []

for model in models:
    table += [[
        model,
        models[model]["acc_mean"],
        models[model]["acc_std"],
        models[model]["acc3_mean"],
        models[model]["acc3_std"],
        models[model]["acc5_mean"],
        models[model]["acc5_std"],
        models[model]["accB_mean"],
        models[model]["accB_std"]
    ]]

    df = pd.DataFrame(table, columns=[
        "model",
        "accuracy_mean",
        "accuracy_std",
        "top3_accuracy_mean",
        "top3_accuracy_std",
        "top5_accuracy_mean",
        "top5_accuracy_std",
        "balanced_accuracy_mean",
        "balanced_accuracy_std",
    ])

In [7]:
df

Unnamed: 0,model,accuracy_mean,accuracy_std,top3_accuracy_mean,top3_accuracy_std,top5_accuracy_mean,top5_accuracy_std,balanced_accuracy_mean,balanced_accuracy_std
0,vit_b_16,0.848876,0.004587,0.962519,0.005364,0.97901,0.006501,0.814171,0.004213
1,swin_b,0.904048,0.006501,0.98051,0.002509,0.989505,0.001896,0.886403,0.007795
2,beit_b_16,0.797001,0.006942,0.928336,0.005228,0.95952,0.004741,0.753545,0.010924
3,deit_b_16,0.881259,0.00755,0.969115,0.006332,0.985607,0.004198,0.856185,0.008697
4,vgg19,0.82009,0.006569,0.943328,0.003202,0.972114,0.003863,0.78102,0.007697
5,resnet50,0.877061,0.010559,0.966117,0.004798,0.97931,0.003598,0.851237,0.013435
6,efficientnetv2_m,0.786207,0.010669,0.933433,0.003229,0.961619,0.005159,0.739243,0.009623
7,convnext_b,0.901349,0.009358,0.976012,0.005106,0.988606,0.002034,0.878425,0.010734


In [8]:
df.to_csv(target_csv, index=False)