# test_analysis
Stuff to analyze results gotten from the testing set.

In [9]:
import pandas as pd
import numpy as np
import sklearn.metrics as mt
import os

In [10]:
# Stuff to change around:

source_dir = "/home/vincent/Documenten/BachelorsProject/ft_artist/"
target_csv = "/home/vincent/Documenten/BachelorsProject/GitHub_Repo/results/ft_artist/test-stats.csv"

models = {
    "vit_t_16": {"vit": True},
    "swin_t": {"vit": True},
    "beit_b_16": {"vit": True},
    "deit_t_16": {"vit": True},
    "vgg19": {"vit": False},
    "resnet50": {"vit": False},
    "efficientnetv2_t": {"vit": False},
    "convnext_t": {"vit": False}
}

In [11]:
def getArrays(csv_predictions: str, csv_confusion: str):
    """Returns numpy arrays we can work with using scikit-learn"""
    df_pred = pd.read_csv(csv_predictions)
    df_conf = pd.read_csv(csv_confusion)

    true_y = df_pred["actual_idx"].to_numpy()
    pred_y = df_pred[df_conf.columns[1:]].to_numpy()

    # Rows gives actual, columns prediction
    conf = df_conf[df_conf.columns[1:]].to_numpy().T

    return true_y, pred_y, conf

In [12]:
for model in models:
    models[model]["preds"] = []
    models[model]["confs"] = []

for file in os.scandir(source_dir):
    if file.is_file():
        if "predictions.csv" in file.name:
            for model in models:
                if model in file.name:
                    models[model]["preds"] += [file.path]
                    break
        if "confusion.csv" in file.name:
            for model in models:
                if model in file.name:
                    models[model]["confs"] += [file.path]
                    break

for model in models:
    models[model]["preds"].sort()
    models[model]["confs"].sort()

In [13]:
for model in models:
    acc = []
    acc3 = []
    acc5 = []
    accB = []
    for pred, conf in zip(models[model]["preds"], models[model]["confs"]):
        true_y, pred_y, conf = getArrays(pred, conf)
        pred_y_max = np.argmax(pred_y, axis=1)
        acc += [mt.accuracy_score(true_y, pred_y_max)]
        acc3 += [mt.top_k_accuracy_score(true_y, pred_y, k=3)]
        acc5 += [mt.top_k_accuracy_score(true_y, pred_y, k=5)]
        accB += [mt.balanced_accuracy_score(true_y, pred_y_max)]
    
    models[model]["acc_mean"] = np.mean(acc)
    models[model]["acc_std"] = np.std(acc)

    models[model]["acc3_mean"] = np.mean(acc3)
    models[model]["acc3_std"] = np.std(acc3)

    models[model]["acc5_mean"] = np.mean(acc5)
    models[model]["acc5_std"] = np.std(acc5)

    models[model]["accB_mean"] = np.mean(accB)
    models[model]["accB_std"] = np.std(accB)

    models[model].pop("preds")
    models[model].pop("confs")

In [14]:
table = []

for model in models:
    table += [[
        model,
        models[model]["acc_mean"],
        models[model]["acc_std"],
        models[model]["acc3_mean"],
        models[model]["acc3_std"],
        models[model]["acc5_mean"],
        models[model]["acc5_std"],
        models[model]["accB_mean"],
        models[model]["accB_std"]
    ]]

    df = pd.DataFrame(table, columns=[
        "model",
        "accuracy_mean",
        "accuracy_std",
        "top3_accuracy_mean",
        "top3_accuracy_std",
        "top5_accuracy_mean",
        "top5_accuracy_std",
        "balanced_accuracy_mean",
        "balanced_accuracy_std",
    ])

In [15]:
df

Unnamed: 0,model,accuracy_mean,accuracy_std,top3_accuracy_mean,top3_accuracy_std,top5_accuracy_mean,top5_accuracy_std,balanced_accuracy_mean,balanced_accuracy_std
0,vit_t_16,0.926537,0.002509,0.983208,0.002579,0.990705,0.003339,0.906251,0.00325
1,swin_t,0.950225,0.004176,0.988006,0.001642,0.990705,0.001748,0.939903,0.007396
2,beit_b_16,0.911844,0.013457,0.976912,0.003863,0.985607,0.00489,0.887448,0.015311
3,deit_t_16,0.925937,0.007502,0.975712,0.003721,0.984708,0.002203,0.906793,0.01011
4,vgg19,0.915442,0.017525,0.97961,0.005414,0.989205,0.002579,0.896158,0.021996
5,resnet50,0.945427,0.004981,0.986507,0.00212,0.992504,0.001896,0.929697,0.007553
6,efficientnetv2_t,0.929235,0.006798,0.98021,0.004779,0.986507,0.001896,0.912441,0.006618
7,convnext_t,0.946927,0.005159,0.984408,0.001529,0.989805,0.003058,0.934277,0.006667


In [16]:
df.to_csv(target_csv, index=False)