In [1]:
import json
from glob import glob
from pathlib import Path

import pandas as pd
from sklearn.metrics import confusion_matrix
from sklearn.metrics import rand_score, adjusted_rand_score

In [2]:
def evaluate_cm(
    tp: int,
    fp: int,
    fn: int,
    tn: int,
) -> dict:
    """Evaluate the performance of a model on a dataset using confusion matrix.
    
    Args:
        tp (int): true positives
        fp (int): false positives
        fn (int): false negatives
        tn (int): true negatives
        
    Returns:
        dict: dictionary containing the following metrics:
            - accuracy
            - precision
            - recall
            - f1
    """
    res = {}
    res['accuracy'] = (tp + tn) / (tp + fp + fn + tn)
    res['precision'] = tp / (tp + fp)
    res['recall'] = tp / (tp + fn)
    res['f1'] = (2 * tp) / (2 * tp + fp + fn)
    return res

In [3]:
def evaluate(preds: list, labels: list):
    """Evaluate the performance of a model on a dataset.
    
    Args:
        preds (list): list of predicted values
        labels (list): list of true values
    
    Returns:
        dict: dictionary containing the following metrics:
            - accuracy
            - precision
            - recall
            - f1
    """
    tn, fp, fn, tp = confusion_matrix(preds, labels).ravel()
    return evaluate_cm(tp, fp, fn, tn)

In [4]:
def evaluate_clusters(preds: list, labels: list):
    """Evaluate the performance of clustering algorithms.
    
    Args:
        preds (list): list of predicted values
        labels (list): list of true values
    
    Returns:
        dict: dictionary containing the following metrics:
            - rand_score
            - adjusted_rand_score
    """
    res = {}
    res['rand_score'] = rand_score(preds, labels)
    res['adjusted_rand_score'] = adjusted_rand_score(preds, labels)
    return res

In [5]:
def average_results(results: list[dict]) -> dict:
    """Calculate the average of a list of results from evaluate().
    
    Args:
        results (list[dict]): list of results from evaluate()
        
    Returns:
        dict: dictionary containing the average results
    """
    res = {}
    for key in results[0]:
        # calculate average
        res[key] = sum([r[key] for r in results]) / len(results)
        
        # round to 3 decimals
        res[key] = round(res[key], 2)
    return res

In [6]:
cnn_exps = [
    'resnet18_not_pt',
    'resnet18_pt',
    'resnet50_not_pt',
    'resnet50_pt',
    'resnet101_not_pt',
    'resnet101_pt',
    'vgg11_not_pt',
    'vgg13_not_pt',
    'vgg16_not_pt',
    'vgg19_not_pt',
]
drp_exps = [
    'drp_multi_not_pt',
    'drp_multi_pt',
    'drp_single_not_pt',
    'drp_single_pt',
    'drp_single_aux_not_pt',
    'drp_single_aux_pt',
]
kviews_exps = [
    'kviews_15',
    'kviews_17',
    'kviews_19',
    'kviews_25',
]
kmeans_exp = [
    'kmeans',
]
exps = kmeans_exp + kviews_exps + cnn_exps + drp_exps

results_df = pd.DataFrame(
    data=None,
    columns=['model', 'accuracy', 'precision', 'recall', 'f1'],
)
cluster_df = pd.DataFrame(
    data=None,
    columns=['model', 'rand_score', 'adjusted_rand_score'],
)

base_path = Path('./results')
for exp in exps:
    res = []
    cluster_res = []
    path = base_path / exp
    if exp in cnn_exps:
        # aggregate results from results.csv files
        # results.csv already has tn,fp,fn,tp
        data = pd.read_csv(Path(path) / 'results.csv')
        for row in data.itertuples():
            res.append(evaluate_cm(row.tn, row.fp, row.fn, row.tp))
        
    elif exp in drp_exps:
        # aggregate results from results.json files
        folders = glob(str(path / '*'))
        for folder in folders:
            with open(Path(folder) / 'results.json') as fin:
                data = json.load(fin)
            res.append(evaluate(data['pred_labels'], data['gt_labels']))
                
    elif exp in kviews_exps:
        if exp == 'kviews_17' or exp == 'kviews_19':
            # kviews 17 and 19 has all results in one csv file
            data = pd.read_csv(Path(path) / 'results.csv')
            preds = data['predict'].tolist()
            labels = data['label'].tolist()
            
            # each trial has 300 samples, of 6 trials
            for i in range(6):
                res.append(evaluate(
                    preds[i::6],
                    labels[i::6],
                ))
                cluster_res.append(evaluate_clusters(
                    preds[i::6],
                    labels[i::6],
                ))
                
        else:
            # aggregate results from results.csv files
            folders = glob(str(path / '*'))
            res = []
            for folder in folders:
                data = pd.read_csv(Path(folder) / 'results.csv')
                res.append(evaluate(
                    data['predict'].tolist(),
                    data['label'].tolist(),
                ))
                cluster_res.append(evaluate_clusters(
                    data['predict'].tolist(),
                    data['label'].tolist(),
                ))
        
    elif exp in kmeans_exp:
        # aggregate results from results.csv files
        folders = glob(str(path / '*'))
        res = []
        for folder in folders:
            data = pd.read_csv(Path(folder) / 'results.csv')
            res.append(evaluate(
                data['predict'].tolist(),
                data['label'].tolist(),
            ))
            cluster_res.append(evaluate_clusters(
                data['predict'].tolist(),
                data['label'].tolist(),
            ))

    # average results
    avg_res = None
    if res != []:
        avg_res = average_results(res)
        avg_res['model'] = exp
        results_df = results_df.append(
            pd.Series(
                data=avg_res,
                index=['model', 'accuracy', 'precision', 'recall', 'f1'],
            ),
            ignore_index=True,
        )
        
    if cluster_res != []:
        avg_res = average_results(cluster_res)
        avg_res['model'] = exp
        cluster_df = cluster_df.append(
            pd.Series(
                data=avg_res,
                index=['model', 'rand_score', 'adjusted_rand_score'],
            ),
            ignore_index=True,
        )


  results_df = results_df.append(
  cluster_df = cluster_df.append(
  results_df = results_df.append(
  cluster_df = cluster_df.append(
  results_df = results_df.append(
  cluster_df = cluster_df.append(
  results_df = results_df.append(
  cluster_df = cluster_df.append(
  results_df = results_df.append(
  cluster_df = cluster_df.append(
  results_df = results_df.append(
  results_df = results_df.append(
  results_df = results_df.append(
  results_df = results_df.append(
  results_df = results_df.append(
  results_df = results_df.append(
  results_df = results_df.append(
  results_df = results_df.append(
  results_df = results_df.append(
  results_df = results_df.append(
  results_df = results_df.append(
  results_df = results_df.append(
  results_df = results_df.append(
  results_df = results_df.append(
  results_df = results_df.append(
  results_df = results_df.append(


In [7]:
# rename models and metrics for the paper
# rename columns
results_df.rename(
    columns={
        'model': 'Model',
        'accuracy': 'Accuracy',
        'recall': 'Recall',
        'precision': 'Precision',
        'f1': 'F1',
    },
    inplace=True,
)

# rename models
name_map = {
    'resnet18_pt': 'ResNet18 (fine-tuned)',
    'resnet18_not_pt': 'ResNet18',
    'resnet50_pt': 'ResNet50 (fine-tuned)',
    'resnet50_not_pt': 'ResNet50',
    'resnet101_pt': 'ResNet101 (fine-tuned)',
    'resnet101_not_pt': 'ResNet101',
    
    'vgg11_not_pt': 'VGG11',
    'vgg13_not_pt': 'VGG13',
    'vgg16_not_pt': 'VGG16',
    'vgg19_not_pt': 'VGG19',
    
    'drp_multi_not_pt': 'DRP Multi-layer',
    'drp_multi_pt': 'DRP Multi-layer (fine-tuned)',
    'drp_single_not_pt': 'DRP Single-layer',
    'drp_single_pt': 'DRP Single-layer (fine-tuned)',
    'drp_single_aux_not_pt': 'DRP Single-layer Auxiliary',
    'drp_single_aux_pt': 'DRP Single-layer Auxiliary (fine-tuned)',
    
    'kviews_15': 'K-Views (15x15)',
    'kviews_17': 'K-Views (17x17)',
    'kviews_19': 'K-Views (19x19)',
    'kviews_25': 'K-Views (25x25)',
    'kmeans': 'K-Means',
}
results_df.replace(name_map, inplace=True)

In [8]:
# sort by f1
# results_df = results_df.sort_values(by='F1', ascending=False)
results_df.to_csv(base_path / 'results.csv')
results_df.to_latex(base_path / 'results.tex', index=False)
results_df

  results_df.to_latex(base_path / 'results.tex', index=False)


Unnamed: 0,Model,Accuracy,Precision,Recall,F1
0,K-Means,0.47,0.07,0.35,0.11
1,K-Views (15x15),0.56,0.58,0.56,0.57
2,K-Views (17x17),0.62,0.79,0.59,0.68
3,K-Views (19x19),0.62,0.8,0.59,0.68
4,K-Views (25x25),0.52,0.71,0.51,0.59
5,ResNet18,0.8,0.72,0.67,0.69
6,ResNet18 (fine-tuned),0.88,0.87,0.77,0.82
7,ResNet50,0.78,0.59,0.67,0.62
8,ResNet50 (fine-tuned),0.87,0.83,0.78,0.8
9,ResNet101,0.77,0.63,0.62,0.62


In [9]:
cluster_df.to_csv(base_path / 'cluster_results.csv')
cluster_df.to_latex(base_path / 'cluster_results.tex')
cluster_df

  cluster_df.to_latex(base_path / 'cluster_results.tex')


Unnamed: 0,model,rand_score,adjusted_rand_score
0,kmeans,0.5,0.0
1,kviews_15,0.51,0.01
2,kviews_17,0.53,0.06
3,kviews_19,0.53,0.06
4,kviews_25,0.5,-0.0
