In [11]:
import json
from glob import glob
from pathlib import Path

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.metrics import auc, confusion_matrix, roc_curve
from sklearn.metrics import rand_score, adjusted_rand_score, silhouette_score

In [12]:
def evaluate_cm(
    tp: int,
    fp: int,
    fn: int,
    tn: int,
) -> dict:
    """Evaluate the performance of a model on a dataset using confusion matrix.
    
    Args:
        tp (int): true positives
        fp (int): false positives
        fn (int): false negatives
        tn (int): true negatives
        
    Returns:
        dict: dictionary containing the following metrics:
            - accuracy
            - precision
            - recall
            - f1
    """
    res = {}
    res['accuracy'] = (tp + tn) / (tp + fp + fn + tn)
    res['precision'] = tp / (tp + fp)
    res['recall'] = tp / (tp + fn)
    res['f1'] = (2 * tp) / (2 * tp + fp + fn)
    return res

In [13]:
def evaluate(preds: list, labels: list):
    """Evaluate the performance of a model on a dataset.
    
    Args:
        preds (list): list of predicted values
        labels (list): list of true values
    
    Returns:
        dict: dictionary containing the following metrics:
            - accuracy
            - precision
            - recall
            - f1
    """
    tn, fp, fn, tp = confusion_matrix(preds, labels).ravel()
    res = evaluate_cm(tp, fp, fn, tn)
    
    fpr, tpr, _ = roc_curve(labels, preds)
    res['fpr'] = fpr
    res['tpr'] = tpr
    
    return res

In [14]:
def evaluate_clusters(preds: list, labels: list):
    """Evaluate the performance of clustering algorithms.
    
    Args:
        preds (list): list of predicted values
        labels (list): list of true values
    
    Returns:
        dict: dictionary containing the following metrics:
            - rand_score
            - adjusted_rand_score
    """
    res = {}
    res['rand_score'] = rand_score(preds, labels)
    res['adjusted_rand_score'] = adjusted_rand_score(preds, labels)
    return res

In [15]:
def average_results(results: list[dict]) -> dict:
    """Calculate the average of a list of results from evaluate().
    
    Args:
        results (list[dict]): list of results from evaluate()
        
    Returns:
        dict: dictionary containing the following metrics:
            - accuracy
            - precision
            - recall
            - f1
    """
    res = {}
    for key in results[0]:
        if key in [
            'accuracy',
            'precision',
            'recall', 
            'f1',
            'rand_score',
            'adjusted_rand_score',
        ]:
            # calculate average
            res[key] = sum([r[key] for r in results]) / len(results)
            
            # round to 2 decimals
            res[key] = round(res[key], 2)
        elif key in ['fpr', 'tpr']:
            # calculate average
            res[key] = np.mean([r[key] for r in results], axis=0)
    return res

In [16]:
def make_plot_fpr_tpr(
    fpr: float,
    tpr: float,
    path: str,
):
    """Make a plot of the ROC curve.
    
    Args:
        fpr (list): false positive rate
        tpr (list): true positive rate
        path (str): path to save the plot
    """
    plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver operating characteristic example')
    plt.legend(loc="lower right")
    plt.savefig(path)
    plt.close()
    

In [17]:
def make_plot(results: dict, path: str):
    """Make a plot of the ROC curve.
    
    Args:
        results (list[dict]): dictionary containing the results metrics.
        path: path to save the plot.
    """
    make_plot_fpr_tpr(results['fpr'], results['tpr'], path)

In [18]:
deep_nets = [
    'resnet18_pt',
]
drp_exps = [
    'drp_multi_not_pt',
    'drp_multi_pt',
    'drp_single_not_pt',
    'drp_single_pt',
    'drp_single_aux_not_pt',
    'drp_single_aux_pt',
]
kviews_exps = [
    'kviews_17',
    'kviews_19',
    'kviews_25',
]
kmeans_exp = [
    'kmeans',
]
exps = drp_exps + kviews_exps + kmeans_exp

results_df = pd.DataFrame(
    data=None,
    columns=['model', 'accuracy', 'precision', 'recall', 'f1'],
)
cluster_df = pd.DataFrame(
    data=None,
    columns=['model', 'rand_score', 'adjusted_rand_score'],
)

base_path = Path('./results')
for exp in exps:
    res = []
    cluster_res = []
    path = base_path / exp
    if exp in drp_exps:
        # aggregate results from results.json files
        folders = glob(str(path / '*'))
        for folder in folders:
            with open(Path(folder) / 'results.json') as fin:
                data = json.load(fin)
            res.append(evaluate(data['pred_labels'], data['gt_labels']))
                
    elif exp in kviews_exps:
        if exp == 'kviews_17' or exp == 'kviews_19':
            # kviews 17 and 19 has all results in one csv file
            data = pd.read_csv(Path(path) / 'results.csv')
            preds = data['predict'].tolist()
            labels = data['label'].tolist()
            
            # each trial has 300 samples, of 6 trials
            for i in range(6):
                res.append(evaluate(
                    preds[i::6],
                    labels[i::6],
                ))
                cluster_res.append(evaluate_clusters(
                    preds[i::6],
                    labels[i::6],
                ))
                
        else:
            # aggregate results from results.csv files
            folders = glob(str(path / '*'))
            res = []
            for folder in folders:
                data = pd.read_csv(Path(folder) / 'results.csv')
                res.append(evaluate(
                    data['predict'].tolist(),
                    data['label'].tolist(),
                ))
                cluster_res.append(evaluate_clusters(
                    data['predict'].tolist(),
                    data['label'].tolist(),
                ))
        
    elif exp in kmeans_exp:
        # aggregate results from results.csv files
        folders = glob(str(path / '*'))
        res = []
        for folder in folders:
            data = pd.read_csv(Path(folder) / 'results.csv')
            res.append(evaluate(
                data['predict'].tolist(),
                data['label'].tolist(),
            ))
            cluster_res.append(evaluate_clusters(
                data['predict'].tolist(),
                data['label'].tolist(),
            ))

    # average results
    avg_res = None
    if res != []:
        avg_res = average_results(res)
        avg_res['model'] = exp
        results_df = results_df.append(
            pd.Series(
                data=avg_res,
                index=['model', 'accuracy', 'precision', 'recall', 'f1', 'roc_auc'],
            ),
            ignore_index=True,
        )
        
        # make plots
        plot_path = path.parent / 'roc_plots' / '{0}_roc.png'.format(exp)
        plot_path.parent.mkdir(parents=True, exist_ok=True)
        make_plot(avg_res, str(plot_path))
        
    if cluster_res != []:
        avg_res = average_results(cluster_res)
        avg_res['model'] = exp
        cluster_df = cluster_df.append(
            pd.Series(
                data=avg_res,
                index=['model', 'rand_score', 'adjusted_rand_score'],
            ),
            ignore_index=True,
        )


  results_df = results_df.append(
  results_df = results_df.append(
  results_df = results_df.append(
  results_df = results_df.append(
  results_df = results_df.append(
  results_df = results_df.append(
  results_df = results_df.append(
  cluster_df = cluster_df.append(
  results_df = results_df.append(
  cluster_df = cluster_df.append(
  results_df = results_df.append(
  cluster_df = cluster_df.append(
  results_df = results_df.append(
  cluster_df = cluster_df.append(


In [19]:
results_df.to_csv(base_path / 'results.csv')
results_df.to_latex(base_path / 'results.tex')
results_df

  results_df.to_latex(base_path / 'results.tex')


Unnamed: 0,model,accuracy,precision,recall,f1,roc_auc
0,drp_multi_not_pt,0.87,0.89,0.92,0.91,0.86
1,drp_multi_pt,0.88,0.89,0.93,0.91,0.87
2,drp_single_not_pt,0.88,0.9,0.94,0.92,0.88
3,drp_single_pt,0.88,0.9,0.93,0.91,0.87
4,drp_single_aux_not_pt,0.87,0.89,0.92,0.9,0.86
5,drp_single_aux_pt,0.89,0.9,0.94,0.92,0.89
6,kviews_17,0.62,0.79,0.59,0.68,0.62
7,kviews_19,0.62,0.8,0.59,0.68,0.62
8,kviews_25,0.52,0.71,0.51,0.59,0.52
9,kmeans,0.47,0.07,0.35,0.11,0.47


In [20]:
cluster_df.to_csv(base_path / 'cluster_results.csv')
cluster_df.to_latex(base_path / 'cluster_results.tex')
cluster_df

  cluster_df.to_latex(base_path / 'cluster_results.tex')


Unnamed: 0,model,rand_score,adjusted_rand_score
0,kviews_17,0.53,0.06
1,kviews_19,0.53,0.06
2,kviews_25,0.5,-0.0
3,kmeans,0.5,0.0
