#### Load Hashes and Scores

- Load in a set of hashes and distance scores that have already been calculated

In [None]:
import os
import pandas as pd
import numpy as np
from prettytable import PrettyTable
from joblib import load
from phaser.utils import load_labelencoders


hash_dist_dir = r"demo_outputs"

# Load the label encoders
le = load_labelencoders(filename="LabelEncoders.bz2", path=hash_dist_dir)

# Get values to construct triplets
TRANSFORMS = le["t"].classes_
METRICS = le["m"].classes_
ALGORITHMS = le["a"].classes_

# Load from CSV
#df_h = pd.read_csv(os.path.join(hash_dist_dir , "Hashes.csv.bz2"))
#df_d = pd.read_csv(os.path.join(hash_dist_dir , "Distances.csv.bz2"))

# Load from the df files instead (a better option for larger datasets)
df_h = load(os.path.join(hash_dist_dir , "Hashes.df.bz2"))
df_d = load(os.path.join(hash_dist_dir , "Distances.df.bz2"))

# Inter (0), Intra (1)
intra_df = df_d[df_d["class"] == 1]
inter_df = df_d[df_d["class"] == 0]



#### Inter-Score Averages

Get a quick idea of the raw inter-distance / inter-similarity score distributions for each algorithm and metric.

This gives us an idea of whether or not they are biased somehow. 
A good metric should be about **0.5** for both mean and median (normally distributed around 0.5), ideally with a narrow spread. Though, in practice, anything is potentially fine as long as the inter-score and intra-score distributions are separable.


Note that values going forward are for SIMILARITY rather than distance unless otherwise specified, as that's the default used in the scientific libraries. We can convert from one to the other by subtracting values from 1.

In [None]:
print("====SIMILARITY Values====")

table = PrettyTable()
table.field_names = ["Hash", "Metric", "Mean", "Median", "STD"]
# Set all columns to left alignment
for field in table.field_names:
    table.align[field] = "l"

for a in ALGORITHMS:
    for m in METRICS:
            m_label = le["m"].transform(np.array(m).ravel())
            a_label = le["a"].transform(np.array(a).ravel())
            _X = inter_df.query(f"algo=={a_label} and metric == {m_label}")["orig"].values
            table.add_row([a, m, round(np.mean(_X), 4), round(np.median(_X), 4), round(np.std(_X), 4)])
print(table)

            
print("\n====DISTANCE Values====")
table = PrettyTable()
table.field_names = ["Hash", "Metric", "Mean", "Median", "STD"]
# Set all columns to left alignment
for field in table.field_names:
    table.align[field] = "l"
for a in ALGORITHMS:
    for m in METRICS:
            m_label = le["m"].transform(np.array(m).ravel())
            a_label = le["a"].transform(np.array(a).ravel())
            _X = inter_df.query(f"algo=={a_label} and metric == {m_label}")["orig"].values
            table.add_row([a, m, round(np.mean(_X), 4), round(np.median(_X), 4), round(np.std(_X), 4)])

print(table)

#### Summary Metrics

- Generate triplet permutations: **Perceptual Algorithm** -- **Transform** -- **Distance Metric**
- Generate additional stats (ComputeMetrics class), save to **CSV**

In [None]:
from phaser.evaluation import ComputeMetrics

# Define the triplet combinations
triplets = np.array(np.meshgrid(
    ALGORITHMS, 
    [t for t in TRANSFORMS if t != 'orig'], # ignore 'orig'
    METRICS)).T.reshape(-1,3)
cm = ComputeMetrics(le, df_d, pd.DataFrame(), analyse_bits=False, n_jobs=4)
m, b = cm.fit(triplets, weighted=False)



In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Save stats to file, print some here

print("Saving stat files.")
# Dump entire stats dataframe to file
m.to_csv(os.path.join(hash_dist_dir, "all_stats.csv"))
# Separately dump a handy AUC aggregation for all transforms in a metric/algorithm pair
m.groupby(['Algorithm', "Metric"])[['AUC', "FP", "FN", "TP", "TN"]].agg(['mean','std']).to_csv(os.path.join(hash_dist_dir, "aggregate_AUC.csv"))



# # AUC info
print(m.groupby(['Algorithm', "Metric"])[['AUC']].agg(['mean','std']))


#### AUC Heatmaps
- Visualise Area under the ROC Curve (AUC) for each Triplet as a heatmap.
- If there is more than one distance metric, facilitate a comparison between them and a chosen baseline (default to Hamming) as a heatmap
    - Note this is presented as percentage point (%pt) change vs. baseline, so 3.5 would mean 0.035 change in the AUC value from the first heatmap.

In [None]:
from ipywidgets import interactive
import ipywidgets as widgets



### Interactive Heatmap Function
mselect = widgets.Dropdown(
        options=METRICS,
        description='Compare to:',
        value="Hamming" if "Hamming" in METRICS else METRICS[0]
    )

# Checkbox for squaring the heatmap
checkbox = widgets.Checkbox(
    value=False,
    description='Square heatmap',
    disabled=False
)

def heatmap_all(square, width=12, height=10):
    """Heatmap of AUCs for all Triplets"""
    
    heatmap_data = m.pivot_table(index=['Metric', 'Algorithm'], columns='Transform', values='AUC')
    fig, ax = plt.subplots(figsize=(width, height))
    ax.set_title(f"AUC of all Triplets")
    fig = sns.heatmap(heatmap_data, annot=True, cmap='RdBu', center=0, square=square, vmin=0, vmax=1, ax=ax)

def heatmap_vs_baseline(baseline_metric, square, width=12, height=10):
    """Heatmap of differences of all AUCs to a given comparison baseline (usually Hamming Distance)"""
    
    mi = m.copy() # avoid messing with the stats as they can take a bit to process.
    
    # Extract Hamming AUCs
    baseline_aucs = mi[mi['Metric'] == baseline_metric].set_index(['Transform', 'Algorithm'])['AUC'].rename(f'{baseline_metric} AUC')

    # Merge Hamming AUCs with the original DataFrame
    mi = mi.set_index(['Transform', 'Algorithm']).join(baseline_aucs, on=['Transform', 'Algorithm'])

    # Calculate the differences
    mi['Diff to baseline_metric'] = (mi['AUC'] - mi[f'{baseline_metric} AUC']) * 100
    mi['Diff to baseline_metric'] = mi['Diff to baseline_metric'].round(3)

    # Filter out Hamming distance rows (since their difference will be zero)
    subset_df = mi[mi['Metric'] != baseline_metric]
    
    #heatmap_data = subset_df.pivot_table(index=['Algorithm', 'Transform'], columns='Metric', values='AUC')
    
    heatmap_data = subset_df.pivot_table(index=['Metric', 'Algorithm'], columns='Transform', values=f'Diff to baseline_metric')
    fig, ax = plt.subplots(figsize=(width, height))
    ax.set_title(f"%pt AUC change vs. {baseline_metric}")
    if baseline_metric != 'Select':
        
        fig = sns.heatmap(heatmap_data, annot=True, cmap='RdBu', center=0, square=square, vmin=-1, vmax=1, ax=ax)
        

# Display Interactive Heatmaps

map1 = interactive(heatmap_all, square=checkbox)
display(map1)

# Comparison to baseline - Only shows if there are multiple metrics (otherwise there is no comparison to be made!)
if len(METRICS) > 1:
    map2 = interactive(heatmap_vs_baseline, baseline_metric=mselect, square=checkbox)
    display(map2)


#### Prepare Plots

- Run this before the plot segments below.
- Allows for some configurability and interactivity of several plots

In [None]:
from phaser.evaluation import MetricMaker
from phaser.plotting import  hist_fig, kde_ax, eer_ax, roc_ax
import matplotlib.pyplot as plt
from ipywidgets import interactive
import ipywidgets as widgets
import warnings
import seaborn as sns
warnings.simplefilter(action='ignore', category=FutureWarning) # Ignore Seaborn warnings due to underlying package using future deprecated calls


#define widgets
tselect = widgets.Dropdown(
        options=TRANSFORMS,
        description='Transform'
    )

#define widgets
tselect_no_orig = widgets.Dropdown(
        options=TRANSFORMS[:-1],
        description='Transform'
    )

mselect = widgets.Dropdown(
        options=METRICS,
        description='Metric'
    )
aselect = widgets.Dropdown(
        options=ALGORITHMS,
        description='Algorithm'
    )
modeselect = widgets.Dropdown(
        options=["inter", "intra"],
        description='Comparison Mode'
    )


### Hist plots, separate for intra/inter
def plot_image(transform, mode, bins=25,  width=8, height=6):
    data = df_h
    if transform != 'Select' and bins > 1:
        if mode == "inter":
            fig = hist_fig(inter_df, label_encoding=le, transform=transform, interactive=True, bins=bins, figsize=(width,height))
        elif mode == "intra":
            fig = hist_fig(intra_df, label_encoding=le, transform=transform, interactive=True, bins=bins, figsize=(width,height))
        fig.suptitle(f"Similarity Histograms - {transform}")
        

### KDE multi plot
def kde_plot_multi(transform, width=8, height=6):
    if transform != 'Select':

        #t_label = le_a.transform(np.array(transform).ravel()
        n_cols = len(METRICS)
        n_rows = len(ALGORITHMS)

        # Subset data
        fig, axes = plt.subplots(ncols=n_cols, nrows=n_rows, figsize=(width,height), constrained_layout=False, 
                                 sharex=True, sharey=False, squeeze=False)
                                 
        for col_i, metric in enumerate(METRICS):
            for row_i, algo in enumerate(ALGORITHMS):
                    # Transform strings to labels
                    m_label = le["m"].transform(np.array(metric).ravel())
                    a_label = le["a"].transform(np.array(algo).ravel())

                    # Subset data and get the distances for the chosen transformation
                    _X = df_d.query(f"algo=={a_label} and metric == {m_label}")
                    

                    kde_ax(_X, transform, label_encoding=le, fill=True, title=f"{algo}-{metric}", ax=axes[row_i, col_i])
        fig.suptitle(f"Inter/Intra-Score KDE Plots - {transform}")
        

### EER multi plot
def eer_plot_multi(transform, width=8, height=6):
    if transform != 'Select':

        n_cols = len(METRICS)
        n_rows = len(ALGORITHMS)
        # Subset data
        fig, axes = plt.subplots(ncols=n_cols, nrows=n_rows, figsize=(width, height), constrained_layout=True, 
                                 sharex=True, sharey=False, squeeze=False)
                                 
        for col_i, metric in enumerate(METRICS):
            for row_i, algo in enumerate(ALGORITHMS):
                    # Transform strings to labels
                    m_label = le["m"].transform(np.array(metric).ravel())
                    a_label = le["a"].transform(np.array(algo).ravel())

                    # Subset data and get the distances for the chosen transformation
                    _X = df_d.query(f"algo=={a_label} and metric == {m_label}")

                    # get similarities and true class labels
                    y_true = _X["class"]
                    y_similarity = _X[transform]

                    # Prepare metrics for plotting EER and AUC
                    mm = MetricMaker(y_true=y_true, y_similarity=y_similarity, weighted=False)
                    
                    # Set threshold
                    threshold = mm.eer_thresh

                    # Make predictions and compute cm using EER
                    eer_ax(mm.fpr, mm.tpr, mm.thresholds, threshold=threshold, legend=f"", title=f"{algo}-{metric}", ax=axes[row_i, col_i])
        fig.suptitle(f"EER Plots - {transform}")
        

### ROC multi plot
def roc_plot_multi(transform, width=8, height=6):
    if transform != 'Select':

        n_cols = len(METRICS)
        n_rows = len(ALGORITHMS)
        # Subset data
        fig, axes = plt.subplots(ncols=n_cols, nrows=n_rows, figsize=(width,height), constrained_layout=True, 
                                 sharex=True, sharey=False, squeeze=False)
                                 
        for col_i, metric in enumerate(METRICS):
            for row_i, algo in enumerate(ALGORITHMS):
                    # Transform strings to labels
                    m_label = le["m"].transform(np.array(metric).ravel())
                    a_label = le["a"].transform(np.array(algo).ravel())

                    # Subset data and get the distances for the chosen transformation
                    _X = df_d.query(f"algo=={a_label} and metric == {m_label}")

                    # get similarities and true class labels
                    y_true = _X["class"]
                    y_similarity = _X[transform]

                    # Prepare metrics for plotting EER and AUC
                    mm = MetricMaker(y_true=y_true, y_similarity=y_similarity, weighted=False)
                    


                    # Make predictions and compute cm using EER
                    roc_ax(mm.fpr, mm.tpr, mm.auc, title=f"{algo}-{metric}", ax=axes[row_i, col_i])
        fig.suptitle(f"ROC Plots - {transform}")
        
        
def kde_plot(transform, algorithm, metric, mode, width=8, height=6):
    if transform != 'Select':
        m_label = le["m"].transform(np.array(metric).ravel())
        a_label = le["a"].transform(np.array(algorithm).ravel())
        t_label = le["t"].transform(np.array(transform).ravel())
        if mode == "inter":
            _X = inter_df.query(f"algo=={a_label} and metric == {m_label}")[transform].values
        else:
            _X = intra_df.query(f"algo=={a_label} and metric == {m_label}")[transform].values
        sns.kdeplot(_X, fill=True)
        plt.title(f"Similarity Histograms - {transform} - {algorithm} - {metric}")
        

#### Similarity Score Histograms

- Normalised counts of scores for each hash/transform
- Same data as the KDE plots, but allows for a better understanding of any gaps in the distributions.
- Updates as long as the number of bins is over 1.
- Ideally: Inter distribution is normally distributed around 0.5, while the intra similarity is as high as possible.

In [None]:
# Similarity Score Histograms
h1 = interactive(kde_plot, transform=tselect, algorithm=aselect, metric=mselect, mode=modeselect)  # optional: save_location
display(h1)

In [None]:
# Similarity Score Histograms
h = interactive(plot_image, transform=tselect, mode=modeselect)  # optional: save_location
display(h)

#### Kernel Density Estimation (KDE)

- Combined plot for Inter/Intra scores.
- More or less the same as Histograms, but estimates probability density.
- Ideally, both classes should be completely non-overlapping. Overlap is expected for difficult transforms and indicates difficulty in setting a threshold to separate them, resulting in False Positives / False Negatives.

In [None]:
# Similarity Kernel Density Estimation (KDE) for inter/intra classes
k = interactive(kde_plot_multi, transform=tselect_no_orig)
display(k)

#### Error Rate

- Visualise the False Positive Rate (FPR) and False Negative Rate (FNR) trade-offs across the similarity score spectrum.
- The vertical line represents the score at which the where FPR == FNR, i.e. The Equal Error Rate Threshold (EERt)

In [None]:
# Equal Error Rate (EER) similarity plots
eer = interactive(eer_plot_multi, transform=tselect)
display(eer)


#### Receiver Operating Characteristic (ROC)

- Plot TPR vs FPR to visualise trade-offs.
- Provides Area Under the Curve (AUC) as a means of summarising overall performance. Larger AUC (up to 1.0) is better.

In [None]:
# Receiver Operator Characteristic (ROC) similarity plots
roc = interactive(roc_plot_multi, transform=tselect)
display(roc)
