In [1]:
import os
from itertools import combinations, product

import numpy as np
import pandas as pd
from tqdm import tqdm

import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express.colors as px_colors
import plotly.io as pio
from scipy.spatial.distance import squareform

import SpAM_Simulations.metrics as metrics
from SpAM_Simulations.simulation import load_latest_simulation, create_simulation

pio.renderers.default = 'browser'

## Load or Make Simulation

In [2]:
try:
    sim = load_latest_simulation(os.getcwd())
except FileNotFoundError:
    # sim = create_simulation(
    #     n_images = 50,
    #     n_dims = 5,
    #     num_subjects = [20, 30],
    #     trials_per_subject = [8, 10],
    #     images_per_trial = [16, 20],
    #     subjects_noise_scale = [0.15, 0.3],
    #     subjects_noise_df = [1, 3],
    #     reps = 3,
    #     seed = 42,
    #     verbose = True,
    # )
    sim = create_simulation(
        n_images = 754,
        n_dims = 10,
        num_subjects = [20, 30, 50, 100, 200],
        trials_per_subject = [8, 10, 15, 20],
        images_per_trial = [16, 20, 25],
        subjects_noise_scale = [0.15, 0.3, 0.5],
        subjects_noise_df = [1, 3, 5],
        reps = 5,
        seed = 42,
        verbose = True,
    )

## Evaluate Simulation
### Coverage

In [None]:
coverage = []
for param in sim._results.keys():
    for rep, result in enumerate(sim._results[param]):
        coverage.append(pd.Series(metrics.coverage(result)).rename((*param, rep)))
del param, rep, result

coverage = pd.concat(coverage, axis=1).T
coverage.index.names = [
    'n_subjects', 'trials_per_subject', 'images_per_trial', 'subjects_noise_scale', 'subjects_noise_df', 'rep',
]
coverage["is_connected"] = coverage["num_connected_components"] == 1.0

# coverage scores are not affected by subject noise, so we average across those parameters
coverage_summary = (
    coverage.groupby(["n_subjects", "trials_per_subject", "images_per_trial"])
    .agg(
        num_reps = ("average_obs_count", "size"),
        obs_count_mean = ("average_obs_count", "mean"),
        obs_count_sem = ("average_obs_count", "sem"),
        percent_coverage_mean = ("coverage", "mean"),
        percent_coverage_sem = ("coverage", "sem"),
        percent_coverage5x_mean = ("coverage@5", "mean"),
        percent_coverage5x_sem = ("coverage@5", "sem"),
        p_is_connected_mean = ("is_connected", "mean"),
        p_is_connected_sem = ("is_connected", "sem"),
    )
    .sort_index()
    .reset_index()
)

In [None]:
ROW_TITLES = {
    "# of Ratings": "obs_count", "% COVERAGE": "percent_coverage", "% COVERAGE@5": "percent_coverage5x"
}
COL_TITLES = {k: f"{k} Images per Trial" for k in coverage_summary["images_per_trial"].unique()}
coverage_fig = make_subplots(
    rows=len(ROW_TITLES), cols=len(COL_TITLES),
    column_titles=list(COL_TITLES.values()),
    shared_xaxes=True, shared_yaxes=True,
    x_title="Number of Subjects",
    vertical_spacing=0.05, horizontal_spacing=0.025,
)
for c, (images_per_trial, col_title) in enumerate(COL_TITLES.items()):
    for r, (row_title, prefix) in enumerate(ROW_TITLES.items()):
        for i, tr in enumerate(coverage_summary["trials_per_subject"].unique()):
            color = px_colors.qualitative.Plotly[i]
            df = coverage_summary.loc[
                (coverage_summary["trials_per_subject"] == tr) & (coverage_summary["images_per_trial"] == images_per_trial),
            ]
            coverage_fig.add_trace(
                row=r + 1, col=c + 1, trace=go.Scatter(
                    x=df["n_subjects"],
                    y=df[f"{prefix}_mean"],
                    error_y=dict(type="data", array=df[f"{prefix}_sem"], visible=True),
                    name=str(tr), legendgroup=str(tr), showlegend=c == 0 and r == 0,
                    mode="lines+markers", line=dict(color=color),
                )
            )
        if c == 0:
            coverage_fig.update_yaxes(
                row=r + 1, col=c + 1,
                title=dict(text=row_title, font=dict(size=14, color='black'))
            )
del c, r, i, images_per_trial, col_title, row_title, prefix, tr, color, df

coverage_fig.update_layout(
    height=650, width=1500,
    title=dict(
        text="Coverage by Experimental Configuration", font=dict(size=20, color='black')
    ),
    legend=dict(
        title=dict(text="Trials per Subject"),
        orientation="h",
        x=1.0, xanchor="right", xref="paper",
        y=1.075, yanchor="bottom", yref="paper",
    ),
)
coverage_fig.show()

### Connected Components
The `MDS` algorithm requires a fully connected graph, so we first want to check the probability of getting a fully connected graph for various experimental configurations.

In [None]:
COL_TITLES = {k: f"{k} Images per Trial" for k in coverage_summary["images_per_trial"].unique()}
PREFIX = "p_is_connected"
cc_fig = make_subplots(
    rows=1, cols=len(COL_TITLES),
    column_titles=list(COL_TITLES.values()),
    shared_xaxes=True, shared_yaxes=True,
    x_title="Number of Subjects",
    vertical_spacing=0.05, horizontal_spacing=0.025,
)
for c, (images_per_trial, col_title) in enumerate(COL_TITLES.items()):
    for i, tr in enumerate(coverage_summary["trials_per_subject"].unique()):
        color = px_colors.qualitative.Plotly[i]
        df = coverage_summary.loc[
            (coverage_summary["trials_per_subject"] == tr) & (coverage_summary["images_per_trial"] == images_per_trial),
        ]
        cc_fig.add_trace(
            row=1, col=c + 1, trace=go.Scatter(
                x=df["n_subjects"],
                y=df[f"{PREFIX}_mean"],
                error_y=dict(type="data", array=df[f"{PREFIX}_sem"], visible=True),
                name=str(tr), legendgroup=str(tr), showlegend=c == 0,
                mode="lines+markers", line=dict(color=color),
            )
        )
    if c == 0:
        cc_fig.update_yaxes(
            row=1, col=c + 1,
            title=dict(text="P[Fully Connected]", font=dict(size=14, color='black'))
        )
del c, i, images_per_trial, col_title, tr, color, df

cc_fig.update_layout(
    height=350, width=800,
    title=dict(
        text="Probability of Full Connectivity by Experimental Configuration", font=dict(size=20, color='black')
    ),
    legend=dict(
        title=dict(text="Trials per Subject"),
        orientation="h",
        x=1.0, xanchor="right", xref="paper",
        y=1.075, yanchor="bottom", yref="paper",
    ),
)
cc_fig.show()

### Stability
We check the Spearman (rank) correlation between different iterations of the same experimental configuration to see how stable the results are.

In [None]:
correlations = dict()
for param in sim._results.keys():
    corrs = []
    for res1, res2 in combinations(sim._results[param], 2):
        try:
            corr = metrics.spearman_correlation(res1, res2)
            corrs.append(corr)
        except ValueError:
            # if we don't have mutual observations we can't compute a correlation, so we skip those cases
            pass
    corrs = pd.Series(corrs)
    mean, sem = corrs.mean(), corrs.sem()
    correlations[param] = pd.Series({"r_mean": mean, "r_sem": sem})
del param, corrs, res1, res2, corr, mean, sem

correlations = pd.DataFrame(correlations).T
correlations.index.names = ['n_subjects', 'trials_per_subject', 'images_per_trial', 'subjects_noise_scale', 'subjects_noise_df']
correlations

In [None]:
ROW_TITLES = {tr: f"{tr} Trials per Subject" for tr in coverage_summary["trials_per_subject"].unique()}
COL_TITLES = {k: f"{k} Images per Trial" for k in coverage_summary["images_per_trial"].unique()}
corr_fig = make_subplots(
    rows=len(ROW_TITLES), cols=len(COL_TITLES),
    column_titles=list(COL_TITLES.values()),
    shared_xaxes=True, shared_yaxes=True,
    x_title="Number of Subjects",
    vertical_spacing=0.05, horizontal_spacing=0.025,
)
for c, (images_per_trial, col_title) in enumerate(COL_TITLES.items()):
    for r, (trials_per_subject, row_title) in enumerate(ROW_TITLES.items()):
        subset = correlations.loc[
            (correlations.index.get_level_values("images_per_trial") == images_per_trial) &
            (correlations.index.get_level_values("trials_per_subject") == trials_per_subject)
        ]
        noise_scales = subset.index.get_level_values("subjects_noise_scale").unique()
        noise_dfs = subset.index.get_level_values("subjects_noise_df").unique()
        for i, (noise_scale, noise_df) in enumerate(product(noise_scales, noise_dfs)):
            name = f"Scale={noise_scale}<br>DFs={noise_df}"
            color = px_colors.qualitative.Plotly[i]
            df = subset.loc[
                (subset.index.get_level_values("subjects_noise_scale") == noise_scale) &
                (subset.index.get_level_values("subjects_noise_df") == noise_df)
            ]
            corr_fig.add_trace(
                row=r + 1, col=c + 1, trace=go.Scatter(
                    x=df.index.get_level_values("n_subjects"),
                    y=df["r_mean"],
                    error_y=dict(type="data", array=df["r_sem"], visible=True),
                    name=name, legendgroup=name, showlegend=c == 0 and r == 0,
                    mode="lines+markers", line=dict(color=color),
                )
            )
        if c == 0:
            corr_fig.update_yaxes(
                row=r + 1, col=c + 1,
                title=dict(text=row_title, font=dict(size=10, color='black'))
            )
del c, r, i, images_per_trial, col_title, trials_per_subject, row_title, noise_scale, noise_df, name, color, df

corr_fig.update_layout(
    height=650, width=1500,
    title=dict(
        text="Stability (Spearman Correlation) by Experimental Configuration",
        font=dict(size=20, color='black')
    ),
    legend=dict(
        title=dict(text="Subject Noise Parameters"),
    ),
)
corr_fig.show()

## Run MDS
In the real world, we don't know the true dimensionality of the data (like here, `D=5 or 10`). Instead, we run the MDS algorithm on the same distance matrix with increasing target-dimensionalities (`ndim=2, 3, ..., 10`).

In [None]:
from SpAM_Simulations.multi_dimensional_scaling import run_mds

#
# res = list(sim._results.values())[0][0]
# run_mds(res.distances, res.num_obs, 3, True)

In [None]:
GT_DIMS = sim.gt_embeddings.shape[1]
mds_inputs = dict()
for i in range(2, GT_DIMS + 1):
    for param in sim._results.keys():
        for rep, result in enumerate(sim._results[param]):
            key = tuple({**(param._asdict()), "rep": rep, "ndim": i}.items())
            mds_inputs[key] = result

mds_results = dict()
for exp_param, exp_result in tqdm(mds_inputs.items(), desc="Running MDS"):
    try:
        mds_result = run_mds(
            dists=exp_result.distances,
            weights=exp_result.num_obs,
            ndim=exp_param[-1][1],
            verbose=False,
        )
        mds_results[exp_param] = mds_result
    except Exception as e:
        mds_results[exp_param] = e

### Percent Failures
We want to check the percentage of MDS runs that failed for each experimental configuration and target dimensionality.<br>
We checked the number of connected components before, but here we check for other causes as well, for example:
- a RuntimeError with the words "connected component" indicates that there were more than 1 CC.
- a RuntimeError with the words "max_iter" indicates that the MDS algorithm failed to converge within the maximum number of iterations.

### Scree Plot
We plot the **Scree Plot** showing Stress vs Dimensionality, and look for the "elbow" in the plot to determine the optimal dimensionality.<br>
We should see if this is stable across iterations of the same experimental configuration.

## Evaluate Embeddings
### Stability
We want to check how "stable" our embeddings are across different iterations of the same experimental configuration. To do this, we calculate the Spearman (rank) correlation between the pairwise distances of the embeddings from different iterations of the same experimental configuration.<br>
<br>
_Why use the Spearman correlation?_<br>
- _Cosine Similarity_: We don't care about the angle between embeddings, just that we maintain their distances relative to each other. We should allow for scaling and rotation of the embeddings, which would change the cosine similarity but not the relative distances between points.<br>
- _Pearson Correlation_: We don't care about the exact distances between points, just that they maintain their relative distances to each other. We should allow for scaling and rotation of the embeddings, which would change the Pearson correlation but not the Spearman correlation.<br>
- _Procrustes Analysis_: This is a more complex method that involves finding the optimal scaling and rotation to align two embeddings before calculating their similarity. This could be a good option, but it is more computationally intensive and may not be necessary for our purposes. The Spearman correlation is a simpler and more interpretable metric that still captures the stability of the embeddings across iterations.<br>