In [1]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from pathlib import Path
import plotly.express as px

OKABE_ITO_SCALE = ["#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7"]
sns.set_palette(sns.color_palette("colorblind"))
sns.set(style="ticks", font_scale=1.1, palette="colorblind")

%load_ext autoreload
%autoreload 2

RESULTS_DIR = Path("../results")
FIG_DIR = Path("../figures")
FIG_DIR.mkdir(exist_ok=True)
DATA_MODALITIES = ["graph","numerical", "binary", "nominal", "timeseries", "NLP", "cv", "multiomics", "seq_of_sets"]


### Pairs strategy comparison

In [None]:
df = pd.read_csv(RESULTS_DIR / "pairs_strategy.csv")
df = df.groupby(["dataset_name", "pairs_strategy", "distance", "dataset_type"]).auc.mean().reset_index()
df = df.sort_values("distance")

In [None]:
from collections import defaultdict
rankings = defaultdict(lambda : 0)
average_distance_between_two_places = 0
n_measurements = 0
for dataset_name in df.dataset_name.unique():
    df_dataset = df[df.dataset_name == dataset_name]
    for distance in df_dataset.distance.unique():
        df_distance = df_dataset[df_dataset.distance == distance]
        df_distance = df_distance.sort_values("auc", ascending=False).reset_index()
        last_value = 0
        for row_id, row in df_distance.iterrows():
            rankings[row.pairs_strategy] += row_id
            if row_id > 0:
                average_distance_between_two_places += row.auc - last_value
                n_measurements += 1
                last_value = row.auc
            else:
                last_value = row.auc

print("Rankings of pair strategies, the lower the better:")
for k, v in sorted(rankings.items(), key=lambda x: x[1]):
    print(k, v)
print(f"Average difference in auc between two consecutive ranks: {average_distance_between_two_places / n_measurements}")

In [None]:
for dtype in df.dataset_type.unique():
    df_plot = df[df.dataset_type == dtype]
    fig = px.strip(df_plot, x="distance", y="auc", color="pairs_strategy", facet_col="dataset_name", facet_col_wrap=3, title="AUC vs Distance", width=df_plot.shape[0]//4 * 150)
    fig.show()

Visualize how results differ on numerical datasets as we change size of the vector passed to the RSIF.

In [None]:
def plot_max_n():
    MAX_N_DIR = FIG_DIR / "max_n"
    MAX_N_DIR.mkdir(exist_ok=True)
    def plot(title):
        g = sns.relplot(data=df, x="max_n", y="auc", hue="type", marker="o", col="dataset_name", col_wrap=2, 
                facet_kws={'sharey': False, 'sharex': False}, kind="line")
        g.set(ylim=(0, 1))
        g.fig.suptitle(f'influence of max_n on AUC for {dist} distance')
        g.set(xscale="log")
        g.fig.subplots_adjust(top=0.9)
        fig = plt.gcf()
        fig.set_size_inches(20, 10)
        fig.savefig(FIG_DIR / title)
        plt.close()

    dfs = []
    for dist in ["manhattan", "chebyshev", "cosine"]:
        df1 = pd.read_csv(RESULTS_DIR / f"max_n/{dist}_projection-fixed.csv")
        df2 = pd.read_csv(RESULTS_DIR / f"max_n/{dist}_projection-not_fixed.csv")
        df1.drop(columns=[f"{dist}_projection-fixed"], inplace=True)
        df2.drop(columns=[f"{dist}_projection-not_fixed"], inplace=True)

        df1["type"] = dist + "_fixed"
        df2["type"] = dist + "_not_fixed"

        df = pd.concat([df1, df2], ignore_index=True)
        plot(MAX_N_DIR / f"max_n_{dist}.png")
        dfs.append(df)
    
    df = pd.concat(dfs, ignore_index=True)
    plot(MAX_N_DIR / "max_n_together.png")


plot_max_n()

Visualize results of changing hyperparameters

In [5]:
HYPER_DIR = FIG_DIR / "hyperparameters"
HYPER_DIR.mkdir(exist_ok=True)

def sensitivity_plot(csv_path, parameter_name, col_wrap=4):
    results = pd.read_csv(csv_path)
    results = results.loc[results.clf == "RSIF", :]
    results = results.rename(columns={"auc": "AUC", "dataset_name": "Dataset"})
    results = results.replace(['21_Lymphography', '36_speech', '6_cardio', '26_optdigits', 'COX2', 'BZR', 'DHFR', 'ad_nominal', 'TwoLeadECG', 'agnews_1'], 
                              ['lymphography', 'speech', 'cardio', 'optdigits', 'cox2', 'bzr', 'dhfr', 'ad', 'twoleadecg', 'agnews'])

    g = sns.lineplot(data=results, x=parameter_name, y="AUC", hue="Dataset", marker="o", err_style="bars", palette="colorblind")
    g.set(ylim=(0.0, 1.02))
    sns.despine()

    plt.savefig(HYPER_DIR / f"{parameter_name}.svg")
    plt.savefig(HYPER_DIR / f"{parameter_name}.png")
    plt.close()

sensitivity_plot("../results/max_samples.csv", "max_samples", 5)
sensitivity_plot("../results/n_estimators.csv", "n_estimators", 5)
sensitivity_plot("../results/selected_obj_ratio.csv", "obj_ratio", 5)
sensitivity_plot("../results/pairs_strategy.csv", "pairs_strategy", 5)

Visualizing results of trying different distances combinations

In [5]:
TOP_DIR = FIG_DIR / "top_n_distances"
TOP_DIR.mkdir(exist_ok=True)
def plot_top_n_dists(data_type, n=7):
    path = RESULTS_DIR / "selected_distances"  / f"{data_type}.csv"
    print(path)
    if not path.exists():
        return

    results = pd.read_csv(path)
    if_default = pd.read_csv(RESULTS_DIR / "if_default.csv")
    grouped = results.groupby(['dataset_name', 'distances'])
    grouped_mean = grouped.auc.mean().reset_index()
    datasets = grouped_mean.dataset_name.unique()
    fig, ax = plt.subplots(1, len(datasets),  figsize=(10 * len(datasets), 10))
    
    if not isinstance(ax, np.ndarray):
        ax = [ax]

    for i in range(0, len(datasets)):
        dataset_name = grouped_mean.dataset_name.unique()[i]
        if_score = if_default[if_default.dataset_name == dataset_name].auc.values
        if_mean = if_score.mean()
        top_n_dists = grouped_mean[grouped_mean.dataset_name == dataset_name].sort_values(by=['auc'], ascending=False).head(n).distances.values
        to_plot = results[results.dataset_name == dataset_name]
        to_plot = to_plot[to_plot.distances.isin(top_n_dists)]
        
        to_plot["index"] = to_plot["distances"].apply(lambda x: list(top_n_dists).index(x))
        to_plot = to_plot.sort_values(by=['index'])
        sns.barplot(data=to_plot, x="distances", y="auc", ax=ax[i], hue="distances")
        ax[i].set_title(dataset_name)
        ax[i].tick_params(labelrotation=45)
        ax[i].axhline(if_mean, ls='-', color='red', lw=3, label='IF default')
        
    fig.suptitle(f"Different distance functions combinations comparisons - {data_type}")
    fig.savefig(TOP_DIR / f"top_n_dists_{data_type}.png")
    plt.close()
    

In [6]:
for modality in DATA_MODALITIES:
    plot_top_n_dists(modality)

..\results\selected_distances\graph.csv
..\results\selected_distances\numerical.csv
..\results\selected_distances\binary.csv
..\results\selected_distances\nominal.csv
..\results\selected_distances\timeseries.csv
..\results\selected_distances\NLP.csv
..\results\selected_distances\cv.csv
..\results\selected_distances\multiomics.csv
..\results\selected_distances\seq_of_sets.csv


Save scores obtained by different distance functions

In [None]:
from collections import defaultdict
def generate_ranking(data_type, file):
    path =  RESULTS_DIR / "selected_distances"  / f"{data_type}.csv"
    if not path.exists():
        return
    f.write(f"{data_type.upper()}===========\n")

    num_df = pd.read_csv(path)
    algorithms_rank = defaultdict(lambda: 0)
    for dataset_name in np.unique(num_df.dataset_name):
        scores = num_df[num_df.dataset_name == dataset_name]
        scores = scores.sort_values(by="auc", ascending=False)
        n_combinations = scores.shape[0]
        
        points_achieved = 1 / np.arange(1, n_combinations+1)
        for i, candidate in enumerate(scores.distances):
            candidates = candidate.split("_")
            for c in candidates:
                algorithms_rank[c] += points_achieved[i] / len(candidates)
    normalizer = sum(algorithms_rank.values())
    for name, score in sorted(algorithms_rank.items(), key=lambda x: x[1], reverse=True):
        f.write(f"{name} -> {score / normalizer}\n")

with open(TOP_DIR / "distances_ranking.txt", "w") as f:
    f.write("SCORE OF DISTANCES FOR EACH MODALITY TESTED IN SENSITIVITY ANALYSIS\n")
    for modality in DATA_MODALITIES:
        generate_ranking(modality, f)

Visualizing results of final experiments

In [None]:
EXPERIMENT_DIR = FIG_DIR / "experiments"
EXPERIMENT_DIR.mkdir(exist_ok=True)
for dtype in ["graph","numerical", "binary", "nominal", "timeseries", "NLP", "cv", "multiomics", "seq_of_sets"]:
    df = pd.read_csv(f'../results/experiments/{dtype}.csv')
    sns.barplot(data=df, x="dataset_name", y="auc", hue="clf")
    fig = plt.gcf()
    fig.set_size_inches(15, 8)
    fig.savefig(EXPERIMENT_DIR / f"final_results_{dtype}.png")
    plt.close()    

Performing statistical analysis

In [None]:
from scipy.stats import friedmanchisquare
import scikit_posthocs as sp

def read_all_data(subset = ["binary", "nominal", "timeseries","cv", "multiomics","graph", "nlp", "numerical", "seq_of_sets"], table_form = False):
    dfs = []
    for dtype in subset:
        dfs.append(pd.read_csv(f'../results/experiments/{dtype}.csv'))

    df = pd.concat(dfs)

    if table_form:
        df = df.groupby(['dataset_name', 'clf']).mean(numeric_only=True).reset_index()
        df = pd.pivot_table(df, values='auc', index=['dataset_name'], columns=['clf'])
    return df

Visualizing standard deviation of scores obtained by different algoritghms on different datasets

In [None]:
DEVIATION_DIR = FIG_DIR / "deviation_analysis"
DEVIATION_DIR.mkdir(exist_ok=True)
std_table = read_all_data()
std_table = std_table.groupby(['dataset_name', 'clf']).std(numeric_only=True).reset_index()
std_table = pd.pivot_table(std_table, values='auc', index=['dataset_name'], columns=['clf'])
std_table.plot(kind='bar', figsize=(25, 10))
plt.xticks(rotation=45)
plt.title("std of AUC for each dataset and classifier")
plt.savefig(DEVIATION_DIR / "std_auc.png")
plt.close()

Saving results of Friedman, Posthoc and mean rank of every algorithm

In [None]:
df_table = read_all_data(table_form=True)

In [None]:
def calculate_mean_rank(df):
    return df_table.rank(axis=1, ascending=False).mean(axis=0).sort_values()

def calculate_posthoc(df):
    posthoc = sp.posthoc_wilcoxon(df.T.to_numpy())
    posthoc.columns = df.columns
    posthoc.index = df.columns.values
    return posthoc

def calculate_friedman(df):
    return friedmanchisquare(*[df[c].to_numpy() for c in df.columns])

#Analysis for entire data
friedman = calculate_friedman(df_table)
posthoc = calculate_posthoc(df_table)
mean_rank = calculate_mean_rank(df_table)

#Every subset separately
subsets = [DATA_MODALITIES, *[[x] for x in DATA_MODALITIES], list(set(DATA_MODALITIES) - {"numerical"})]
df_tests = []
posthoc_idx = posthoc.index + "_post"
for set_of_modalities in subsets:
    df = read_all_data(set_of_modalities, table_form=True)
    try:
        posthoc = (calculate_posthoc(df) < 0.05).sum()
        posthoc.index = posthoc_idx
    except:
        posthoc = pd.Series([0] * len(df.columns), index=posthoc_idx)

    row = {**calculate_mean_rank(df).to_dict(), **posthoc.to_dict()}
    row["fried_passed"] = calculate_friedman(df).pvalue < 0.05

    if set_of_modalities == DATA_MODALITIES:
        row["idx"] = "all"
    elif len(set_of_modalities) == len(DATA_MODALITIES) - 1:
        row["idx"] = "complex"
    else:
        row["idx"] = "_".join(set_of_modalities)
    df_tests.append(row)

separately = pd.DataFrame(df_tests).set_index("idx")

with open(FIG_DIR / "statistical_tests.txt", "w") as f:
    f.write(f"p-value whole data: {friedman.pvalue}\n\n\n")
    f.write(f"statistic whole data: {friedman.statistic}\n\n\n")
    f.write(f"mean rank whole data: {mean_rank}\n\n\n")
    f.write(f"posthoc whole data:\n{posthoc}\n\n\n")
    f.write(f"separately:\n{separately}\n\n\n")

Comparison of results of original IF implementation and RSIF in a "IF" mode. RSIF works alike IF when we set `dummy_projection`

In [None]:
df = pd.read_csv("../results/numerical_dummy_dist.csv").groupby(["clf", "dataset_name"]).auc.mean().reset_index().pivot(index="dataset_name", columns="clf", values="auc")
df['diff'] = df['IForest'] - df['RSIF']
df.to_csv(DEVIATION_DIR / "IF_implementation_comparison.csv")

Visualization of number of holdouts needed for the mean and standard deviation to converge

In [None]:
df = pd.read_csv(RESULTS_DIR / "20holdouts_sensitivity_datasets.csv")
df = df.set_index(["dataset_name", "clf"]).sort_index()
data_to_plot = []
for dataset_name, clf in np.unique(df.index.to_numpy()):
    results = df.loc[(dataset_name, clf)].reset_index()
    for n_holdouts in range(1, len(results)+1):
        samples = results.iloc[:n_holdouts, 3].to_numpy()
        data_to_plot.append({
            "dataset_name": dataset_name,
            "clf": clf,
            "n_holdouts": n_holdouts,
            "auc_std": samples.std(),
            "auc_mean": samples.mean(),
            "samples": samples
        })
df_plt = pd.DataFrame(data_to_plot)

In [None]:
plt.rcParams["figure.figsize"] = (25,5)
for stat in ["mean", "std"]:
    g = sns.relplot(data=df_plt,  x="n_holdouts", y=f"auc_{stat}", hue="clf", marker="o", col="dataset_name", col_wrap=5, 
                    facet_kws={'sharey': False, 'sharex': False}, kind="line")
    g.set(ylim=(0, 1 if stat == "mean" else 0.2))
    g.fig.suptitle(f'{stat.upper()} score vs number of repeated holdouts fore every dataset and classifier', fontsize=16)
    g.fig.subplots_adjust(top=0.9)
    fig = plt.gcf()
    fig.savefig(DEVIATION_DIR / f"{stat}_holdouts_convergence.png")
    plt.close()

Visualizing results of hyperparameters search.

In [None]:
df_final_results = read_all_data()

In [None]:
fold_ids = []
previous_comb = None
for row in df_final_results.iterrows():
    row = row[1]
    comb = row["dataset_name"] + row["clf"]
    if comb != previous_comb:
        previous_comb = comb
        fold_ids += list(range(10))

df_final_results['fold'] = fold_ids
df_final_results = pd.pivot_table(df_final_results, columns="clf", values="auc", index=['dataset_name', 'dataset_type', 'fold']).reset_index()

In [None]:
import pickle
from pathlib import Path
from rsif.distance_functions import GraphDist
from rsif.distance import SelectiveDistance
def get_shortcut(dist):
    if isinstance(dist, SelectiveDistance):
        return dist.projection_func.__name__
    if isinstance(dist, GraphDist):
        return dist.distance.__class__.__name__
    return dist.__class__.__name__



def get_single_result(file, all_entries = None):
    clf = file.stem.split("_")[0] + "_tuned"
    dataset = "_".join(file.stem.split("_")[1:])
    with open(file, "rb") as f:
        best_distances = pickle.load(open(file, "rb"))
        for fold_id, distances in best_distances.items():
            result = {"clf": clf}
            distances_list = []
            n_dist = min([len(distances), 4])
            for n in range(n_dist):
                if n == 0:
                    result[f"auc"] = distances[n][0]
                distances_list.append("++".join([get_shortcut(d) for d in distances[n][1]]))
            result["fold"] = fold_id
            result[f'distances_{clf}'] = distances_list
            result["dataset_name"] = dataset
            if all_entries is not None:
                all_entries.append(result)
            else:
                return result
            

entries = []
for file in Path("../best_distances/").iterdir():
    if "RSIF" in file.stem and ("ovarian" in file.stem or "breast" in file.stem or "rosmap" in file.stem):
        continue
    if "LOF" not in file.stem:
        get_single_result(file, entries)

forest_df = pd.DataFrame(entries)

separate_results = []
for file in Path("../best_distances/").iterdir():
    if "RSIF" in file.stem and ("ovarian" in file.stem or "breast" in file.stem or "rosmap" in file.stem):
        get_single_result(file, separate_results)



for file in Path("../best_distances/").iterdir():
    if "LOF" in file.stem:
        df = pd.read_csv(file)
        entries = []
        distances = []
        entry = {'clf': "LOF_tuned"}
        for row in df.iterrows():
            i, row = row
            distances.append((row['auc'], row["metric"]))
            if (i+1) % 3 == 0:
                distances.sort(key=lambda x: x[0], reverse=True)
                entry['distances_LOF_tuned'] = [d[1] for d in distances]
                entry['auc'] = distances[0][0]
                entry['fold'] = row['fold_id']
                entry['dataset_name'] = row['dataset']
                entries.append(entry)
                entry = {'clf': "LOF_tuned"}
                distances = []

lof_df = pd.DataFrame(entries)



In [None]:
sep = pd.DataFrame(separate_results) # FOR MIXED DATA
sep['dataset_name'] = sep. dataset_name.map(lambda x: x[:-1])
sep = sep.groupby(['fold', 'dataset_name']).auc.max().reset_index()
sep['clf'] = "RSIF_tuned"
sep["distances_RSIF_tuned"] = "not implemented"

forest_df = pd.concat([forest_df,sep])

In [None]:
df_hyper = pd.concat([forest_df, lof_df], axis=0)
df_hyper_pivot = pd.pivot_table(df_hyper, columns=["clf"], values="auc", index=['dataset_name', 'fold']).reset_index()
for dist_name in [ "distances_LOF_tuned", 'distances_ISF_tuned', "distances_RSIF_tuned"]:
    df_hyper_pivot[dist_name] = df_hyper[~df_hyper[dist_name].isna()].sort_values(by=['dataset_name', 'fold'])[dist_name].reset_index(drop=True)

In [None]:
df_final = pd.concat([df_final_results, df_hyper_pivot.loc[:, ["distances_LOF_tuned","distances_ISF_tuned", "ISF_tuned", "LOF_tuned", "RSIF_tuned", "distances_RSIF_tuned"]]], axis=1).dropna()
df_final['dataset_name'] = df_final['dataset_name'].str.slice(stop=10) + "_" + df_final['dataset_type']

In [None]:
DISTANCE_SEARCH_DIR = FIG_DIR / "distance_search"
DISTANCE_SEARCH_DIR.mkdir(exist_ok=True)
for algorithm in ["RSIF", "ISF", "LOF"]:
    df_final["diff_to_if"] = df_final[algorithm] - df_final['IForest']
    fig = px.scatter(df_final, x = f"{algorithm}_tuned", y = algorithm , color = "diff_to_if", hover_data= [f"distances_{algorithm}_tuned", "fold"], facet_col="dataset_name", 
                facet_col_wrap=5, height=2000, title = f"Performance on validation vs test set for {algorithm}",
                labels = {"diff_to_if": "Difference to IForest", f"{algorithm}_tuned": "Performance on validation set", algorithm: "Performance on test set"},
                range_color = (-1,1),
                color_continuous_scale = [(0, "red"), (0.5, "red"), (0.5,"green"), (1, "green")])

    fig = fig.add_shape(type= 'line',
                yref= 'y', y0=0, y1= 1,
                xref= 'x', x0=0, x1= 1, row="all", col="all",
                line=dict(
                    width=1,
                    dash="dashdot",
                )
            )

    fig.write_html(DISTANCE_SEARCH_DIR / f"{algorithm}_distance_search.html")

Visualizing how the projection changes as we play with pairs that create it.

In [None]:
from rsif.distance_functions import EuclideanDist, ProperEuclideanDist
from rsif.distance import TrainDistanceMixin

def generate_data(n_dim, proper_euclidean=False):
    X = np.random.normal(0, 3, size=(100, n_dim))

    if proper_euclidean:
        entire_distance = TrainDistanceMixin(ProperEuclideanDist())
    else:
        entire_distance = TrainDistanceMixin(EuclideanDist())
    
    entire_distance.precompute_distances(X, n_jobs=-1)

    df = pd.DataFrame(entire_distance.distance_matrix)
    df = df.unstack().reset_index()
    df.columns=['x','y','V']
    df = df[df.x != df.y]
    df = df.sort_values(by=['V'], ascending=True)

    return X, entire_distance, df, df.shape[0]

In [None]:
PROJECTION_PLOTS_PATH = FIG_DIR / "projection_plots"
PROJECTION_PLOTS_PATH.mkdir(exist_ok=True)

def rand_jitter(arr):
    stdev = .055 * (max(arr) - min(arr))
    return arr + np.random.randn(len(arr)) * stdev

def generate_plot(X, projection, x, y, n_dim = 1, old_df_to_plot = None, v = None):
    columns = [f"x{i}" for i in range(n_dim)]
    df_to_plot = pd.concat([pd.DataFrame(X, columns=columns), pd.DataFrame(projection, columns = ["projection"])], axis=1)
    df_to_plot["used"] = "red"
    df_to_plot.iloc[x.item(), n_dim + 1] = "blue"
    df_to_plot.iloc[y.item(), n_dim + 1] = "blue"

    if n_dim == 1:
        ax[col][row].scatter(df_to_plot.x0, df_to_plot.projection, c=df_to_plot.used, alpha=0.4)
        

    else:
        df_to_plot['distance_p_q'] = v
        if old_df_to_plot is not None:
            return pd.concat([old_df_to_plot, df_to_plot], axis = 0)
        return df_to_plot


for proper_euclidean in [True, False]:
    for n_dim in [1, 2, 3]:
        df_to_plot = None
        X, entire_distance, df, n_rows = generate_data(n_dim, proper_euclidean)
        if n_dim == 1:
            fig, ax = plt.subplots(7, 3, figsize=(35, 25))
        row = 0
        col = 0
        for idx in range(0, n_rows + 1, n_rows//20):
            if idx == n_rows:
                idx = n_rows - 1
            x, y, v = df.iloc[idx]
            x, y = np.array(int(x)), np.array(int(y))

            projection = entire_distance.project(np.arange(X.shape[0]), x, y)
            df_to_plot = generate_plot(X, projection, x, y, n_dim, df_to_plot, v)

            if n_dim == 1:
                ax[col][row].set_title(f"Projection value when distance between p and q  is {v}")
                ax[col][row].set_xlabel("x")
                ax[col][row].set_ylabel('projection value')

            row += 1
            if row == 3:
                row = 0
                col += 1
        
        if n_dim == 1:
            fig.tight_layout()
            prefix = "proper_eulicdean" if proper_euclidean else "approx_euclidean"
            plt.savefig(PROJECTION_PLOTS_PATH / f"{n_dim}_dim_{prefix}.png")
            plt.close()
        
        if n_dim == 2:
            df_to_plot['used'] = [0.5 if x == "red" else 3 for x in df_to_plot['used']] 
            fig = px.scatter(df_to_plot, x = "x0", y = "x1" , color = "projection", facet_col="distance_p_q", facet_col_wrap=5, height=2000, title = f"Projection values depending on p and q selected", size="used", symbol="used", range_color=(-100,100))
            fig.write_html(PROJECTION_PLOTS_PATH / f"2_dim_{prefix}.html")

        if n_dim == 3:
            df_to_plot['used'] = [0.5 if x == "red" else 3 for x in df_to_plot['used']] 
            for i, v in enumerate(df_to_plot.distance_p_q.unique()):
                df = df_to_plot[df_to_plot.distance_p_q == v]
                fig = px.scatter_3d(df, x = "x0", y = "x1", z="x2", title = f"Projection when distance between p and q is equal to {v}", color='projection', symbol="used", size='used', range_color=(-100,100))
                fig.write_html(PROJECTION_PLOTS_PATH / f"3_dim_{i}_{prefix}.html")
