## Directory parameters

In [1]:
# Update working directory

%cd /home/hugo/Bureau/PoincareMSA/

/home/hugo/Bureau/PoincareMSA


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [2]:
import sys

# Add the project root to Python path
project_root = "/home/hugo/Bureau/PoincareMSA"
if project_root not in sys.path:
    sys.path.append(project_root)

## Librairies

In [22]:
import os
import itertools
import pandas as pd
import torch

from scripts.visualize_projection.pplots_new import (
    read_embeddings,
    plot_embedding_interactive
)

from scripts.visualize_projection.pplots_new import read_embeddings, plot_embedding_interactive
from scripts.build_poincare_map.poincare_maps import PoincareMaps
from scripts.build_poincare_map.embedding_quality_score import get_quality_metrics
from sklearn.cluster import AgglomerativeClustering, SpectralClustering
from sklearn.metrics import adjusted_rand_score, fowlkes_mallows_score
from scipy import stats
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd


## Parameters

In [4]:
data_type = "pssm"       # "plm", "pssm"                       # Ne fonctionnent pas pour l'instant : "RFA_matrix", "plm_aae"

# Annotation CSV
path_annotation_csv = "examples/kinases/kinase_group_new.csv"

# Embeddings or fasta folder depending on type
embedding_paths = {
    "plm":       "embeddings/ankh_base_kinases/",
    "plm_aae":   "embeddings/aae_embeddings/ankh_base_kinases/",
    "pssm":      "examples/kinases/glob.mfasta",
    "RFA_matrix": None
}

# Output bases
output_base = "grid_search_results/" + data_type
os.makedirs(output_base, exist_ok=True)

In [5]:
grid = {
    "knn":       [5],
    "gamma":     [0.5, 1],
    "sigma":     [1],
    "cospca":    [0],
    "epochs":    [10],
    "seed":      [0,1,2],
    "distance":  ["cosine"],
}

## Usefull functions

In [6]:
def get_scores(true_labels, pred_labels):
    ARI = adjusted_rand_score(true_labels, pred_labels)
    FMS = fowlkes_mallows_score(true_labels, pred_labels)
    return ARI, FMS

def detect_cluster(distances, n_clusters=2, clustering_name='agglomerative'):
    if clustering_name == 'agglomerative':
        clustering = AgglomerativeClustering(n_clusters=n_clusters, linkage='average').fit(distances)
        labels = clustering.labels_
    return labels


In [7]:
def build_command(params, folder_output, project_root=project_root):

    if data_type in ["plm", "plm_aae"]:
        input_path = embedding_paths[data_type]
        plm_flag = "True"
        mid_output = folder_output + "/matrices/"
        os.makedirs(mid_output, exist_ok=True)
    elif data_type == "pssm":
        # Prepare data with pssm preprocessing
        pssm_prep_path = "kinases_data/fasta0.9"
        if not os.path.exists(pssm_prep_path):
            os.system(f"bash scripts/prepare_data/create_projection.sh scripts/prepare_data examples/kinases/glob.mfasta kinases_data 0.9")

        input_path = pssm_prep_path
        plm_flag = "False"
        mid_output = folder_output + "/matrices/"
        os.makedirs(mid_output, exist_ok=True)
    elif data_type == "RFA_matrix":
        input_path = "None"
        plm_flag = "True"
        mid_output = "RFA_matrix/kinases/with_plm_embeddings/"

    cmd = (
        f"PYTHONPATH={project_root}:$PYTHONPATH python scripts/build_poincare_map/main.py "
        f"--input_path {input_path} "
        f"--output_path {folder_output} "
        f"--plm_embedding {plm_flag} "
        f"--matrices_output_path {mid_output} "
        f"--distlocal {params['distance']} "
        f"--gamma {params['gamma']} "
        f"--pca {params['cospca']} "
        f"--epochs {params['epochs']} "
        f"--seed {params['seed']} "
        f"--knn {params['knn']} "
    )
    return cmd

In [8]:
def load_projection(folder_output, params):
    possible_files = [
        f"{folder_output}/PM{params['knn']}sigma={params['sigma']:.2f}gamma={params['gamma']:.2f}{params['distance']}pca={params['cospca']}_seed{params['seed']}.csv",
        f"{folder_output}/matrices/PM{params['knn']}sigma={params['sigma']:.2f}gamma={params['gamma']:.2f}{params['distance']}pca={params['cospca']}_seed{params['seed']}.csv"
    ]

    for file in possible_files:
        if os.path.exists(file):
            return file

    return possible_files[0]

In [None]:
def compare_projections(results):
    # Convert results to DataFrame
    df_results = pd.DataFrame(results)

    # Create a new column that excludes the seed from the parameter string
    df_results["params_base"] = df_results["params"].apply(
        lambda x: "_".join([f"{k}{v}" for k, v in x.items() if k != "seed"])
    )

    # Add seed as a separate column for visualization
    df_results["seed"] = df_results["params"].apply(lambda x: x["seed"])

    # Calculate mean values for each parameter combination
    df_means = df_results.groupby("params_base").agg({
        "ARI": "mean",
        "FMS": "mean",
        "Qlocal": "mean",
        "Qglobal": "mean"
    }).reset_index()

    # Print available metrics
    print("\n=== Comparison of Projections ===")
    available_cols = ["params_base", "seed", "ARI", "FMS"]
    if "Qlocal" in df_results.columns and df_results["Qlocal"].notna().any():
        available_cols.append("Qlocal")
    if "Qglobal" in df_results.columns and df_results["Qglobal"].notna().any():
        available_cols.append("Qglobal")
    print(df_results[available_cols])

    # Print mean values
    print("\n=== Mean Values by Parameter Combination ===")
    mean_cols = ["params_base", "ARI", "FMS"]
    if "Qlocal" in df_means.columns:
        mean_cols.append("Qlocal")
    if "Qglobal" in df_means.columns:
        mean_cols.append("Qglobal")
    print(df_means[mean_cols])

    # Get unique parameter combinations and their indices
    unique_params = df_results["params_base"].unique()
    param_to_index = {param: idx for idx, param in enumerate(unique_params)}

    # Plot ARI if available
    if "ARI" in df_results.columns and df_results["ARI"].notna().any():
        fig_ari = px.strip(
            df_results,
            x="params_base",
            y="ARI",
            color="seed",
            title="ARI by parameter combination (colored by seed)"
        )

        # Add mean lines (horizontal dotted lines)
        for param_base, mean_val in zip(df_means["params_base"], df_means["ARI"]):
            if param_base in param_to_index:
                x_pos = param_to_index[param_base]

                # Add a horizontal dotted line for the mean
                fig_ari.add_shape(
                    type="line",
                    xref="x", yref="y",
                    x0=x_pos - 0.4, y0=mean_val,
                    x1=x_pos + 0.4, y1=mean_val,
                    line=dict(color="black", width=2, dash="dot"),
                    name=f"Mean {param_base}"
                )

                # Add a small text label for the mean value
                fig_ari.add_annotation(
                    x=x_pos + 0.45,
                    y=mean_val + 0.01,
                    text=f"{mean_val:.3f}",
                    showarrow=False,
                    font=dict(size=10)
                )

        fig_ari.update_layout(
            xaxis_title="Parameter combination (excluding seed)",
            yaxis_title="ARI",
            legend_title="Seed",
            showlegend=True
        )
        fig_ari.show()

    # Plot FMS if available
    if "FMS" in df_results.columns and df_results["FMS"].notna().any():
        fig_fms = px.strip(
            df_results,
            x="params_base",
            y="FMS",
            color="seed",
            title="FMS by parameter combination (colored by seed)"
        )

        # Add mean lines (horizontal dotted lines)
        for param_base, mean_val in zip(df_means["params_base"], df_means["FMS"]):
            if param_base in param_to_index:
                x_pos = param_to_index[param_base]

                # Add a horizontal dotted line for the mean
                fig_fms.add_shape(
                    type="line",
                    xref="x", yref="y",
                    x0=x_pos - 0.4, y0=mean_val,
                    x1=x_pos + 0.4, y1=mean_val,
                    line=dict(color="black", width=2, dash="dot"),
                    name=f"Mean {param_base}"
                )

                # Add a small text label for the mean value
                fig_fms.add_annotation(
                    x=x_pos + 0.45,
                    y=mean_val + 0.01,
                    text=f"{mean_val:.3f}",
                    showarrow=False,
                    font=dict(size=10)
                )

        fig_fms.update_layout(
            xaxis_title="Parameter combination (excluding seed)",
            yaxis_title="FMS",
            legend_title="Seed",
            showlegend=True
        )
        fig_fms.show()

    # Plot Qnx if available
    if "Qnx" in df_results.columns and any(len(qnx) > 0 for qnx in df_results["Qnx"]):
        # Create a palette for parameter combinations
        param_palette = dict(zip(
            df_results["params_base"].unique(),
            sns.color_palette("husl", n_colors=len(df_results["params_base"].unique())).as_hex()
        ))

        plt.figure(figsize=(14, 7))

        # Group by parameter combination
        for param_base, param_group in df_results.groupby("params_base"):
            # For each seed in this parameter group
            for _, row in param_group.iterrows():
                if isinstance(row["Qnx"], (list, np.ndarray)) and len(row["Qnx"]) > 0:
                    seed = row["seed"]
                    # Use a different line style for each seed
                    linestyle = '-' if seed == param_group["seed"].min() else '--'
                    sns.lineplot(
                        x=range(1, len(row["Qnx"]) + 1),
                        y=row["Qnx"],
                        color=param_palette[param_base],
                        linestyle=linestyle,
                        label=f"{param_base} (seed={seed})",
                        linewidth=2 if seed == param_group["seed"].min() else 1
                    )

        # Add mean lines for Qnx (average across seeds for each k value)
        qnx_means = {}
        max_k = 0
        for param_base, param_group in df_results.groupby("params_base"):
            for _, row in param_group.iterrows():
                if isinstance(row["Qnx"], (list, np.ndarray)):
                    max_k = max(max_k, len(row["Qnx"]))

        for k in range(1, max_k + 1):
            for param_base, param_group in df_results.groupby("params_base"):
                # Get all Qnx values for this k position
                qnx_vals = []
                for _, row in param_group.iterrows():
                    if isinstance(row["Qnx"], (list, np.ndarray)) and len(row["Qnx"]) >= k:
                        qnx_vals.append(row["Qnx"][k-1])
                if qnx_vals:
                    qnx_means[(param_base, k)] = np.mean(qnx_vals)

        # Plot the mean lines with horizontal dotted lines
        for (param_base, k), mean_val in qnx_means.items():
            plt.axhline(
                y=mean_val,
                color=param_palette[param_base],
                linestyle=':',
                linewidth=2
            )
            # Add a text label
            plt.text(
                x=max_k + 0.5,
                y=mean_val,
                s=f"{mean_val:.3f}",
                color=param_palette[param_base],
                va='center',
                fontsize=9
            )

        plt.xlabel("K")
        plt.ylabel("Qnx")
        plt.ylim([0, 1.1])
        plt.title("Qnx by parameter combination (different seeds)")
        plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
        plt.tight_layout()
        plt.show()


## Grid search & plot

In [18]:
print("=== Starting grid search ===")
df_annotation = pd.read_csv(path_annotation_csv)

# List to store the results of projections
results = []

# Define a custom color palette for visualization
kinase_palette = {-1 : "#c7c7c7", "OTHER": "#c7c7c7", "None" :"#c7c7c7", "NA" : "#c7c7c7", "Uncharacterized" : "#c7c7c7", "root": "#000000",
                  "TYR": "#bd065f", "CMGC": "#d5c203", "TKL": "#997e73","STE": "#80b412", # kinase groups 
                  "CK1": "#0dbae9", "AGC": "#00bba1", "CAMK":  "#1f6ed4", "NEK": "#8ce4fa", "RGC":"#f59a62"}

# Generate all parameter combinations
keys = list(grid.keys())
values = list(grid.values())
combinations = list(itertools.product(*values))

for combo in combinations:
    params = dict(zip(keys, combo))

    # Create a folder for each combination
    folder_name = "_".join([f"{k}{v}" for k, v in params.items()])
    folder_output = os.path.join(output_base, folder_name)
    os.makedirs(folder_output, exist_ok=True)
    print(f"\n--- Running projection for {folder_name} ---")

    # 1. Run Poincaré map
    cmd = build_command(params, folder_output + "/")
    print("CMD:", cmd)
    os.system(cmd)

    # 2. Load projection
    projection_file = load_projection(folder_output, params)
    if not os.path.exists(projection_file):
        print(f"Projection file not found: {projection_file}")
        continue

    df_emb = read_embeddings(projection_file, path_annotation_csv, withroot=False)

    # 3. Generate and display interactive plot for each parameter combination
    if data_type == "plm":
        title = f"Poincaré projection with embeddings: {folder_name}"
    elif data_type == "plm_aae":
        title = f"Poincaré projection with AAE embeddings: {folder_name}"
    elif data_type == "pssm":
        title = f"Poincaré projection without embeddings: {folder_name}"
    else:
        title = f"Poincaré projection with RFA matrix: {folder_name}"

    # Create and display the interactive plot
    fig = plot_embedding_interactive(
        df_emb,
        labels_name="1_Group",
        color_palette=kinase_palette,
        title=title,
        fontsize=11
    )
    fig.show()

    # Save the plot to HTML file
    output_html = os.path.join(folder_output, f"projection_{folder_name}.html")
    fig.write_html(output_html)
    print(f"✔ Saved projection to → {output_html}")

    # 4. Calculate quality metrics
    true_labels = df_emb["1_Group"].values
    coord_low = df_emb[["pm1", "pm2"]].values

    # Load original high-dimensional data
    if data_type in ["plm", "plm_aae"]:
        embedding_files = [f for f in os.listdir(embedding_paths[data_type]) if f.endswith('.pt')]
        features = []
        for file in sorted(embedding_files):
            embedding = torch.load(os.path.join(embedding_paths[data_type], file))['embedding']
            features.append(np.mean(embedding, axis=0))
        features = np.array(features)
    elif data_type == "pssm":
        pssm_files = [f for f in os.listdir("examples/kinases/") if f.endswith('.aamtx')]
        features = []
        for file in sorted(pssm_files):
            pssm = np.loadtxt(os.path.join("examples/kinases/", file)).reshape(-1)
            features.append(pssm)
        features = np.array(features)

    # Calculate all quality metrics
    try:
        model = PoincareMaps(coord_low)
        model.get_distances()
        D_proj = model.distances

        # Detect clusters
        clusters = detect_cluster(D_proj, n_clusters=len(np.unique(true_labels)))
        ARI, FMS = get_scores(true_labels, clusters)

        # Calculate Q metrics
        Qlocal, Qglobal, Kmax, df_Q = get_quality_metrics(
            coord_high=features,
            coord_low=coord_low,
            distance="poincare",
            setting="manifold",
            k_neighbours=5
        )

        # Store all results
        result = {
            "params": params,
            "ARI": ARI,
            "FMS": FMS,
            "Qlocal": Qlocal,
            "Qglobal": Qglobal,
            "Qnx": df_Q.Qnx.to_numpy() if hasattr(df_Q, 'Qnx') else np.zeros(5),
            "projection_file": projection_file,
        }
    except Exception as e:
        print(f"Error calculating metrics: {e}")
        # Store partial results if metrics calculation fails
        result = {
            "params": params,
            "ARI": ARI if 'ARI' in locals() else None,
            "FMS": FMS if 'FMS' in locals() else None,
            "Qlocal": None,
            "Qglobal": None,
            "Qnx": np.zeros(5),
            "projection_file": projection_file,
        }

    results.append(result)

print("\n=== Grid search complete ===")

=== Starting grid search ===

--- Running projection for knn5_gamma0.5_sigma1_cospca0_epochs10_seed0_distancecosine ---
CMD: PYTHONPATH=/home/hugo/Bureau/PoincareMSA:$PYTHONPATH python scripts/build_poincare_map/main.py --input_path kinases_data/fasta0.9 --output_path grid_search_results/pssm/knn5_gamma0.5_sigma1_cospca0_epochs10_seed0_distancecosine/ --plm_embedding False --matrices_output_path grid_search_results/pssm/knn5_gamma0.5_sigma1_cospca0_epochs10_seed0_distancecosine//matrices/ --distlocal cosine --gamma 0.5 --pca 0 --epochs 10 --seed 0 --knn 5 
CUDA: True
Random seed set as 0
497 proteins found in folder kinases_data/fasta0.9.
No root detected
['74.aamtx', '358.aamtx', '433.aamtx', '445.aamtx', '128.aamtx', '246.aamtx', '57.aamtx', '421.aamtx', '62.aamtx', '61.aamtx', '320.aamtx', '136.aamtx', '337.aamtx', '223.aamtx', '200.aamtx', '446.aamtx', '50.aamtx', '190.aamtx', '217.aamtx', '289.aamtx']
74.aamtx
Prepare data: tensor construction
Prepare data: successfully terminated

loss: 6.57537: 100%|████████████████████████████| 10/10 [00:02<00:00,  3.45it/s]


PM computed in 2.90 sec

loss = 6.575e+00
time = 0.055 min


Ignoring fixed x limits to fulfill fixed data aspect with adjustable data limits.
Ignoring fixed x limits to fulfill fixed data aspect with adjustable data limits.


✔ Saved projection to → grid_search_results/pssm/knn5_gamma0.5_sigma1_cospca0_epochs10_seed0_distancecosine/projection_knn5_gamma0.5_sigma1_cospca0_epochs10_seed0_distancecosine.html
Error calculating metrics: Expected 2D array, got 1D array instead:
array=[].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

--- Running projection for knn5_gamma0.5_sigma1_cospca0_epochs10_seed1_distancecosine ---
CMD: PYTHONPATH=/home/hugo/Bureau/PoincareMSA:$PYTHONPATH python scripts/build_poincare_map/main.py --input_path kinases_data/fasta0.9 --output_path grid_search_results/pssm/knn5_gamma0.5_sigma1_cospca0_epochs10_seed1_distancecosine/ --plm_embedding False --matrices_output_path grid_search_results/pssm/knn5_gamma0.5_sigma1_cospca0_epochs10_seed1_distancecosine//matrices/ --distlocal cosine --gamma 0.5 --pca 0 --epochs 10 --seed 1 --knn 5 
CUDA: True
Random seed set as 1
497 proteins found in folder ki

loss: 6.58757: 100%|████████████████████████████| 10/10 [00:01<00:00,  5.26it/s]
Ignoring fixed x limits to fulfill fixed data aspect with adjustable data limits.
Ignoring fixed x limits to fulfill fixed data aspect with adjustable data limits.


PM computed in 1.90 sec

loss = 6.588e+00
time = 0.038 min


✔ Saved projection to → grid_search_results/pssm/knn5_gamma0.5_sigma1_cospca0_epochs10_seed1_distancecosine/projection_knn5_gamma0.5_sigma1_cospca0_epochs10_seed1_distancecosine.html
Error calculating metrics: Expected 2D array, got 1D array instead:
array=[].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

--- Running projection for knn5_gamma0.5_sigma1_cospca0_epochs10_seed2_distancecosine ---
CMD: PYTHONPATH=/home/hugo/Bureau/PoincareMSA:$PYTHONPATH python scripts/build_poincare_map/main.py --input_path kinases_data/fasta0.9 --output_path grid_search_results/pssm/knn5_gamma0.5_sigma1_cospca0_epochs10_seed2_distancecosine/ --plm_embedding False --matrices_output_path grid_search_results/pssm/knn5_gamma0.5_sigma1_cospca0_epochs10_seed2_distancecosine//matrices/ --distlocal cosine --gamma 0.5 --pca 0 --epochs 10 --seed 2 --knn 5 
CUDA: True
Random seed set as 2
497 proteins found in folder ki

loss: 6.56583: 100%|████████████████████████████| 10/10 [00:01<00:00,  6.54it/s]


PM computed in 1.53 sec

loss = 6.566e+00
time = 0.032 min


Ignoring fixed x limits to fulfill fixed data aspect with adjustable data limits.
Ignoring fixed x limits to fulfill fixed data aspect with adjustable data limits.


✔ Saved projection to → grid_search_results/pssm/knn5_gamma0.5_sigma1_cospca0_epochs10_seed2_distancecosine/projection_knn5_gamma0.5_sigma1_cospca0_epochs10_seed2_distancecosine.html
Error calculating metrics: Expected 2D array, got 1D array instead:
array=[].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

--- Running projection for knn5_gamma1_sigma1_cospca0_epochs10_seed0_distancecosine ---
CMD: PYTHONPATH=/home/hugo/Bureau/PoincareMSA:$PYTHONPATH python scripts/build_poincare_map/main.py --input_path kinases_data/fasta0.9 --output_path grid_search_results/pssm/knn5_gamma1_sigma1_cospca0_epochs10_seed0_distancecosine/ --plm_embedding False --matrices_output_path grid_search_results/pssm/knn5_gamma1_sigma1_cospca0_epochs10_seed0_distancecosine//matrices/ --distlocal cosine --gamma 1 --pca 0 --epochs 10 --seed 0 --knn 5 
CUDA: True
Random seed set as 0
497 proteins found in folder kinases_da

loss: 6.24138: 100%|████████████████████████████| 10/10 [00:03<00:00,  2.99it/s]
Ignoring fixed x limits to fulfill fixed data aspect with adjustable data limits.
Ignoring fixed x limits to fulfill fixed data aspect with adjustable data limits.


PM computed in 3.34 sec

loss = 6.241e+00
time = 0.063 min


✔ Saved projection to → grid_search_results/pssm/knn5_gamma1_sigma1_cospca0_epochs10_seed0_distancecosine/projection_knn5_gamma1_sigma1_cospca0_epochs10_seed0_distancecosine.html
Error calculating metrics: Expected 2D array, got 1D array instead:
array=[].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

--- Running projection for knn5_gamma1_sigma1_cospca0_epochs10_seed1_distancecosine ---
CMD: PYTHONPATH=/home/hugo/Bureau/PoincareMSA:$PYTHONPATH python scripts/build_poincare_map/main.py --input_path kinases_data/fasta0.9 --output_path grid_search_results/pssm/knn5_gamma1_sigma1_cospca0_epochs10_seed1_distancecosine/ --plm_embedding False --matrices_output_path grid_search_results/pssm/knn5_gamma1_sigma1_cospca0_epochs10_seed1_distancecosine//matrices/ --distlocal cosine --gamma 1 --pca 0 --epochs 10 --seed 1 --knn 5 
CUDA: True
Random seed set as 1
497 proteins found in folder kinases_data/f

loss: 6.24974: 100%|████████████████████████████| 10/10 [00:01<00:00,  7.50it/s]


PM computed in 1.33 sec

loss = 6.250e+00
time = 0.029 min


Ignoring fixed x limits to fulfill fixed data aspect with adjustable data limits.
Ignoring fixed x limits to fulfill fixed data aspect with adjustable data limits.


✔ Saved projection to → grid_search_results/pssm/knn5_gamma1_sigma1_cospca0_epochs10_seed1_distancecosine/projection_knn5_gamma1_sigma1_cospca0_epochs10_seed1_distancecosine.html
Error calculating metrics: Expected 2D array, got 1D array instead:
array=[].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

--- Running projection for knn5_gamma1_sigma1_cospca0_epochs10_seed2_distancecosine ---
CMD: PYTHONPATH=/home/hugo/Bureau/PoincareMSA:$PYTHONPATH python scripts/build_poincare_map/main.py --input_path kinases_data/fasta0.9 --output_path grid_search_results/pssm/knn5_gamma1_sigma1_cospca0_epochs10_seed2_distancecosine/ --plm_embedding False --matrices_output_path grid_search_results/pssm/knn5_gamma1_sigma1_cospca0_epochs10_seed2_distancecosine//matrices/ --distlocal cosine --gamma 1 --pca 0 --epochs 10 --seed 2 --knn 5 
CUDA: True
Random seed set as 2
497 proteins found in folder kinases_data/f

loss: 6.21986: 100%|████████████████████████████| 10/10 [00:01<00:00,  6.52it/s]


PM computed in 1.53 sec

loss = 6.220e+00
time = 0.033 min


Ignoring fixed x limits to fulfill fixed data aspect with adjustable data limits.
Ignoring fixed x limits to fulfill fixed data aspect with adjustable data limits.


✔ Saved projection to → grid_search_results/pssm/knn5_gamma1_sigma1_cospca0_epochs10_seed2_distancecosine/projection_knn5_gamma1_sigma1_cospca0_epochs10_seed2_distancecosine.html
Error calculating metrics: Expected 2D array, got 1D array instead:
array=[].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

=== Grid search complete ===


## Results

In [29]:
# Call comparison function
compare_projections(results)


=== Comparison of Projections ===
                                         params_base  seed       ARI       FMS
0  knn5_gamma0.5_sigma1_cospca0_epochs10_distance...     0  0.362651  0.519171
1  knn5_gamma0.5_sigma1_cospca0_epochs10_distance...     1  0.406580  0.558860
2  knn5_gamma0.5_sigma1_cospca0_epochs10_distance...     2  0.388165  0.535729
3  knn5_gamma1_sigma1_cospca0_epochs10_distanceco...     0  0.401368  0.554389
4  knn5_gamma1_sigma1_cospca0_epochs10_distanceco...     1  0.405478  0.558559
5  knn5_gamma1_sigma1_cospca0_epochs10_distanceco...     2  0.396241  0.541197

=== Mean Values by Parameter Combination ===
                                         params_base       ARI       FMS  \
0  knn5_gamma0.5_sigma1_cospca0_epochs10_distance...  0.385799  0.537920   
1  knn5_gamma1_sigma1_cospca0_epochs10_distanceco...  0.401029  0.551382   

  Qlocal Qglobal  
0    NaN     NaN  
1    NaN     NaN  
