In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import sklearn
from apopfail.model import clean

sns.set_style("whitegrid")

sklearn.set_config(transform_output="pandas")

np.random.seed(0)

In [None]:
X = pd.read_parquet("../data/train_set_p53mutant.parquet")
y = pd.read_csv(
    "../data/train_labels_p53mutant.csv", index_col=0, skiprows=1, names=["target"]
)["target"].map({"inactive": 0, "active": 1})

In [None]:
X, y = clean(X, y)

In [None]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

In [None]:
X = SimpleImputer(strategy="mean").fit_transform(X)
X = StandardScaler().fit_transform(X)

In [None]:
from sklearn.manifold import Isomap, trustworthiness

In [None]:
embedding = Isomap(n_components=15, n_neighbors=5)
X_transformed = embedding.fit_transform(X)

In [None]:
embedding.reconstruction_error()

In [None]:
X_transformed.head(5)

In [None]:
trustworthiness(X, X_transformed, n_neighbors=5)

In [None]:
from drcomp import DimensionalityReducer
from sklearn.manifold import Isomap as _Isomap
from umap import UMAP


class Isomap(DimensionalityReducer):
    """Wrapper for Isomap dimensionality reduction algorithm."""

    def __init__(self, n_components=2, n_neighbors=5):
        """Initialize Isomap dimensionality reduction algorithm."""
        super().__init__(n_components)
        self.n_neighbors = n_neighbors
        self.n_components = n_components

    def fit(self, X):
        """Fit Isomap model to data."""
        self.model = _Isomap(
            n_components=self.n_components, n_neighbors=self.n_neighbors
        )
        self.model.fit(X)
        return self

    def transform(self, X):
        """Transform data using Isomap model."""
        return self.model.transform(X)


class Umap(DimensionalityReducer):
    """Wrapper for UMAP dimensionality reduction algorithm."""

    def __init__(self, n_components=2, n_neighbors=5):
        """Initialize UMAP dimensionality reduction algorithm."""
        super().__init__(n_components)
        self.n_neighbors = n_neighbors
        self.n_components = n_components

    def fit(self, X):
        """Fit UMAP model to data."""
        self.model = UMAP(n_components=self.n_components, n_neighbors=self.n_neighbors)
        self.model.fit(X)
        return self

    def transform(self, X):
        """Transform data using UMAP model."""
        return self.model.transform(X)

In [None]:
from drcomp import estimate_intrinsic_dimension
from drcomp.reducers import PCA

In [None]:
intrinsic_dim = estimate_intrinsic_dimension(X)
print(f"{intrinsic_dim=}")
pca = PCA(n_components=intrinsic_dim)
isomap = Isomap(n_components=intrinsic_dim)
umap = Umap(n_components=intrinsic_dim)

X_pca = pca.fit_transform(X)
X_umap = umap.fit_transform(X)
X_isomap = isomap.fit_transform(X)

In [None]:
metrics_pca = pca.evaluate(X, max_K=30)
metrics_umap = umap.evaluate(X, max_K=30)
metrics_isomap = isomap.evaluate(X, max_K=30)

In [None]:
from drcomp import compare_metrics

In [None]:
compare_metrics(
    {"PCA": metrics_pca, "UMAP": metrics_umap, "Isomap": metrics_isomap},
)