In [None]:
import pandas as pd
from apopfail.model import clean

In [None]:
X = pd.read_parquet("../data/train_set_p53mutant.parquet")
y = pd.read_csv(
    "../data/train_labels_p53mutant.csv", index_col=0, skiprows=1, names=["target"]
)["target"].map({"inactive": 0, "active": 1})

In [None]:
X, y = clean(X, y)

In [None]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

In [None]:
X = SimpleImputer(strategy="mean").fit_transform(X)
X = StandardScaler().fit_transform(X)

In [None]:
X_normal = X[y == 0]
X_abnormal = X[y == 1]

X_normal = X_normal.astype("float32")

In [None]:
# kernel pca
from drcomp import estimate_intrinsic_dimension
from drcomp.reducers import PCA, KernelPCA

intrinsic_dim = estimate_intrinsic_dimension(X_normal)
print(f"{intrinsic_dim=}")

kpca = KernelPCA(intrinsic_dim=intrinsic_dim, kernel="poly")
kpca_rbf = KernelPCA(intrinsic_dim=intrinsic_dim, kernel="rbf")
kpca_sig = KernelPCA(intrinsic_dim=intrinsic_dim, kernel="sigmoid")
pca = PCA(intrinsic_dim=intrinsic_dim)

X_kpca = kpca.fit_transform(X_normal)
X_kpca_rbf = kpca_rbf.fit_transform(X_normal)
X_kpca_sig = kpca_sig.fit_transform(X_normal)
X_pca = pca.fit_transform(X_normal)

In [None]:
metrics_pca = pca.evaluate(X_normal, X_pca, max_K=30)
metrics_kpca = kpca.evaluate(X_normal, X_kpca, max_K=30)
metrics_kpca_rbf = kpca_rbf.evaluate(X_normal, X_kpca_rbf, max_K=30)
metrics_kpca_sig = kpca_sig.evaluate(X_normal, X_kpca_sig, max_K=30)

In [None]:
from drcomp.plotting import compare_metrics

In [None]:
compare_metrics(
    {
        "PCA": metrics_pca,
        "KernelPCA poly": metrics_kpca,
        "KernelPCA rbf": metrics_kpca_rbf,
        "KernelPCA sigmoid": metrics_kpca_sig,
    }
)