In [6]:
import os

import pandas as pd
import gtda

from sklearn.preprocessing import StandardScaler, PowerTransformer
from sklearn.cluster import DBSCAN

# Load data

In [2]:
celldyn_full = pd.read_feather("L:/laupodteam/AIOS/Bram/data/CellDyn/artifacts/celldyn_FULL_transformed_df_updated.feather")

meas_columns = [c for c in celldyn_full.columns if ('c_b' in c) | ("COMBO" in c)]
mode_columns = [c for c in celldyn_full.columns if 'c_m' in c]
alrt_columns = [c for c in celldyn_full.columns if 'alrt' in c.lower()]
c_s_columns = [c for c in celldyn_full.columns if 'c_s_' in c.lower()]
celldyn_full.drop(['index'], axis=1, inplace=True)

celldyn_full = celldyn_full.assign(gender=celldyn_full.gender.map({'M':0, 'F':1}))
celldyn_full.dropna(subset=['gender','draw_hour'], axis=0, inplace=True)
celldyn_full.rename(columns={'studyid_alle_celldyn':'study_id', 'afname_dt': 'sample_dt'}, inplace=True)
celldyn_full.set_index(['study_id', 'sample_dt'], inplace=True)

In [4]:
from gtda.mapper import (
    CubicalCover,
    make_mapper_pipeline,
    Projection,
    plot_static_mapper_graph,
    plot_interactive_mapper_graph,
    MapperInteractivePlotter
)


In [7]:
# Define filter function – can be any scikit-learn transformer
filter_func = PowerTransformer(method='yeo-johnson', standardize=True)

# Define cover
cover = CubicalCover(n_intervals=10, overlap_frac=0.3)

# Define clustering
clusterer = DBSCAN(eps=0.5, min_samples=3)

pipe = make_mapper_pipeline(
    filter_func=filter_func,
    cover=cover,
    clusterer=clusterer,
    verbose=True,
    n_jobs=1
)

In [8]:
data = celldyn_full[meas_columns].sample(1000).values

In [9]:
fig = plot_static_mapper_graph(pipe, data)
fig.show(config={'scrollZoom': True})

[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s
[Pipeline] ....... (step 2 of 3) Processing filter_func, total=   0.3s
