In [5]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_blobs
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import DBSCAN
import kmapper as km

# Step 1: Generate synthetic X10-like gene expression data
# Simulate 3 clusters of cells in 50-dimensional gene space
X, y = make_blobs(n_samples=500, centers=3, n_features=50, random_state=42)
X = StandardScaler().fit_transform(X)  # Standardize

# Convert to DataFrame (simulate genes as columns)
gene_names = [f"Gene_{i}" for i in range(X.shape[1])]
df = pd.DataFrame(X, columns=gene_names)

# Step 2: Initialize KeplerMapper
mapper = km.KeplerMapper(verbose=1)

# Step 3: Use PCA as lens
lens = mapper.fit_transform(df, projection=PCA(n_components=2))

# Step 4: Create a Mapper graph
graph = mapper.map(
    lens,
    df.values,
    clusterer=DBSCAN(eps=2.0, min_samples=3),
    cover=km.Cover(n_cubes=15, perc_overlap=0.5)
)

# Step 5: Visualize (save HTML)
html_path = "mapper_x10_synthetic.html"
mapper.visualize(
    graph,
    path_html=html_path,
    title="Mapper on Synthetic X10 Genomic Data"
)

html_path  # Return path to HTML file


KeplerMapper(verbose=1)
..Composing projection pipeline of length 1:
	Projections: PCA(n_components=2)
	Distance matrices: False
	Scalers: MinMaxScaler()
..Projecting on data shaped (500, 50)

..Projecting data using: 
	PCA(n_components=2)


..Scaling with: MinMaxScaler()

Mapping on data shaped (500, 50) using lens shaped (500, 2)

Creating 225 hypercubes.

Created 41 edges and 26 nodes in 0:00:00.209950.
Wrote visualization to: mapper_x10_synthetic.html


'mapper_x10_synthetic.html'

In [6]:
df

Unnamed: 0,Gene_0,Gene_1,Gene_2,Gene_3,Gene_4,Gene_5,Gene_6,Gene_7,Gene_8,Gene_9,...,Gene_40,Gene_41,Gene_42,Gene_43,Gene_44,Gene_45,Gene_46,Gene_47,Gene_48,Gene_49
0,-1.147224,-1.452935,-1.016859,-0.721764,1.602568,-0.627106,1.114487,0.474411,0.033320,-1.181629,...,1.367168,-1.057165,0.562260,-1.274233,-0.616584,-1.150649,1.319668,0.011211,-0.730346,0.992782
1,-0.192786,0.847463,0.163051,-0.448194,-1.010894,-1.128545,-0.424171,0.713204,1.030910,1.500514,...,-0.609227,-0.047581,-1.609535,1.210367,-1.224569,1.027609,-1.659961,0.666356,1.650527,-0.540809
2,1.273027,0.262067,0.634208,1.002502,0.176940,1.426578,-0.878181,-1.341809,-1.171974,-0.224440,...,-0.800182,1.195936,0.940103,-0.114162,1.522292,0.330304,0.156625,-0.747570,-0.589807,-1.545485
3,-1.092837,-0.874085,-1.350900,-1.293996,1.237749,-0.487851,1.394277,0.679458,-0.253042,-0.901923,...,1.512506,-1.823730,0.218799,-1.213391,-0.549166,-1.380758,0.928139,-0.406883,-0.476491,2.110183
4,-1.066415,-1.476575,-1.407039,-1.206360,1.138917,-0.645082,1.564296,0.611700,-0.455523,-1.112755,...,1.375842,-1.032456,0.176881,-0.830986,-0.774519,-1.256434,0.775703,0.047374,-0.709109,1.567848
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,-1.222115,-0.751030,-1.598743,-0.347741,1.149814,-0.712615,0.961931,0.283385,-0.229678,-1.113528,...,1.302547,-1.292113,0.339165,-1.360114,-0.726468,-1.234871,1.290960,0.116555,-0.708937,0.348335
496,-1.183265,-1.500088,-1.444774,-0.549085,0.921593,-0.652198,1.696634,0.581643,-0.221442,-1.235076,...,1.649904,-1.105555,0.329048,-1.363909,-0.488745,-1.144136,1.056102,0.259886,-0.806802,1.002800
497,-0.268844,0.781252,-0.008777,-0.057385,-1.343885,-0.778708,-0.800975,0.951206,1.338712,1.034253,...,-0.550008,-0.213108,-1.193239,1.033857,-0.866102,1.315330,-1.362053,0.708582,1.560447,0.313988
498,-1.042026,-1.591497,-1.352185,-0.984153,1.059024,-0.356189,1.017637,0.551667,-0.005835,-1.040107,...,1.237776,-1.466766,0.200747,-1.002209,-0.452402,-1.047712,0.818055,0.167638,-0.835020,0.388785


array([2, 0, 1, 2, 2, 0, 0, 2, 0, 0, 2, 2, 2, 0, 2, 0, 2, 2, 0, 2, 2, 2,
       0, 0, 2, 2, 1, 1, 2, 2, 0, 2, 0, 2, 2, 0, 1, 1, 0, 2, 2, 1, 0, 0,
       1, 2, 2, 2, 1, 1, 0, 1, 1, 1, 2, 1, 0, 2, 1, 0, 1, 0, 1, 0, 0, 0,
       1, 1, 0, 0, 0, 2, 1, 1, 0, 1, 2, 2, 0, 2, 1, 1, 0, 2, 1, 0, 2, 2,
       2, 1, 0, 0, 0, 2, 1, 1, 1, 1, 1, 0, 0, 2, 2, 2, 0, 1, 1, 2, 1, 0,
       0, 2, 0, 2, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0,
       1, 1, 2, 2, 1, 1, 1, 2, 2, 0, 1, 2, 0, 0, 2, 0, 1, 2, 1, 1, 0, 0,
       2, 0, 0, 2, 1, 1, 1, 2, 0, 2, 1, 0, 1, 0, 0, 2, 0, 1, 1, 1, 2, 0,
       2, 1, 2, 0, 2, 0, 2, 2, 0, 0, 0, 1, 2, 0, 1, 1, 2, 1, 2, 0, 0, 1,
       2, 0, 0, 1, 1, 1, 1, 0, 0, 0, 2, 1, 0, 2, 1, 1, 2, 1, 1, 0, 0, 0,
       2, 2, 2, 2, 1, 1, 1, 1, 0, 1, 1, 2, 0, 1, 0, 0, 1, 0, 2, 1, 0, 2,
       0, 2, 2, 0, 2, 2, 2, 0, 1, 0, 0, 0, 1, 2, 0, 2, 2, 2, 1, 2, 1, 2,
       2, 1, 2, 0, 2, 2, 0, 0, 1, 2, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 2, 0,
       0, 0, 1, 1, 1, 1, 0, 2, 1, 0, 0, 2, 2, 0, 0,