In [1]:
import numpy as np
import pandas as pd
import kmapper as km
import networkx as nx
import matplotlib 
import sklearn
from sklearn import ensemble

import sys
sys.path.append("../")
from coal_mapper.mapper import CoalMapper

import warnings
warnings.filterwarnings("ignore")

# Testing with Breast Cancer Data Set

In [None]:
df = pd.read_csv("/Users/jeremy.wayland/Downloads/data.csv")
feature_names = [c for c in df.columns if c not in ["id", "diagnosis"]]
df["diagnosis"] = df["diagnosis"].apply(lambda x: 1 if x == "M" else 0)
X = np.array(df[feature_names].fillna(0))  # quick and dirty imputation
y = np.array(df["diagnosis"])

In [None]:
# Create Object
test = CoalMapper(X=X)

In [None]:
#Check Data
test.data

In [None]:
# Construct Lens as in KM example
model = ensemble.IsolationForest(random_state=1729)
model.fit(X)
lens1 = model.decision_function(X).reshape((X.shape[0], 1))

mapper = km.KeplerMapper(verbose=3)
lens2 = mapper.fit_transform(X, projection="l2norm")

In [None]:
# Assign Lens and Clusterer to Object
test.lens = np.c_[lens1, lens2]
test.clusterer = sklearn.cluster.KMeans(n_clusters=2, random_state=1618033)

In [None]:
# Compute Mapper with Object
test.compute_mapper(n_cubes=2, perc_overlap=0.4)

In [None]:
# Check Output, saved as mapper attribute
test.mapper

In [None]:
# Original Code from Example
mapper = km.KeplerMapper(verbose=3)
lens2 = mapper.fit_transform(X, projection="l2norm")

lens = np.c_[lens1, lens2]


graph = mapper.map(
    lens,
    X,
    cover=km.Cover(n_cubes=2, perc_overlap=0.4),
    clusterer=sklearn.cluster.KMeans(n_clusters=2, random_state=1618033),#nerve=trial_nerve
)

In [None]:
# Sanity Checks
graph["nodes"] == test.mapper['nodes']

In [None]:
graph["links"] == test.mapper['links']

In [None]:
graph["simplices"] == test.mapper['simplices']

In [None]:
# Convert to Networkx
G = test.mapper_to_networkx()

In [None]:
nx.draw(G)

In [None]:
# Generate Conncted Components
a,b = test.connected_components()

In [None]:
nx.draw(a)

In [None]:
nx.draw(b)

In [None]:
# Lookup Item, here by index

    # which clusters is `0` in?
test.item_lookup(0)[0].keys()

In [None]:
# Subgraph
    # what is the subgraph generated by the clusters that contain `0`. I.e. What does `0` contribute to in Mapper output
nx.draw(test.item_lookup(0)[1])