### Imports

In [4]:
import os
import pickle
import IPython
import sklearn
import numpy as np
import kmapper as km
import pandas as pd
import seaborn as sns
from umap import UMAP
import tqdm

from sklearn import cluster, preprocessing, manifold, decomposition
from sklearn.model_selection import StratifiedKFold, KFold
from scipy.spatial import distance
from scipy.sparse import issparse
from datetime import datetime
from collections import defaultdict
from datetime import datetime
import inspect
import itertools
import sys
import warnings

from sklearn.manifold import t_sne, isomap

from sklearn import datasets
from sklearn.cluster import DBSCAN

from sklearn.decomposition import TruncatedSVD
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

%matplotlib inline
sns.set(style='white', rc={'figure.figsize':(12,8)})

ModuleNotFoundError: No module named 'umap'

### Some tweaks to kmapper (remove forced index column adding to original data)

In [2]:
class CustomMapper(km.KeplerMapper):
    def map(self,
            projected_X,
            inverse_X=None,
            clusterer=cluster.DBSCAN(eps=0.5, min_samples=3),
            nr_cubes=None,
            overlap_perc=None,
            coverer=km.Cover(nr_cubes=10, overlap_perc=0.1),
            nerve=km.GraphNerve()):
        """Apply Mapper algorithm on this projection and build a simplicial complex. Returns a dictionary with nodes and links.
        Parameters
        ----------
        projected_X : Numpy Array
            Output from fit_transform
        inverse_X : Numpy Array
            Original data. If `None`, then use `projected_X` for clustering.
        clusterer:
            Scikit-learn API compatible clustering algorithm. Default: DBSCAN
        nr_cubes : Int
            The number of intervals/hypercubes to create. Default = 10. (DeprecationWarning: define Cover explicitly in future versions)
        overlap_perc : Float
            The percentage of overlap "between" the intervals/hypercubes. Default = 0.1. (DeprecationWarning: define Cover explicitly in future versions)
        coverer : kmapper.Cover
            Cover scheme for lens. Instance of kmapper.cover providing methods `define_bins` and `find_entries`.
        nerve : kmapper.Nerve
            Nerve builder implementing `__call__(nodes)` API
        Returns
        =======
        simplicial_complex : dict
            A dictionary with "nodes", "links" and "meta" information.
        Example
        =======
        >>> simplicial_complex = mapper.map(projected_X, inverse_X=None, clusterer=cluster.DBSCAN(eps=0.5,min_samples=3),nr_cubes=10, overlap_perc=0.1)
        >>>print(simplicial_complex["nodes"])
        >>>print(simplicial_complex["links"])
        >>>print(simplicial_complex["meta"])
        """

        start = datetime.now()

        nodes = defaultdict(list)
        meta = defaultdict(list)
        graph = {}

        # If inverse image is not provided, we use the projection as the inverse image (suffer projection loss)
        if inverse_X is None:
            inverse_X = projected_X

        if nr_cubes is not None or overlap_perc is not None:
            # If user supplied nr_cubes or overlap_perc,
            # use old defaults instead of new Cover
            nr_cubes = nr_cubes if nr_cubes else 10
            overlap_perc = overlap_perc if overlap_perc else 0.1
            self.coverer = km.Cover(nr_cubes=nr_cubes,
                                 overlap_perc=overlap_perc)

            warnings.warn(
                "Explicitly passing in nr_cubes and overlap_perc will be deprecated in future releases. Please supply Cover object.", DeprecationWarning)
        else:
            self.coverer = coverer

        if self.verbose > 0:
            print("Mapping on data shaped %s using lens shaped %s\n" %
                  (str(inverse_X.shape), str(projected_X.shape)))

        # Prefix'ing the data with ID's
        ids = np.array([x for x in range(projected_X.shape[0])])
        projected_X = np.c_[ids, projected_X]
        inverse_X = inverse_X

        # Cover scheme defines a list of elements
        bins = self.coverer.define_bins(projected_X)

        # Algo's like K-Means, have a set number of clusters. We need this number
        # to adjust for the minimal number of samples inside an interval before
        # we consider clustering or skipping it.
        cluster_params = clusterer.get_params()
        min_cluster_samples = cluster_params.get("n_clusters", 1)

        if self.verbose > 1:
            print("Minimal points in hypercube before clustering: %d" %
                  (min_cluster_samples))

        # Subdivide the projected data X in intervals/hypercubes with overlap
        if self.verbose > 0:
            bins = list(bins)  # extract list from generator
            total_bins = len(bins)
            print("Creating %s hypercubes." % total_bins)

        for i, cube in enumerate(bins):
            # Slice the hypercube:
            #  gather all entries in this element of the cover
            hypercube = self.coverer.find_entries(projected_X, cube)

            if self.verbose > 1:
                print("There are %s points in cube_%s / %s" %
                      (hypercube.shape[0], i, total_bins))

            # If at least min_cluster_samples samples inside the hypercube
            if hypercube.shape[0] >= min_cluster_samples:

                # Cluster the data point(s) in the cube, skipping the id-column
                # Note that we apply clustering on the inverse image (original data samples) that fall inside the cube.
                inverse_x = inverse_X[[int(nn) for nn in hypercube[:, 0]]]

                clusterer.fit(inverse_x)

                if self.verbose > 1:
                    print("Found %s clusters in cube_%s\n" % (
                        np.unique(clusterer.labels_[clusterer.labels_ > -1]).shape[0], i))

                # TODO: I think this loop could be improved by turning inside out:
                #           - partition points according to each cluster
                # Now for every (sample id in cube, predicted cluster label)
                for a in np.c_[hypercube[:, 0], clusterer.labels_]:
                    if a[1] != -1:  # if not predicted as noise

                        # TODO: allow user supplied label
                        #   - where all those extra values necessary?
                        cluster_id = "cube{}_cluster{}".format(i, int(a[1]))

                        # Append the member id's as integers
                        nodes[cluster_id].append(int(a[0]))
                        meta[cluster_id] = {
                            "size": hypercube.shape[0], "coordinates": cube}
            else:
                if self.verbose > 1:
                    print("Cube_%s is empty.\n" % (i))

        links, simplices = nerve(nodes)

        graph["nodes"] = nodes
        graph["links"] = links
        graph["simplices"] = simplices
        graph["meta_data"] = {
            "projection": self.projection if self.projection else "custom",
            "nr_cubes": self.coverer.nr_cubes,
            "overlap_perc": self.coverer.overlap_perc,
            "clusterer": str(clusterer),
            "scaler": str(self.scaler)
        }
        graph["meta_nodes"] = meta

        # Reporting
        if self.verbose > 0:
            self._summary(graph, str(datetime.now() - start))

        return graph

    def _summary(self, graph, time):
        # TODO: this summary is relevant to the type of Nerve being built.
        links = graph["links"]
        nodes = graph["nodes"]
        nr_links = sum(len(v) for k, v in links.items())

        print("\nCreated %s edges and %s nodes in %s." %
              (nr_links, len(nodes), time))

### MetaKMeans class can estimate n_clusters based on a number of observations

In [3]:
class MetaKMeans(sklearn.base.BaseEstimator, sklearn.base.ClusterMixin, sklearn.base.TransformerMixin):
    def __init__(self,
                 init='k-means++',
                 n_init=10,
                 max_iter=300,
                 tol=0.0001,
                 precompute_distances='auto',
                 verbose=0,
                 random_state=None,
                 copy_x=True,
                 n_jobs=1,
                 algorithm='auto'):
        self.init = init
        self.n_init = n_init
        self.max_iter = max_iter
        self.tol = tol
        self.precompute_distances = precompute_distances
        self.verbose = verbose
        self.random_state = random_state
        self.copy_x = copy_x
        self.n_jobs = n_jobs
        self.algorithm = algorithm
        self._temp_clusterer = None
    
    def get_params(self, deep=True):
        params = {
            'init': self.init,
            'n_init': self.n_init,
            'max_iter': self.max_iter,
            'tol': self.tol,
            'precompute_distances': self.precompute_distances,
            'verbose': self.verbose,
            'random_state': self.random_state,
            'copy_x': self.copy_x,
            'n_jobs': self.n_jobs,
            'algorithm': self.algorithm
        }
        return params
    
    def _estimate_n_clusters(self, n):
        return max(1, int(np.log1p(n)) // 2)

    def fit(self, X, y=None):
        n_clusters = self._estimate_n_clusters(len(X))
        self._temp_clusterer = sklearn.cluster.KMeans(n_clusters=n_clusters, **self.get_params())
        self._temp_clusterer.fit(X, y)
        self.labels_ = self._temp_clusterer.labels_.copy()

    def fit_predict(self, X, y=None):
        self.fit(X, y)
        return self.labels_

### Displaying html inside notebook cell

In [6]:
def display(path_html):
    """ Displays a html file inside a Jupyter Notebook output cell.
    
    Parameters
    ----------
    path_html : str
        Path to html. Use file name for file inside current working 
        directory. Use `file://` browser url-format for path to local file.
        Use `https://` urls for externally hosted resources.

    Notes
    -----
    Thanks to https://github.com/smartinsightsfromdata for the issue:
    https://github.com/MLWave/kepler-mapper/issues/10

    """

    iframe = '<iframe src=' + path_html \
            + ' width=100%% height=800 frameBorder="0"></iframe>'
    IPython.core.display.display(IPython.core.display.HTML(iframe))

### Read serialized dataset
* graph_trans_df --- dataframes that contains fraud labels information for each transaction
* baseamount_agg_mean_values --- aggregated features (data where clusteting will take place)
* pca_mean_transformed --- 2d pca of aggregated features
* isof_scores_off_ops --- isolation forest scores for each sample
* mean_colnames_selected --- names for aggregated features

In [5]:
with open('tda_isof_pca_dataset_sample.pkl', 'rb') as handle:
    tda_isof_pca_dataset_sample = pickle.load(handle)

In [12]:
graph_trans_df = tda_isof_pca_dataset_sample['graph_trans_df']
baseamount_agg_mean_values = tda_isof_pca_dataset_sample['baseamount_agg_mean_values']
pca_mean_transformed = tda_isof_pca_dataset_sample['pca_mean_transformed']
isof_scores_off_ops = tda_isof_pca_dataset_sample['isof_scores_off_ops']
mean_colnames_selected = tda_isof_pca_dataset_sample['mean_colnames_selected']

labels = graph_trans_df[('all', 'target')].values

### concatenate pca output and isolation forest scores into a 3d projection

In [8]:
proj_3d = np.c_[pca_mean_transformed, isof_scores_off_ops]

In [None]:
# Initialize
mapper = CustomMapper(verbose=2)

### Create graph with Mapper algorithm

In [10]:
# Create dictionary called 'graph' with nodes, edges and meta-information
graph = mapper.map(proj_3d, baseamount_agg_mean_values, clusterer=MetaKMeans(n_jobs=10), nr_cubes=15, overlap_perc=0.2)



Mapping on data shaped (821659, 782) using lens shaped (821659, 3)

Minimal points in hypercube before clustering: 1
Creating 3375 hypercubes.
There are 0 points in cube_0 / 3375
Cube_0 is empty.

There are 0 points in cube_1 / 3375
Cube_1 is empty.

There are 0 points in cube_2 / 3375
Cube_2 is empty.

There are 0 points in cube_3 / 3375
Cube_3 is empty.

There are 0 points in cube_4 / 3375
Cube_4 is empty.

There are 0 points in cube_5 / 3375
Cube_5 is empty.

There are 0 points in cube_6 / 3375
Cube_6 is empty.

There are 0 points in cube_7 / 3375
Cube_7 is empty.

There are 0 points in cube_8 / 3375
Cube_8 is empty.

There are 0 points in cube_9 / 3375
Cube_9 is empty.

There are 0 points in cube_10 / 3375
Cube_10 is empty.

There are 0 points in cube_11 / 3375
Cube_11 is empty.

There are 0 points in cube_12 / 3375
Cube_12 is empty.

There are 0 points in cube_13 / 3375
Cube_13 is empty.

There are 0 points in cube_14 / 3375
Cube_14 is empty.

There are 0 points in cube_15 / 3375


There are 0 points in cube_140 / 3375
Cube_140 is empty.

There are 0 points in cube_141 / 3375
Cube_141 is empty.

There are 1 points in cube_142 / 3375
Found 1 clusters in cube_142

There are 0 points in cube_143 / 3375
Cube_143 is empty.

There are 0 points in cube_144 / 3375
Cube_144 is empty.

There are 0 points in cube_145 / 3375
Cube_145 is empty.

There are 0 points in cube_146 / 3375
Cube_146 is empty.

There are 0 points in cube_147 / 3375
Cube_147 is empty.

There are 0 points in cube_148 / 3375
Cube_148 is empty.

There are 0 points in cube_149 / 3375
Cube_149 is empty.

There are 0 points in cube_150 / 3375
Cube_150 is empty.

There are 0 points in cube_151 / 3375
Cube_151 is empty.

There are 0 points in cube_152 / 3375
Cube_152 is empty.

There are 0 points in cube_153 / 3375
Cube_153 is empty.

There are 0 points in cube_154 / 3375
Cube_154 is empty.

There are 0 points in cube_155 / 3375
Cube_155 is empty.

There are 0 points in cube_156 / 3375
Cube_156 is empty.

Ther

There are 0 points in cube_285 / 3375
Cube_285 is empty.

There are 0 points in cube_286 / 3375
Cube_286 is empty.

There are 0 points in cube_287 / 3375
Cube_287 is empty.

There are 0 points in cube_288 / 3375
Cube_288 is empty.

There are 0 points in cube_289 / 3375
Cube_289 is empty.

There are 0 points in cube_290 / 3375
Cube_290 is empty.

There are 0 points in cube_291 / 3375
Cube_291 is empty.

There are 0 points in cube_292 / 3375
Cube_292 is empty.

There are 0 points in cube_293 / 3375
Cube_293 is empty.

There are 0 points in cube_294 / 3375
Cube_294 is empty.

There are 0 points in cube_295 / 3375
Cube_295 is empty.

There are 0 points in cube_296 / 3375
Cube_296 is empty.

There are 0 points in cube_297 / 3375
Cube_297 is empty.

There are 0 points in cube_298 / 3375
Cube_298 is empty.

There are 0 points in cube_299 / 3375
Cube_299 is empty.

There are 2 points in cube_300 / 3375
Found 1 clusters in cube_300

There are 52 points in cube_301 / 3375
Found 1 clusters in cub

There are 0 points in cube_417 / 3375
Cube_417 is empty.

There are 0 points in cube_418 / 3375
Cube_418 is empty.

There are 0 points in cube_419 / 3375
Cube_419 is empty.

There are 0 points in cube_420 / 3375
Cube_420 is empty.

There are 0 points in cube_421 / 3375
Cube_421 is empty.

There are 0 points in cube_422 / 3375
Cube_422 is empty.

There are 0 points in cube_423 / 3375
Cube_423 is empty.

There are 0 points in cube_424 / 3375
Cube_424 is empty.

There are 0 points in cube_425 / 3375
Cube_425 is empty.

There are 0 points in cube_426 / 3375
Cube_426 is empty.

There are 0 points in cube_427 / 3375
Cube_427 is empty.

There are 0 points in cube_428 / 3375
Cube_428 is empty.

There are 0 points in cube_429 / 3375
Cube_429 is empty.

There are 0 points in cube_430 / 3375
Cube_430 is empty.

There are 0 points in cube_431 / 3375
Cube_431 is empty.

There are 0 points in cube_432 / 3375
Cube_432 is empty.

There are 0 points in cube_433 / 3375
Cube_433 is empty.

There are 0 po

Found 1 clusters in cube_554

There are 0 points in cube_555 / 3375
Cube_555 is empty.

There are 233 points in cube_556 / 3375
Found 2 clusters in cube_556

There are 1031 points in cube_557 / 3375
Found 3 clusters in cube_557

There are 829 points in cube_558 / 3375
Found 3 clusters in cube_558

There are 333 points in cube_559 / 3375
Found 2 clusters in cube_559

There are 582 points in cube_560 / 3375
Found 3 clusters in cube_560

There are 1890 points in cube_561 / 3375
Found 3 clusters in cube_561

There are 4877 points in cube_562 / 3375
Found 4 clusters in cube_562

There are 9119 points in cube_563 / 3375
Found 4 clusters in cube_563

There are 16810 points in cube_564 / 3375
Found 4 clusters in cube_564

There are 32494 points in cube_565 / 3375
Found 5 clusters in cube_565

There are 30840 points in cube_566 / 3375
Found 5 clusters in cube_566

There are 9167 points in cube_567 / 3375
Found 4 clusters in cube_567

There are 195 points in cube_568 / 3375
Found 2 clusters in c

There are 0 points in cube_695 / 3375
Cube_695 is empty.

There are 0 points in cube_696 / 3375
Cube_696 is empty.

There are 0 points in cube_697 / 3375
Cube_697 is empty.

There are 0 points in cube_698 / 3375
Cube_698 is empty.

There are 0 points in cube_699 / 3375
Cube_699 is empty.

There are 0 points in cube_700 / 3375
Cube_700 is empty.

There are 0 points in cube_701 / 3375
Cube_701 is empty.

There are 0 points in cube_702 / 3375
Cube_702 is empty.

There are 0 points in cube_703 / 3375
Cube_703 is empty.

There are 0 points in cube_704 / 3375
Cube_704 is empty.

There are 0 points in cube_705 / 3375
Cube_705 is empty.

There are 0 points in cube_706 / 3375
Cube_706 is empty.

There are 0 points in cube_707 / 3375
Cube_707 is empty.

There are 0 points in cube_708 / 3375
Cube_708 is empty.

There are 0 points in cube_709 / 3375
Cube_709 is empty.

There are 0 points in cube_710 / 3375
Cube_710 is empty.

There are 0 points in cube_711 / 3375
Cube_711 is empty.

There are 0 po

There are 0 points in cube_829 / 3375
Cube_829 is empty.

There are 0 points in cube_830 / 3375
Cube_830 is empty.

There are 0 points in cube_831 / 3375
Cube_831 is empty.

There are 0 points in cube_832 / 3375
Cube_832 is empty.

There are 0 points in cube_833 / 3375
Cube_833 is empty.

There are 14 points in cube_834 / 3375
Found 1 clusters in cube_834

There are 0 points in cube_835 / 3375
Cube_835 is empty.

There are 0 points in cube_836 / 3375
Cube_836 is empty.

There are 0 points in cube_837 / 3375
Cube_837 is empty.

There are 0 points in cube_838 / 3375
Cube_838 is empty.

There are 0 points in cube_839 / 3375
Cube_839 is empty.

There are 0 points in cube_840 / 3375
Cube_840 is empty.

There are 0 points in cube_841 / 3375
Cube_841 is empty.

There are 0 points in cube_842 / 3375
Cube_842 is empty.

There are 0 points in cube_843 / 3375
Cube_843 is empty.

There are 0 points in cube_844 / 3375
Cube_844 is empty.

There are 0 points in cube_845 / 3375
Cube_845 is empty.

The

There are 0 points in cube_976 / 3375
Cube_976 is empty.

There are 0 points in cube_977 / 3375
Cube_977 is empty.

There are 0 points in cube_978 / 3375
Cube_978 is empty.

There are 0 points in cube_979 / 3375
Cube_979 is empty.

There are 0 points in cube_980 / 3375
Cube_980 is empty.

There are 0 points in cube_981 / 3375
Cube_981 is empty.

There are 0 points in cube_982 / 3375
Cube_982 is empty.

There are 0 points in cube_983 / 3375
Cube_983 is empty.

There are 0 points in cube_984 / 3375
Cube_984 is empty.

There are 0 points in cube_985 / 3375
Cube_985 is empty.

There are 1 points in cube_986 / 3375
Found 1 clusters in cube_986

There are 3 points in cube_987 / 3375
Found 1 clusters in cube_987

There are 1 points in cube_988 / 3375
Found 1 clusters in cube_988

There are 1 points in cube_989 / 3375
Found 1 clusters in cube_989

There are 0 points in cube_990 / 3375
Cube_990 is empty.

There are 0 points in cube_991 / 3375
Cube_991 is empty.

There are 0 points in cube_992 /

There are 0 points in cube_1109 / 3375
Cube_1109 is empty.

There are 0 points in cube_1110 / 3375
Cube_1110 is empty.

There are 0 points in cube_1111 / 3375
Cube_1111 is empty.

There are 0 points in cube_1112 / 3375
Cube_1112 is empty.

There are 0 points in cube_1113 / 3375
Cube_1113 is empty.

There are 0 points in cube_1114 / 3375
Cube_1114 is empty.

There are 0 points in cube_1115 / 3375
Cube_1115 is empty.

There are 0 points in cube_1116 / 3375
Cube_1116 is empty.

There are 0 points in cube_1117 / 3375
Cube_1117 is empty.

There are 0 points in cube_1118 / 3375
Cube_1118 is empty.

There are 0 points in cube_1119 / 3375
Cube_1119 is empty.

There are 0 points in cube_1120 / 3375
Cube_1120 is empty.

There are 0 points in cube_1121 / 3375
Cube_1121 is empty.

There are 0 points in cube_1122 / 3375
Cube_1122 is empty.

There are 0 points in cube_1123 / 3375
Cube_1123 is empty.

There are 0 points in cube_1124 / 3375
Cube_1124 is empty.

There are 0 points in cube_1125 / 3375
C

There are 0 points in cube_1249 / 3375
Cube_1249 is empty.

There are 0 points in cube_1250 / 3375
Cube_1250 is empty.

There are 0 points in cube_1251 / 3375
Cube_1251 is empty.

There are 0 points in cube_1252 / 3375
Cube_1252 is empty.

There are 0 points in cube_1253 / 3375
Cube_1253 is empty.

There are 3 points in cube_1254 / 3375
Found 1 clusters in cube_1254

There are 4 points in cube_1255 / 3375
Found 1 clusters in cube_1255

There are 0 points in cube_1256 / 3375
Cube_1256 is empty.

There are 0 points in cube_1257 / 3375
Cube_1257 is empty.

There are 0 points in cube_1258 / 3375
Cube_1258 is empty.

There are 0 points in cube_1259 / 3375
Cube_1259 is empty.

There are 0 points in cube_1260 / 3375
Cube_1260 is empty.

There are 0 points in cube_1261 / 3375
Cube_1261 is empty.

There are 0 points in cube_1262 / 3375
Cube_1262 is empty.

There are 0 points in cube_1263 / 3375
Cube_1263 is empty.

There are 0 points in cube_1264 / 3375
Cube_1264 is empty.

There are 827 points

There are 0 points in cube_1385 / 3375
Cube_1385 is empty.

There are 0 points in cube_1386 / 3375
Cube_1386 is empty.

There are 0 points in cube_1387 / 3375
Cube_1387 is empty.

There are 0 points in cube_1388 / 3375
Cube_1388 is empty.

There are 0 points in cube_1389 / 3375
Cube_1389 is empty.

There are 0 points in cube_1390 / 3375
Cube_1390 is empty.

There are 0 points in cube_1391 / 3375
Cube_1391 is empty.

There are 0 points in cube_1392 / 3375
Cube_1392 is empty.

There are 0 points in cube_1393 / 3375
Cube_1393 is empty.

There are 0 points in cube_1394 / 3375
Cube_1394 is empty.

There are 0 points in cube_1395 / 3375
Cube_1395 is empty.

There are 0 points in cube_1396 / 3375
Cube_1396 is empty.

There are 0 points in cube_1397 / 3375
Cube_1397 is empty.

There are 0 points in cube_1398 / 3375
Cube_1398 is empty.

There are 0 points in cube_1399 / 3375
Cube_1399 is empty.

There are 0 points in cube_1400 / 3375
Cube_1400 is empty.

There are 0 points in cube_1401 / 3375
C

There are 0 points in cube_1525 / 3375
Cube_1525 is empty.

There are 0 points in cube_1526 / 3375
Cube_1526 is empty.

There are 0 points in cube_1527 / 3375
Cube_1527 is empty.

There are 0 points in cube_1528 / 3375
Cube_1528 is empty.

There are 0 points in cube_1529 / 3375
Cube_1529 is empty.

There are 0 points in cube_1530 / 3375
Cube_1530 is empty.

There are 0 points in cube_1531 / 3375
Cube_1531 is empty.

There are 0 points in cube_1532 / 3375
Cube_1532 is empty.

There are 0 points in cube_1533 / 3375
Cube_1533 is empty.

There are 0 points in cube_1534 / 3375
Cube_1534 is empty.

There are 0 points in cube_1535 / 3375
Cube_1535 is empty.

There are 0 points in cube_1536 / 3375
Cube_1536 is empty.

There are 0 points in cube_1537 / 3375
Cube_1537 is empty.

There are 0 points in cube_1538 / 3375
Cube_1538 is empty.

There are 0 points in cube_1539 / 3375
Cube_1539 is empty.

There are 0 points in cube_1540 / 3375
Cube_1540 is empty.

There are 0 points in cube_1541 / 3375
C

There are 0 points in cube_1663 / 3375
Cube_1663 is empty.

There are 0 points in cube_1664 / 3375
Cube_1664 is empty.

There are 0 points in cube_1665 / 3375
Cube_1665 is empty.

There are 0 points in cube_1666 / 3375
Cube_1666 is empty.

There are 0 points in cube_1667 / 3375
Cube_1667 is empty.

There are 0 points in cube_1668 / 3375
Cube_1668 is empty.

There are 0 points in cube_1669 / 3375
Cube_1669 is empty.

There are 0 points in cube_1670 / 3375
Cube_1670 is empty.

There are 0 points in cube_1671 / 3375
Cube_1671 is empty.

There are 0 points in cube_1672 / 3375
Cube_1672 is empty.

There are 0 points in cube_1673 / 3375
Cube_1673 is empty.

There are 0 points in cube_1674 / 3375
Cube_1674 is empty.

There are 0 points in cube_1675 / 3375
Cube_1675 is empty.

There are 0 points in cube_1676 / 3375
Cube_1676 is empty.

There are 0 points in cube_1677 / 3375
Cube_1677 is empty.

There are 0 points in cube_1678 / 3375
Cube_1678 is empty.

There are 0 points in cube_1679 / 3375
C

There are 0 points in cube_1801 / 3375
Cube_1801 is empty.

There are 0 points in cube_1802 / 3375
Cube_1802 is empty.

There are 0 points in cube_1803 / 3375
Cube_1803 is empty.

There are 0 points in cube_1804 / 3375
Cube_1804 is empty.

There are 0 points in cube_1805 / 3375
Cube_1805 is empty.

There are 0 points in cube_1806 / 3375
Cube_1806 is empty.

There are 0 points in cube_1807 / 3375
Cube_1807 is empty.

There are 0 points in cube_1808 / 3375
Cube_1808 is empty.

There are 0 points in cube_1809 / 3375
Cube_1809 is empty.

There are 0 points in cube_1810 / 3375
Cube_1810 is empty.

There are 0 points in cube_1811 / 3375
Cube_1811 is empty.

There are 0 points in cube_1812 / 3375
Cube_1812 is empty.

There are 0 points in cube_1813 / 3375
Cube_1813 is empty.

There are 0 points in cube_1814 / 3375
Cube_1814 is empty.

There are 0 points in cube_1815 / 3375
Cube_1815 is empty.

There are 0 points in cube_1816 / 3375
Cube_1816 is empty.

There are 0 points in cube_1817 / 3375
C

There are 0 points in cube_1939 / 3375
Cube_1939 is empty.

There are 0 points in cube_1940 / 3375
Cube_1940 is empty.

There are 0 points in cube_1941 / 3375
Cube_1941 is empty.

There are 0 points in cube_1942 / 3375
Cube_1942 is empty.

There are 0 points in cube_1943 / 3375
Cube_1943 is empty.

There are 0 points in cube_1944 / 3375
Cube_1944 is empty.

There are 0 points in cube_1945 / 3375
Cube_1945 is empty.

There are 0 points in cube_1946 / 3375
Cube_1946 is empty.

There are 0 points in cube_1947 / 3375
Cube_1947 is empty.

There are 0 points in cube_1948 / 3375
Cube_1948 is empty.

There are 0 points in cube_1949 / 3375
Cube_1949 is empty.

There are 0 points in cube_1950 / 3375
Cube_1950 is empty.

There are 0 points in cube_1951 / 3375
Cube_1951 is empty.

There are 0 points in cube_1952 / 3375
Cube_1952 is empty.

There are 0 points in cube_1953 / 3375
Cube_1953 is empty.

There are 0 points in cube_1954 / 3375
Cube_1954 is empty.

There are 0 points in cube_1955 / 3375
C

There are 0 points in cube_2079 / 3375
Cube_2079 is empty.

There are 0 points in cube_2080 / 3375
Cube_2080 is empty.

There are 0 points in cube_2081 / 3375
Cube_2081 is empty.

There are 0 points in cube_2082 / 3375
Cube_2082 is empty.

There are 0 points in cube_2083 / 3375
Cube_2083 is empty.

There are 0 points in cube_2084 / 3375
Cube_2084 is empty.

There are 0 points in cube_2085 / 3375
Cube_2085 is empty.

There are 0 points in cube_2086 / 3375
Cube_2086 is empty.

There are 0 points in cube_2087 / 3375
Cube_2087 is empty.

There are 0 points in cube_2088 / 3375
Cube_2088 is empty.

There are 0 points in cube_2089 / 3375
Cube_2089 is empty.

There are 0 points in cube_2090 / 3375
Cube_2090 is empty.

There are 0 points in cube_2091 / 3375
Cube_2091 is empty.

There are 0 points in cube_2092 / 3375
Cube_2092 is empty.

There are 0 points in cube_2093 / 3375
Cube_2093 is empty.

There are 0 points in cube_2094 / 3375
Cube_2094 is empty.

There are 0 points in cube_2095 / 3375
C

There are 0 points in cube_2219 / 3375
Cube_2219 is empty.

There are 0 points in cube_2220 / 3375
Cube_2220 is empty.

There are 0 points in cube_2221 / 3375
Cube_2221 is empty.

There are 0 points in cube_2222 / 3375
Cube_2222 is empty.

There are 0 points in cube_2223 / 3375
Cube_2223 is empty.

There are 0 points in cube_2224 / 3375
Cube_2224 is empty.

There are 0 points in cube_2225 / 3375
Cube_2225 is empty.

There are 0 points in cube_2226 / 3375
Cube_2226 is empty.

There are 0 points in cube_2227 / 3375
Cube_2227 is empty.

There are 0 points in cube_2228 / 3375
Cube_2228 is empty.

There are 0 points in cube_2229 / 3375
Cube_2229 is empty.

There are 0 points in cube_2230 / 3375
Cube_2230 is empty.

There are 0 points in cube_2231 / 3375
Cube_2231 is empty.

There are 0 points in cube_2232 / 3375
Cube_2232 is empty.

There are 0 points in cube_2233 / 3375
Cube_2233 is empty.

There are 0 points in cube_2234 / 3375
Cube_2234 is empty.

There are 0 points in cube_2235 / 3375
C

There are 0 points in cube_2359 / 3375
Cube_2359 is empty.

There are 0 points in cube_2360 / 3375
Cube_2360 is empty.

There are 0 points in cube_2361 / 3375
Cube_2361 is empty.

There are 0 points in cube_2362 / 3375
Cube_2362 is empty.

There are 0 points in cube_2363 / 3375
Cube_2363 is empty.

There are 0 points in cube_2364 / 3375
Cube_2364 is empty.

There are 0 points in cube_2365 / 3375
Cube_2365 is empty.

There are 0 points in cube_2366 / 3375
Cube_2366 is empty.

There are 0 points in cube_2367 / 3375
Cube_2367 is empty.

There are 0 points in cube_2368 / 3375
Cube_2368 is empty.

There are 0 points in cube_2369 / 3375
Cube_2369 is empty.

There are 0 points in cube_2370 / 3375
Cube_2370 is empty.

There are 0 points in cube_2371 / 3375
Cube_2371 is empty.

There are 0 points in cube_2372 / 3375
Cube_2372 is empty.

There are 0 points in cube_2373 / 3375
Cube_2373 is empty.

There are 0 points in cube_2374 / 3375
Cube_2374 is empty.

There are 0 points in cube_2375 / 3375
C

There are 0 points in cube_2499 / 3375
Cube_2499 is empty.

There are 0 points in cube_2500 / 3375
Cube_2500 is empty.

There are 0 points in cube_2501 / 3375
Cube_2501 is empty.

There are 0 points in cube_2502 / 3375
Cube_2502 is empty.

There are 0 points in cube_2503 / 3375
Cube_2503 is empty.

There are 0 points in cube_2504 / 3375
Cube_2504 is empty.

There are 0 points in cube_2505 / 3375
Cube_2505 is empty.

There are 0 points in cube_2506 / 3375
Cube_2506 is empty.

There are 0 points in cube_2507 / 3375
Cube_2507 is empty.

There are 0 points in cube_2508 / 3375
Cube_2508 is empty.

There are 0 points in cube_2509 / 3375
Cube_2509 is empty.

There are 0 points in cube_2510 / 3375
Cube_2510 is empty.

There are 0 points in cube_2511 / 3375
Cube_2511 is empty.

There are 0 points in cube_2512 / 3375
Cube_2512 is empty.

There are 0 points in cube_2513 / 3375
Cube_2513 is empty.

There are 0 points in cube_2514 / 3375
Cube_2514 is empty.

There are 0 points in cube_2515 / 3375
C

There are 0 points in cube_2639 / 3375
Cube_2639 is empty.

There are 0 points in cube_2640 / 3375
Cube_2640 is empty.

There are 0 points in cube_2641 / 3375
Cube_2641 is empty.

There are 0 points in cube_2642 / 3375
Cube_2642 is empty.

There are 0 points in cube_2643 / 3375
Cube_2643 is empty.

There are 0 points in cube_2644 / 3375
Cube_2644 is empty.

There are 0 points in cube_2645 / 3375
Cube_2645 is empty.

There are 0 points in cube_2646 / 3375
Cube_2646 is empty.

There are 0 points in cube_2647 / 3375
Cube_2647 is empty.

There are 0 points in cube_2648 / 3375
Cube_2648 is empty.

There are 0 points in cube_2649 / 3375
Cube_2649 is empty.

There are 0 points in cube_2650 / 3375
Cube_2650 is empty.

There are 0 points in cube_2651 / 3375
Cube_2651 is empty.

There are 0 points in cube_2652 / 3375
Cube_2652 is empty.

There are 0 points in cube_2653 / 3375
Cube_2653 is empty.

There are 0 points in cube_2654 / 3375
Cube_2654 is empty.

There are 0 points in cube_2655 / 3375
C

There are 0 points in cube_2779 / 3375
Cube_2779 is empty.

There are 0 points in cube_2780 / 3375
Cube_2780 is empty.

There are 0 points in cube_2781 / 3375
Cube_2781 is empty.

There are 0 points in cube_2782 / 3375
Cube_2782 is empty.

There are 0 points in cube_2783 / 3375
Cube_2783 is empty.

There are 0 points in cube_2784 / 3375
Cube_2784 is empty.

There are 0 points in cube_2785 / 3375
Cube_2785 is empty.

There are 0 points in cube_2786 / 3375
Cube_2786 is empty.

There are 0 points in cube_2787 / 3375
Cube_2787 is empty.

There are 0 points in cube_2788 / 3375
Cube_2788 is empty.

There are 0 points in cube_2789 / 3375
Cube_2789 is empty.

There are 0 points in cube_2790 / 3375
Cube_2790 is empty.

There are 0 points in cube_2791 / 3375
Cube_2791 is empty.

There are 0 points in cube_2792 / 3375
Cube_2792 is empty.

There are 0 points in cube_2793 / 3375
Cube_2793 is empty.

There are 0 points in cube_2794 / 3375
Cube_2794 is empty.

There are 0 points in cube_2795 / 3375
C

There are 0 points in cube_2918 / 3375
Cube_2918 is empty.

There are 0 points in cube_2919 / 3375
Cube_2919 is empty.

There are 0 points in cube_2920 / 3375
Cube_2920 is empty.

There are 0 points in cube_2921 / 3375
Cube_2921 is empty.

There are 0 points in cube_2922 / 3375
Cube_2922 is empty.

There are 0 points in cube_2923 / 3375
Cube_2923 is empty.

There are 0 points in cube_2924 / 3375
Cube_2924 is empty.

There are 0 points in cube_2925 / 3375
Cube_2925 is empty.

There are 0 points in cube_2926 / 3375
Cube_2926 is empty.

There are 0 points in cube_2927 / 3375
Cube_2927 is empty.

There are 0 points in cube_2928 / 3375
Cube_2928 is empty.

There are 0 points in cube_2929 / 3375
Cube_2929 is empty.

There are 0 points in cube_2930 / 3375
Cube_2930 is empty.

There are 0 points in cube_2931 / 3375
Cube_2931 is empty.

There are 0 points in cube_2932 / 3375
Cube_2932 is empty.

There are 0 points in cube_2933 / 3375
Cube_2933 is empty.

There are 0 points in cube_2934 / 3375
C

There are 0 points in cube_3056 / 3375
Cube_3056 is empty.

There are 0 points in cube_3057 / 3375
Cube_3057 is empty.

There are 0 points in cube_3058 / 3375
Cube_3058 is empty.

There are 0 points in cube_3059 / 3375
Cube_3059 is empty.

There are 0 points in cube_3060 / 3375
Cube_3060 is empty.

There are 0 points in cube_3061 / 3375
Cube_3061 is empty.

There are 0 points in cube_3062 / 3375
Cube_3062 is empty.

There are 0 points in cube_3063 / 3375
Cube_3063 is empty.

There are 0 points in cube_3064 / 3375
Cube_3064 is empty.

There are 0 points in cube_3065 / 3375
Cube_3065 is empty.

There are 0 points in cube_3066 / 3375
Cube_3066 is empty.

There are 0 points in cube_3067 / 3375
Cube_3067 is empty.

There are 0 points in cube_3068 / 3375
Cube_3068 is empty.

There are 0 points in cube_3069 / 3375
Cube_3069 is empty.

There are 0 points in cube_3070 / 3375
Cube_3070 is empty.

There are 0 points in cube_3071 / 3375
Cube_3071 is empty.

There are 0 points in cube_3072 / 3375
C

There are 0 points in cube_3194 / 3375
Cube_3194 is empty.

There are 0 points in cube_3195 / 3375
Cube_3195 is empty.

There are 0 points in cube_3196 / 3375
Cube_3196 is empty.

There are 0 points in cube_3197 / 3375
Cube_3197 is empty.

There are 0 points in cube_3198 / 3375
Cube_3198 is empty.

There are 0 points in cube_3199 / 3375
Cube_3199 is empty.

There are 0 points in cube_3200 / 3375
Cube_3200 is empty.

There are 0 points in cube_3201 / 3375
Cube_3201 is empty.

There are 0 points in cube_3202 / 3375
Cube_3202 is empty.

There are 0 points in cube_3203 / 3375
Cube_3203 is empty.

There are 0 points in cube_3204 / 3375
Cube_3204 is empty.

There are 0 points in cube_3205 / 3375
Cube_3205 is empty.

There are 0 points in cube_3206 / 3375
Cube_3206 is empty.

There are 0 points in cube_3207 / 3375
Cube_3207 is empty.

There are 0 points in cube_3208 / 3375
Cube_3208 is empty.

There are 0 points in cube_3209 / 3375
Cube_3209 is empty.

There are 0 points in cube_3210 / 3375
C

There are 0 points in cube_3331 / 3375
Cube_3331 is empty.

There are 0 points in cube_3332 / 3375
Cube_3332 is empty.

There are 0 points in cube_3333 / 3375
Cube_3333 is empty.

There are 0 points in cube_3334 / 3375
Cube_3334 is empty.

There are 0 points in cube_3335 / 3375
Cube_3335 is empty.

There are 0 points in cube_3336 / 3375
Cube_3336 is empty.

There are 0 points in cube_3337 / 3375
Cube_3337 is empty.

There are 0 points in cube_3338 / 3375
Cube_3338 is empty.

There are 0 points in cube_3339 / 3375
Cube_3339 is empty.

There are 0 points in cube_3340 / 3375
Cube_3340 is empty.

There are 0 points in cube_3341 / 3375
Cube_3341 is empty.

There are 0 points in cube_3342 / 3375
Cube_3342 is empty.

There are 0 points in cube_3343 / 3375
Cube_3343 is empty.

There are 0 points in cube_3344 / 3375
Cube_3344 is empty.

There are 0 points in cube_3345 / 3375
Cube_3345 is empty.

There are 0 points in cube_3346 / 3375
Cube_3346 is empty.

There are 0 points in cube_3347 / 3375
C

### standard algorithm will most likely fill node color with red if there is any fraud transaction in it

In [14]:
colormap = np.ones_like(labels)
for node, indices in graph['nodes'].items():
    node_labels = mapper.data_from_cluster_id(node, graph, labels)
    #assert(np.allclose(ytrain[indices], node_labels))
    #print(fraud_mean)
    fraud_mean = 1.0
    if np.any(node_labels > 0):
        fraud_mean = 0.0
    #fraud_mean = (1.0 - (node_labels > 0).mean())**4
    #fraud_size = node_labels.size
    #tooltip_s.append((fraud_size, fraud_mean))
    colormap[indices] = fraud_mean

#tooltip_s = ytrain.copy()
#tooltip_s = np.array(tooltip_s)

### Tooltips will indicate if there are any false positives or true fraud

In [20]:
custom_tooltips = np.array(['']*len(labels), dtype=object)
custom_tooltips[labels == 0] = 'fp'
custom_tooltips[labels == 1] = 'FRAUD'

In [22]:
html = mapper.visualize(graph,
                 path_html="mean_pca_with_isof.html",
                 projected_X=proj_3d, color_function=colormap,
                 projected_X_names=['pca_mean_1', 'pca_mean_2', 'isof_score'], custom_tooltips = custom_tooltips,#10**(labels + 1),
                 title="mean_pca_with_isof(n_samples={})".format(len(baseamount_agg_mean_values)))



Wrote visualization to: mean_pca_with_isof.html


In [13]:
display("mean_pca_with_isof.html")

In [12]:
!pwd

/home/yanina/TDA
