In [1]:
import gtda
gtda.__version__

'0.5.1'

In [1]:
# Data wrangling
import numpy as np
import pandas as pd  # Not a requirement of giotto-tda, but is compatible with the gtda.mapper module

# Data viz
from gtda.plotting import plot_point_cloud

# TDA magic
from gtda.mapper import (
    CubicalCover,
    #OneDimensionalCover,
    make_mapper_pipeline,
    Projection,
    plot_static_mapper_graph,
    plot_interactive_mapper_graph,
    MapperInteractivePlotter
)

# ML tools
from sklearn import datasets
from sklearn.cluster import DBSCAN

from sklearn.decomposition import KernelPCA
from sklearn.decomposition import PCA
from sklearn.cluster import AgglomerativeClustering
from sklearn.preprocessing import normalize

In [2]:
from numpy import genfromtxt
my_data = genfromtxt('GSE76705_normalize_data_364samples.csv', delimiter=',')
np.shape(my_data)

(54677, 365)

In [8]:
data_1=my_data[2:,1:]
print(data_1[0:3,0:3])
data__1=np.transpose(data_1)
print(np.shape(data__1))

data1=data__1[:229,0:]
#data1 = normalize(data1, axis=0, norm='max')
data1 = (data1 - np.mean(data1, axis=0)) / np.std(data1, axis=0)
np.shape(data1)
print(data1[1:5,1:5])

[[ 7.49439165  6.00978303  6.12110651]
 [ 7.31419368  8.43868735  8.6332645 ]
 [10.46600306 10.26927119 11.37452286]]
(364, 54675)
[[ 0.11492694 -1.21454137  1.84462947  2.06964858]
 [ 0.94042854  1.70016183  0.18950863 -1.10528408]
 [-0.66342108  0.73525204  1.81365421  0.39682449]
 [-0.41760275  0.65962765 -0.59171974 -0.77666268]]


In [9]:
from numpy import genfromtxt
meta = genfromtxt('metadata.csv', delimiter=',')
np.shape(meta)
meta[0:5,0:5]

mdata=meta[1:,2:]
print(mdata[0:5,0:5])
print(np.shape(mdata))
metadata=mdata[:229,:]
# print(np.shape(metadata))
# print(metadata[225:235,0:5])
# print(metadata[:,0:2])
# print(len(metadata[:,1]))

[[ 37.6  34.   60.    1.   17. ]
 [ 91.8  78.   60.    0.   29. ]
 [ 52.2  36.   70.    0.  100. ]
 [ 36.5  29.   62.    1.   45. ]
 [ 33.2  35.   70.    1.   28. ]]
(364, 5)


In [10]:
# for x in range(10,15):
#     for y in np.arange(0.05,1,0.05):
        # Define filter function – can be any scikit-learn transformer
        filter_func = None#Projection(columns=[0,1,2,3,4,5])
        # Define cover
        cover = CubicalCover(n_intervals=12, overlap_frac=0.4)
        #cover = OneDimensionalCover(n_intervals=10, overlap_frac=0.15)
        # Choose clustering algorithm – default is DBSCAN
        clusterer = DBSCAN()

        # Configure parallelism of clustering step
        n_jobs = 1

        # Initialise pipeline
        pipe = make_mapper_pipeline(
            filter_func=filter_func,
            cover=cover,
            clusterer=clusterer,
            verbose=True,
            n_jobs=n_jobs,
        )

        fig = plot_static_mapper_graph(pipe, data1,metadata[:,:])
        fig.show(config={'scrollZoom': True})      

[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s
[Pipeline] ....... (step 2 of 3) Processing filter_func, total=   1.5s
[Pipeline] ............. (step 3 of 3) Processing cover, total=   0.0s
[Pipeline] .... (step 1 of 3) Processing pullback_cover, total=   1.6s
[Pipeline] ........ (step 2 of 3) Processing clustering, total=   0.8s
[Pipeline] ............. (step 3 of 3) Processing nerve, total=   0.1s


In [12]:

#3D version of the above graph
# for x in range(10,15):
#     for y in np.arange(0.05,1,0.05):
        # Define filter function – can be any scikit-learn transformer
        filter_func = None#Projection(columns=[0,1,2,3,4,5])
        # Define cover
        cover = CubicalCover(n_intervals=12, overlap_frac=0.4)
        #cover = OneDimensionalCover(n_intervals=10, overlap_frac=0.15)
        # Choose clustering algorithm – default is DBSCAN
        clusterer = DBSCAN()

        # Configure parallelism of clustering step
        n_jobs = 1

        # Initialise pipeline
        pipe = make_mapper_pipeline(
            filter_func=filter_func,
            cover=cover,
            clusterer=clusterer,
            verbose=True,
            n_jobs=n_jobs,
        )

        fig = plot_static_mapper_graph(pipe, data1,metadata[:,:], layout_dim=3)
        fig.show(config={'scrollZoom': True})
        

        

[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s
[Pipeline] ....... (step 2 of 3) Processing filter_func, total=   1.4s
[Pipeline] ............. (step 3 of 3) Processing cover, total=   0.0s
[Pipeline] .... (step 1 of 3) Processing pullback_cover, total=   1.4s
[Pipeline] ........ (step 2 of 3) Processing clustering, total=   0.8s
[Pipeline] ............. (step 3 of 3) Processing nerve, total=   0.1s


In [13]:
# Initialise estimator to color graph by
pca = PCA(n_components=10)
pca
fig = plot_static_mapper_graph(
    pipe, data1, color_data=data1, color_features=pca
)
fig.show(config={'scrollZoom': True})


[Pipeline] ............ (step 1 of 3) Processing scaler, total=   0.0s
[Pipeline] ....... (step 2 of 3) Processing filter_func, total=   1.4s
[Pipeline] ............. (step 3 of 3) Processing cover, total=   0.0s
[Pipeline] .... (step 1 of 3) Processing pullback_cover, total=   1.4s
[Pipeline] ........ (step 2 of 3) Processing clustering, total=   0.9s
[Pipeline] ............. (step 3 of 3) Processing nerve, total=   0.1s
