The Baseline of this code was from https://github.com/giotto-ai/giotto-tda/blob/master/examples/MNIST_classification.ipynb and I edited some of the lines to tailor for my specific needs like the parameters of binarization and the metrics

In [1]:
import pandas as pd

##### Imports image dataset ####### 
data = pd.read_csv(r"C:\Users\ASUS\Downloads\MNIST sign language.csv") 

#### array Indexing ###
X = data.iloc[:,1:785].to_numpy() #Data
Y = data.iloc[:,0].to_numpy()

In [2]:
print(f"X shape: {X.shape}, Y shape: {Y.shape}")

X shape: (34627, 784), Y shape: (34627,)


In [3]:
X = X.reshape((-1, 28, 28))

In [54]:
import numpy as np
from gtda.images import Binarizer

# Pick out index of first 8 image
im8_idx = np.flatnonzero(Y == 0)[0]
# Reshape to (n_samples, n_pixels_x, n_pixels_y) format
im8 = 255-X[im8_idx]
im8 = im8[None, :, :]
binarizer = Binarizer(threshold=0.6)
im8_binarized = binarizer.fit_transform(im8)

binarizer.plot(im8_binarized)

In [55]:
from gtda.images import HeightFiltration

height_filtration = HeightFiltration(direction=np.array([1,1]))
im8_filtration = height_filtration.fit_transform(im8_binarized)

height_filtration.plot(im8_filtration)

In [62]:

binarizer = Binarizer(threshold=0.3)
im8_binarized = binarizer.fit_transform(im8_filtration)



binarizer.plot(im8_binarized)

In [13]:
from gtda.homology import CubicalPersistence

cubical_persistence = CubicalPersistence(n_jobs=-1)
im8_cubical = cubical_persistence.fit_transform(im8_filtration)

cubical_persistence.plot(im8_cubical)

In [3]:
##### Imports the necessary modules #######
from sklearn.pipeline import make_pipeline, make_union
from gtda.diagrams import PersistenceEntropy
from gtda.images import HeightFiltration
from gtda.images import RadialFiltration
from gtda.images import Binarizer
from gtda.homology import CubicalPersistence
from gtda.diagrams import Scaler
from gtda.diagrams import Amplitude

##### Directions list for Height Filtration#######
direction_list = [[1, 0], [1, 1], [0, 1], [-1, 1], [-1, 0], [-1, -1], [0, -1], [1, -1]]

########Center list for radial filtration ########
center_list = [
    [13, 6],
    [6, 13],
    [13, 13],
    [20, 13],
    [13, 20],
    [6, 6],
    [6, 20],
    [20, 6],
    [20, 20],
]

###### List of filtration methods #########
filtration_list = (
    [
        HeightFiltration(direction=np.array(direction), n_jobs=-1)
        for direction in direction_list
    ]
    + [RadialFiltration(center=np.array(center), n_jobs=-1) for center in center_list]
)

###### Persistence diagram acquisition ########
diagram_steps = [
    [
        Binarizer(threshold=0.5, n_jobs=-1),
        filtration,
        CubicalPersistence(n_jobs=-1),
        Scaler(n_jobs=-1),
    ]
    for filtration in filtration_list
]

####### list of metric used to obtain the amplitude of PD ########
metric_list = [
    {"metric": "bottleneck", "metric_params": {}},
    {"metric": "wasserstein", "metric_params": {"p": 1}},
    {"metric": "wasserstein", "metric_params": {"p": 2}},
    {"metric": "landscape", "metric_params": {"p": 1, "n_layers": 1, "n_bins": 50}},
    {"metric": "landscape", "metric_params": {"p": 1, "n_layers": 2, "n_bins": 50}},
    {"metric": "landscape", "metric_params": {"p": 2, "n_layers": 1, "n_bins": 50}},
    {"metric": "landscape", "metric_params": {"p": 2, "n_layers": 2, "n_bins": 50}},
    {"metric": "betti", "metric_params": {"p": 1, "n_bins": 50}},
    {"metric": "betti", "metric_params": {"p": 2, "n_bins": 50}},
    {"metric": "heat", "metric_params": {"p": 1, "sigma": 0.15, "n_bins": 50}},
    {"metric": "heat", "metric_params": {"p": 1, "sigma": 0.15, "n_bins": 50}},
    {"metric": "heat", "metric_params": {"p": 2, "sigma": 0.15, "n_bins": 50}},
    {"metric": "heat", "metric_params": {"p": 2, "sigma": 0.15, "n_bins": 50}},
]

####### union of all features as an ordered n-tuples i.e. n-dimensional vector, in our case the resulting feature has 476 dimension #######
feature_union = make_union(
    *[PersistenceEntropy(nan_fill_value=-1)]
    + [Amplitude(**metric, n_jobs=-1) for metric in metric_list]
)

####### Pipeline union. this way a pipeline is analogous to a function that maps every data point to a feature vector########
tda_union = make_union(
    *[make_pipeline(*diagram_step, feature_union) for diagram_step in diagram_steps],
    n_jobs=-1
)
     

In [4]:
#### Displays the pipeline diagram ##### 
from sklearn import set_config
set_config(display='diagram')  

tda_union

In [None]:
X=255-X # Greyscale image inversion, for some reason, the radial filtration recognizes the background of image as feature so an image inversion is necessary#####
X = X.reshape((-1, 28, 28)) #dataset is a flattened vector, to restructure it into image, a resize is necessary

In [None]:
##### Full blown feature extraction of the raw dataset ##### 
##### This will take so much time ####

X_tda = tda_union.fit_transform(X)
X_tda.shape #as a notification of the completeness of execution, the size will be shown.

In [None]:
#### As a checkpoint, the extracted feature is exported to csv format
X_tda = pd.DataFrame(X_tda)
X_tda.to_csv("Raw Feature.csv", index=False)