In [2]:
import numpy as np
import pandas as pd
from PIL import Image
import os

In [3]:
os.getcwd()

'c:\\promise\\word'

In [6]:
train= pd.read_csv('train/train_data.csv')
train

Unnamed: 0,file_name,label
0,train0001.png,8
1,train0002.png,8
2,train0003.png,8
3,train0004.png,8
4,train0005.png,8
...,...,...
4995,train4996.png,6
4996,train4997.png,6
4997,train4998.png,6
4998,train4999.png,6


In [11]:
def png_to_numpy(file_names):
    images=[]
    if file_names[0][:5] =='train':
        for file in file_names:
            images.append(Image.open('train/'+file))
    else:
        for file in file_names:
            images.append(Image.open('test/'+file))
    image2number=  np.array([np.array(image) for image in images])
    return image2number

In [14]:
X_train =  png_to_numpy(train['file_name'])
y_train= train['label']


In [17]:
from gtda.plotting import plot_heatmap
sample_idx= np.random.randint(5000)
sample = X_train[sample_idx]

plot_heatmap(sample)

In [20]:
from gtda.images import Binarizer


im8_idx = np.flatnonzero(y_train == 8)[3]
im8 = X_train[im8_idx][None, :, :]

binarizer= Binarizer(threshold=0.4)
im8_binarized= binarizer.fit_transform(im8)

binarizer.plot(im8_binarized)

In [22]:
from gtda.images import RadialFiltration

radial_filtration = RadialFiltration(center=np.array([20,6]))

im8_filtration= radial_filtration.fit_transform(im8_binarized)

radial_filtration.plot(im8_filtration, colorscale='jet')

In [23]:
from gtda.homology import CubicalPersistence

cubical_persistence = CubicalPersistence(homology_dimensions=[0,1],reduced_homology = True, n_jobs=-1)
im8_cubical = cubical_persistence.fit_transform(im8_filtration)

cubical_persistence.plot(im8_cubical)

In [24]:
from gtda.diagrams import Scaler

scaler = Scaler()
im8_scaled = scaler.fit_transform(im8_cubical)

scaler.plot(im8_scaled)

In [26]:
from gtda.diagrams import HeatKernel

heat = HeatKernel(sigma=.15, n_bins=60, n_jobs=-1)
im8_heat = heat.fit_transform(im8_scaled)
# Visualize the heat kernel for H1
heat.plot(im8_heat, homology_dimension_idx=1, colorscale='jet')

In [27]:
# giotto-tda의 변환기들은 sklearn을 기반으로 만들어졌기에, sklearn의 pipeline 기능과도 호환이 됩니다.

from sklearn.pipeline import Pipeline
from gtda.diagrams import Amplitude

steps = [
    ("binarizer", Binarizer(threshold=0.4)),
    ("filtration", RadialFiltration(center=np.array([20, 6]))),
    ("diagram", CubicalPersistence()),
    ("rescaling", Scaler()),
    ("amplitude", Amplitude(metric="heat", metric_params={'sigma':0.15, 'n_bins':60}))
]

heat_pipeline = Pipeline(steps)

In [28]:
im8_pipeline = heat_pipeline.fit_transform(im8)
im8_pipeline

array([[1.34528408e-04, 2.49730871e+00]])

In [31]:
from sklearn.pipeline import make_pipeline, make_union
from gtda.diagrams import PersistenceEntropy
from gtda.images import HeightFiltration

direction_list =[[1,0],[1,1],[0,1],[-1,1],[-1,0],[-1,-1],[0,-1],[1,-1]]

center_list=[
    [13,6],
    [6,13],
    [13,13],
    [20,13],
    [13,20],
    [6,6],
    [6,20],
    [20,6],
    [20,20],
]

# Creating a list of all filtration transformer
filtration_list=(
    [HeightFiltration(direction=np.array(direction), n_jobs=-1)
     for direction in direction_list
    ]
    +[RadialFiltration(center=np.array(center), n_jobs=-1) for center in center_list]
)

#Creating the diagram generation pipeline
diagram_steps = [[
    Binarizer(threshold=0.4, n_jobs=-1),
    filtration,
    CubicalPersistence(n_jobs=-1),
    Scaler(n_jobs=-1),
]
for filtration in filtration_list
]

metric_list = [
    {"metric": "bottleneck", "metric_params": {}},
    {"metric": "wasserstein", "metric_params": {"p": 1}},
    {"metric": "wasserstein", "metric_params": {"p": 2}},
    {"metric": "landscape", "metric_params": {"p": 1, "n_layers": 1, "n_bins": 100}},
    {"metric": "landscape", "metric_params": {"p": 1, "n_layers": 2, "n_bins": 100}},
    {"metric": "landscape", "metric_params": {"p": 2, "n_layers": 1, "n_bins": 100}},
    {"metric": "landscape", "metric_params": {"p": 2, "n_layers": 2, "n_bins": 100}},
    {"metric": "betti", "metric_params": {"p": 1, "n_bins": 100}},
    {"metric": "betti", "metric_params": {"p": 2, "n_bins": 100}},
    {"metric": "heat", "metric_params": {"p": 1, "sigma": 1.6, "n_bins": 100}},
    {"metric": "heat", "metric_params": {"p": 1, "sigma": 3.2, "n_bins": 100}},
    {"metric": "heat", "metric_params": {"p": 2, "sigma": 1.6, "n_bins": 100}},
    {"metric": "heat", "metric_params": {"p": 2, "sigma": 3.2, "n_bins": 100}},
]

feature_union =make_union(*[PersistenceEntropy(nan_fill_value=-1)]
+[Amplitude(**metric, n_jobs=-1) for metric in metric_list]
)

tda_union=make_union(
    *[make_pipeline(*diagram_step, feature_union) for diagram_step in diagram_steps],
    n_jobs=-1
)

In [32]:
from sklearn import set_config
set_config(display='diagram')

tda_union

In [34]:
X_train_tda = tda_union.fit_transform(X_train)
X_train_tda.shape

(5000, 476)

In [35]:
import pickle

# save
with open('X_train_tda.pickle', 'wb') as f:
    pickle.dump(X_train_tda, f)