In [1]:
import pandas as pd
import numpy as np
import gudhi as gd
import matplotlib.pyplot as plt
from matplotlib.animation import ArtistAnimation
import diagram2vec
from collections import defaultdict
from mpl_toolkits.axes_grid1 import make_axes_locatable
import imageio
import time
from sklearn.neighbors.kde import KernelDensity
from ripser import Rips
from gudhi.representations.kernel_methods import PersistenceFisherDistance, \
                                                SlicedWassersteinDistance
vr = Rips()
import seaborn as sns
import tensorflow as tf

from sklearn.model_selection import KFold, train_test_split, cross_val_score, GridSearchCV
from tqdm import tqdm

from sklearn.base import BaseEstimator

Rips(maxdim=1, thresh=inf, coeff=2, do_cocycles=False, n_perm = None, verbose=True)


In [2]:
class DirectedFilter:
    def __init__(self, angle, threshold = 0.4, max_val = 10**3, normalized = True):
        self.threshold = threshold
        self.direction = np.array((-np.sin(angle), np.cos(angle)))
        self.normalized = normalized
        self.max_val = max_val
        
    def transform(self, image):
        bin_image = np.array(image > self.threshold, dtype = np.int8).reshape(image.shape)

        l1, l2 = map(list, np.where(bin_image))
        pos = np.array(list(zip(l1,l2)))
        val_filt = pos@self.direction
        filtred_img = bin_image.copy()
        filtred_img[l1,l2] = val_filt
        
        l1, l2 = map(list, np.where(bin_image == 0))
        filtred_img[l1,l2] = self.max_val
        if(self.normalized):
            filtred_img = filtred_img/self.max_val
            
        return filtred_img
    

In [3]:
def get_diagram(x, concat = False):
    if(not concat):
        d = defaultdict(list)
        for ind, inter in x:
            d[ind].append(inter)
        return [np.array(d[c]) for c in d]
    
    res = []
    for _, inter in x:
        if(inter[1] == float('inf')):
            res.append([inter[0], 1000])
        else:
            res.append(inter)
    return np.array(res)

In [4]:
def get_sim_table(image, angles, f_dist, threshold):
    if(type(angles) is int):
        
        angles_ = np.arange(0, 2*np.pi, 2*np.pi/angles)
    else:
        angles_ = angles
        
    sim_mat = np.zeros([angles_.shape[0]]*2)
    diagrams = []
    for ang in angles_:
        filt = DirectedFilter(ang, threshold)
        new_img = filt.transform(image)
        cc_density_crater = gd.CubicalComplex(dimensions = new_img.shape, 
                                          top_dimensional_cells = new_img.reshape(-1))
        pers_density_crater = cc_density_crater.persistence()
        
        diag = get_diagram(pers_density_crater, True)
        #print(diag.shape)
        diagrams.append(diag)
    
    res = f_dist.fit_transform(diagrams)
            
    return res, angles_

In [5]:
I = np.array(pd.read_csv('datasets/mnist_train.csv', header = None))
X = I[:, 1:].reshape((-1,28,28))
y = I[:, 0]

In [11]:
with open('datasets/labels.pkl', 'wb') as f:
    pickle.dump(y, f)

In [18]:
new_X = []
F = PersistenceFisherDistance(0.5)
for img in tqdm(X, position=0, leave=True):
    sim_table, angles = get_sim_table(img, 30, F, 0.4)
    tri_upper_diag = np.triu(sim_table, k=0).reshape((-1,))
    new_X.append(tri_upper_diag)
    
new_X = np.array(new_X)


  0%|                                                                                        | 0/60000 [00:00<?, ?it/s]


ValueError: too many values to unpack (expected 2)

In [6]:
n = 4
F = PersistenceFisherDistance(0.5)
import pickle
for rand_it in range(5):
    angles = np.random.uniform(0, 2*np.pi, n)
    new_X = []
    for img in tqdm(X, position=0, leave=True):
        sim_table = get_sim_table(img, angles, F, 0.4)
        tri_upper_diag = np.triu(sim_table, k=0).reshape((-1,))
        new_X.append(tri_upper_diag)
    new_X = np.array(new_X)
    
    with open('random_angles_'+str(n)+'iter_'+str(rand_it)+'.pkl', 'wb') as f:
        pickle.dump(new_X, f)
    

  0%|                                                                                        | 0/60000 [00:00<?, ?it/s]
ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "C:\Users\anubo\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3326, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-6-f5559aae1f3e>", line 9, in <module>
    tri_upper_diag = np.triu(sim_table, k=0).reshape((-1,))
  File "<__array_function__ internals>", line 6, in triu
  File "C:\Users\anubo\AppData\Roaming\Python\Python37\site-packages\numpy\lib\twodim_base.py", line 464, in triu
    m = asanyarray(m)
  File "C:\Users\anubo\AppData\Roaming\Python\Python37\site-packages\numpy\core\_asarray.py", line 138, in asanyarray
    return array(a, dtype, copy=False, order=order, subok=True)
ValueError: could not broadcast input array from shape (4,4) into shape (4)

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\anubo\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2040, in showtraceback
    stb = v

ValueError: could not broadcast input array from shape (4,4) into shape (4)

In [14]:
new_X = []
F = SlicedWassersteinDistance()
for img in tqdm(X, position=0, leave=True):
    sim_table = get_sim_table(img, 36, F, 0.4)
    tri_upper_diag = np.triu(sim_table, k=0).reshape((-1,))
    new_X.append(tri_upper_diag)
    
new_X = np.array(new_X)
with open('same_angles_svd4.pkl', 'wb') as f:
    pickle.dump(new_X, f)

  2%|█▍                                                                         | 1102/60000 [01:29<1:18:52, 12.45it/s]

KeyboardInterrupt: 

In [7]:
n = 4
F = SlicedWassersteinDistance()
import pickle
for rand_it in range(4):
    angles = np.random.uniform(0, 2*np.pi, n)
    new_X = []
    for img in tqdm(X, position=0, leave=True):
        sim_table, angles = get_sim_table(img, angles, F, 0.4)
        tri_upper_diag = np.triu(sim_table, k=0).reshape((-1,))
        new_X.append(tri_upper_diag)
    new_X = np.array(new_X)
    new_X = {'X' : new_X,
            'angles' : angles}
    with open('random_angles_svd'+str(n)+'iter_'+str(rand_it)+'.pkl', 'wb') as f:
        pickle.dump(new_X, f)

100%|████████████████████████████████████████████████████████████████████████████| 60000/60000 [26:53<00:00, 37.19it/s]
100%|████████████████████████████████████████████████████████████████████████████| 60000/60000 [27:23<00:00, 36.50it/s]
100%|████████████████████████████████████████████████████████████████████████████| 60000/60000 [25:50<00:00, 38.70it/s]
100%|████████████████████████████████████████████████████████████████████████████| 60000/60000 [27:34<00:00, 36.26it/s]
