In [1]:
import numpy as np
import pickle as pck
import pandas as pd
import matplotlib.pyplot as plt
from gudhi.point_cloud.timedelay import TimeDelayEmbedding
from sklearn.preprocessing import MinMaxScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import pairwise_distances
from xgboost import XGBClassifier

from multipers import *

Choose the dataset! They can be obtained there: https://www.cs.ucr.edu/~eamonn/time_series_data_2018/

In [2]:
dataset = "GunPoint"

Define all hyperparameters.

In [3]:
# time delay embedding
dimension = 3
delay     = 1
skip      = 1

# vineyards
nlines = 200
noise  = .2

# DTM parameters
m = .1
p = 1

# image parameters
res = 50

# ML parameters
split      = 100
classifier = XGBClassifier(random_state=101)
cv         = 5

# plot
visu = False

Define some global variables.

In [4]:
path       = "./" + dataset + "/"
list_filts = ["Alpha-DTM-0", "Alpha-DTM-1"]

Read the data sets, and impute the missing values.

In [5]:
X1 = np.array(pd.read_csv(path + dataset + "_TRAIN.tsv", sep="\t", header=None))
X2 = np.array(pd.read_csv(path + dataset + "_TEST.tsv",  sep="\t", header=None))
X = np.vstack([X1, X2])

In [6]:
L, TS = X[:,0], X[:,1:]
imp = SimpleImputer(missing_values=np.nan, strategy="mean")
TS = imp.fit_transform(TS)
tde = TimeDelayEmbedding(dim=dimension, delay=delay, skip=skip)
nts = len(TS)

# Decompositions

Compute maximal pairwise distance for Alpha complex.

In [7]:
ds = []
for tsidx in range(0,30):
    X = tde(TS[tsidx,:])
    ds.append(pairwise_distances(X).flatten())
allds = np.concatenate(ds)
maxd = np.max(allds)

Compute bounding rectangle for multiparameter persistence.

In [8]:
mxf, Mxf, myf, Myf = np.inf, -np.inf, np.inf, -np.inf

for tsidx in range(0, nts):

    # Compute min and max of first filtration (Alpha)
    X = tde(TS[tsidx,:])
    st = gd.AlphaComplex(points=X).create_simplex_tree(max_alpha_square=maxd)
    st.persistence()
    fs = [f for (s,f) in st.get_filtration()]
    mxf, Mxf = min(mxf, min(fs)), max(Mxf, max(fs))
    
    # Compute min and max of second filtration (lower-star on DTM)
    density = DTM(X, X, m)
    myf, Myf = min(myf, min(density)), max(Myf, max(density))

Compte all multipersistence decompositions.

In [None]:
ldgms0, mdgms0 = [], []
ldgms1, mdgms1 = [], []
count = 0

for tsidx in range(0, nts):

    # Compute time delay embedding and DTM density
    X = tde(TS[tsidx,:])
    density = np.squeeze(DTM(X, X, m))
    
    # Create Alpha complex
    dcomplex = gd.AlphaComplex(points=X)
    st = dcomplex.create_simplex_tree(max_alpha_square=maxd)

    # Use first barycentric subdivision to turn Alpha into a lower-star
    list_splxs = []
    st2 = gd.SimplexTree()
    for (s,_) in st.get_filtration():
        st2.insert(s, max([density[v] for v in s]))
        list_splxs.append((s, max([density[v] for v in s])))
    bary1 = barycentric_subdivision(st, use_sqrt=False)
    bary2 = barycentric_subdivision(st2, list_splx=list_splxs)

    # Write inputs for vineyards algorithm
    cname, fname = path + "complex" + str(count) + ".txt", path + "filtrations" + str(count) + ".txt"
    complexfile, filtfile = open(cname, "w"), open(fname, "w")
    for (s,f) in bary1.get_filtration():
        for v in s:
            complexfile.write(str(v) + " ")
        complexfile.write("\n")
        if len(s) == 1:
            filtfile.write(str(f) + " " + str(bary2.filtration(s)) + "\n")
    complexfile.close()
    filtfile.close()

    # Compute the vineyards
    mdg0, lines0, _, _ = sublevelsets_multipersistence(
        "./bin/vine", cname, fname, homology=0, num_lines=nlines, corner="dg", extended=False, essential=False,
        epsilon=1e-10, min_bars=1, noise=noise, parallel=False, nproc=None, visu=visu, plot_per_bar=False, 
        bnds_filt=[mxf,Mxf,myf,Myf], bnds_visu=[mxf,Mxf,myf,Myf])
    mdg1, lines1, _, _ = sublevelsets_multipersistence(
        "./bin/vine", cname, fname, homology=1, num_lines=nlines, corner="dg", extended=False, essential=False,
        epsilon=1e-10, min_bars=1, noise=noise, parallel=False, nproc=None, visu=visu, plot_per_bar=False, 
        bnds_filt=[mxf,Mxf,myf,Myf], bnds_visu=[mxf,Mxf,myf,Myf])

    mdgms0.append(mdg0)
    mdgms1.append(mdg1)
    count += 1

    os.system("rm " + cname + "*")
    os.system("rm " + fname + "*")

Save the data.

In [None]:
np.savetxt(path + "labels.txt", L[:nts])
np.save(path + "lines_Alpha-DTM-0", lines0)
np.save(path + "lines_Alpha-DTM-1", lines1)
np.save(path + "bnds_Alpha-DTM-0", np.array([mxf,Mxf,myf,Myf]))
np.save(path + "bnds_Alpha-DTM-1", np.array([mxf,Mxf,myf,Myf]))
pickle.dump(mdgms0, open(path + "mdgms_Alpha-DTM-0.pkl", "wb"))
pickle.dump(mdgms1, open(path + "mdgms_Alpha-DTM-1.pkl", "wb"))

# Vectorizations

Read the data.

In [None]:
list_lines, list_bnds, list_mdgms, list_delta = [], [], [], []
for filtname in list_filts:
    list_lines.append(np.load(path + "lines_" + filtname + ".npy"))
    list_bnds.append(np.load(path + "bnds_" + filtname + ".npy"))
    list_mdgms.append(pck.load(open(path + "mdgms_" + filtname + ".pkl", "rb")))
    delta = np.abs(lines[0,0]-lines[1,0]) if lines[0,0] != lines[1,0] else np.abs(lines[2,2]-lines[1,2])
    list_delta.append(delta)

Compute Multiparameter Persistence Images.

In [None]:
for filtidx in range(len(list_filts)):
    
    bnds  = list_bnds [filtidx]
    mdgms = list_mdgms[filtidx]

    MPI = [multipersistence_image(mdg, bnds, resolution=[res,res], return_raw=True) for mdg in mdgms]
    pck.dump(MPI, open(path + "mpi_" + str(res) + "_" + filtname + ".pkl", "wb"))
    print("MPI done")

Compute Multiparameter Persistence Landscapes.

In [None]:
for filtidx in range(len(list_filts)):
    
    bnds  = list_bnds [filtidx]
    mdgms = list_mdgms[filtidx]
    delta = list_delta[filtidx]
    
    MLS = [multipersistence_landscape(mdg,bnds,delta,resolution=[res,res],k=5,return_raw=True) for mdg in mdgms]
    pck.dump(MLS, open(path + "mls_" + str(res) + "_" + filtname + ".pkl", "wb"))
    print("MLS done")

Compute Multiparameter Persistence Kernels.

In [None]:
for filtidx in range(len(list_filts)):
    
    lines = list_lines[filtidx]
    bnds  = list_bnds [filtidx]
    mdgms = list_mdgms[filtidx]
    
    MK  = [extract_diagrams(mdg, bnds, lines) for mdg in mdgms]
    sw = sktda.SlicedWassersteinDistance(num_directions=10)
    M = multipersistence_kernel(MK, MK, lines, sw, lambda x: 1, same=True, return_raw=False, power=0)
    pck.dump(M, open(path + "mk_" + filtname + ".pkl", "wb"))
    print("MK done")

Collect the diagonal barcodes for 1D persistence.

In [None]:
fibs = []
for filtidx in range(len(list_filts)):
    
    lines = list_lines[filtidx]
    bnds  = list_bnds [filtidx]
    mdgms = list_mdgms[filtidx]
    
    ldgms = []
    for decomposition in mdgms:
        if len(decomposition) > 0:
            mdgm = np.vstack(decomposition)
            al = int(len(lines)/2)
            for a in range(len(lines)):
                if lines[a,0] == min(lines[:,0]) and lines[a,1] == min(lines[:,1]):
                    al = a
                    break
            dg = []
            idxs = np.argwhere(mdgm[:,4] == al)[:,0]
            if len(idxs) > 0:
                dg.append(mdgm[idxs][:,:4])
            if len(dg) > 0:
                dg = np.vstack(dg)
                dg = intersect_boundaries(dg, bnds)
                if len(dg) > 0:
                    xalpha, yalpha, xAlpha, yAlpha = lines[al,0], lines[al,1], lines[al,2], lines[al,3]
                    pt = np.array([[xalpha, yalpha]])
                    st, ed = dg[:,[0,2]], dg[:,[1,3]]
                    dgm = np.hstack([ np.linalg.norm(st-pt, axis=1)[:,np.newaxis], 
                                      np.linalg.norm(ed-pt, axis=1)[:,np.newaxis] ])
                else:
                    dgm = np.array([[.5*(bnds[0]+bnds[1]), .5*(bnds[2]+bnds[3])]])
            else:
                dgm = np.array([[.5*(bnds[0]+bnds[1]), .5*(bnds[2]+bnds[3])]])
        else:
            dgm = np.array([[.5*(bnds[0]+bnds[1]), .5*(bnds[2]+bnds[3])]])
        ldgms.append(dgm)
    fibs.append(ldgms)

Compute persistence landscapes.

In [None]:
for filtidx in range(len(list_filts)):
    
    fib = fibs[filtidx]
    
    ldgmsLS = [dg for dg in fib]
    L = sktda.Landscape(num_landscapes=5, resolution=2500, sample_range=[np.nan, np.nan]).fit_transform(ldgmsLS)
    pck.dump(L, open(path + "ls_" + filtname + ".pkl", "wb"))
    print("LS done")

Compute persistence images.

In [None]:
for filtidx in range(len(list_filts)):
    
    fib = fibs[filtidx]
    
    ldgmsPI = [dg for dg in fib]
    ldgmsPI = [np.hstack([dgm[:,0:1], dgm[:,1:2]-dgm[:,0:1]]) for dgm in ldgmsPI]
    PXs, PYs = np.vstack([dgm[:,0:1] for dgm in ldgms]), np.vstack([dgm[:,1:2] for dgm in ldgms])
    bnds = [PXs.min(), PXs.max(), PYs.min(), PYs.max()]
    PI = [persistence_image(dgm=dgm, bnds=bnds, return_raw=True) for dgm in ldgmsPI]
    pck.dump(PI, open(path + "pi_" + str(resolution) + "_" + filtname + ".pkl", "wb"))
    print("PI done")

# Classifications

Read the labels.

In [None]:
labels = np.loadtxt(path + "labels.txt", dtype=float)
labels = np.array([int(l) for l in labels])
npoints = len(labels)
train_index, test_index = np.arange(split), np.arange(split, npoints)

Classify the Multiparameter Persistence Images.

In [None]:
Xmpi = [pck.load(open(path + "mpi_" + str(resolution)  + "_" + filt + ".pkl", "rb")) for filt in list_filts]
params_mpi = {
    "mpi__bdw":     [1e-2, 1e-1, 1, 1e1, 1e2],
    "mpi__power":   [0, 1],
    "mpi__step":    [1, 5],
    "clf":          [classifier],
}
pipe_mpi = Pipeline([("mpi", MultiPersistenceImageWrapper()), ("clf", classifier)])
X_train  = [[Xmpi[nf][n] for nf in range(len(Xmpi))] for n in train_index]
Xtest    = [[Xmpi[nf][n] for nf in range(len(Xmpi))] for n in test_index]
y_train, y_test = labels[train_index], labels[test_index]
model = GridSearchCV(estimator=pipe_mpi, param_grid=params_mpi, cv=cv)
model.fit(X_train, y_train)
score = model.score(X_test, y_test)
print("MP-I score = " + str(score))
pck.dump([model.best_params_, model.cv_results_, score], 
         open(path + "modelMPI_CV" + str(cv) + "_" + filtname + ".pkl", "wb"))

Classify the Multiparameter Persistence Landscapes.

In [None]:
Xmls = [pck.load(open(path + "mls_" + str(resolution)  + "_" + filt + ".pkl", "rb")) for filt in list_filts]
params_mls = {
    "mls__power":   [0, 1],
    "mls__step":    [1, 5],
    "mls__k":       [5],
    "clf":          [classifier],
}
pipe_mls = Pipeline([("mls", MultiPersistenceLandscapeWrapper()), ("clf", classifier)])
X_train  = [[Xmls[nf][n] for nf in range(len(Xmls))] for n in train_index]
X_test   =[[Xmls[nf][n] for nf in range(len(Xmls))] for n in test_index]
y_train, y_test = labels[train_index], labels[test_index]
model = GridSearchCV(estimator=pipe_mls, param_grid=params_mls, cv=cv)
model.fit(X_train, y_train)
score = model.score(X_test, y_test)
print("MP-L score = " + str(score))
pck.dump([model.best_params_, model.cv_results_, score], 
         open(path + "modelMLS_CV" + str(cv) + "_" + filtname + ".pkl", "wb"))

Classify the Multiparameter Persistence Kernels.

In [None]:
Xmk = [pck.load(open(path + "mk_" + filt + ".pkl", "rb")) for filt in list_filts]
Xmk = sum([  Xmk[nf] for nf in range(len(Xmk))  ])
X_train, X_test = Xmk[train_index,:][:,train_index], Xmk[test_index,:][:,train_index]
y_train, y_test = labels[train_index], labels[test_index]
model = SVC(kernel="precomputed")
model.fit(X_train, y_train)
score = model.score(X_test, y_test)
print("MP-K score = " + str(score))
pck.dump([0, 0, score], open(path + "modelMK_SWK10_CV1_" + filtname + ".pkl", "wb"))

Classify the persistence landscapes.

In [None]:
Xls = [pck.load(open(path + "ls_" + filt + ".pkl", "rb")) for filt in list_filts]
Xls = np.hstack(Xls)
params_ls = {
    "sbs__step":    [1, 25],
    "clf":          [classifier],
}
pipe_ls = Pipeline([("sbs", SubsampleWrapper()), ("clf", classifier)])
X_train, X_test = Xls[train_index,:], Xls[test_index,:]
y_train, y_test = labels[train_index], labels[test_index]
model = GridSearchCV(estimator=pipe_ls, param_grid=params_ls, cv=cv)
model.fit(X_train, y_train)
score = model.score(X_test, y_test)
print("P-L score = " + str(score))
pck.dump([model.best_params_, model.cv_results_, score], 
         open(path + "modelLS_CV" + str(cv) + "_" + filtname + ".pkl", "wb"))

Classify the persistence images.

In [None]:
Xpi = [pck.load(open(path + "pi_" + str(resolution)  + "_" + filt + ".pkl", "rb")) for filt in list_filts]
params_pi = {
    "pi__bdw":     [1e-2, 1e-1, 1, 1e1, 1e2],
    "pi__power":   [0, 1],
    "pi__step":    [1, 5],
    "clf":         [classifier],
}
pipe_pi = Pipeline([("pi", PersistenceImageWrapper()), ("clf", classifier)])
X_train = [[Xpi[nf][n] for nf in range(len(Xpi))] for n in train_index] 
X_test  = [[Xpi[nf][n] for nf in range(len(Xpi))] for n in test_index]
y_train, y_test = labels[train_index], labels[test_index]
model = GridSearchCV(estimator=pipe_pi, param_grid=params_pi, cv=cv)
model.fit(X_train, y_train)
score = model.score(X_test, y_test)
print("P-I score = " + str(score))
pck.dump([model.best_params_, model.cv_results_, score], 
         open(path + "modelPI_CV" + str(cv) + "_" + filtname + ".pkl", "wb"))