In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import matplotlib.pyplot as plt
import utils
from models import *
from sklearn.metrics import accuracy_score

from skorch.dataset import Dataset
from skorch import NeuralNetClassifier, callbacks
from skorch.helper import predefined_split

from sklearn.svm import LinearSVC, SVC
from sklearn.ensemble import RandomForestClassifier

import matplotlib.pyplot as plt



Gudhi not found--GraphInducedComplex not available


In [3]:
TEST_FRACTION = 0.20
EPOCHS        = 200

# Read data
data = utils.read_and_build_features()
# data = utils.read_and_build_features()[:50]; EPOCHS=20  # smaller data for testing purposes

# Split into train-test
dataset = utils.split_train_test(data, TEST_FRACTION)

corr_feature_size = dataset.X_train[0].corr_vector.shape[0]
pi_feature_size   = dataset.X_train[0].persistence_image.shape[0]
pl_feature_size   = dataset.X_train[0].persistence_landscape.shape[0]
pers_input_size   = 50

modelManager = ModelManager('../data_processed/', dataset, overwrite=True)

featureExtractors = {'corr': utils.get_corr_features,
                     'pi_corr': utils.get_pers_img_corr_features,
                     'pl_corr': utils.get_pers_landscape_corr_features,
                     'pd_corr': utils.get_pers_diag_corr_features,
                     'pi': utils.get_pers_img_features,
                     'pl': utils.get_pers_landscape_features,
                     'pd': utils.get_pers_diag_features,
                     'pd_kern': utils.get_pers_diag_kern_features,
                     'pi_conv0': utils.get_pi_conv0_features,
                     'pi_conv1': utils.get_pi_conv1_features,
                     'pi_conv_dimchannel': utils.get_pi_conv_dimchannel_features,
                     'pi_conv_sum': utils.get_pi_conv_sum_features,
                     'pi_conv_hybrid': utils.get_pi_conv_hybrid_features
                    }

### Correlation models

In [4]:
svm_corr = SVC(kernel='linear')
rf_corr  = RandomForestClassifier(n_estimators=500)
nn_corr  = NeuralNetClassifier(NNVec([corr_feature_size, 100, 2], dropout_prob=0.5),
                               max_epochs=EPOCHS, verbose=False, warm_start=True)

modelManager.add_model(svm_corr, 'svm_corr', featureExtractors['corr'])
modelManager.add_model(rf_corr , 'rf_corr', featureExtractors['corr'])
modelManager.add_model(nn_corr, 'nn_corr', featureExtractors['corr'])

### Persistence image models

In [5]:
svm_pi = SVC(kernel='linear')
rf_pi  = RandomForestClassifier(n_estimators=500)
nn_pi  = NeuralNetClassifier(NNVec([pi_feature_size, 10, 2]),
                             max_epochs=EPOCHS, verbose=False, warm_start=True)

modelManager.add_model(svm_pi, 'svm_pi', featureExtractors['pi'])
modelManager.add_model(rf_pi , 'rf_pi', featureExtractors['pi'])
modelManager.add_model(nn_pi, 'nn_pi', featureExtractors['pi'])

In [7]:
modelManager.models['pi_conv_hybrid'].confusion_matrix(dataset.X_test, dataset.y_test)

array([[  1, 101],
       [  0, 105]])

In [5]:
# pi_conv_hybrid = NeuralNetClassifier(NNConvBranched(), max_epochs=EPOCHS, verbose=True)

# modelManager.add_model(pi_conv_hybrid, 'pi_conv_hybrid', featureExtractors['pi_conv_hybrid'])
# modelManager.train_all()
modelManager.evaluate_all(accuracy_score)
modelManager.tabulate()

HBox(children=(IntProgress(value=0, max=1), HTML(value='')))




Unnamed: 0,Model,Train time,Score
0,pi_conv_hybrid,120.804385,0.512077


In [5]:
pi_conv0 = NeuralNetClassifier(NNConv(), max_epochs=EPOCHS,  verbose=False, lr=0.001)
pi_conv1 = NeuralNetClassifier(NNConv(), max_epochs=EPOCHS,  verbose=False, lr=0.001)
pi_conv_dimchannel = NeuralNetClassifier(NNConv(dim_channel=True), max_epochs=EPOCHS, verbose=False, lr=0.001)
pi_conv_sum = NeuralNetClassifier(NNConv(), max_epochs=EPOCHS, verbose=False, lr=0.001)

modelManager.add_model(pi_conv0, 'pi_conv0', featureExtractors['pi_conv0'])
modelManager.add_model(pi_conv1, 'pi_conv1', featureExtractors['pi_conv1'])
modelManager.add_model(pi_conv_dimchannel, 'pi_conv_dimchannel', featureExtractors['pi_conv_dimchannel'])
modelManager.add_model(pi_conv_sum, 'pi_conv_sum', featureExtractors['pi_conv_sum'])

In [7]:
# modelManager.train_all()
# modelManager.evaluate_all(accuracy_score)
# modelManager.tabulate()

### Persistence Landscape models

In [8]:
svm_pl = SVC(kernel='linear')
rf_pl  = RandomForestClassifier(n_estimators=500)
nn_pl  = NeuralNetClassifier(NNVec([pl_feature_size, 10, 2], dropout_prob=0.5),
                             max_epochs=EPOCHS, verbose=False, warm_start=True)

modelManager.add_model(svm_pl, 'svm_pl', featureExtractors['pl'])
modelManager.add_model(rf_pl , 'rf_pl', featureExtractors['pl'])
modelManager.add_model(nn_pl, 'nn_pl', featureExtractors['pl'])

### Persistence diagram models

In [5]:
nn_pd = NeuralNetClassifier(NNPersDiag([[pers_input_size, 25], [pers_input_size, 25]], [50, 2], dropout_prob=0.5),
                            max_epochs=EPOCHS, verbose=False, warm_start=True)

modelManager.add_model(nn_pd, 'nn_pd', featureExtractors['pd'])

### Hybrid models

In [5]:
pd_corr = NeuralNetClassifier(NNHybridPers([[pers_input_size, 25], [pers_input_size, 25]], [corr_feature_size, 500, 25], [75, 2], dropout_prob=0.5),
                              max_epochs=EPOCHS, verbose=False, warm_start=True)
pi_corr = NeuralNetClassifier(NNHybridVec([[pi_feature_size, 10], [corr_feature_size, 10]], [20, 10, 2], dropout_prob=0.5),
                              max_epochs=EPOCHS, verbose=False, warm_start=True)
pl_corr = NeuralNetClassifier(NNHybridVec([[pl_feature_size, 10], [corr_feature_size, 10]], [20, 10, 2], dropout_prob=0.5),
                              max_epochs=EPOCHS, verbose=False, warm_start=True)

modelManager.add_model(pd_corr, 'pd_corr', featureExtractors['pd_corr'])
modelManager.add_model(pi_corr, 'pi_corr', featureExtractors['pi_corr'])
modelManager.add_model(pl_corr, 'pl_corr', featureExtractors['pl_corr'])

### Topological kernels

In [11]:
svm_scalespace        = PersistenceKernelSVM(kernel_type='scale_space')
svm_weightedgaussian  = PersistenceKernelSVM(kernel_type='weighted_gaussian')
svm_slicedwasserstein = PersistenceKernelSVM(kernel_type='sliced_wasserstein')
svm_fisher            = PersistenceKernelSVM(kernel_type='fisher')

# modelManager.add_model(svm_scalespace, 'svm_scalespace', featureExtractors['pd_kern'])
# modelManager.add_model(svm_weightedgaussian, 'svm_weightedgaussian', featureExtractors['pd_kern'])
modelManager.add_model(svm_slicedwasserstein, 'svm_slicedwasserstein', featureExtractors['pd_kern'])
# modelManager.add_model(svm_fisher, 'svm_fisher', featureExtractors['pd_kern'])

### Topological kernel + Correlation

In [12]:
svm_scalespace_corr        = PersistenceKernelSVM(kernel_type='scale_space', corr_kernel=True)
svm_weightedgaussian_corr  = PersistenceKernelSVM(kernel_type='weighted_gaussian', corr_kernel=True)
svm_slicedwasserstein_corr = PersistenceKernelSVM(kernel_type='sliced_wasserstein', corr_kernel=True)
svm_fisher_corr            = PersistenceKernelSVM(kernel_type='fisher', corr_kernel=True) 

# modelManager.add_model(svm_scalespace_corr, 'svm_scalespace_corr', featureExtractors['pd_kern'])
# modelManager.add_model(svm_weightedgaussian_corr, 'svm_weightedgaussian_corr', featureExtractors['pd_kern'])
modelManager.add_model(svm_slicedwasserstein_corr, 'svm_slicedwasserstein_corr', featureExtractors['pd_kern'])
# modelManager.add_model(svm_fisher_corr, 'svm_fisher_corr', featureExtractors['pd_kern'])

In [6]:
modelManager.train_all()

HBox(children=(IntProgress(value=0, max=4), HTML(value='')))




In [7]:
modelManager.evaluate_all(accuracy_score)

HBox(children=(IntProgress(value=0, max=4), HTML(value='')))




In [8]:
modelManager.tabulate()

Unnamed: 0,Model,Train time,Score
0,pi_conv0,61.065454,0.512077
1,pi_conv1,67.114612,0.492754
2,pi_conv_dimchannel,64.249,0.512077
3,pi_conv_sum,58.846171,0.458937


In [15]:
modelManager.remove_model('svm_slicedwasserstein_corr')

In [16]:
import pickle
with open('../models/modelManager.pkl', 'wb') as f:
    pickle.dump(modelManager, f)