In [None]:
classifiers = [LogisticRegression,KNeighborsClassifier,SVC,
               MLPClassifier,GaussianNB,DecisionTreeClassifier,
              RandomForestClassifier]

In [1]:
def classification_results(x_dat,y_true,y_pred,mod,title=None,results_out= False):

#   print("Results for {}:".format(method.__name__))
    if title != None:
        print("Results for:",title)
    print(classification_report(y_true, y_pred))
    print("Training Accuracy:",
          round(classification_report(y_true, y_pred, output_dict=True)['accuracy'],4))
    print("Training Recall:",
          round(classification_report(y_true, y_pred, output_dict=True)['1']['recall'],4))


    #print confusion matrix
    sns.set_palette("Paired")
    y_pred_rf = y_pred
    y_true_rf = y_true
    cm = confusion_matrix(y_true_rf, y_pred_rf)
    f, ax = plt.subplots(figsize =(5,5))
    sns.heatmap(cm,annot = True,linewidths=0.5,linecolor="black",
                cmap="RdBu_r",fmt = ".0f",ax=ax)
    plt.xlabel("y_pred_rf")
    plt.ylabel("y_true_rf")
    plt.title('Training Data Confusion Matrix')
    plt.show()

    if results_out== True:
        return classification_report(y_true, y_pred, output_dict=True)

In [2]:
def classification(method, x_dat, y_dat, model_out=False,
                   feature_importance=False, results=True,
                   test=False, resultOnly=False,
                   **params):

    #fit model
    mod = Pipeline([('classify', method(**params))])
    mod.fit(x_dat, y_dat)
    y_pred = mod.predict(x_dat)
    if (resultOnly == True) and (test == False):
        return round(classification_report(y_dat, y_pred, output_dict=True)['accuracy'],4)
    
    
    if (results == True) and (test == False):
        t = classification_results(x_dat,y_dat,y_pred,mod,title=method.__name__,results_out=results)

        if feature_importance == True:
            # Calculate permutation feature importance
            # (n_jobs=-1 means using all processors)
            try:
                imp = permutation_importance(mod, x_dat, y_dat, n_jobs=-1)

                #Generate feature importance plot
                plt.figure(figsize=(12,8))
                importance_data = pd.DataFrame({'feature':x_dat.columns, 'importance':imp.importances_mean})
                sns.barplot(x='importance', y='feature', data=importance_data)
                plt.title('Permutation Feature Importance')
                plt.xlabel('Mean Decrease in F1 Score')
                plt.ylabel('')
                plt.show()
                
                
            except:
                print('No Feature Importance Available')
        return round(t['accuracy'],4)  


    if test != False:
        x_test, y_test = test[0], test[1]
        y_pred_test = mod.predict(x_test)
        
        if (resultOnly == True):
            return round(classification_report(y_test, y_pred_test, output_dict=True)['accuracy'],4)
        else:
            print("Results for {}:".format(method.__name__))
            print(classification_report(y_test, y_pred_test))
            print("Test Accuracy: {}%".format(round(mod.score(x_test, y_test)*100,2)))

            #print confusion matrix
            y_pred_rf = y_pred_test
            y_true_rf = y_test
            cm = confusion_matrix(y_test, y_pred_test)
            f, ax = plt.subplots(figsize =(5,5))
            sns.heatmap(cm,annot = True,linewidths=0.5,linecolor="red",fmt = ".0f",ax=ax)
            plt.xlabel("y_pred_rf")
            plt.ylabel("y_true_rf")
            plt.title('Test Data Confusion Matrix')
            plt.show()
            return round(classification_report(y_test, y_pred_test, output_dict=True)['accuracy'],4)


    if model_out == True:
        return mod
    

In [3]:
def matrice_distance(data):
    d=euclidean_distances(data,data)
    d=preprocessing.normalize(d)
    return d

In [4]:
def estimateur(data,d,sigma):
    f=[]
    for i in range(40):
        s=0
        for j in range(40):
            s=s+math.exp(- (d[i][j]**2)*(sigma**(-1)))
        f.append(s)
    return f

In [5]:
def KL(a, b):
    a = np.asarray(a, dtype=np.float64)
    b = np.asarray(b, dtype=np.float64)

    return np.sum(np.where(a != 0, a * np.log(a / b), 0))

In [6]:
def showResult(classifiers,x_train,y_train,test=False,resultOnly=False):
    
    result = []
    classifiers_columns = []
    for cls in classifiers:
        print('_' * 50)
        print('-' * 50)
        classifiers_columns.append(cls.__name__)
        ans = classification(cls,x_train,y_train,test=test)
        result.append(ans)
    return pd.DataFrame([result], columns=classifiers_columns)

In [7]:
def topologicalFeatures(classifiers,x,y,test=False,results=True,resultOnly=True,model_out=True,
                        random_state = 7):
    x = x.to_numpy().reshape(x.shape[0],x.shape[1],1)
    homology_dimensions = [0, 1, 2]
    result = []
    column_labels = ["bottleneck", "wasserstein", "landscape", "betti", "heat", "silhouette", "persistence_image"]
    
    for metric in column_labels:
        steps = [
            ("persistence", VietorisRipsPersistence(metric="euclidean", homology_dimensions=homology_dimensions, n_jobs=6)),
            ("amplitude",Amplitude(metric, n_jobs=-1)),
        ]

        pipeline = Pipeline(steps)
        data = pipeline.fit_transform(x)
        res = []
        classifiers_columns = []
        if test!=False:
            test_data = test[0].to_numpy().reshape(test[0].shape[0],test[0].shape[1],1)
            test_data = pipeline.fit_transform(test_data)
            for cls in classifiers:
                classifiers_columns.append(cls.__name__)
                print("*"*40)
                res.append(classification(cls,data,y,test=[test_data,test[1]],results=result,resultOnly=result,model_out=model_out))
                print("+"*40)
        else:
            for cls in classifiers:
                classifiers_columns.append(cls.__name__)
                res.append(classification(cls,data,y,results=result,resultOnly=result,model_out=model_out,random_state=random_state))
        result.append(res)
        
    column_labels.append("entropy")
    steps = [
        ("persistence", VietorisRipsPersistence(metric="euclidean", homology_dimensions=homology_dimensions, n_jobs=6)),
        ("entropy",PersistenceEntropy(normalize=True)),
    ]
    pipeline = Pipeline(steps)
    res = []
    data = pipeline.fit_transform(x)
    
    
    if test!=False:
        test_data = test[0].to_numpy().reshape(test[0].shape[0],test[0].shape[1],1)
        test_data = pipeline.fit_transform(test_data)
        classifiers_columns = []
        for cls in classifiers:
            classifiers_columns.append(cls.__name__)
            res.append(classification(cls,data,y,test=[test_data,test[1]],results=result,resultOnly=result,model_out=model_out))
    else:
        for cls in classifiers:
            classifiers_columns.append(cls.__name__)
            res.append(classification(cls,data,y,results=result,resultOnly=result,model_out=model_out,random_state=random_state))
    result.append(res)
    
    
    return pd.DataFrame(result, index=column_labels,columns=classifiers_columns)

In [8]:
def topologicalFeaturesComplete(x,y,test=False):
    x = x.to_numpy().reshape(x.shape[0],x.shape[1],1)
    homology_dimensions = [0, 1, 2]
    result = []
    column_labels = ["bottleneck", "wasserstein", "landscape", "betti", "heat", "silhouette", "persistence_image"]
    classifiers_columns = ["LogisticRegression","KNeighborsClassifier","SVC",
               "MLPClassifier","GaussianNB","DecisionTreeClassifier",
              "RandomForestClassifier"]
    for metric in column_labels:
        steps = [
            ("persistence", VietorisRipsPersistence(metric="euclidean", homology_dimensions=homology_dimensions, n_jobs=6)),
            ("amplitude",Amplitude(metric, n_jobs=-1)),
        ]

        pipeline = Pipeline(steps)
        data = pipeline.fit_transform(x)
        res = []
        if test!=False:
            test_data = test[0].to_numpy().reshape(test[0].shape[0],test[0].shape[1],1)
            test_data = pipeline.fit_transform(test_data)
            for cls in classifiers:
                res.append(classification(cls,data,y,test=[test_data,test[1]],results=True,resultOnly=True))
        else:
            for cls in classifiers:
                res.append(classification(cls,data,y,results=True,resultOnly=True))
        result.append(res)
        
    column_labels.append("entropy")
    steps = [
        ("persistence", VietorisRipsPersistence(metric="euclidean", homology_dimensions=homology_dimensions, n_jobs=6)),
        ("entropy",PersistenceEntropy(normalize=True)),
    ]
    pipeline = Pipeline(steps)
    res = []
    data = pipeline.fit_transform(x)
    
    
    if test!=False:
        test_data = test[0].to_numpy().reshape(test[0].shape[0],test[0].shape[1],1)
        test_data = pipeline.fit_transform(test_data)
        for cls in classifiers:
            res.append(classification(cls,data,y,test=[test_data,test[1]],results=True,resultOnly=True))
    else:
        for cls in classifiers:
            res.append(classification(cls,data,y,results=True,resultOnly=True))
    result.append(res)
    
    
    return pd.DataFrame(result, index=column_labels,columns=classifiers_columns)