In [1]:
from DatasetGeneration import generate_data_clouds
from ML_pipeline_final import ML_pipeline
from quantile_filtration import quantile
import itertools
import gudhi as gd
from build_complex import build_complex
from run_complex import run_complex
from compute_persistence import compute_persistence
from measure_time_memory import measure_time_memory
import matplotlib.pyplot as plt
import numpy as np
from compute_vectorisation import compute_vectorisation
from memory_profiler import profile
from sklearn.preprocessing   import MinMaxScaler
from sklearn.pipeline        import Pipeline
from sklearn.svm             import SVC
from sklearn.ensemble        import RandomForestClassifier
from sklearn.neighbors       import KNeighborsClassifier
from launch_benchmark import launch_benchmark
import random
import time


In [2]:
points_for_cloud=800
num_diag_per_class=15
#r_values=[3, 3.5, 4]
r_values=[3, 3.5, 4, 4.1, 4.3, 4.5,5]

points,labels=generate_data_clouds(points_for_cloud,num_diag_per_class,r_values)
thresh = [quantile(element) for element in points]
thresh_array = np.array(thresh)
quantile = np.mean(thresh_array, axis=0)
#np.append(quantile,None)


Now we will test which is the best combination of ml algo and tda representation for this dataset

In [3]:
complex_alpha_params = {
    'complex_type': ['alpha'],
    'precision': ['safe']}
        
        
best_accuracy=0
best_complex_parameters = {}
best_grid_search_parameters = {}
whole_time,whole_memory=[], []
for complex_alpha_values in itertools.product(*complex_alpha_params.values()):
    
    complex_alpha_parameters = dict(zip(complex_alpha_params.keys(), complex_alpha_values))
    dgms=[]  #diagrams list for all the points clouds
    
   
    
    for i,X in enumerate(points):
        alpha_result,timing,memory=measure_time_memory(build_complex,X,**complex_alpha_parameters)
        print("ST {} computation time: {:.2f} seconds".format(i, timing))
        print("ST Memory used: {:.2f} MB".format(memory))
        start_time = time.time()
        dgm=compute_persistence(alpha_result) #PD
        end_time = time.time()
        elapsed_time = end_time - start_time
        print("persistence computation time: :{:.2f} seconds".format(elapsed_time))
        print(" \n\n")
        dgms.append(dgm)
        whole_time.append(timing)
        whole_memory.append(memory)
    print("Time used for diagrams computation and persistence: ", np.sum(whole_time),"seconds")    
    print("Menory used for diagrams computation and persistence: {:.2f} MB".format(np.sum(whole_memory)))
    model,train_dgms,test_dgms,train_labs,test_labs=ML_pipeline(dgms,labels) 
    start_time2 = time.time()    
    test_accuracy,best_par=launch_benchmark(model,train_dgms,test_dgms,train_labs,test_labs)
    end_time2 = time.time()
    elapsed_time2 = end_time2 - start_time2
    print("Launch_benchmark computation time: :{:.2f} seconds".format(elapsed_time2))
    print(" \n\n")
    print("TEST RESULT FOR PARAMETERS : ", complex_alpha_parameters)
    print("============================================================================================================================================")
    print("============================================================================================================================================")
    print("============================================================================================================================================")
    
    if test_accuracy >best_accuracy :
        best_accuracy=test_accuracy
        best_grid_parameters=best_par.copy()
        best_complex_parameters=complex_alpha_parameters.copy()
        
        
        
        
    


    
    




ST 0 computation time: 2.41 seconds
ST Memory used: 0.92 MB
persistence computation time: :0.01 seconds
 


ST 1 computation time: 2.92 seconds
ST Memory used: 0.30 MB
persistence computation time: :0.00 seconds
 


ST 2 computation time: 1.41 seconds
ST Memory used: 0.02 MB
persistence computation time: :0.00 seconds
 


ST 3 computation time: 1.46 seconds
ST Memory used: 0.01 MB
persistence computation time: :0.01 seconds
 


ST 4 computation time: 2.01 seconds
ST Memory used: 0.00 MB
persistence computation time: :0.01 seconds
 


ST 5 computation time: 1.40 seconds
ST Memory used: 0.00 MB
persistence computation time: :0.01 seconds
 


ST 6 computation time: 1.37 seconds
ST Memory used: 0.00 MB
persistence computation time: :0.01 seconds
 


ST 7 computation time: 1.88 seconds
ST Memory used: 0.19 MB
persistence computation time: :0.01 seconds
 


ST 8 computation time: 1.45 seconds
ST Memory used: 0.00 MB
persistence computation time: :0.02 seconds
 


ST 9 computation time: 3.20 

In [4]:
print("Best Complex Alpha Parameters:", best_complex_parameters)
print("Best Grid Search Parameters:", best_grid_parameters)
print("Best Test Accuracy:", best_accuracy)
print()

Best Complex Alpha Parameters: {'complex_type': 'alpha', 'precision': 'safe'}
Best Grid Search Parameters: {'Estimator': RandomForestClassifier(), 'Scaler__use': True, 'TDA': PersistenceImage(bandwidth=0.1, resolution=[6, 6]), 'TDA__bandwidth': 0.1, 'TDA__resolution': [6, 6]}
Best Test Accuracy: 0.38095238095238093



In [1]:
complex_alpha_params = {
    'complex_type': ['rips'],
    'max_dimension': [2],
    'sparse': [None],
    'max_edge_length': [quantile[-1]]
}
        
best_accuracy=0
best_complex_parameters = {}
best_grid_search_parameters = {}
whole_time,whole_memory=[], []
for complex_alpha_values in itertools.product(*complex_alpha_params.values()):
    
    complex_alpha_parameters = dict(zip(complex_alpha_params.keys(), complex_alpha_values))
    dgms=[]  #diagrams list for all the points clouds
    
   
    
    for i,X in enumerate(points):
        alpha_result,timing,memory=measure_time_memory(build_complex,X,**complex_alpha_parameters)
        print("ST {} computation time: {:.2f} seconds".format(i, timing))
        print("ST Memory used: {:.2f} MB".format(memory))
        start_time = time.time()
        dgm=compute_persistence(alpha_result) #PD
        end_time = time.time()
        elapsed_time = end_time - start_time
        print("persistence computation time: :{:.2f} seconds".format(elapsed_time))
        print(" \n\n")
        dgms.append(dgm)
        whole_time.append(timing)
        whole_memory.append(memory)
    print("Time used for diagrams computation and persistence: ", np.sum(whole_time),"seconds")    
    print("Menory used for diagrams computation and persistence: {:.2f} MB".format(np.sum(whole_memory)))
    model,train_dgms,test_dgms,train_labs,test_labs=ML_pipeline(dgms,labels) 
    start_time2 = time.time()    
    test_accuracy,best_par=launch_benchmark(model,train_dgms,test_dgms,train_labs,test_labs)
    end_time2 = time.time()
    elapsed_time2 = end_time2 - start_time2
    print("Launch_benchmark computation time: :{:.2f} seconds".format(elapsed_time2))
    print(" \n\n")
    print("TEST RESULT FOR PARAMETERS : ", complex_alpha_parameters)
    print("============================================================================================================================================")
    print("============================================================================================================================================")
    print("============================================================================================================================================")
    
    if test_accuracy >best_accuracy :
        best_accuracy=test_accuracy
        best_grid_parameters=best_par.copy()
        best_complex_parameters=complex_alpha_parameters.copy()
        
        
        
    


    
    




NameError: name 'quantile' is not defined

In [None]:
print("Best Complex Alpha Parameters:", best_complex_parameters)
print("Best Grid Search Parameters:", best_grid_parameters)
print("Best Test Accuracy:", best_accuracy)
print()

Best Complex Alpha Parameters: {'complex_type': 'rips', 'max_dimension': 2, 'sparse': None, 'max_edge_length': 0.8576947761290932}
Best Grid Search Parameters: {'Estimator': RandomForestClassifier(), 'Scaler__use': True, 'TDA': Landscape(), 'TDA__resolution': 100}
Best Test Accuracy: 0.5714285714285714



In [None]:
complex_alpha_params = {
    'complex_type': ['edge'],
    'max_dimension': [2],
    'sparse': [None],
    'max_edge_length': [quantile[-1]],
    'nb_iterations': [1]
}
        
best_accuracy=0
best_complex_parameters = {}
best_grid_search_parameters = {}
whole_time,whole_memory=[], []
for complex_alpha_values in itertools.product(*complex_alpha_params.values()):
    
    complex_alpha_parameters = dict(zip(complex_alpha_params.keys(), complex_alpha_values))
    dgms=[]  #diagrams list for all the points clouds
    
   
    
    for i,X in enumerate(points):
        alpha_result,timing,memory=measure_time_memory(build_complex,X,**complex_alpha_parameters)
        print("ST {} computation time: {:.2f} seconds".format(i, timing))
        print("ST Memory used: {:.2f} MB".format(memory))
        start_time = time.time()
        dgm=compute_persistence(alpha_result) #PD
        end_time = time.time()
        elapsed_time = end_time - start_time
        print("persistence computation time: :{:.2f} seconds".format(elapsed_time))
        print(" \n\n")
        dgms.append(dgm)
        whole_time.append(timing)
        whole_memory.append(memory)
    print("Time used for diagrams computation and persistence: ", np.sum(whole_time),"seconds")    
    print("Menory used for diagrams computation and persistence: {:.2f} MB".format(np.sum(whole_memory)))
    model,train_dgms,test_dgms,train_labs,test_labs=ML_pipeline(dgms,labels) 
    start_time2 = time.time()    
    test_accuracy,best_par=launch_benchmark(model,train_dgms,test_dgms,train_labs,test_labs)
    end_time2 = time.time()
    elapsed_time2 = end_time2 - start_time2
    print("Launch_benchmark computation time: :{:.2f} seconds".format(elapsed_time2))
    print(" \n\n")
    print("TEST RESULT FOR PARAMETERS : ", complex_alpha_parameters)
    print("============================================================================================================================================")
    print("============================================================================================================================================")
    print("============================================================================================================================================")
    
    if test_accuracy >best_accuracy :
        best_accuracy=test_accuracy
        best_grid_parameters=best_par.copy()
        best_complex_parameters=complex_alpha_parameters.copy()
        
        
        
    


    
    




ST 0 computation time: 9.57 seconds
ST Memory used: 792.02 MB
persistence computation time: :0.00 seconds
 


ST 1 computation time: 5.71 seconds
ST Memory used: 1789.23 MB
persistence computation time: :0.00 seconds
 


ST 2 computation time: 4.94 seconds
ST Memory used: 1801.57 MB
persistence computation time: :0.00 seconds
 


ST 3 computation time: 5.18 seconds
ST Memory used: 1753.82 MB
persistence computation time: :0.00 seconds
 


ST 4 computation time: 4.91 seconds
ST Memory used: 1726.91 MB
persistence computation time: :0.00 seconds
 


ST 5 computation time: 4.86 seconds
ST Memory used: 1708.86 MB
persistence computation time: :0.00 seconds
 


ST 6 computation time: 4.97 seconds
ST Memory used: 1714.11 MB
persistence computation time: :0.00 seconds
 


ST 7 computation time: 4.89 seconds
ST Memory used: 1793.40 MB
persistence computation time: :0.00 seconds
 


ST 8 computation time: 4.80 seconds
ST Memory used: 1743.51 MB
persistence computation time: :0.01 seconds
 


ST

In [None]:
print("Best Complex Alpha Parameters:", best_complex_parameters)
print("Best Grid Search Parameters:", best_grid_parameters)
print("Best Test Accuracy:", best_accuracy)
print()

Best Complex Alpha Parameters: {'complex_type': 'edge', 'max_dimension': 2, 'sparse': None, 'max_edge_length': 0.8492661703175679, 'nb_iterations': 1}
Best Grid Search Parameters: {'Estimator': RandomForestClassifier(), 'Scaler__use': True, 'TDA': Landscape(), 'TDA__resolution': 100}
Best Test Accuracy: 0.6666666666666666

