In [1]:

try:
    from jyquickhelper import add_notebook_menu 
except:
    !pip install jyquickhelper
    from jyquickhelper import add_notebook_menu
    

add_notebook_menu()
#Table du contenu pour  naviger facilement le notebook. 

## Notebook presentation

This notebook is focused on presenting the benchmark results for the Rips complex, Rips edge collapse complex, and alpha complex. The evaluation is performed on the ORBIT15K dataset. The notebook includes two tests, each utilizing distinct machine learning pipelines, which will be described in subsequent steps.

## Imports 

In [2]:
import pandas as pd
import ast
import numpy as np
from memory_profiler import profile
from sklearn.preprocessing   import MinMaxScaler
from sklearn.pipeline        import Pipeline
from sklearn.svm             import SVC
from sklearn.ensemble        import RandomForestClassifier
from sklearn.neighbors       import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from gudhi.representations import PersistenceImage
from gudhi.representations import BottleneckDistance
from gudhi.representations import Landscape
import gudhi as gd

In [3]:
pd.set_option('display.max_rows', None)  # Show all rows
pd.set_option('display.max_columns', None)  # Show all columns

##  Second Test : Selection of the representation module

### Pipeline 

In [4]:
pipe = Pipeline([("Separator", gd.representations.DiagramSelector(limit=np.inf, point_type="finite")),
                    ("Scaler",    gd.representations.DiagramScaler(scalers=[([0,1], MinMaxScaler())])),
                    ("TDA",       gd.representations.PersistenceImage()),
                    ("Estimator", RandomForestClassifier())])

param =    [
                                
                {"Scaler__use":         [True],
                "TDA":                 [gd.representations.Landscape()], 
                "TDA__resolution":     [100],
                "Estimator":           [RandomForestClassifier()]},
            
                {"Scaler__use":         [True],
                "TDA":                 [gd.representations.PersistenceImage()], 
                "TDA__resolution":     [ [5,5], [6,6] ],
                "TDA__bandwidth":      [0.01, 0.1, 1.0, 10.0],
                "Estimator":           [RandomForestClassifier()]},
                {"Scaler__use":         [False],
                "TDA":                 [gd.representations.BottleneckDistance()], 
                "TDA__epsilon":        [0.1], 
                "Estimator":           [RandomForestClassifier()]}
            ]

model = GridSearchCV(pipe, param, cv=4, error_score='raise')   

### Alpha Complex :

In [5]:
df_alpha_test_1=pd.read_csv("results_alpha.csv")
df_alpha_test_2=pd.read_csv("resultsalpha_6245.csv")
df_alpha_test_3=pd.read_csv("resultsalpha_625.csv")
df_alpha_test_pipeline1=pd.concat([df_alpha_test_1,df_alpha_test_2,df_alpha_test_3])
df_alpha_test_pipeline1=df_alpha_test_pipeline1.sort_values(by="test_accuracy",ascending=False)
df_alpha_test_pipeline1 = df_alpha_test_pipeline1.reset_index(drop=True)
                
df_complex = pd.json_normalize(df_alpha_test_pipeline1["complex_parameters"].apply(ast.literal_eval))
df_complex=df_complex.reset_index(drop=True)

df_alpha_test_pipeline1 = pd.concat([df_complex, df_alpha_test_pipeline1], axis=1)
df_alpha_test_pipeline1['Estimator']=" RandomForestClassifier"
df_alpha_test_pipeline1

Unnamed: 0,complex_type,precision,complex_parameters,elapsed_time,time for diagram computation,time for pipeline creation,time for pipeline fitting and testing,elapsed_memory,memory for diagram computation,memory for pipeline creation,memory for pipeline fitting and testing,test_accuracy,best parameters for the complex,Estimator
0,alpha,exact,"{'complex_type': 'alpha', 'precision': 'exact'}",71.804,9.205,0.0,63,1.043,0.0,0.0,1,0.619048,"{'Estimator': RandomForestClassifier(), 'Scale...",RandomForestClassifier
1,alpha,exact,"{'complex_type': 'alpha', 'precision': 'exact'}",29.435,9.576,0.0,20,0.023,0.0,0.0,0,0.619048,"{'Estimator': RandomForestClassifier(), 'Scale...",RandomForestClassifier
2,alpha,fast,"{'complex_type': 'alpha', 'precision': 'fast'}",51.927,2.583,0.0,49,6.922,0.285,0.0,7,0.571429,"{'Estimator': RandomForestClassifier(), 'Scale...",RandomForestClassifier
3,alpha,safe,"{'complex_type': 'alpha', 'precision': 'safe'}",53.611,2.338,0.0,51,0.176,0.0,0.0,0,0.52381,"{'Estimator': RandomForestClassifier(), 'Scale...",RandomForestClassifier
4,alpha,exact,"{'complex_type': 'alpha', 'precision': 'exact'}",39.283,11.054,0.0,28,0.766,0.008,0.0,1,0.47619,"{'Estimator': RandomForestClassifier(), 'Scale...",RandomForestClassifier
5,alpha,fast,"{'complex_type': 'alpha', 'precision': 'fast'}",24.089,3.178,0.0,21,5.309,1.547,0.0,4,0.380952,"{'Estimator': RandomForestClassifier(), 'Scale...",RandomForestClassifier
6,alpha,safe,"{'complex_type': 'alpha', 'precision': 'safe'}",30.043,2.895,0.0,27,9.238,7.402,0.0,2,0.380952,"{'Estimator': RandomForestClassifier(), 'Scale...",RandomForestClassifier
7,alpha,safe,"{'complex_type': 'alpha', 'precision': 'safe'}",25.323,2.504,0.0,23,1.141,0.0,0.0,1,0.333333,"{'Estimator': RandomForestClassifier(), 'Scale...",RandomForestClassifier
8,alpha,fast,"{'complex_type': 'alpha', 'precision': 'fast'}",22.292,2.753,0.0,20,10.285,1.508,0.0,9,0.238095,"{'Estimator': RandomForestClassifier(), 'Scale...",RandomForestClassifier


In [6]:
columns_to_keep = ['complex_type', 'precision', 'elapsed_time', 'elapsed_memory', 'test_accuracy']

# Keep only the specified columns
df_alpha_test_pipeline1_filtered = df_alpha_test_pipeline1[columns_to_keep]
df_alpha_test_pipeline1_filtered

Unnamed: 0,complex_type,precision,elapsed_time,elapsed_memory,test_accuracy
0,alpha,exact,71.804,1.043,0.619048
1,alpha,exact,29.435,0.023,0.619048
2,alpha,fast,51.927,6.922,0.571429
3,alpha,safe,53.611,0.176,0.52381
4,alpha,exact,39.283,0.766,0.47619
5,alpha,fast,24.089,5.309,0.380952
6,alpha,safe,30.043,9.238,0.380952
7,alpha,safe,25.323,1.141,0.333333
8,alpha,fast,22.292,10.285,0.238095


In [7]:
print(" MEAN MEMORY ",np.mean(df_alpha_test_pipeline1["elapsed_memory"]))
print(" MEAN TIME ",np.mean(df_alpha_test_pipeline1["elapsed_time"]))

for e in df_alpha_test_pipeline1['best parameters for the complex']:
    print(e)

 MEAN MEMORY  3.878111111111112
 MEAN TIME  38.64522222222222
{'Estimator': RandomForestClassifier(), 'Scaler__use': True, 'TDA': PersistenceImage(bandwidth=0.1, resolution=[6, 6]), 'TDA__bandwidth': 0.1, 'TDA__resolution': [6, 6]}
{'Estimator': RandomForestClassifier(), 'Scaler__use': True, 'TDA': PersistenceImage(bandwidth=0.1, resolution=[5, 5]), 'TDA__bandwidth': 0.1, 'TDA__resolution': [5, 5]}
{'Estimator': RandomForestClassifier(), 'Scaler__use': True, 'TDA': Landscape(), 'TDA__resolution': 100}
{'Estimator': RandomForestClassifier(), 'Scaler__use': True, 'TDA': Landscape(), 'TDA__resolution': 100}
{'Estimator': RandomForestClassifier(), 'Scaler__use': True, 'TDA': PersistenceImage(bandwidth=0.1, resolution=[6, 6]), 'TDA__bandwidth': 0.1, 'TDA__resolution': [6, 6]}
{'Estimator': RandomForestClassifier(), 'Scaler__use': True, 'TDA': Landscape(), 'TDA__resolution': 100}
{'Estimator': RandomForestClassifier(), 'Scaler__use': True, 'TDA': PersistenceImage(bandwidth=0.1, resolution=[5

### results for alpha: 

* Best persistence representations: Persistence Image
* Best precision: exact
* Mean of elapsed_time: 38.645222
* Mean of elapsed_memory: 3.878 MB
* Best test accuracy: 0.619048
* Best elapsed time for best acc: 29.435 seconds
* Best elapsed memory for best acc: 0.023 MB


### Edge Collapse Rips Complex :

In [8]:
df_edge_test_1=pd.read_csv("results_edge.csv")
df_edge_test_2=pd.read_csv("resultsedge_6245.csv")
df_edge_test_3=pd.read_csv("resultsedge_625.csv")
df_edge_test_4=pd.read_csv("resultsedge_6255558.csv")
df_edge_test_pipeline1=pd.concat([df_edge_test_1,df_edge_test_2,df_edge_test_3,df_edge_test_4])
df_edge_test_pipeline1=df_edge_test_pipeline1.sort_values(by="test_accuracy",ascending=False)
df_edge_test_pipeline1 = df_edge_test_pipeline1.reset_index(drop=True)
                
df_complex = pd.json_normalize(df_edge_test_pipeline1["complex_parameters"].apply(ast.literal_eval))
df_complex=df_complex.reset_index(drop=True)



df_edge_test_pipeline1 = pd.concat([df_complex, df_edge_test_pipeline1], axis=1)
df_edge_test_pipeline1['Estimator']=" RandomForestClassifier"
df_edge_test_pipeline1

Unnamed: 0,complex_type,max_dimension,sparse,max_edge_length,nb_iterations,complex_parameters,elapsed_time,time for diagram computation,time for pipeline creation,time for pipeline fitting and testing,elapsed_memory,memory for diagram computation,memory for pipeline creation,memory for pipeline fitting and testing,test_accuracy,best parameters for the complex,Estimator
0,edge,2,,0.516584,2,"{'complex_type': 'edge', 'max_dimension': 2, '...",258.302,244.751,0.0,14,55270.48,55270.48,0.0,0,0.714286,"{'Estimator': RandomForestClassifier(), 'Scale...",RandomForestClassifier
1,edge,2,,0.575211,3,"{'complex_type': 'edge', 'max_dimension': 2, '...",338.189,325.621,0.0,13,80732.895,80732.895,0.0,0,0.666667,"{'Estimator': RandomForestClassifier(), 'Scale...",RandomForestClassifier
2,edge,2,,0.649735,2,"{'complex_type': 'edge', 'max_dimension': 2, '...",437.601,425.233,0.0,12,110552.59,110552.59,0.0,0,0.666667,"{'Estimator': RandomForestClassifier(), 'Scale...",RandomForestClassifier
3,edge,2,,0.42586,3,"{'complex_type': 'edge', 'max_dimension': 2, '...",246.145,234.599,0.0,12,43914.68,43914.68,0.0,0,0.666667,"{'Estimator': RandomForestClassifier(), 'Scale...",RandomForestClassifier
4,edge,2,,0.439468,3,"{'complex_type': 'edge', 'max_dimension': 2, '...",151.566,140.578,0.0,11,35265.723,35265.723,0.0,0,0.666667,"{'Estimator': RandomForestClassifier(), 'Scale...",RandomForestClassifier
5,edge,2,,0.746055,3,"{'complex_type': 'edge', 'max_dimension': 2, '...",520.796,509.302,0.0,11,151870.59,151870.59,0.0,0,0.666667,"{'Estimator': RandomForestClassifier(), 'Scale...",RandomForestClassifier
6,edge,2,,0.590269,2,"{'complex_type': 'edge', 'max_dimension': 2, '...",356.636,343.033,0.0,14,81101.656,81101.656,0.0,0,0.619048,"{'Estimator': RandomForestClassifier(), 'Scale...",RandomForestClassifier
7,edge,2,,0.358031,2,"{'complex_type': 'edge', 'max_dimension': 2, '...",118.301,106.297,0.0,12,24753.375,24753.375,0.0,0,0.619048,"{'Estimator': RandomForestClassifier(), 'Scale...",RandomForestClassifier
8,edge,2,,0.284186,3,"{'complex_type': 'edge', 'max_dimension': 2, '...",63.548,52.0,0.0,12,10466.676,10466.676,0.0,0,0.619048,"{'Estimator': RandomForestClassifier(), 'Scale...",RandomForestClassifier
9,edge,2,,0.512279,2,"{'complex_type': 'edge', 'max_dimension': 2, '...",223.39,211.901,0.0,11,55033.195,55033.195,0.0,0,0.619048,"{'Estimator': RandomForestClassifier(), 'Scale...",RandomForestClassifier


In [9]:
print(" MEAN MEMORY: ",np.mean(df_edge_test_pipeline1["elapsed_memory"]))
print(" MEAN TIME : ",np.mean(df_edge_test_pipeline1["elapsed_time"]))

for e in df_edge_test_pipeline1['best parameters for the complex']:
    print(e)

 MEAN MEMORY:  75094.70731944445
 MEAN TIME :  308.4976666666667
{'Estimator': RandomForestClassifier(), 'Scaler__use': True, 'TDA': Landscape(), 'TDA__resolution': 100}
{'Estimator': RandomForestClassifier(), 'Scaler__use': True, 'TDA': Landscape(), 'TDA__resolution': 100}
{'Estimator': RandomForestClassifier(), 'Scaler__use': True, 'TDA': Landscape(), 'TDA__resolution': 100}
{'Estimator': RandomForestClassifier(), 'Scaler__use': True, 'TDA': Landscape(), 'TDA__resolution': 100}
{'Estimator': RandomForestClassifier(), 'Scaler__use': True, 'TDA': PersistenceImage(bandwidth=0.1, resolution=[6, 6]), 'TDA__bandwidth': 0.1, 'TDA__resolution': [6, 6]}
{'Estimator': RandomForestClassifier(), 'Scaler__use': True, 'TDA': PersistenceImage(bandwidth=0.1, resolution=[6, 6]), 'TDA__bandwidth': 0.1, 'TDA__resolution': [6, 6]}
{'Estimator': RandomForestClassifier(), 'Scaler__use': True, 'TDA': Landscape(), 'TDA__resolution': 100}
{'Estimator': RandomForestClassifier(), 'Scaler__use': True, 'TDA': La

In [10]:
mask = df_edge_test_pipeline1['test_accuracy'] > 0.6
columns_to_keep = ['complex_type', 'max_edge_length','max_dimension','nb_iterations', 'elapsed_time', 'elapsed_memory', 'test_accuracy']

df_edge_test_pipeline1_filtered = df_edge_test_pipeline1[columns_to_keep]
filtered_df = df_edge_test_pipeline1_filtered[mask]
filtered_df  

Unnamed: 0,complex_type,max_edge_length,max_dimension,nb_iterations,elapsed_time,elapsed_memory,test_accuracy
0,edge,0.516584,2,2,258.302,55270.48,0.714286
1,edge,0.575211,2,3,338.189,80732.895,0.666667
2,edge,0.649735,2,2,437.601,110552.59,0.666667
3,edge,0.42586,2,3,246.145,43914.68,0.666667
4,edge,0.439468,2,3,151.566,35265.723,0.666667
5,edge,0.746055,2,3,520.796,151870.59,0.666667
6,edge,0.590269,2,2,356.636,81101.656,0.619048
7,edge,0.358031,2,2,118.301,24753.375,0.619048
8,edge,0.284186,2,3,63.548,10466.676,0.619048
9,edge,0.512279,2,2,223.39,55033.195,0.619048


In [11]:
 
filtered_df['max_edge_length'].value_counts()


max_edge_length
0.516584    1
0.575211    1
0.649735    1
0.425860    1
0.439468    1
0.746055    1
0.590269    1
0.358031    1
0.284186    1
0.512279    1
0.190465    1
0.443120    1
Name: count, dtype: int64

In [12]:
print(" MEAN MEMORY: ",np.mean(filtered_df["elapsed_memory"]))
print(" MEAN TIME : ",np.mean(filtered_df["elapsed_time"]))

for e in df_edge_test_pipeline1['best parameters for the complex']:
    print(e)

 MEAN MEMORY:  57479.7445
 MEAN TIME :  244.1185833333333
{'Estimator': RandomForestClassifier(), 'Scaler__use': True, 'TDA': Landscape(), 'TDA__resolution': 100}
{'Estimator': RandomForestClassifier(), 'Scaler__use': True, 'TDA': Landscape(), 'TDA__resolution': 100}
{'Estimator': RandomForestClassifier(), 'Scaler__use': True, 'TDA': Landscape(), 'TDA__resolution': 100}
{'Estimator': RandomForestClassifier(), 'Scaler__use': True, 'TDA': Landscape(), 'TDA__resolution': 100}
{'Estimator': RandomForestClassifier(), 'Scaler__use': True, 'TDA': PersistenceImage(bandwidth=0.1, resolution=[6, 6]), 'TDA__bandwidth': 0.1, 'TDA__resolution': [6, 6]}
{'Estimator': RandomForestClassifier(), 'Scaler__use': True, 'TDA': PersistenceImage(bandwidth=0.1, resolution=[6, 6]), 'TDA__bandwidth': 0.1, 'TDA__resolution': [6, 6]}
{'Estimator': RandomForestClassifier(), 'Scaler__use': True, 'TDA': Landscape(), 'TDA__resolution': 100}
{'Estimator': RandomForestClassifier(), 'Scaler__use': True, 'TDA': Landscape

### results for Edge Collapse Rips Complex : 
* Best accuracy: 0.714286
* elapsed time for best acc: 258.302	
* Best persistence representations: Landscape
* Best threshold: between 5th and 6th quantile
* Mean of elapsed_time: 1173.497 S
* Mean of elapsed_memory: 75028.74 MB
* Best number of collapse: 2 and 3 



###  Rips Complex :

In [13]:
df_rips_test_1=pd.read_csv("results_rips.csv")
df_rips_test_2=pd.read_csv("resultsrips_6245.csv")
df_rips_test_3=pd.read_csv("resultsrips_625.csv")
df_rips_test_4=pd.read_csv("resultsrips_62588.csv")
df_rips_test_pipeline1=pd.concat([df_rips_test_1,df_rips_test_2,df_rips_test_3,df_rips_test_4])
df_rips_test_pipeline1=df_rips_test_pipeline1.sort_values(by="test_accuracy",ascending=False)
df_rips_test_pipeline1 = df_rips_test_pipeline1.reset_index(drop=True)
                
df_complex = pd.json_normalize(df_rips_test_pipeline1["complex_parameters"].apply(ast.literal_eval))
df_complex=df_complex.reset_index(drop=True)

df_rips_test_pipeline1 = pd.concat([df_complex, df_rips_test_pipeline1], axis=1)
df_rips_test_pipeline1['Estimator']=" RandomForestClassifier"

df_rips_test_pipeline1

Unnamed: 0,complex_type,max_dimension,sparse,max_edge_length,complex_parameters,elapsed_time,time for diagram computation,time for pipeline creation,time for pipeline fitting and testing,elapsed_memory,memory for diagram computation,memory for pipeline creation,memory for pipeline fitting and testing,test_accuracy,best parameters for the complex,Estimator
0,rips,2,,0.836201,"{'complex_type': 'rips', 'max_dimension': 2, '...",15967.85,15956.725,0.001,11,inf,inf,0.0,0,0.761905,"{'Estimator': RandomForestClassifier(), 'Scale...",RandomForestClassifier
1,rips,2,,0.732426,"{'complex_type': 'rips', 'max_dimension': 2, '...",13377.622,13365.973,0.0,12,inf,inf,0.0,0,0.714286,"{'Estimator': RandomForestClassifier(), 'Scale...",RandomForestClassifier
2,rips,2,,0.501485,"{'complex_type': 'rips', 'max_dimension': 2, '...",5325.229,5314.198,0.0,11,inf,inf,0.0,0,0.666667,"{'Estimator': RandomForestClassifier(), 'Scale...",RandomForestClassifier
3,rips,2,,0.277081,"{'complex_type': 'rips', 'max_dimension': 2, '...",1431.26,1419.355,0.001,12,inf,inf,0.0,0,0.666667,"{'Estimator': RandomForestClassifier(), 'Scale...",RandomForestClassifier
4,rips,2,,0.439468,"{'complex_type': 'rips', 'max_dimension': 2, '...",2859.286,2847.823,0.001,11,inf,inf,0.0,0,0.666667,"{'Estimator': RandomForestClassifier(), 'Scale...",RandomForestClassifier
5,rips,2,,0.358031,"{'complex_type': 'rips', 'max_dimension': 2, '...",2419.382,2407.623,0.0,12,inf,inf,0.0,0,0.619048,"{'Estimator': RandomForestClassifier(), 'Scale...",RandomForestClassifier
6,rips,2,,0.50362,"{'complex_type': 'rips', 'max_dimension': 2, '...",5469.453,5457.747,0.0,12,inf,inf,0.0,1,0.619048,"{'Estimator': RandomForestClassifier(), 'Scale...",RandomForestClassifier
7,rips,2,,0.190465,"{'complex_type': 'rips', 'max_dimension': 2, '...",549.468,537.948,0.0,12,inf,inf,0.0,1,0.619048,"{'Estimator': RandomForestClassifier(), 'Scale...",RandomForestClassifier
8,rips,2,,0.364614,"{'complex_type': 'rips', 'max_dimension': 2, '...",1638.864,1627.371,0.0,11,inf,inf,0.0,0,0.571429,"{'Estimator': RandomForestClassifier(), 'Scale...",RandomForestClassifier
9,rips,2,,0.573518,"{'complex_type': 'rips', 'max_dimension': 2, '...",7001.134,6990.006,0.001,11,inf,inf,0.0,0,0.571429,"{'Estimator': RandomForestClassifier(), 'Scale...",RandomForestClassifier


In [14]:
mask = df_rips_test_pipeline1['test_accuracy'] > 0.6

# Apply the mask to the DataFrame
filtered_df = df_rips_test_pipeline1[mask]
print(np.mean(filtered_df["elapsed_memory"]))
print(np.mean(filtered_df["elapsed_time"]))
print (filtered_df['max_edge_length'].value_counts())

for e in filtered_df['best parameters for the complex']:
    print(e)


inf
5924.94375
max_edge_length
0.836201    1
0.732426    1
0.501485    1
0.277081    1
0.439468    1
0.358031    1
0.503620    1
0.190465    1
Name: count, dtype: int64
{'Estimator': RandomForestClassifier(), 'Scaler__use': True, 'TDA': PersistenceImage(bandwidth=0.1, resolution=[5, 5]), 'TDA__bandwidth': 0.1, 'TDA__resolution': [5, 5]}
{'Estimator': RandomForestClassifier(), 'Scaler__use': True, 'TDA': Landscape(), 'TDA__resolution': 100}
{'Estimator': RandomForestClassifier(), 'Scaler__use': True, 'TDA': PersistenceImage(bandwidth=0.1, resolution=[5, 5]), 'TDA__bandwidth': 0.1, 'TDA__resolution': [5, 5]}
{'Estimator': RandomForestClassifier(), 'Scaler__use': True, 'TDA': PersistenceImage(bandwidth=0.1, resolution=[5, 5]), 'TDA__bandwidth': 0.1, 'TDA__resolution': [5, 5]}
{'Estimator': RandomForestClassifier(), 'Scaler__use': True, 'TDA': PersistenceImage(bandwidth=0.1, resolution=[6, 6]), 'TDA__bandwidth': 0.1, 'TDA__resolution': [6, 6]}
{'Estimator': RandomForestClassifier(), 'Scale

In [15]:

mask = df_rips_test_pipeline1['test_accuracy'] > 0.6
columns_to_keep = ['complex_type', 'max_edge_length', 'elapsed_time', 'elapsed_memory', 'test_accuracy']
df_edge_test_pipeline1_filtered = df_rips_test_pipeline1[columns_to_keep]

# Apply the mask to the DataFrame
filtered_df = df_edge_test_pipeline1_filtered[mask]
filtered_df  



Unnamed: 0,complex_type,max_edge_length,elapsed_time,elapsed_memory,test_accuracy
0,rips,0.836201,15967.85,inf,0.761905
1,rips,0.732426,13377.622,inf,0.714286
2,rips,0.501485,5325.229,inf,0.666667
3,rips,0.277081,1431.26,inf,0.666667
4,rips,0.439468,2859.286,inf,0.666667
5,rips,0.358031,2419.382,inf,0.619048
6,rips,0.50362,5469.453,inf,0.619048
7,rips,0.190465,549.468,inf,0.619048


###  results for  Rips Complex :
* Best accuracy: 0.761905
* elapsed time for best acc: 15967.850	
* Best persistence representations: Persistence Image
* Best threshold: NA
* Mean of elapsed_time: 5924.94375 S
* Mean of elapsed_memory: inf MB


## Third Test : RandomForestClassifier Hyperparameters Tuning

### Pipeline: 

In [16]:
pipe = Pipeline([("Separator", gd.representations.DiagramSelector(limit=np.inf, point_type="finite")),
                    ("Scaler",    gd.representations.DiagramScaler(scalers=[([0,1], MinMaxScaler())])),
                    ("TDA",       gd.representations.PersistenceImage()),
                    ("Estimator", RandomForestClassifier())])

param =    [
                                
                {"Scaler__use":         [True],
                "TDA":                 [gd.representations.Landscape()], 
                "TDA__resolution":     [100],
                "Estimator":           [RandomForestClassifier()],
                "Estimator": [RandomForestClassifier()],
               "Estimator__n_estimators": [50, 100, 200],
                "Estimator__max_depth": [None, 5, 10],
                "Estimator__min_samples_split": [2, 5, 10],
                "Estimator__min_samples_leaf": [1, 2, 4]},
            
                {"Scaler__use":         [True],
                "TDA":                 [gd.representations.PersistenceImage()], 
                "TDA__resolution":     [ [5,5], [6,6] ],
                "TDA__bandwidth":      [0.01, 0.1, 1.0, 10.0],
                "Estimator":           [RandomForestClassifier()],
                "Estimator__n_estimators": [50, 100, 200],
                "Estimator__max_depth": [None, 5, 10],
                "Estimator__min_samples_split": [2, 5, 10],
                "Estimator__min_samples_leaf": [1, 2, 4]},
            ]

model = GridSearchCV(pipe, param, cv=4, error_score='raise')    

### Alpha Complex :

In [17]:
df_alpha_test_1=pd.read_csv("results2alpha_6245.csv")
df_alpha_test_2=pd.read_csv("results2alpha_625.csv")
df_alpha_test_3=pd.read_csv("results2alpha_62588.csv")
df_alpha_test_4=pd.read_csv("results2alpha_6255558.csv")
df_alpha_test_5=pd.read_csv("results2alpha_621238.csv")
df_alpha_test_6=pd.read_csv("results2alpha_99846.csv")
df_alpha_test_7=pd.read_csv("results2alpha_9986.csv")

df_alpha_test_pipeline1=pd.concat([df_alpha_test_1,df_alpha_test_2,df_alpha_test_3,df_alpha_test_4,df_alpha_test_5,df_alpha_test_6,df_alpha_test_7])
df_alpha_test_pipeline1=df_alpha_test_pipeline1.sort_values(by="test_accuracy",ascending=False)
df_alpha_test_pipeline1 = df_alpha_test_pipeline1.reset_index(drop=True)
                
df_complex = pd.json_normalize(df_alpha_test_pipeline1["complex_parameters"].apply(ast.literal_eval))
df_complex=df_complex.reset_index(drop=True)

df_alpha_test_pipeline1 = pd.concat([df_complex, df_alpha_test_pipeline1], axis=1)
df_alpha_test_pipeline1['Estimator']=" RandomForestClassifier"
df_alpha_test_pipeline1

Unnamed: 0,complex_type,precision,complex_parameters,elapsed_time,time for diagram computation,time for pipeline creation,time for pipeline fitting and testing,elapsed_memory,memory for diagram computation,memory for pipeline creation,memory for pipeline fitting and testing,test_accuracy,best parameters for the complex,Estimator
0,alpha,safe,"{'complex_type': 'alpha', 'precision': 'safe'}",1390.002,2.451,0.0,1388,24.684,0.0,0.0,25,0.619048,"{'Estimator': RandomForestClassifier(), 'Estim...",RandomForestClassifier
1,alpha,safe,"{'complex_type': 'alpha', 'precision': 'safe'}",1522.215,2.294,0.0,1520,4.359,0.0,0.0,4,0.619048,{'Estimator': RandomForestClassifier(max_depth...,RandomForestClassifier
2,alpha,exact,"{'complex_type': 'alpha', 'precision': 'exact'}",1537.387,8.572,0.0,1529,2.895,0.0,0.0,3,0.619048,{'Estimator': RandomForestClassifier(n_estimat...,RandomForestClassifier
3,alpha,safe,"{'complex_type': 'alpha', 'precision': 'safe'}",1527.48,2.409,0.0,1525,2.039,0.0,0.0,2,0.571429,{'Estimator': RandomForestClassifier(max_depth...,RandomForestClassifier
4,alpha,safe,"{'complex_type': 'alpha', 'precision': 'safe'}",1478.673,2.329,0.0,1476,2.527,0.0,0.0,3,0.571429,{'Estimator': RandomForestClassifier(max_depth...,RandomForestClassifier
5,alpha,fast,"{'complex_type': 'alpha', 'precision': 'fast'}",1470.563,2.644,0.0,1468,15.801,1.508,0.0,14,0.52381,{'Estimator': RandomForestClassifier(max_depth...,RandomForestClassifier
6,alpha,fast,"{'complex_type': 'alpha', 'precision': 'fast'}",1530.352,2.53,0.0,1528,13.41,1.52,0.0,12,0.52381,{'Estimator': RandomForestClassifier(min_sampl...,RandomForestClassifier
7,alpha,exact,"{'complex_type': 'alpha', 'precision': 'exact'}",1161.026,8.81,0.0,1152,0.453,0.0,0.0,0,0.52381,{'Estimator': RandomForestClassifier(max_depth...,RandomForestClassifier
8,alpha,safe,"{'complex_type': 'alpha', 'precision': 'safe'}",1986.921,2.324,0.0,1985,2.504,0.0,0.0,3,0.52381,{'Estimator': RandomForestClassifier(max_depth...,RandomForestClassifier
9,alpha,fast,"{'complex_type': 'alpha', 'precision': 'fast'}",1562.842,2.48,0.0,1560,13.355,1.488,0.0,12,0.52381,{'Estimator': RandomForestClassifier(min_sampl...,RandomForestClassifier


In [18]:
print(" Mean memory :",np.mean(df_alpha_test_pipeline1["elapsed_memory"]))
print(" Mean time :",np.mean(df_alpha_test_pipeline1["elapsed_time"]))

for e in df_alpha_test_pipeline1['best parameters for the complex']:
    print(e)
    print("---------------------------------------------------------------------------------------------------------------")
    print("---------------------------------------------------------------------------------------------------------------")


 Mean memory : 7.630904761904761
 Mean time : 1503.3933333333334
{'Estimator': RandomForestClassifier(), 'Estimator__max_depth': None, 'Estimator__min_samples_leaf': 1, 'Estimator__min_samples_split': 2, 'Estimator__n_estimators': 100, 'Scaler__use': True, 'TDA': Landscape(), 'TDA__resolution': 100}
---------------------------------------------------------------------------------------------------------------
---------------------------------------------------------------------------------------------------------------
{'Estimator': RandomForestClassifier(max_depth=10, min_samples_leaf=2, n_estimators=50), 'Estimator__max_depth': 10, 'Estimator__min_samples_leaf': 2, 'Estimator__min_samples_split': 2, 'Estimator__n_estimators': 50, 'Scaler__use': True, 'TDA': PersistenceImage(bandwidth=0.1, resolution=[5, 5]), 'TDA__bandwidth': 0.1, 'TDA__resolution': [5, 5]}
---------------------------------------------------------------------------------------------------------------
----------------

In [19]:
mask = df_alpha_test_pipeline1['test_accuracy'] > 0.6

# Apply the mask to the DataFrame
filtered_df = df_alpha_test_pipeline1[mask]
print(np.mean(filtered_df["elapsed_memory"]))
print(np.mean(filtered_df["elapsed_time"]))

for e in filtered_df['best parameters for the complex']:
    print(e)




10.645999999999999
1483.2013333333332
{'Estimator': RandomForestClassifier(), 'Estimator__max_depth': None, 'Estimator__min_samples_leaf': 1, 'Estimator__min_samples_split': 2, 'Estimator__n_estimators': 100, 'Scaler__use': True, 'TDA': Landscape(), 'TDA__resolution': 100}
{'Estimator': RandomForestClassifier(max_depth=10, min_samples_leaf=2, n_estimators=50), 'Estimator__max_depth': 10, 'Estimator__min_samples_leaf': 2, 'Estimator__min_samples_split': 2, 'Estimator__n_estimators': 50, 'Scaler__use': True, 'TDA': PersistenceImage(bandwidth=0.1, resolution=[5, 5]), 'TDA__bandwidth': 0.1, 'TDA__resolution': [5, 5]}
{'Estimator': RandomForestClassifier(n_estimators=50), 'Estimator__max_depth': None, 'Estimator__min_samples_leaf': 1, 'Estimator__min_samples_split': 2, 'Estimator__n_estimators': 50, 'Scaler__use': True, 'TDA': PersistenceImage(bandwidth=0.1, resolution=[5, 5]), 'TDA__bandwidth': 0.1, 'TDA__resolution': [5, 5]}


In [20]:

mask = df_alpha_test_pipeline1['test_accuracy'] > 0.6
columns_to_keep = ['complex_type', 'precision', 'elapsed_time', 'elapsed_memory', 'test_accuracy']
df_alpha_test_pipeline1_filtered = df_alpha_test_pipeline1[columns_to_keep]

# Apply the mask to the DataFrame
filtered_df = df_alpha_test_pipeline1_filtered[mask]
filtered_df  

Unnamed: 0,complex_type,precision,elapsed_time,elapsed_memory,test_accuracy
0,alpha,safe,1390.002,24.684,0.619048
1,alpha,safe,1522.215,4.359,0.619048
2,alpha,exact,1537.387,2.895,0.619048


### results for Alpha Complex: 

* Best persistence representations: Persistence Image
* Best precision: exact
* Mean of elapsed_time: 38.645222
* Mean of elapsed_memory: 3.878 MB
* Best test accuracy: 0.619048
* Best elapsed time for best acc: 29.435 seconds


### Edge Collapse Rips Complex

In [21]:
df_edge_test_1=pd.read_csv("results2edge_6245.csv")
df_edge_test_2=pd.read_csv("results2edge_625.csv")
df_edge_test_3=pd.read_csv("results2edge_62588.csv")
df_edge_test_4=pd.read_csv("results2edge_6255558.csv")
df_edge_test_5=pd.read_csv("results2edge_76338.csv")
df_edge_test_6=pd.read_csv("results2edge_336238.csv")
df_edge_test_7=pd.read_csv("results2edge_621238.csv")
df_edge_test_8=pd.read_csv("results2edge_7644338.csv")
df_edge_test_9=pd.read_csv("results2edge_9986.csv")
df_edge_test_10=pd.read_csv("results2edge_89846.csv")
df_edge_test_11=pd.read_csv("results2edge_56238.csv")

df_edge_test_pipeline1=pd.concat([df_edge_test_1,df_edge_test_2,df_edge_test_3,df_edge_test_4,df_edge_test_5,df_edge_test_6,df_edge_test_7,df_edge_test_8,df_edge_test_9,df_edge_test_10,df_edge_test_11])
df_edge_test_pipeline1=df_edge_test_pipeline1.sort_values(by="test_accuracy",ascending=False)
df_edge_test_pipeline1 = df_edge_test_pipeline1.reset_index(drop=True)
                
df_complex = pd.json_normalize(df_edge_test_pipeline1["complex_parameters"].apply(ast.literal_eval))
df_complex=df_complex.reset_index(drop=True)



df_edge_test_pipeline1 = pd.concat([df_complex, df_edge_test_pipeline1], axis=1)
df_edge_test_pipeline1['Estimator']=" RandomForestClassifier"
df_edge_test_pipeline1

Unnamed: 0,complex_type,max_dimension,sparse,max_edge_length,nb_iterations,complex_parameters,elapsed_time,time for diagram computation,time for pipeline creation,time for pipeline fitting and testing,elapsed_memory,memory for diagram computation,memory for pipeline creation,memory for pipeline fitting and testing,test_accuracy,best parameters for the complex,Estimator
0,edge,2,,0.358031,3,"{'complex_type': 'edge', 'max_dimension': 2, '...",924.289,95.649,0.0,829,24812.172,24812.156,0.0,0,0.761905,{'Estimator': RandomForestClassifier(max_depth...,RandomForestClassifier
1,edge,2,,0.661717,2,"{'complex_type': 'edge', 'max_dimension': 2, '...",1236.627,393.697,0.0,843,112081.469,112081.469,0.0,0,0.761905,{'Estimator': RandomForestClassifier(max_depth...,RandomForestClassifier
2,edge,2,,0.585445,3,"{'complex_type': 'edge', 'max_dimension': 2, '...",1172.248,296.413,0.0,876,80295.172,80295.172,0.0,0,0.761905,{'Estimator': RandomForestClassifier(max_depth...,RandomForestClassifier
3,edge,2,,0.496333,3,"{'complex_type': 'edge', 'max_dimension': 2, '...",1293.219,306.695,0.0,987,62795.77,62795.77,0.0,0,0.761905,{'Estimator': RandomForestClassifier(max_depth...,RandomForestClassifier
4,edge,2,,0.668441,2,"{'complex_type': 'edge', 'max_dimension': 2, '...",1596.168,484.891,0.0,1111,113415.434,113415.434,0.0,0,0.666667,{'Estimator': RandomForestClassifier(min_sampl...,RandomForestClassifier
5,edge,2,,0.671234,3,"{'complex_type': 'edge', 'max_dimension': 2, '...",1233.926,393.445,0.0,840,116561.141,116561.141,0.0,0,0.666667,{'Estimator': RandomForestClassifier(min_sampl...,RandomForestClassifier
6,edge,2,,0.364614,3,"{'complex_type': 'edge', 'max_dimension': 2, '...",926.261,87.496,0.0,839,20598.227,20598.148,0.0,0,0.666667,{'Estimator': RandomForestClassifier(min_sampl...,RandomForestClassifier
7,edge,2,,0.353158,3,"{'complex_type': 'edge', 'max_dimension': 2, '...",1182.37,196.478,0.0,986,29931.305,29931.305,0.0,0,0.666667,{'Estimator': RandomForestClassifier(min_sampl...,RandomForestClassifier
8,edge,2,,0.726875,2,"{'complex_type': 'edge', 'max_dimension': 2, '...",1414.106,571.659,0.0,842,155012.613,155012.613,0.0,0,0.666667,{'Estimator': RandomForestClassifier(max_depth...,RandomForestClassifier
9,edge,2,,0.726875,3,"{'complex_type': 'edge', 'max_dimension': 2, '...",1555.586,567.028,0.0,989,154611.32,154611.32,0.0,0,0.666667,{'Estimator': RandomForestClassifier(max_depth...,RandomForestClassifier


In [22]:
print(np.mean(df_edge_test_pipeline1["elapsed_memory"]))
print(np.mean(df_edge_test_pipeline1["elapsed_time"]))

for e in df_edge_test_pipeline1['best parameters for the complex']:
    print(e)

75028.74947474748
1173.6455454545455
{'Estimator': RandomForestClassifier(max_depth=10, n_estimators=200), 'Estimator__max_depth': 10, 'Estimator__min_samples_leaf': 1, 'Estimator__min_samples_split': 2, 'Estimator__n_estimators': 200, 'Scaler__use': True, 'TDA': Landscape(), 'TDA__resolution': 100}
{'Estimator': RandomForestClassifier(max_depth=5, min_samples_split=5), 'Estimator__max_depth': 5, 'Estimator__min_samples_leaf': 1, 'Estimator__min_samples_split': 5, 'Estimator__n_estimators': 100, 'Scaler__use': True, 'TDA': PersistenceImage(bandwidth=0.1, resolution=[6, 6]), 'TDA__bandwidth': 0.1, 'TDA__resolution': [6, 6]}
{'Estimator': RandomForestClassifier(max_depth=10, min_samples_leaf=2), 'Estimator__max_depth': 10, 'Estimator__min_samples_leaf': 2, 'Estimator__min_samples_split': 2, 'Estimator__n_estimators': 100, 'Scaler__use': True, 'TDA': PersistenceImage(bandwidth=0.1, resolution=[5, 5]), 'TDA__bandwidth': 0.1, 'TDA__resolution': [5, 5]}
{'Estimator': RandomForestClassifier(m

In [23]:
mask = df_edge_test_pipeline1['test_accuracy'] > 0.6

# Apply the mask to the DataFrame
filtered_df = df_edge_test_pipeline1[mask]
print("Mean Memory: ",np.mean(filtered_df["elapsed_memory"]))
print("Mean Time: ", np.mean(filtered_df["elapsed_time"]))

for e in filtered_df['best parameters for the complex']:
    print(e)




Mean Memory:  95876.1323548387
Mean Time:  1262.4036774193544
{'Estimator': RandomForestClassifier(max_depth=10, n_estimators=200), 'Estimator__max_depth': 10, 'Estimator__min_samples_leaf': 1, 'Estimator__min_samples_split': 2, 'Estimator__n_estimators': 200, 'Scaler__use': True, 'TDA': Landscape(), 'TDA__resolution': 100}
{'Estimator': RandomForestClassifier(max_depth=5, min_samples_split=5), 'Estimator__max_depth': 5, 'Estimator__min_samples_leaf': 1, 'Estimator__min_samples_split': 5, 'Estimator__n_estimators': 100, 'Scaler__use': True, 'TDA': PersistenceImage(bandwidth=0.1, resolution=[6, 6]), 'TDA__bandwidth': 0.1, 'TDA__resolution': [6, 6]}
{'Estimator': RandomForestClassifier(max_depth=10, min_samples_leaf=2), 'Estimator__max_depth': 10, 'Estimator__min_samples_leaf': 2, 'Estimator__min_samples_split': 2, 'Estimator__n_estimators': 100, 'Scaler__use': True, 'TDA': PersistenceImage(bandwidth=0.1, resolution=[5, 5]), 'TDA__bandwidth': 0.1, 'TDA__resolution': [5, 5]}
{'Estimator':

In [24]:
mask = df_edge_test_pipeline1['test_accuracy'] > 0.6
columns_to_keep = ['complex_type', 'max_edge_length','nb_iterations', 'elapsed_time', 'elapsed_memory', 'test_accuracy']

# Keep only the specified columns
df_edge_test_pipeline1_filtered = df_edge_test_pipeline1[columns_to_keep]
# Apply the mask to the DataFrame
filtered_df = df_edge_test_pipeline1_filtered[mask]
filtered_df

Unnamed: 0,complex_type,max_edge_length,nb_iterations,elapsed_time,elapsed_memory,test_accuracy
0,edge,0.358031,3,924.289,24812.172,0.761905
1,edge,0.661717,2,1236.627,112081.469,0.761905
2,edge,0.585445,3,1172.248,80295.172,0.761905
3,edge,0.496333,3,1293.219,62795.77,0.761905
4,edge,0.668441,2,1596.168,113415.434,0.666667
5,edge,0.671234,3,1233.926,116561.141,0.666667
6,edge,0.364614,3,926.261,20598.227,0.666667
7,edge,0.353158,3,1182.37,29931.305,0.666667
8,edge,0.726875,2,1414.106,155012.613,0.666667
9,edge,0.726875,3,1555.586,154611.32,0.666667


{'Estimator': RandomForestClassifier(max_depth=5), 'Estimator__max_depth': 5, 'Estimator__min_samples_leaf': 1, 'Estimator__min_samples_split': 2, 'Estimator__n_estimators': 100, 'Scaler__use': True, 'TDA': Landscape(), 'TDA__resolution': 100}

In [25]:
filtered_df['max_edge_length'].value_counts(), filtered_df['nb_iterations'].value_counts()



(max_edge_length
 0.858712    2
 0.671234    2
 0.726875    2
 0.503620    2
 0.501485    1
 0.580863    1
 0.370012    1
 0.844886    1
 0.579512    1
 0.575211    1
 0.732426    1
 0.731654    1
 0.741360    1
 0.517931    1
 0.277081    1
 0.358031    1
 0.512279    1
 0.661717    1
 0.284186    1
 0.355923    1
 0.836201    1
 0.353158    1
 0.364614    1
 0.668441    1
 0.496333    1
 0.585445    1
 0.746055    1
 Name: count, dtype: int64,
 nb_iterations
 3    18
 2    13
 Name: count, dtype: int64)

### results for Edge Collapse Rips Complex: 
* Best persistence representations: Persistence Image // Landscape
* Best precision: exact
* Mean of elapsed_time: 1156.59575
* Mean of elapsed_memory: 69996.14 MB
* Best test accuracy: 0.762
* Best elapsed time for best acc:924.289 seconds
* Best number of iterations: 3


### Rips Complex :

In [26]:
df_rips_test_1=pd.read_csv("results2rips_6245.csv")
df_rips_test_2=pd.read_csv("results2rips_625.csv")
df_rips_test_3=pd.read_csv("results2rips_62588.csv")
df_rips_test_4=pd.read_csv("results2rips_6255558.csv")
df_rips_test_5=pd.read_csv("results2rips_89846.csv")
df_rips_test_6=pd.read_csv("results2rips_336238.csv")
df_rips_test_7=pd.read_csv("results2rips_621238.csv")
df_rips_test_8=pd.read_csv("results2rips_7644338.csv")
df_rips_test_9=pd.read_csv("results2rips_99846.csv")
df_rips_test_10=pd.read_csv("results2rips_89846.csv")
df_rips_test_10=pd.read_csv("results2rips_336238.csv")


df_rips_test_pipeline1=pd.concat([df_rips_test_1,df_rips_test_2,df_rips_test_3,df_rips_test_4,df_rips_test_5,df_rips_test_6,df_rips_test_7,df_rips_test_8,df_rips_test_9,df_rips_test_10])
df_rips_test_pipeline1=df_rips_test_pipeline1.sort_values(by="test_accuracy",ascending=False)
df_rips_test_pipeline1 = df_rips_test_pipeline1.reset_index(drop=True)
                
df_complex = pd.json_normalize(df_rips_test_pipeline1["complex_parameters"].apply(ast.literal_eval))
df_complex=df_complex.reset_index(drop=True)



df_rips_test_pipeline1 = pd.concat([df_complex, df_rips_test_pipeline1], axis=1)
df_rips_test_pipeline1['Estimator']=" RandomForestClassifier"
df_rips_test_pipeline1

Unnamed: 0,complex_type,max_dimension,sparse,max_edge_length,complex_parameters,elapsed_time,time for diagram computation,time for pipeline creation,time for pipeline fitting and testing,elapsed_memory,memory for diagram computation,memory for pipeline creation,memory for pipeline fitting and testing,test_accuracy,best parameters for the complex,Estimator
0,rips,2,,0.431968,"{'complex_type': 'rips', 'max_dimension': 2, '...",4051.952,3203.224,0.0,849,inf,inf,0.0,0,0.714286,"{'Estimator': RandomForestClassifier(), 'Estim...",RandomForestClassifier
1,rips,2,,0.282771,"{'complex_type': 'rips', 'max_dimension': 2, '...",1793.627,955.274,0.0,838,inf,inf,0.0,0,0.714286,"{'Estimator': RandomForestClassifier(), 'Estim...",RandomForestClassifier
2,rips,2,,0.591743,"{'complex_type': 'rips', 'max_dimension': 2, '...",8796.108,7687.874,0.0,1108,inf,inf,0.0,0,0.666667,{'Estimator': RandomForestClassifier(max_depth...,RandomForestClassifier
3,rips,2,,0.364614,"{'complex_type': 'rips', 'max_dimension': 2, '...",2523.064,1634.997,0.0,888,inf,inf,0.0,0,0.666667,{'Estimator': RandomForestClassifier(min_sampl...,RandomForestClassifier
4,rips,2,,0.661717,"{'complex_type': 'rips', 'max_dimension': 2, '...",11925.929,11036.55,0.0,889,inf,inf,0.0,268,0.666667,{'Estimator': RandomForestClassifier(n_estimat...,RandomForestClassifier
5,rips,2,,0.836042,"{'complex_type': 'rips', 'max_dimension': 2, '...",16397.743,15570.073,0.001,828,inf,inf,0.0,0,0.666667,{'Estimator': RandomForestClassifier(min_sampl...,RandomForestClassifier
6,rips,2,,0.275171,"{'complex_type': 'rips', 'max_dimension': 2, '...",2089.808,1242.662,0.0,847,inf,inf,0.0,0,0.619048,{'Estimator': RandomForestClassifier(min_sampl...,RandomForestClassifier
7,rips,2,,0.362385,"{'complex_type': 'rips', 'max_dimension': 2, '...",2592.378,1758.003,0.0,834,inf,inf,0.0,0,0.619048,{'Estimator': RandomForestClassifier(max_depth...,RandomForestClassifier
8,rips,2,,0.756757,"{'complex_type': 'rips', 'max_dimension': 2, '...",12063.922,11213.172,0.0,851,inf,inf,0.0,1,0.619048,{'Estimator': RandomForestClassifier(min_sampl...,RandomForestClassifier
9,rips,2,,0.288394,"{'complex_type': 'rips', 'max_dimension': 2, '...",1321.127,461.825,0.0,859,inf,inf,0.0,0,0.619048,{'Estimator': RandomForestClassifier(min_sampl...,RandomForestClassifier


In [27]:
print("Mean Memory:",np.mean(df_rips_test_pipeline1["elapsed_memory"]))
print("Mean Time :",np.mean(df_rips_test_pipeline1["elapsed_time"]))

for e in df_rips_test_pipeline1['best parameters for the complex']:
    print(e)


Mean Memory: inf
Mean Time : 7117.8712444444445
{'Estimator': RandomForestClassifier(), 'Estimator__max_depth': None, 'Estimator__min_samples_leaf': 1, 'Estimator__min_samples_split': 2, 'Estimator__n_estimators': 100, 'Scaler__use': True, 'TDA': Landscape(), 'TDA__resolution': 100}
{'Estimator': RandomForestClassifier(), 'Estimator__max_depth': None, 'Estimator__min_samples_leaf': 1, 'Estimator__min_samples_split': 2, 'Estimator__n_estimators': 100, 'Scaler__use': True, 'TDA': PersistenceImage(bandwidth=0.1, resolution=[5, 5]), 'TDA__bandwidth': 0.1, 'TDA__resolution': [5, 5]}
{'Estimator': RandomForestClassifier(max_depth=10, n_estimators=50), 'Estimator__max_depth': 10, 'Estimator__min_samples_leaf': 1, 'Estimator__min_samples_split': 2, 'Estimator__n_estimators': 50, 'Scaler__use': True, 'TDA': PersistenceImage(bandwidth=0.1, resolution=[6, 6]), 'TDA__bandwidth': 0.1, 'TDA__resolution': [6, 6]}
{'Estimator': RandomForestClassifier(min_samples_leaf=2), 'Estimator__max_depth': None, 

In [28]:
mask = df_rips_test_pipeline1['test_accuracy'] > 0.6

# Apply the mask to the DataFrame
filtered_df = df_rips_test_pipeline1[mask]
print("Mean Memory: ",np.mean(filtered_df["elapsed_memory"]))
print("Mean Time: ", np.mean(filtered_df["elapsed_time"]))
print(filtered_df["max_edge_length"].value_counts())
for e in filtered_df['best parameters for the complex']:
    print(e)




Mean Memory:  inf
Mean Time:  6916.835533333334
max_edge_length
0.756757    2
0.288394    2
0.431968    1
0.282771    1
0.591743    1
0.364614    1
0.661717    1
0.836042    1
0.275171    1
0.362385    1
0.429650    1
0.503620    1
0.746055    1
Name: count, dtype: int64
{'Estimator': RandomForestClassifier(), 'Estimator__max_depth': None, 'Estimator__min_samples_leaf': 1, 'Estimator__min_samples_split': 2, 'Estimator__n_estimators': 100, 'Scaler__use': True, 'TDA': Landscape(), 'TDA__resolution': 100}
{'Estimator': RandomForestClassifier(), 'Estimator__max_depth': None, 'Estimator__min_samples_leaf': 1, 'Estimator__min_samples_split': 2, 'Estimator__n_estimators': 100, 'Scaler__use': True, 'TDA': PersistenceImage(bandwidth=0.1, resolution=[5, 5]), 'TDA__bandwidth': 0.1, 'TDA__resolution': [5, 5]}
{'Estimator': RandomForestClassifier(max_depth=10, n_estimators=50), 'Estimator__max_depth': 10, 'Estimator__min_samples_leaf': 1, 'Estimator__min_samples_split': 2, 'Estimator__n_estimators'

In [29]:
mask = df_rips_test_pipeline1['test_accuracy'] > 0.6
columns_to_keep = ['complex_type', 'max_edge_length', 'elapsed_time', 'elapsed_memory', 'test_accuracy']
df_rips_test_pipeline1_filtered = df_rips_test_pipeline1[columns_to_keep]

# Apply the mask to the DataFrame
filtered_df = df_rips_test_pipeline1_filtered[mask]
filtered_df  


Unnamed: 0,complex_type,max_edge_length,elapsed_time,elapsed_memory,test_accuracy
0,rips,0.431968,4051.952,inf,0.714286
1,rips,0.282771,1793.627,inf,0.714286
2,rips,0.591743,8796.108,inf,0.666667
3,rips,0.364614,2523.064,inf,0.666667
4,rips,0.661717,11925.929,inf,0.666667
5,rips,0.836042,16397.743,inf,0.666667
6,rips,0.275171,2089.808,inf,0.619048
7,rips,0.362385,2592.378,inf,0.619048
8,rips,0.756757,12063.922,inf,0.619048
9,rips,0.288394,1321.127,inf,0.619048


### results for Edge Collapse Rips Complex: 
* Best persistence representations: Persistence Image // Landscape
* Best precision: exact
* Mean of elapsed_time: 1156.59575
* Mean of elapsed_memory: 69996.14 MB
* Best test accuracy: 0.762
* Best elapsed time for best acc:924.289 seconds
* Best number of iterations: 3
