# Classifying textures in outex68 using TDA

In [1]:
from gtda.plotting import plot_heatmap
from skimage import io
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_validate
import gudhi as gd
import gudhi.representations
import vectorization as ex

## Preparing the dataset

We will try to classify outex-68. First, we load the .bmp images as matrixes. It will take some seconds.

In [2]:
folder = 'Outex-TC-00024/data/images'
images_names = os.listdir(folder)
images_names = list(filter(lambda x : x[0]!='.', images_names))

In [3]:
images_matrixes = np.array(list(map(lambda x : io.imread(folder+'/'+x), images_names)), dtype=float)

Then, we pick the names of the training and test images and their labels.

In [4]:
train_names = pd.read_csv("/home/manu/vectorisation-maps/Outex-TC-00024/data/000/train.txt", sep=" ", usecols=[0]).to_numpy().flatten().tolist()
train_labels = pd.read_csv("/home/manu/vectorisation-maps/Outex-TC-00024/data/000/train.txt", sep=" ", usecols=[1]).to_numpy().flatten().tolist()
test_names = pd.read_csv("/home/manu/vectorisation-maps/Outex-TC-00024/data/000/test.txt", sep=" ", usecols=[0]).to_numpy().flatten().tolist()
test_labels = pd.read_csv("/home/manu/vectorisation-maps/Outex-TC-00024/data/000/test.txt", sep=" ", usecols=[1]).to_numpy().flatten().tolist()

Using the file names, we will load the train and test images. Note that they will keep the same order than the labels. We will also use the images with the greyscale inverted.

In [5]:
train_indexes = list(map(lambda x : images_names.index(x), train_names))
test_indexes = list(map(lambda x : images_names.index(x), test_names))

We are using only 10 classes from the original dataset. In order to perform the experiment with the 68 classes, comment the following cell.

from numpy.random import choice
labels = np.array(list(set(test_labels)))
labels = choice(labels, size=(10), replace = False)
    
train_indexes = [train_indexes[i] for i in range(len(train_indexes)) if train_labels[i] in labels]
train_labels = [i for i in train_labels if i in labels]
test_indexes = [test_indexes[i] for i in range(len(test_indexes)) if test_labels[i] in labels]
test_labels = [i for i in test_labels if i in labels]

Finally, we load the images and their inverse.

In [6]:
images_gudhi = np.array(list(map(lambda x : x.reshape(128*128,1), images_matrixes)))
train_gudhi =  images_gudhi[train_indexes]
test_gudhi = images_gudhi[test_indexes]

train_gudhi_opp =  255-train_gudhi
test_gudhi_opp = 255-test_gudhi

## Calculating the persistence diagrams

We use the filter function for cubical complexes to obtain the persistence diagrams. We obtain 4 diagrams from each image: dimension 0 and 1 of the original image and the inverted one. It takes around 10 minutes to run.

In [7]:
cub_filtration = lambda image : gd.CubicalComplex(dimensions = [128,128], top_dimensional_cells=image)
calculate_pd = lambda image : cub_filtration(image).persistence()

train_pds = list(map(calculate_pd, train_gudhi))
test_pds = list(map(calculate_pd, test_gudhi))
train_pds_opp = list(map(calculate_pd, train_gudhi_opp))
test_pds_opp = list(map(calculate_pd, test_gudhi_opp))

In [9]:
#The representation module does not deal well with infinity, so we change it by 256.
infty_proj = lambda x : 256 if ~np.isfinite(x) else x

In [10]:
intervals_of_dim_0 = lambda pd : np.array([[x[1][0], infty_proj(x[1][1])]  for x in pd if x[0]==0])
train_pds_0 = list(map(intervals_of_dim_0, train_pds))
test_pds_0 = list(map(intervals_of_dim_0, test_pds))
train_pds_opp_0 = list(map(intervals_of_dim_0, train_pds_opp))
test_pds_opp_0 = list(map(intervals_of_dim_0, test_pds_opp))

In [11]:
intervals_of_dim_1 = lambda pd : np.array([[x[1][0], infty_proj(x[1][1])]  for x in pd if x[0]==1])
train_pds_1 = list(map(intervals_of_dim_1, train_pds))
test_pds_1 = list(map(intervals_of_dim_1, test_pds))
train_pds_opp_1 = list(map(intervals_of_dim_1, train_pds_opp))
test_pds_opp_1 = list(map(intervals_of_dim_1, test_pds_opp))

## Classification with the Betti Curve

We perform a classification task using the betti curve and Random Forest. We concatenate the four betti curves obtained from each image, and then feed the classifier with them. It takes 1 minutes.

In [None]:
res = 100
train_Btt_0 = [ex.GetBettiCurveFeature(pd, res) for pd in train_pds_0]
train_Btt_1 = [ex.GetBettiCurveFeature(pd, res) for pd in train_pds_1]
test_Btt_0 = [ex.GetBettiCurveFeature(pd, res) for pd in test_pds_0]
test_Btt_1 = [ex.GetBettiCurveFeature(pd, res) for pd in test_pds_1]
train_Btt_opp_0 = [ex.GetBettiCurveFeature(pd, res) for pd in train_pds_opp_0]
train_Btt_opp_1 = [ex.GetBettiCurveFeature(pd, res) for pd in train_pds_opp_1]
test_Btt_opp_0 = [ex.GetBettiCurveFeature(pd, res) for pd in test_pds_opp_0]
test_Btt_opp_1 = [ex.GetBettiCurveFeature(pd, res) for pd in test_pds_opp_1]
train_Btt = [np.concatenate([dgm0,dgm1,dgm2,dgm3]) for dgm0,dgm1,dgm2,dgm3 in zip(train_Btt_0, train_Btt_1, train_Btt_opp_0, train_Btt_opp_1)]
test_Btt = [np.concatenate([dgm0,dgm1,dgm2,dgm3]) for dgm0,dgm1,dgm2,dgm3 in zip(test_Btt_0, test_Btt_1, test_Btt_opp_0, test_Btt_opp_1)]

Example of how the curves look like after the concatenation.

In [None]:
plt.plot(train_Btt[0])

In [None]:
classifier = RandomForestClassifier()
classifier = classifier.fit(train_Btt, train_labels)   

In [None]:
train_Btt[0].shape

In [None]:
np.asarray(train_Btt).shape

In [None]:
len(train_labels)

In this example, the accuracy is the following

In [None]:
print("Train accuracy = " + str(classifier.score(train_Btt, train_labels)))
print("Test accuracy  = " + str(classifier.score(test_Btt, test_labels))) 

## Classification with PersStats

We use here persistent stats. It takes less than one minute.

In [12]:
train_Sta_0 = [ex.GetPersStats(pd) for pd in train_pds_0]
train_Sta_1 = [ex.GetPersStats(pd) for pd in train_pds_1]
test_Sta_0 = [ex.GetPersStats(pd) for pd in test_pds_0]
test_Sta_1 = [ex.GetPersStats(pd) for pd in test_pds_1]
train_Sta_opp_0 = [ex.GetPersStats(pd) for pd in train_pds_opp_0]
train_Sta_opp_1 = [ex.GetPersStats(pd) for pd in train_pds_opp_1]
test_Sta_opp_0 = [ex.GetPersStats(pd) for pd in test_pds_opp_0]
test_Sta_opp_1 = [ex.GetPersStats(pd) for pd in test_pds_opp_1]
train_Sta = [np.concatenate([dgm0,dgm1,dgm2,dgm3]) for dgm0,dgm1,dgm2,dgm3 in zip(train_Sta_0, train_Sta_1, train_Sta_opp_0, train_Sta_opp_1)]
test_Sta = [np.concatenate([dgm0,dgm1,dgm2,dgm3]) for dgm0,dgm1,dgm2,dgm3 in zip(test_Sta_0, test_Sta_1, test_Sta_opp_0, test_Sta_opp_1)]

In [20]:
classifier = RandomForestClassifier(n_estimators=1000)
classifier = classifier.fit(train_Sta, train_labels)  

In [21]:
print("Train accuracy = " + str(classifier.score(train_Sta, train_labels)))
print("Test accuracy  = " + str(classifier.score(test_Sta, test_labels))) 

Train accuracy = 1.0
Test accuracy  = 0.5691176470588235


## Classification with Persistence Images

We use here persistent stats. It takes less than five minute.

In [None]:
res = [6,6]

train_PI_0 = [ex.GetPersImageFeature(pd,res) for pd in train_pds_0]
train_PI_1 = [ex.GetPersImageFeature(pd,res) for pd in train_pds_1]
test_PI_0 = [ex.GetPersImageFeature(pd,res) for pd in test_pds_0]
test_PI_1 = [ex.GetPersImageFeature(pd,res) for pd in test_pds_1]
train_PI_opp_0 = [ex.GetPersImageFeature(pd,res) for pd in train_pds_opp_0]
train_PI_opp_1 = [ex.GetPersImageFeature(pd,res) for pd in train_pds_opp_1]
test_PI_opp_0 = [ex.GetPersImageFeature(pd,res) for pd in test_pds_opp_0]
test_PI_opp_1 = [ex.GetPersImageFeature(pd,res) for pd in test_pds_opp_1]
train_PI = [np.concatenate([dgm0,dgm1,dgm2,dgm3]) for dgm0,dgm1,dgm2,dgm3 in zip(train_PI_0, train_PI_1, train_PI_opp_0, train_PI_opp_1)]
test_PI = [np.concatenate([dgm0,dgm1,dgm2,dgm3]) for dgm0,dgm1,dgm2,dgm3 in zip(test_PI_0, test_PI_1, test_PI_opp_0, test_PI_opp_1)]

In [None]:
classifier = RandomForestClassifier()
classifier = classifier.fit(train_PI, train_labels)  

In [None]:
print("Train accuracy = " + str(classifier.score(train_PI, train_labels)))
print("Test accuracy  = " + str(classifier.score(test_PI, test_labels))) 

## Classification with Entropy Curve

We use here persistent stats. It takes less than five minute.

In [None]:
train_E_0 = [ex.GetPersEntropyFeature(pd) for pd in train_pds_0]
train_E_1 = [ex.GetPersEntropyFeature(pd) for pd in train_pds_1]
test_E_0 = [ex.GetPersEntropyFeature(pd) for pd in test_pds_0]
test_E_1 = [ex.GetPersEntropyFeature(pd) for pd in test_pds_1]
train_E_opp_0 = [ex.GetPersEntropyFeature(pd) for pd in train_pds_opp_0]
train_E_opp_1 = [ex.GetPersEntropyFeature(pd) for pd in train_pds_opp_1]
test_E_opp_0 = [ex.GetPersEntropyFeature(pd) for pd in test_pds_opp_0]
test_E_opp_1 = [ex.GetPersEntropyFeature(pd) for pd in test_pds_opp_1]
train_E = [np.concatenate([dgm0,dgm1,dgm2,dgm3]) for dgm0,dgm1,dgm2,dgm3 in zip(train_E_0, train_E_1, train_E_opp_0, train_E_opp_1)]
test_E = [np.concatenate([dgm0,dgm1,dgm2,dgm3]) for dgm0,dgm1,dgm2,dgm3 in zip(test_E_0, test_E_1, test_E_opp_0, test_E_opp_1)]

In [None]:
classifier = RandomForestClassifier()
classifier = classifier.fit(train_E, train_labels)  

In [None]:
print("Train accuracy = " + str(classifier.score(train_E, train_labels)))
print("Test accuracy  = " + str(classifier.score(test_E, test_labels))) 

## Classification with Life Span curve

In [None]:
train_Life_0 = [ex.GetPersLifespanFeature(pd) for pd in train_pds_0]
train_Life_1 = [ex.GetPersLifespanFeature(pd) for pd in train_pds_1]
test_Life_0 = [ex.GetPersLifespanFeature(pd) for pd in test_pds_0]
test_Life_1 = [ex.GetPersLifespanFeature(pd) for pd in test_pds_1]
train_Life_opp_0 = [ex.GetPersLifespanFeature(pd) for pd in train_pds_opp_0]
train_Life_opp_1 = [ex.GetPersLifespanFeature(pd) for pd in train_pds_opp_1]
test_Life_opp_0 = [ex.GetPersLifespanFeature(pd) for pd in test_pds_opp_0]
test_Life_opp_1 = [ex.GetPersLifespanFeature(pd) for pd in test_pds_opp_1]
train_Life = [np.concatenate([dgm0,dgm1,dgm2,dgm3]) for dgm0,dgm1,dgm2,dgm3 in zip(train_Life_0, train_Life_1, train_Life_opp_0, train_Life_opp_1)]
test_Life = [np.concatenate([dgm0,dgm1,dgm2,dgm3]) for dgm0,dgm1,dgm2,dgm3 in zip(test_Life_0, test_Life_1, test_Life_opp_0, test_Life_opp_1)]

In [None]:
classifier = RandomForestClassifier()
classifier = classifier.fit(train_Life, train_labels)  

In [None]:
print("Train accuracy = " + str(classifier.score(train_Life, train_labels)))
print("Test accuracy  = " + str(classifier.score(test_Life, test_labels))) 

## Classification with Tropical Coordinates

In [None]:
train_T_0 = [ex.GetPersTropicalCoordinatesFeature(pd) for pd in train_pds_0]
train_T_1 = [ex.GetPersTropicalCoordinatesFeature(pd) for pd in train_pds_1]
test_T_0 = [ex.GetPersTropicalCoordinatesFeature(pd) for pd in test_pds_0]
test_T_1 = [ex.GetPersTropicalCoordinatesFeature(pd) for pd in test_pds_1]
train_T_opp_0 = [ex.GetPersTropicalCoordinatesFeature(pd) for pd in train_pds_opp_0]
train_T_opp_1 = [ex.GetPersTropicalCoordinatesFeature(pd) for pd in train_pds_opp_1]
test_T_opp_0 = [ex.GetPersTropicalCoordinatesFeature(pd) for pd in test_pds_opp_0]
test_T_opp_1 = [ex.GetPersTropicalCoordinatesFeature(pd) for pd in test_pds_opp_1]
train_T = [np.concatenate([dgm0,dgm1,dgm2,dgm3]) for dgm0,dgm1,dgm2,dgm3 in zip(train_T_0, train_T_1, train_T_opp_0, train_T_opp_1)]
test_T = [np.concatenate([dgm0,dgm1,dgm2,dgm3]) for dgm0,dgm1,dgm2,dgm3 in zip(test_T_0, test_T_1, test_T_opp_0, test_T_opp_1)]

In [None]:
classifier = RandomForestClassifier()
classifier = classifier.fit(train_T, train_labels)  

In [None]:
print("Train accuracy = " + str(classifier.score(train_T, train_labels)))
print("Test accuracy  = " + str(classifier.score(test_T, test_labels))) 

## Classification with Atol

In [None]:
from sklearn.cluster import KMeans
quantiser = KMeans(n_clusters=2, random_state=202006)

train_A_0 = [ex.GetAtolFeature(pd, quantiser) for pd in train_pds_0]
train_A_1 = [ex.GetAtolFeature(pd, quantiser) for pd in train_pds_1]
test_A_0 = [ex.GetAtolFeature(pd, quantiser) for pd in test_pds_0]
test_A_1 = [ex.GetAtolFeature(pd, quantiser) for pd in test_pds_1]
train_A_opp_0 = [ex.GetAtolFeature(pd, quantiser) for pd in train_pds_opp_0]
train_A_opp_1 = [ex.GetAtolFeature(pd, quantiser) for pd in train_pds_opp_1]
test_A_opp_0 = [ex.GetAtolFeature(pd, quantiser) for pd in test_pds_opp_0]
test_A_opp_1 = [ex.GetAtolFeature(pd, quantiser) for pd in test_pds_opp_1]
train_A = [np.concatenate([dgm0,dgm1,dgm2,dgm3]) for dgm0,dgm1,dgm2,dgm3 in zip(train_A_0, train_A_1, train_A_opp_0, train_A_opp_1)]
test_A = [np.concatenate([dgm0,dgm1,dgm2,dgm3]) for dgm0,dgm1,dgm2,dgm3 in zip(test_A_0, test_A_1, test_A_opp_0, test_A_opp_1)]

In [None]:
classifier = RandomForestClassifier()
classifier = classifier.fit(train_A, train_labels) 

In [None]:
print("Train accuracy = " + str(classifier.score(train_A, train_labels)))
print("Test accuracy  = " + str(classifier.score(test_A, test_labels))) 

 ## Classification with Persistence Landscapes

In [None]:
train_Land_0 = [ex.GetPersLandscapeFeature(pd, num=20) for pd in train_pds_0]
train_Land_1 = [ex.GetPersLandscapeFeature(pd, num=20) for pd in train_pds_1]
test_Land_0 = [ex.GetPersLandscapeFeature(pd, num=20) for pd in test_pds_0]
test_Land_1 = [ex.GetPersLandscapeFeature(pd, num=20) for pd in test_pds_1]
train_Land_opp_0 = [ex.GetPersLandscapeFeature(pd, num=20) for pd in train_pds_opp_0]
train_Land_opp_1 = [ex.GetPersLandscapeFeature(pd, num=20) for pd in train_pds_opp_1]
test_Land_opp_0 = [ex.GetPersLandscapeFeature(pd, num=20) for pd in test_pds_opp_0]
test_Land_opp_1 = [ex.GetPersLandscapeFeature(pd, num=20) for pd in test_pds_opp_1]
train_Land = [np.concatenate([dgm0,dgm1,dgm2,dgm3]) for dgm0,dgm1,dgm2,dgm3 in zip(train_Land_0, train_Land_1, train_Land_opp_0, train_Land_opp_1)]
test_Land = [np.concatenate([dgm0,dgm1,dgm2,dgm3]) for dgm0,dgm1,dgm2,dgm3 in zip(test_Land_0, test_Land_1, test_Land_opp_0, test_Land_opp_1)]

In [None]:
classifier = RandomForestClassifier()
classifier = classifier.fit(train_Land, train_labels) 

In [None]:
print("Train accuracy = " + str(classifier.score(train_Land, train_labels)))
print("Test accuracy  = " + str(classifier.score(test_Land, test_labels))) 

 ## Classification with Persistence Silhouettes

In [None]:
train_Sil_0 = [ex.GetPersSilhouetteFeature(pd) for pd in train_pds_0]
train_Sil_1 = [ex.GetPersSilhouetteFeature(pd) for pd in train_pds_1]
test_Sil_0 = [ex.GetPersSilhouetteFeature(pd) for pd in test_pds_0]
test_Sil_1 = [ex.GetPersSilhouetteFeature(pd) for pd in test_pds_1]
train_Sil_opp_0 = [ex.GetPersSilhouetteFeature(pd) for pd in train_pds_opp_0]
train_Sil_opp_1 = [ex.GetPersSilhouetteFeature(pd) for pd in train_pds_opp_1]
test_Sil_opp_0 = [ex.GetPersSilhouetteFeature(pd) for pd in test_pds_opp_0]
test_Sil_opp_1 = [ex.GetPersSilhouetteFeature(pd) for pd in test_pds_opp_1]
train_Sil = [np.concatenate([dgm0,dgm1,dgm2,dgm3]) for dgm0,dgm1,dgm2,dgm3 in zip(train_Sil_0, train_Sil_1, train_Sil_opp_0, train_Sil_opp_1)]
test_Sil = [np.concatenate([dgm0,dgm1,dgm2,dgm3]) for dgm0,dgm1,dgm2,dgm3 in zip(test_Sil_0, test_Sil_1, test_Sil_opp_0, test_Sil_opp_1)]

In [None]:
classifier = RandomForestClassifier()
classifier = classifier.fit(train_Sil, train_labels) 

In [None]:
print("Train accuracy = " + str(classifier.score(train_Sil, train_labels)))
print("Test accuracy  = " + str(classifier.score(test_Sil, test_labels))) 

 ## Classification with Carlsson Coordinates

In [None]:
train_CC_0 = [ex.GetCarlssonCoordinatesFeature(pd) for pd in train_pds_0]
train_CC_1 = [ex.GetCarlssonCoordinatesFeature(pd) for pd in train_pds_1]
test_CC_0 = [ex.GetCarlssonCoordinatesFeature(pd) for pd in test_pds_0]
test_CC_1 = [ex.GetCarlssonCoordinatesFeature(pd) for pd in test_pds_1]
train_CC_opp_0 = [ex.GetCarlssonCoordinatesFeature(pd) for pd in train_pds_opp_0]
train_CC_opp_1 = [ex.GetCarlssonCoordinatesFeature(pd) for pd in train_pds_opp_1]
test_CC_opp_0 = [ex.GetCarlssonCoordinatesFeature(pd) for pd in test_pds_opp_0]
test_CC_opp_1 = [ex.GetCarlssonCoordinatesFeature(pd) for pd in test_pds_opp_1]
train_CC = [np.concatenate([dgm0,dgm1,dgm2,dgm3]) for dgm0,dgm1,dgm2,dgm3 in zip(train_CC_0, train_CC_1, train_CC_opp_0, train_CC_opp_1)]
test_CC = [np.concatenate([dgm0,dgm1,dgm2,dgm3]) for dgm0,dgm1,dgm2,dgm3 in zip(test_CC_0, test_CC_1, test_CC_opp_0, test_CC_opp_1)]

In [None]:
classifier = RandomForestClassifier()
classifier = classifier.fit(train_CC, train_labels) 

In [None]:
print("Train accuracy = " + str(classifier.score(train_CC, train_labels)))
print("Test accuracy  = " + str(classifier.score(test_CC, test_labels))) 

 ## Classification with Topological Vectors

In [None]:
train_TV_0 = [ex.GetTopologicalVectorFeature(pd) for pd in train_pds_0]
train_TV_1 = [ex.GetTopologicalVectorFeature(pd) for pd in train_pds_1]
test_TV_0 = [ex.GetTopologicalVectorFeature(pd) for pd in test_pds_0]
test_TV_1 = [ex.GetTopologicalVectorFeature(pd) for pd in test_pds_1]
train_TV_opp_0 = [ex.GetTopologicalVectorFeature(pd) for pd in train_pds_opp_0]
train_TV_opp_1 = [ex.GetTopologicalVectorFeature(pd) for pd in train_pds_opp_1]
test_TV_opp_0 = [ex.GetTopologicalVectorFeature(pd) for pd in test_pds_opp_0]
test_TV_opp_1 = [ex.GetTopologicalVectorFeature(pd) for pd in test_pds_opp_1]
train_TV = [np.concatenate([dgm0,dgm1,dgm2,dgm3]) for dgm0,dgm1,dgm2,dgm3 in zip(train_TV_0, train_TV_1, train_TV_opp_0, train_TV_opp_1)]
test_TV = [np.concatenate([dgm0,dgm1,dgm2,dgm3]) for dgm0,dgm1,dgm2,dgm3 in zip(test_TV_0, test_TV_1, test_TV_opp_0, test_TV_opp_1)]

In [None]:
classifier = RandomForestClassifier()
classifier = classifier.fit(train_TV, train_labels) 

In [None]:
print("Train accuracy = " + str(classifier.score(train_TV, train_labels)))
print("Test accuracy  = " + str(classifier.score(test_TV, test_labels))) 

 ## Classification with Complex Polynomials (type = R)

In [None]:
train_CPR_0 = [ex.GetComplexPolynomialFeature(pd) for pd in train_pds_0]
train_CPR_1 = [ex.GetComplexPolynomialFeature(pd) for pd in train_pds_1]
test_CPR_0 = [ex.GetComplexPolynomialFeature(pd) for pd in test_pds_0]
test_CPR_1 = [ex.GetComplexPolynomialFeature(pd) for pd in test_pds_1]
train_CPR_opp_0 = [ex.GetComplexPolynomialFeature(pd) for pd in train_pds_opp_0]
train_CPR_opp_1 = [ex.GetComplexPolynomialFeature(pd) for pd in train_pds_opp_1]
test_CPR_opp_0 = [ex.GetComplexPolynomialFeature(pd) for pd in test_pds_opp_0]
test_CPR_opp_1 = [ex.GetComplexPolynomialFeature(pd) for pd in test_pds_opp_1]
train_CPR = [np.concatenate([dgm0,dgm1,dgm2,dgm3]) for dgm0,dgm1,dgm2,dgm3 in zip(train_CPR_0, train_CPR_1, train_CPR_opp_0, train_CPR_opp_1)]
test_CPR = [np.concatenate([dgm0,dgm1,dgm2,dgm3]) for dgm0,dgm1,dgm2,dgm3 in zip(test_CPR_0, test_CPR_1, test_CPR_opp_0, test_CPR_opp_1)]

The sk-learn classifiers requires float32, but GetComplexPolynomialFeature can give large float64 values. One solution is to study what is the maximum value of the experiment and divide all values by a fixed large $10^n$, to make them smaller than $10^{38}$. Then, all values can be transform to float32.

In [None]:
def e_index(x):
    if ((len(str(x))>4)and(str(x)[-4] =='e')):
        y = int(str(x)[-3:])  
    else:
        y = len(str(x))
    return y
y = max([max([e_index(c) for c in cp]) for cp in train_CPR] +
       [max([e_index(c) for c in cp]) for cp in test_CPR])
n = max(0, y-38+1)
print('The maximum number has the following size:')
print('10 power ', y)
print('We shall divide by 10 power', n)

In [None]:
train_CPR = [[np.float32(x/10**n) for x in cp] for cp in train_CPR]
test_CPR = [[np.float32(x/10**n) for x in cp] for cp in test_CPR]

In [None]:
classifier = RandomForestClassifier()
classifier = classifier.fit(train_CPR, train_labels) 

In [None]:
print("Train accuracy = " + str(classifier.score(train_CPR, train_labels)))
print("Test accuracy  = " + str(classifier.score(test_CPR, test_labels))) 

 ## Classification with Complex Polynomials (type = S)

In [None]:
train_CPS_0 = [ex.GetComplexPolynomialFeature(pd, pol_type='S') for pd in train_pds_0]
train_CPS_1 = [ex.GetComplexPolynomialFeature(pd, pol_type='S') for pd in train_pds_1]
test_CPS_0 = [ex.GetComplexPolynomialFeature(pd, pol_type='S') for pd in test_pds_0]
test_CPS_1 = [ex.GetComplexPolynomialFeature(pd, pol_type='S') for pd in test_pds_1]
train_CPS_opp_0 = [ex.GetComplexPolynomialFeature(pd, pol_type='S') for pd in train_pds_opp_0]
train_CPS_opp_1 = [ex.GetComplexPolynomialFeature(pd, pol_type='S') for pd in train_pds_opp_1]
test_CPS_opp_0 = [ex.GetComplexPolynomialFeature(pd, pol_type='S') for pd in test_pds_opp_0]
test_CPS_opp_1 = [ex.GetComplexPolynomialFeature(pd, pol_type='S') for pd in test_pds_opp_1]
train_CPS = [np.concatenate([dgm0,dgm1,dgm2,dgm3]) for dgm0,dgm1,dgm2,dgm3 in zip(train_CPS_0, train_CPS_1, train_CPS_opp_0, train_CPS_opp_1)]
test_CPS = [np.concatenate([dgm0,dgm1,dgm2,dgm3]) for dgm0,dgm1,dgm2,dgm3 in zip(test_CPS_0, test_CPS_1, test_CPS_opp_0, test_CPS_opp_1)]

In [None]:
def e_index(x):
    if ((len(str(x))>4)and(str(x)[-4] =='e')):
        y = int(str(x)[-3:])  
    else:
        y = len(str(x))
    return y
y = max([max([e_index(c) for c in cp]) for cp in train_CPS] +
       [max([e_index(c) for c in cp]) for cp in test_CPS])
n = max(0, y-38+1)
print('The maximum number has the following size:')
print('10 power ', y)
print('We shall divide by 10 power', n)

In [None]:
train_CPS = [[np.float32(x/10**n) for x in cp] for cp in train_CPS]
test_CPS = [[np.float32(x/10**n) for x in cp] for cp in test_CPS]

In [None]:
classifier = RandomForestClassifier()
classifier = classifier.fit(train_CPS, train_labels) 

In [None]:
print("Train accuracy = " + str(classifier.score(train_CPS, train_labels)))
print("Test accuracy  = " + str(classifier.score(test_CPS, test_labels))) 

 ## Classification with Complex Polynomials (type = T)

In [None]:
train_CPT_0 = [ex.GetComplexPolynomialFeature(pd, pol_type='T') for pd in train_pds_0]
train_CPT_1 = [ex.GetComplexPolynomialFeature(pd, pol_type='T') for pd in train_pds_1]
test_CPT_0 = [ex.GetComplexPolynomialFeature(pd, pol_type='T') for pd in test_pds_0]
test_CPT_1 = [ex.GetComplexPolynomialFeature(pd, pol_type='T') for pd in test_pds_1]
train_CPT_opp_0 = [ex.GetComplexPolynomialFeature(pd, pol_type='T') for pd in train_pds_opp_0]
train_CPT_opp_1 = [ex.GetComplexPolynomialFeature(pd, pol_type='T') for pd in train_pds_opp_1]
test_CPT_opp_0 = [ex.GetComplexPolynomialFeature(pd, pol_type='T') for pd in test_pds_opp_0]
test_CPT_opp_1 = [ex.GetComplexPolynomialFeature(pd, pol_type='T') for pd in test_pds_opp_1]
train_CPT = [np.concatenate([dgm0,dgm1,dgm2,dgm3]) for dgm0,dgm1,dgm2,dgm3 in zip(train_CPT_0, train_CPT_1, train_CPT_opp_0, train_CPT_opp_1)]
test_CPT = [np.concatenate([dgm0,dgm1,dgm2,dgm3]) for dgm0,dgm1,dgm2,dgm3 in zip(test_CPT_0, test_CPT_1, test_CPT_opp_0, test_CPT_opp_1)]

In [None]:
def e_index(x):
    if ((len(str(x))>4)and(str(x)[-4] =='e')):
        y = int(str(x)[-3:])  
    else:
        y = len(str(x))
    return y
y = max([max([e_index(c) for c in cp]) for cp in train_CPT] +
       [max([e_index(c) for c in cp]) for cp in test_CPT])
n = max(0, y-38+1)
print('The maximum number has the following size:')
print('10 power ', y)
print('We shall divide by 10 power', n)

In [None]:
train_CPT = [[np.float32(x/10**n) for x in cp] for cp in train_CPT]
test_CPT = [[np.float32(x/10**n) for x in cp] for cp in test_CPT]

In [None]:
classifier = RandomForestClassifier()
classifier = classifier.fit(train_CPT, train_labels) 

In [None]:
print("Train accuracy = " + str(classifier.score(train_CPT, train_labels)))
print("Test accuracy  = " + str(classifier.score(test_CPT, test_labels))) 