# Classifying Outex-10

In [26]:
import pickle
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from vectorization import *

In [27]:
path_feat = "Outex-TC-00024/features/"
path_res = "Outex-TC-00024/results/"
path_data = "Outex-TC-00024/data/000/"

In [3]:
with open(path_feat + 'l_d0.pkl', 'rb') as f:
    features_l_d0 = pickle.load(f)
with open(path_feat + 'l_d1.pkl', 'rb') as f:
    features_l_d1 = pickle.load(f)
with open(path_feat + 'u_d0.pkl', 'rb') as f:
    features_u_d0 = pickle.load(f)
with open(path_feat + 'u_d1.pkl', 'rb') as f:
    features_u_d1 = pickle.load(f)

In [4]:
func_list = [GetPersStats,
             GetPersImageFeature,
             GetPersLandscapeFeature,
             GetPersEntropyFeature,
             GetBettiCurveFeature,
             GetCarlssonCoordinatesFeature,
             GetPersSilhouetteFeature,
             GetTopologicalVectorFeature,
             GetAtolFeature,
             GetComplexPolynomialFeature,
             GetPersLifespanFeature,
             GetPersTropicalCoordinatesFeature
            ]

In [5]:
features = {}
for i in range(2720):
    for func in func_list:
        features[(func.__name__)+'_'+str(i)] = np.hstack([features_l_d0[(func.__name__)+'_'+str(i)],features_l_d1[(func.__name__)+'_'+str(i)],features_u_d0[(func.__name__)+'_'+str(i)],features_u_d1[(func.__name__)+'_'+str(i)]])

In [6]:
from numpy.random import choice
from numpy.random import seed
seed(1)
labels = range(68)
labels = choice(labels, size=(10), replace = False)

In [7]:
train_labels_0 = np.array(pd.read_csv(path_data + "train.txt", sep=" ", usecols=[1]).to_numpy().flatten().tolist())
test_labels_0 = np.array(pd.read_csv(path_data + "test.txt", sep=" ", usecols=[1]).to_numpy().flatten().tolist())

In [8]:
train_indexes = np.array([i for i in range(len(train_labels_0)) if train_labels_0[i] in labels])
test_indexes = np.array([i for i in range(len(test_labels_0)) if test_labels_0[i] in labels])
train_labels = train_labels_0[train_indexes]
test_labels = test_labels_0[test_indexes]
test_indexes = 1360 + test_indexes

### Classical Training-Test

In [9]:
def classification(train_labels, test_labels, train_indexes, test_indexes, func_list, features):
    train_scores = dict()
    test_scores = dict()
    for func in func_list:
        X = []
        Y = []
        for i in train_indexes:
            name = (func.__name__)+'_'+str(i)
            X.append(features[name])
        for i in test_indexes:
            name = (func.__name__)+'_'+str(i)
            Y.append(features[name])
        clf = RandomForestClassifier()
        clf = clf.fit(X, train_labels)      
        train_scores[func.__name__] = clf.score(X, train_labels)
        test_scores[func.__name__] = clf.score(Y, test_labels)
    return train_scores, test_scores

In [10]:
n = 10
scores_vector = np.zeros([n, len(func_list)])
for i in range(n):
    train_scores, test_scores = classification(train_labels, test_labels, train_indexes, test_indexes, func_list, features)
    scores_vector[i, :] = np.array([x[1] for x in list(test_scores.items())])
    
scores_avg = dict()
for j in range(len(func_list)):
    scores_avg[func_list[j].__name__] = (scores_vector[:,j].mean(), scores_vector[:,j].std())
    
with open('scores.pkl', 'wb') as f:
  pickle.dump(scores_avg, f)

In [11]:
scores_avg

{'GetPersStats': (0.63, 0.023345235059857524),
 'GetPersImageFeature': (0.556, 0.018411952639521948),
 'GetPersLandscapeFeature': (0.6060000000000001, 0.015779733838059515),
 'GetPersEntropyFeature': (0.5405, 0.021266170318136726),
 'GetBettiCurveFeature': (0.5160000000000001, 0.018814887722226795),
 'GetCarlssonCoordinatesFeature': (0.5584999999999999, 0.030170349683091166),
 'GetPersSilhouetteFeature': (0.47300000000000003, 0.017776388834631174),
 'GetTopologicalVectorFeature': (0.5355000000000001, 0.016499999999999994),
 'GetAtolFeature': (0.569, 0.019595917942265395),
 'GetComplexPolynomialFeature': (0.5279999999999999, 0.0271293199325011),
 'GetPersLifespanFeature': (0.5834999999999999, 0.008381527307120114),
 'GetPersTropicalCoordinatesFeature': (0.5810000000000001,
  0.028879058156387277)}

### K-Fold Cross Validation with the Training Set

In [30]:
def kfold_classification(labels, indexes, func_list, features, n=5):
    scores = dict()
    for func in func_list:
        X = []
        for i in indexes:
            name = (func.__name__)+'_'+str(i)
            X.append(features[name])
        clf = RandomForestClassifier()
        clf = clf.fit(X, train_labels)  
        score = cross_val_score(clf, X, labels, cv=n)
        scores[func.__name__] = (score.mean(), score.std())
        
    return scores

In [31]:
scores_avg = kfold_classification(labels=train_labels, indexes=train_indexes, func_list=func_list, features=features, n=15)    
with open(path_res + '10-k-fold_train_scores.pkl', 'wb') as f:
  pickle.dump(scores_avg, f)

In [32]:
scores_avg

{'GetPersStats': (0.9505494505494506, 0.043680454756817116),
 'GetPersImageFeature': (0.8201465201465201, 0.1183802086249791),
 'GetPersLandscapeFeature': (0.8366300366300365, 0.09710345666974564),
 'GetPersEntropyFeature': (0.9300366300366302, 0.07524241082458158),
 'GetBettiCurveFeature': (0.8560439560439562, 0.0764559337236408),
 'GetCarlssonCoordinatesFeature': (0.8959706959706961, 0.06361826473366883),
 'GetPersSilhouetteFeature': (0.9703296703296703, 0.045961574040609875),
 'GetTopologicalVectorFeature': (0.7813186813186814, 0.08592535297725105),
 'GetAtolFeature': (0.868864468864469, 0.11366885274974517),
 'GetComplexPolynomialFeature': (0.9703296703296703, 0.045961574040609875),
 'GetPersLifespanFeature': (0.9058608058608059, 0.08677995067094788),
 'GetPersTropicalCoordinatesFeature': (0.9256410256410258,
  0.07061716270215301)}

### K-Fold Cross Validation with the Whole Set

In [16]:
whole_labels = train_labels+test_labels
whole_index = train_indexes+test_indexes

In [20]:
scores_avg = kfold_classification(labels=whole_labels, indexes=whole_index, func_list=func_list, features=features, n=10)    
with open(path_res + '10-k-fold_whole_scores.pkl', 'wb') as f:
  pickle.dump(scores_avg, f)

GetPersStats
GetPersImageFeature
GetPersLandscapeFeature
GetPersEntropyFeature
GetBettiCurveFeature
GetCarlssonCoordinatesFeature
GetPersSilhouetteFeature
GetTopologicalVectorFeature
GetAtolFeature
GetComplexPolynomialFeature
GetPersLifespanFeature
GetPersTropicalCoordinatesFeature


In [21]:
scores_avg

{'GetPersStats': (0.97, 0.039999999999999994),
 'GetPersImageFeature': (0.93, 0.03999999999999999),
 'GetPersLandscapeFeature': (0.9499999999999998, 0.0447213595499958),
 'GetPersEntropyFeature': (0.9200000000000002, 0.07810249675906653),
 'GetBettiCurveFeature': (0.9499999999999998, 0.0447213595499958),
 'GetCarlssonCoordinatesFeature': (0.945, 0.065),
 'GetPersSilhouetteFeature': (0.9549999999999998, 0.04153311931459038),
 'GetTopologicalVectorFeature': (0.8550000000000001, 0.07566372975210778),
 'GetAtolFeature': (0.8700000000000001, 0.08426149773176358),
 'GetComplexPolynomialFeature': (0.9800000000000001, 0.033166247903554),
 'GetPersLifespanFeature': (0.96, 0.07681145747868609),
 'GetPersTropicalCoordinatesFeature': (0.95, 0.05916079783099615)}

### Classical Training-Test 70/30

In [21]:
indexes = np.hstack([train_indexes, test_indexes])
labels = np.hstack([train_labels, test_labels])

In [22]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(indexes, labels, test_size=0.3, random_state=1)

In [24]:
n = 10
scores_vector = np.zeros([n, len(func_list)])
for i in range(n):
    train_scores, test_scores = classification(train_labels=y_train, test_labels=y_test, train_indexes=X_train, test_indexes=X_test, func_list=func_list, features=features)
    scores_vector[i, :] = np.array([x[1] for x in list(test_scores.items())])
    
scores_avg = dict()
for j in range(len(func_list)):
    scores_avg[func_list[j].__name__] = (scores_vector[:,j].mean(), scores_vector[:,j].std())
    
with open('10-7030_scores.pkl', 'wb') as f:
  pickle.dump(scores_avg, f)

In [25]:
scores_avg

{'GetPersStats': (0.9175000000000001, 0.012610621625351292),
 'GetPersImageFeature': (0.7358333333333333, 0.015833333333333345),
 'GetPersLandscapeFeature': (0.8191666666666666, 0.009895285072531594),
 'GetPersEntropyFeature': (0.885, 0.011666666666666672),
 'GetBettiCurveFeature': (0.8483333333333334, 0.016158932858054424),
 'GetCarlssonCoordinatesFeature': (0.9066666666666668, 0.01166666666666666),
 'GetPersSilhouetteFeature': (0.8799999999999999, 0.011303883305208805),
 'GetTopologicalVectorFeature': (0.79, 0.011055415967851331),
 'GetAtolFeature': (0.7024999999999999, 0.023288647687470205),
 'GetComplexPolynomialFeature': (0.9724999999999999, 0.0053359368645273545),
 'GetPersLifespanFeature': (0.9175000000000001, 0.009464847243000467),
 'GetPersTropicalCoordinatesFeature': (0.8841666666666667,
  0.009464847243000467)}