## Classifying Outex-68

In [2]:
import pickle
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from vectorization import *

In [3]:
path_feat = "Outex-TC-00024/features/"
path_res = "Outex-TC-00024/results/"
path_data = "Outex-TC-00024/data/000/"

In [4]:
with open(path_feat + 'l_d0.pkl', 'rb') as f:
    features_l_d0 = pickle.load(f)
with open(path_feat + 'l_d1.pkl', 'rb') as f:
    features_l_d1 = pickle.load(f)
with open(path_feat + 'u_d0.pkl', 'rb') as f:
    features_u_d0 = pickle.load(f)
with open(path_feat + 'u_d1.pkl', 'rb') as f:
    features_u_d1 = pickle.load(f)

In [5]:
func_list = [GetPersStats,
             GetPersImageFeature,
             GetPersLandscapeFeature,
             GetPersEntropyFeature,
             GetBettiCurveFeature,
             GetCarlssonCoordinatesFeature,
             GetPersSilhouetteFeature,
             GetTopologicalVectorFeature,
             GetAtolFeature,
             GetComplexPolynomialFeature,
             GetPersLifespanFeature,
             GetPersTropicalCoordinatesFeature
            ]

In [6]:
features = {}
for i in range(2720):
    for func in func_list:
        features[(func.__name__)+'_'+str(i)] = np.hstack([features_l_d0[(func.__name__)+'_'+str(i)],features_l_d1[(func.__name__)+'_'+str(i)],features_u_d0[(func.__name__)+'_'+str(i)],features_u_d1[(func.__name__)+'_'+str(i)]])

In [7]:
train_labels = np.array(pd.read_csv(path_data + "train.txt", sep=" ", usecols=[1]).to_numpy().flatten().tolist())
test_labels = np.array(pd.read_csv(path_data + "test.txt", sep=" ", usecols=[1]).to_numpy().flatten().tolist())

train_indexes = list(range(1360))
test_indexes = list(range(1360,2720))

### Classical Training-Test

In [18]:
def classification(train_labels, test_labels, train_indexes, test_indexes, func_list, features):
    train_scores = dict()
    test_scores = dict()
    for func in func_list:
        X = []
        Y = []
        for i in train_indexes:
            name = (func.__name__)+'_'+str(i)
            X.append(features[name])
        clf = RandomForestClassifier()
        clf = clf.fit(X, train_labels)      
        train_scores[func] = clf.score(X, train_labels)
        for i in test_indexes:
            name = (func.__name__)+'_'+str(i)
            Y.append(features[name])
        test_scores[func.__name__] = clf.score(Y, test_labels)
        
    return train_scores, test_scores

In [10]:
n = 10
scores_vector = np.zeros([n, len(func_list)])
for i in range(n):
    train_scores, test_scores = classification(train_labels, test_labels, train_index, test_index, func_list)
    scores_vector[i, :] = np.array([x[1] for x in list(test_scores.items())])

In [11]:
scores_avg = dict()
for j in range(len(func_list)):
    scores_avg[func_list[j].__name__] = (scores_vector[:,j].mean(), scores_vector[:,j].std())

In [12]:
with open(path_res + '68-train-test_score.pkl', 'wb') as f:
  pickle.dump(scores_avg, f)

In [13]:
scores_avg

{'GetPersStats': (0.5674264705882354, 0.006037028904442944),
 'GetPersImageFeature': (0.5088970588235294, 0.006260372016539201),
 'GetPersLandscapeFeature': (0.46448529411764705, 0.0043630893232784775),
 'GetPersEntropyFeature': (0.5183823529411764, 0.005845472621868194),
 'GetBettiCurveFeature': (0.5600735294117648, 0.005312977029264724),
 'GetCarlssonCoordinatesFeature': (0.4803676470588235, 0.004900060906337096),
 'GetPersSilhouetteFeature': (0.45786764705882355, 0.004998377764516867),
 'GetTopologicalVectorFeature': (0.3610294117647059, 0.004954376625859713),
 'GetAtolFeature': (0.40764705882352936, 0.007376433061167325),
 'GetComplexPolynomialFeature': (0.1488235294117647, 0.007521595207379072),
 'GetPersLifespanFeature': (0.6314705882352941, 0.005248990675757259),
 'GetPersTropicalCoordinatesFeature': (0.5697058823529411,
  0.007547427894327249)}

### K-Fold Cross Validation with the Training Set

In [22]:
def kfold_classification(labels, indexes, func_list, features, n=5):
    scores = dict()
    for func in func_list:
        X = []
        for i in indexes:
            name = (func.__name__)+'_'+str(i)
            X.append(features[name])
        clf = RandomForestClassifier()
        score = cross_val_score(clf, X, labels, cv=n)
        scores[func.__name__] = (score.mean(), score.std())
        
    return scores

In [23]:
scores_avg = kfold_classification(labels=train_labels, indexes=train_index, func_list=func_list, features=features,n=10)
with open(path_res + '68-k-fold_train_scores.pkl', 'wb') as f:
  pickle.dump(scores_avg, f)

In [24]:
scores_avg

{'GetPersStats': (0.8801470588235294, 0.03677205808838231),
 'GetPersImageFeature': (0.7205882352941176, 0.041594516540385144),
 'GetPersLandscapeFeature': (0.7852941176470589, 0.03929526239966877),
 'GetPersEntropyFeature': (0.8, 0.049303101806588846),
 'GetBettiCurveFeature': (0.7933823529411765, 0.027403889931741518),
 'GetCarlssonCoordinatesFeature': (0.8441176470588235, 0.05871313169905038),
 'GetPersSilhouetteFeature': (0.8264705882352942, 0.04664338684456972),
 'GetTopologicalVectorFeature': (0.6441176470588236, 0.05630644035022543),
 'GetAtolFeature': (0.5779411764705882, 0.05828800347825417),
 'GetComplexPolynomialFeature': (0.9602941176470587, 0.01923043651562061),
 'GetPersLifespanFeature': (0.8257352941176471, 0.03120455172635729),
 'GetPersTropicalCoordinatesFeature': (0.8566176470588236,
  0.06118866535696042)}

### K-Fold Cross Validation with the Whole Set

In [9]:
whole_labels = train_labels+test_labels
whole_index = train_indexes + test_indexes

In [10]:
scores_avg = kfold_classification(labels=whole_labels, indexes=whole_index, func_list=func_list, n=10)    
with open(path_res + '68-k-fold_whole_scores.pkl', 'wb') as f:
  pickle.dump(scores_avg, f)

NameError: name 'kfold_classification' is not defined

In [28]:
scores_avg

{'GetPersStats': (0.8654411764705883, 0.05684161202966967),
 'GetPersImageFeature': (0.6933823529411766, 0.05815799980377997),
 'GetPersLandscapeFeature': (0.7613970588235294, 0.06423731580798293),
 'GetPersEntropyFeature': (0.7658088235294118, 0.07697883280936416),
 'GetBettiCurveFeature': (0.7871323529411764, 0.04058963392730884),
 'GetCarlssonCoordinatesFeature': (0.8136029411764707, 0.07316176470588236),
 'GetPersSilhouetteFeature': (0.8147058823529412, 0.04765817771499647),
 'GetTopologicalVectorFeature': (0.5761029411764705, 0.07117650856829322),
 'GetAtolFeature': (0.586764705882353, 0.024175415033490577),
 'GetComplexPolynomialFeature': (0.9294117647058823, 0.044568771943211),
 'GetPersLifespanFeature': (0.8198529411764707, 0.04719648981141535),
 'GetPersTropicalCoordinatesFeature': (0.8496323529411764,
  0.05148240410592682)}

### Classical Training-Test 70/30

In [11]:
indexes = np.hstack([train_indexes, test_indexes])
labels = np.hstack([train_labels, test_labels])

In [12]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(indexes, labels, test_size=0.3, random_state=1)

In [19]:
n = 10
scores_vector = np.zeros([n, len(func_list)])
for i in range(n):
    train_scores, test_scores = classification(train_labels=y_train, test_labels=y_test, train_indexes=X_train, test_indexes=X_test, func_list=func_list, features=features)
    scores_vector[i, :] = np.array([x[1] for x in list(test_scores.items())])
    
scores_avg = dict()
for j in range(len(func_list)):
    scores_avg[func_list[j].__name__] = (scores_vector[:,j].mean(), scores_vector[:,j].std())
    
with open('68-7030-scores.pkl', 'wb') as f:
  pickle.dump(scores_avg, f)

In [20]:
scores_avg

{'GetPersStats': (0.878921568627451, 0.003944479642017408),
 'GetPersImageFeature': (0.7087009803921568, 0.009986805260725162),
 'GetPersLandscapeFeature': (0.7792892156862744, 0.006518179483809878),
 'GetPersEntropyFeature': (0.7642156862745099, 0.0056212475201861635),
 'GetBettiCurveFeature': (0.7848039215686275, 0.008651648330746435),
 'GetCarlssonCoordinatesFeature': (0.8435049019607843, 0.003150725522593724),
 'GetPersSilhouetteFeature': (0.8140931372549021, 0.00675799788184421),
 'GetTopologicalVectorFeature': (0.6155637254901961, 0.0028504174876502367),
 'GetAtolFeature': (0.5897058823529412, 0.003828553762699327),
 'GetComplexPolynomialFeature': (0.9542892156862746, 0.0037990196078431697),
 'GetPersLifespanFeature': (0.8280637254901961, 0.005426860251344272),
 'GetPersTropicalCoordinatesFeature': (0.8497549019607844,
  0.004144983952276398)}