In [None]:
#general packages
import os
import timeit
import pandas as pd
from glob import glob
import numpy as np
from scipy import signal
import scipy
import math

#possible package for feature extraction
import entropy as ent
import librosa.feature as lrf 

#packages for models
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MinMaxScaler

#packages for feature selection
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.decomposition import PCA

In [None]:
#functions for feature extraction
# 25th Percentile
def q25(x):
    return x.quantile(0.25)

# 75th Percentile
def q75(x):
    return x.quantile(0.75)
#
def Featuremfcc(X):
    X=np.array(X)
    flag = not np.any(X)
    if flag:
        return 0
    if np.isfinite(X).all()!=True:
        return 0
    return np.sum(lrf.mfcc(y=X,sr=40, n_mfcc=1))
#spectral entropy
def Featureent(X):

    X=np.array(X)
    if np.isfinite(X).all()!=True:
        return 0
    return ent.spectral_entropy(np.array(X),sf=40,method='fft', normalize=False)
#zero crossing rate
def Featurezcr(X):

    X=np.array(X)
    if np.isfinite(X).all()!=True:
        return 0
    flag = not np.any(X)
    if flag:
        return 0
    return np.mean(lrf.zero_crossing_rate(X, frame_length=40, hop_length= 100))

                                
#spectral roll off                                
def Featurespecrolloff(X):

    X=np.array(X)
    if np.isfinite(X).all()!=True:
        return 0
    flag = not np.any(X)
    if flag:
        return 0
    return np.mean(lrf.spectral_rolloff(y=X, sr=40, hop_length=100, roll_percent=0.9))
                                  
                                  
#spectral bandwidth                                  
def Featurespecbandwidth(X):
    X=np.array(X)

    flag = not np.any(X)
    if flag:
        return 0
    return np.mean(lrf.spectral_bandwidth(y=X, sr=40, hop_length=100) )                                 
#spectral flatness                                  
def Featurespecflatness(X):

    X=np.array(X)
    if np.isfinite(X).all()!=True:
        return 0
    flag = not np.any(X)
    if flag:
        return 0
    return np.mean(lrf.spectral_flatness(y=X, hop_length=100)   )                                 
                                
                                
#spectral centroid                                
def FeatureSpectralCentroid(X, f_s=40): #works

    isSpectrum = X.ndim == 1

    # X = X**2 removed for consistency with book
    X=np.array(X)
    norm = X.sum(axis=0, keepdims=True)
    norm[norm == 0] = 1

    vsc = np.dot(np.arange(0, X.shape[0]), X) / norm

    # convert from index to Hz
    vsc = vsc / (X.shape[0] - 1) * f_s / 2

    # if input is a spectrum, output scaler else if spectrogram, output 1d array
    vsc = np.squeeze(vsc) if isSpectrum else np.squeeze(vsc, axis=0)

    return vsc
#spectral spread
def FeatureSpectralSpread(X, f_s=40):#works

    isSpectrum = X.ndim == 1
    if isSpectrum:
        X = np.expand_dims(X, axis=1)

    # get spectral centroid as index
    vsc = FeatureSpectralCentroid(X, f_s) * 2 / f_s * (X.shape[0] - 1)

    # X = X**2 removed for consistency with book

    norm = X.sum(axis=0)
    norm[norm == 0] = 1

    # compute spread
    vss = np.zeros(X.shape[1])
    indices = np.arange(0, X.shape[0])
    for n in range(0, X.shape[1]):
        vss[n] = np.dot((indices - vsc[n])**2, X[:, n]) / norm[n]

    vss = np.sqrt(vss)

    # convert from index to Hz
    vss = vss / (X.shape[0] - 1) * f_s / 2

    return np.squeeze(vss) if isSpectrum else vss
#spectral flux
def FeatureSpectralFlux(X, f_s=40):#works

    isSpectrum = X.ndim == 1
    if isSpectrum:
        X = np.expand_dims(X, axis=1)

    # difference spectrum (set first diff to zero)
    X = np.c_[X[:, 0], X]

    afDeltaX = np.diff(X, 1, axis=1)

    # flux
    vsf = np.sqrt((afDeltaX**2).sum(axis=0)) / X.shape[0]

    return np.squeeze(vsf) if isSpectrum else vsf

In [None]:
#generate a folder for all data and results
new_folder_list=['data2/label','data2/data', 'data2/feature','data2/pca','data2/results']
for new_folder in new_folder_list:
    if not os.path.exists(new_folder):
        os.makedirs(new_folder)
        print(new_folder + ' has been created')

In [None]:
#function to generate basic features
def data_separation():
    tic= timeit.default_timer()
    global listpreproces
    data_df_list=[]
    data_df=pd.read_csv('data/all_accelerometer_data_pids_13.csv', parse_dates=True, index_col='time')
    data_df.drop(data_df[data_df.index < 1].index, inplace=True)
    data_df.index=pd.to_datetime(data_df.index, unit='ms')
    pids = list(set(data_df['pid']))
    sample_rate = 40 # hertz
    subject_height = 170 # centimeters
    for pid in pids:
        df = data_df[data_df['pid'] == pid]
        x=df['x'].copy()
        y=df['y'].copy()
        z=df['z'].copy()
        #first difference
        df['delta x'] = x-x.shift(1)
        df['delta y'] = y-y.shift(1)
        df['delta z'] = z-z.shift(1)
        #squared
        df['x^2']=x**2
        df['y^2']=y**2
        df['z^2']=z**2
        #abs
        df['abs x'] = np.absolute(x)
        df['abs y'] = np.absolute(y)
        df['abs z'] = np.absolute(z)
        #energy feature
        df['energy']=np.sqrt(x**2+y**2+z**2)
        #prod
        df['prod']= x*y*z
        #add chebychev
        b, a = signal.cheby2(15, 40, [0.1,0.9], 'bandstop')
        filteredx = signal.lfilter(b,a,x).copy()
        filteredy = signal.lfilter(b,a,y).copy()
        filteredz = signal.lfilter(b,a,z).copy()
        df['fx'] = filteredx
        df['fy'] = filteredy
        df['fz'] = filteredz        
        #first differences
        df['delta fx'] = np.concatenate(([0],np.diff(filteredx)))
        df['delta fy'] = np.concatenate(([0],np.diff(filteredy)))
        df['delta fz'] = np.concatenate(([0],np.diff(filteredz)))
        #squared
        df['fx^2']=filteredx**2
        df['fy^2']=filteredy**2
        df['fz^2']=filteredz**2
        #abs
        df['abs fx'] = np.absolute(filteredx)
        df['abs fy'] = np.absolute(filteredy)
        df['abs fz'] = np.absolute(filteredz)
        #energy feature
        df['fenergy']=np.sqrt(filteredx**2+filteredy**2+filteredz**2)
        #prod
        df['prod']= filteredx*filteredy*filteredz
        
        listpreproces=df.columns
        filename='data2/data/'+pid+'.csv'
        df.dropna(inplace=True)
        df.to_csv(filename)
        
        print(filename +" has been created")
    toc= timeit.default_timer()
    print(toc-tic)


data_separation()

In [None]:
#tranform the target into a classification
def add_label():
    label_df_list=[]
    pids=[]
    tic= timeit.default_timer()
    csv_files = glob('data/clean_tac/*.csv')
    for csv_file in csv_files:
        df=pd.read_csv(csv_file, parse_dates=True, index_col='timestamp')
        pid=csv_file[len('data/clean_tac\\'):-len('_clean_TAC.csv')]
        pids.append(pid)
        df['label']= 0
        df.loc[df['TAC_Reading']>=.08,'label']= 1
        df.index=pd.to_datetime(df.index, unit='s')
        df=df.resample('1S').pad()
        filename='data2/label/'+ pid+'.csv'
        df.to_csv(filename)
        print (filename +' has been created')
    toc= timeit.default_timer()
    print(toc-tic)
    return pids

pids=add_label()

In [None]:
#a function to show how a model performs
def classification_report_df(y_true, y_pred, labels,name=""):
    global tic
    report = classification_report(y_true, y_pred, labels=labels, digits=4, zero_division=0)
    report=report.replace("\nweighted avg", "\n weighted_avg")
    report=report.replace("\n weighted avg", "\n weighted_avg")
    report=report.replace("macro avg", "macro_avg")    
    report_data = []
    lines = report.split('\n')
    toc= timeit.default_timer()
    for line in lines[2:]:
        row_data = line.split(None)
        if len(row_data)!=5:
            continue
        row = {}        
        row['Class'] = row_data[0]
        row['Precision'] = 100*float(row_data[1])
        row['Recall'] = 100*float(row_data[2])
        row['F1_score'] = 100*float(row_data[3])
        row['Support'] =float(row_data[4])
        row['time in min']=(toc-tic)/60
        report_data.append(row)
    df_clf_report = pd.DataFrame.from_dict(report_data)
    df_clf_report.to_csv('data2/results/classification_report '+name+'.csv', index = False)
    return df_clf_report

In [None]:
#extract final features
def extract_features(df, time_interval):
    global listpreproces
    tic= timeit.default_timer()
    listsortfeatures=[np.nansum, np.nanmin, np.nanmax, np.nanmean, np.nanmedian,
                      np.nanstd, np.nanvar,scipy.stats.skew,scipy.stats.kurtosis, q75, q25,
                      FeatureSpectralCentroid,FeatureSpectralSpread,FeatureSpectralFlux,
                     Featureent, Featurezcr, Featurespecrolloff,
                      Featurespecflatness]#missing Featuremfcc,Featurespecbandwidth
    dictagg={i:listsortfeatures for i in listpreproces}
    toc= timeit.default_timer()
    print(toc-tic)
    return df.resample(time_interval).agg(dictagg)


In [None]:
def extract_features_label(pids,interval):
    global listpreproces
    global dim

    for pid in pids:        
        in_filename_data='data/data/'+ pid+'.csv'
        in_filename_label='data/label/'+ pid+'.csv'
        out_filename='data2/feature/'+ pid+'.csv'
        data_df=pd.read_csv(in_filename_data, parse_dates=True, index_col='time', usecols= list(listpreproces).append('time'))    
        label_df=pd.read_csv(in_filename_label, parse_dates=True, index_col='timestamp', usecols=['timestamp', 'label'])
        listpreproces=list(data_df.columns)[1:]
        feature_df=extract_features(data_df, interval)        
        feature_label_df = pd.concat([feature_df, label_df['label']], axis=1, join="inner")
        feature_label_df.to_csv(out_filename, index_label='time')
        print(out_filename + ' has been created')  
    dim=len(data_df.columns)
#extract_features_label(pids, '1S')        

In [None]:
# load in all data in a data frame
def load_features_label(pids):
    global dim
    tic= timeit.default_timer()
    dim= len(pd.read_csv('data2/feature/'+ pids[0].iloc[0]+'.csv').columns)-1
    df=pd.DataFrame(np.repeat(None, dim)).T
    for pid in pids[0]:
        filename='data2/feature/'+ pid+'.csv'
        data_df=pd.read_csv(filename)
        data_df=data_df.loc[:, data_df.columns != 'time']
        data_df=data_df.fillna(0)
        df=pd.DataFrame(np.vstack((df,data_df)),columns=df.columns)
        print(df.columns)        
        print(df.shape)
        print(data_df.shape)
        print(pid)
    print(sum(df.isnull().sum(axis=1)))
    df=df.dropna()
    toc= timeit.default_timer()
    print(toc-tic)
    df.to_csv('data2/totaldata')
    return df.iloc[:,:len(df.columns)-1],df.iloc[:,len(df.columns)-1]

In [None]:
#when running for the first time
pids=pd.read_csv('data/pids.txt',header=None)
df, y=load_features_label(pids)
#after it already ran once
#dftot=pd.read_csv('data/totaldata')
#df=dftot.iloc[:,1:len(dftot.columns)-1]
#y=dftot.iloc[:,len(dftot.columns)-1]


df=df[df.columns[np.sum(df,axis=0)!=0]]
y=y.astype('int')
df.shape

In [None]:
X_train, X_test, y_train, y_test  = train_test_split(df, y ,train_size=0.75, random_state=0)

In [None]:
#example of how to train the random forest for the full feature space
tic= timeit.default_timer()
clfRF = RandomForestClassifier(n_estimators=300,random_state=0, bootstrap=False)
clfRF.fit(X_train, y_train)
y_hat=clfRF.predict(X_test)
df_clf_report=classification_report_df(y_test, y_hat, labels=[0, 1],name='300 trees with full feature space')  
df_clf_report

In [None]:
#calculating PCA
tic= timeit.default_timer()
pca = PCA()
data_pca=pca.fit_transform(df)
principalComponents_xtrain, principalComponents_xtest, y_train, y_test  = train_test_split( data_pca, y ,train_size=0.75, random_state=0)
print(pca.explained_variance_ratio_.round(3)[:20])
toc = timeit.default_timer()
print(toc-tic)

In [None]:
def predictELM(X_train,X_test,y_train,hidden_size, alpha=0,method='relu', bootstrap=1):
    onehotencoder = OneHotEncoder(categories='auto')
    scaler = MinMaxScaler()
    X_train = scaler.fit_transform(X_train)
    y_train = onehotencoder.fit_transform(np.array(y_train).reshape(-1, 1)).toarray()
    X_test = scaler.fit_transform(X_test)
    y_hat_tot=0
    input_size = X_train.shape[1]
    def relu(x,alpha):
        return np.maximum(x, 0, x)+alpha*np.maximum(-x, 0, x)
    def sine(x):
        return np.sin(x)
    def sigmoid(x):
        return 1/(1+np.exp(-x))
    def hidden_nodes(X,method):
        G = np.dot(X, input_weights)
        G = G + biases
        if method=='relu':
            H = relu(G, alpha)
        elif method=='sine':
            H = sine(G)
        elif method =='sigmoid':
            H =sigmoid(G)
        else:
            print('no valid method')
            return G
        return H
    def predict(X, method):
        out = hidden_nodes(X,method)
        out = np.dot(out, output_weights)
        return out
    for i in range(bootstrap):
        input_weights = np.random.normal(size=[input_size,hidden_size])
        biases = np.random.normal(size=[hidden_size])
        output_weights = np.dot(scipy.linalg.pinv2(hidden_nodes(X_train, method)), y_train)
        predicted = predict(X_test,method)
        y_hat_i=np.argmax(predicted, axis=1)
        y_hat_tot+=y_hat_i
    y_hat_p=y_hat_tot/bootstrap
    y_hat=np.logical_not(y_hat_p<=0.5)
    return y_hat

In [None]:
#example of ELM
tic= timeit.default_timer()
hidden_size=20
y_hat=predictELM(X_train,X_test,y_train,hidden_size,0,'relu')
df_clf_report=classification_report_df(y_test, y_hat, labels=[0, 1],name='elm '+str(hidden_size)+ ' hidden neurons and ReLu as activation')  
df_clf_report

In [None]:
# using sequential floating feature selection 
tic= timeit.default_timer()
model=RandomForestClassifier(n_estimators=10,random_state=0)
sfs1 = SFS(model, 
           k_features=75, 
           forward=True, 
           floating=True, 
           scoring='accuracy',
           cv=0,
           verbose=5,
          n_jobs=-1)

sfs1 = sfs1.fit(X_train, y_train)
print('\nSequential Forward Floating Selection (k=75):')
print(sfs1.k_feature_idx_)
features_sffs=sfs1.k_feature_idx_
print('CV Score:')
print(sfs1.k_score_)


In [None]:
cor_list=[]
for i in X_train.columns.tolist():
    cor = np.corrcoef(X_train[i].astype(float), y_train)[0, 1]
    cor_list.append(cor)
cor_feature = X_train.iloc[:,np.argsort(np.abs(cor_list))].columns.tolist()

In [None]:
#grid search
trees=[10,20,30,50,100,300]
number_of_hidden_nodes=[5,10,15,20,25,50,100,200,500]
number_of_features=[10,20,30,40,50,60,70,75,80,90,100]
dfresultsRF=pd.DataFrame(trees)
dfresultsRFPCA=dfresultsRF.set_index(0)
dfresultsRFCORR=dfresultsRF.set_index(0)

#random forest
for j in number_of_features:
    lst=[]
    lst2=[]
    for i in trees:
        clf = RandomForestClassifier(n_estimators=i,random_state=0)
        clf.fit(X_train[cor_feature[-j:]], y_train)
        y_hat=clf.predict(X_test[cor_feature[-j:]])
        df_clf_report=classification_report_df(y_test, y_hat, labels=[0, 1],name='RF '+str(i)+ ' trees and '+str(j)+' features selected on correlation ') 
        lst.append(df_clf_report['F1_score'][2])
        clf.fit(principalComponents_xtrain[:,:j], y_train)
        y_hat=clf.predict(principalComponents_xtest[:,:j])
        df_clf_report2=classification_report_df(y_test, y_hat, labels=[0, 1],name='RF '+str(i)+ ' trees and '+str(j)+' features selected on pca' ) 
        lst2.append(df_clf_report2['F1_score'][2])
    dfresultsRFPCA[str(j)+' feat corr RF']=lst
    dfresultsRFCORR[str(j)+' feat trees PCA RF']=lst2
#
dfresults=pd.DataFrame(number_of_hidden_nodes)
dfresultsRELUPCA=dfresults.set_index(0).copy()
dfresultsSINEPCA=dfresults.set_index(0).copy()
dfresultsSIGMOIDPCA=dfresults.set_index(0).copy()
dfresultsRELUCORR=dfresults.set_index(0).copy()
dfresultsSINECORR=dfresults.set_index(0).copy()
dfresultsSIGMOIDCORR=dfresults.set_index(0).copy()
dfresults=dfresults.set_index(0).copy()
dfresultsRFsffs=pd.DataFrame(trees)
k=0
#ReLu
for j in number_of_features:
    lst=[]
    lst2=[]
    for i in number_of_hidden_nodes:
        y_hat_cor=predictELM(X_train[cor_feature[-j:]],X_test[cor_feature[-j:]],y_train,i,k,'relu')
        df_clf_report=classification_report_df(y_test, y_hat_cor, labels=[0, 1],name='elm '+str(i)+ ' hidden neurons and '+str(j)+' features selected on correlation relu leaky '+str(k)) 
        lst.append(df_clf_report['F1_score'][2])
        y_hat_pca=predictELM(principalComponents_xtrain[:,:j],principalComponents_xtest[:,:j],y_train,i,k,'relu')
        df_clf_report2=classification_report_df(y_test, y_hat_pca, labels=[0, 1],name='elm '+str(i)+ ' hidden neurons and '+str(j)+' features selected on pca relu leaky'+str(k)) 
        lst2.append(df_clf_report2['F1_score'][2])
    dfresultsRELUPCA[str(j)+' feat '+str(k)+ ' leaky relu PCA ELM']=lst2
    dfresultsRELUCORR[str(j)+' feat '+str(k)+ ' leaky relu corr ELM']=lst  
#sine      
k='sine'
for j in number_of_features:
    lst=[]
    lst2=[]
    for i in number_of_hidden_nodes:
        y_hat_cor=predictELM(X_train[cor_feature[-j:]],X_test[cor_feature[-j:]],y_train,i,0,k)
        df_clf_report=classification_report_df(y_test, y_hat_cor, labels=[0, 1],name='elm '+str(i)+ ' hidden neurons and '+str(j)+' features selected on correlation '+k) 
        lst.append(df_clf_report['F1_score'][2])
        y_hat_pca=predictELM(principalComponents_xtrain[:,:j],principalComponents_xtest[:,:j],y_train,i,0,k)
        df_clf_report2=classification_report_df(y_test, y_hat_pca, labels=[0, 1],name='elm '+str(i)+ ' hidden neurons and '+str(j)+' features selected on pca ' +k) 
        lst2.append(df_clf_report2['F1_score'][2])
    dfresultsSINECORR[str(j)+' feat '+str(k)+' corr ELM']=lst
    dfresultsSINEPCA[str(j)+' feat '+str(k)+' PCA ELM']=lst2
#sigmoid
k='sigmoid'
for j in number_of_features:
    lst=[]
    lst2=[]
    for i in number_of_hidden_nodes:
        y_hat_cor=predictELM(X_train[cor_feature[-j:]],X_test[cor_feature[-j:]],y_train,i,0,k)
        df_clf_report=classification_report_df(y_test, y_hat_cor, labels=[0, 1],name='elm '+str(i)+ ' hidden neurons and '+str(j)+' features selected on correlation '+k) 
        lst.append(df_clf_report['F1_score'][2])
        y_hat_pca=predictELM(principalComponents_xtrain[:,:j],principalComponents_xtest[:,:j],y_train,i,0,k)
        df_clf_report2=classification_report_df(y_test, y_hat_pca, labels=[0, 1],name='elm '+str(i)+ ' hidden neurons and '+str(j)+' features selected on pca ' +k) 
        lst2.append(df_clf_report2['F1_score'][2])
    dfresultsSIGMOIDCORR[str(j)+' feat '+str(k)+' corr ELM']=lst
    dfresultsSIGMOIDPCA[str(j)+' feat '+str(k)+' PCA ELM']=lst2
                  
dfresultsRELUsffs=dfresults.copy()
dfresultsSINEsffs=dfresults.copy()
dfresultsSIGMOIDsffs=dfresults.copy()
j=75

lst=[]
for i in number_of_hidden_nodes:
    y_hat_cor=predictELM(X_train[X_train.columns[list(features_sffs)]],X_test[X_test.columns[list(features_sffs)]],y_train,i,k,'relu')
    df_clf_report=classification_report_df(y_test, y_hat_cor, labels=[0, 1],name='elm '+str(i)+ ' hidden neurons and '+str(j)+' features selected on sffs relu leaky '+str(k)) 
    lst.append(df_clf_report['F1_score'][2])
dfresultsRELUsffs[str(j)+' feat '+str(k)+ ' leaky relu sffs ELM']=lst  
       
k='sine'

lst=[]
for i in number_of_hidden_nodes:
    y_hat_cor=predictELM(X_train[X_train.columns[list(features_sffs)]],X_test[X_test.columns[list(features_sffs)]],y_train,i,0,k)
    df_clf_report=classification_report_df(y_test, y_hat_cor, labels=[0, 1],name='elm '+str(i)+ ' hidden neurons and '+str(j)+' features selected on sffs '+k) 
    lst.append(df_clf_report['F1_score'][2])
dfresultsSINEsffs[str(j)+' feat '+str(k)+' sffs ELM']=lst
k='sigmoid'

lst=[]
for i in number_of_hidden_nodes:
    y_hat_cor=predictELM(X_train[X_train.columns[list(features_sffs)]],X_test[X_test.columns[list(features_sffs)]],y_train,i,0,k)
    df_clf_report=classification_report_df(y_test, y_hat_cor, labels=[0, 1],name='elm '+str(i)+ ' hidden neurons and '+str(j)+' features selected on sffs '+k) 
    lst.append(df_clf_report['F1_score'][2])
dfresultsSIGMOIDsffs[str(j)+' feat '+str(k)+' sffs ELM']=lst

lst=[]
for i in trees:
    clf = RandomForestClassifier(n_estimators=i,random_state=0)
    clf.fit(X_train[X_train.columns[list(features_sffs)]], y_train)
    y_hat=clf.predict(X_test[X_test.columns[list(features_sffs)]])
    df_clf_report=classification_report_df(y_test, y_hat, labels=[0, 1],name='RF '+str(i)+ ' trees and '+str(j)+' features selected on sffs ') 
    lst.append(df_clf_report['F1_score'][2])
dfresultsRFsffs[str(j)+' feat '+str(k)+' sffs RF']=lst   


In [None]:
DFgraphPCA=dfresults.copy()
DFgraphPCA['RELU 10 features']=dfresultsRELUPCABESTVAL
DFgraphPCA['Sine 75 features']=dfresultsSINEPCA[dfresultsSINEPCA.max().idxmax()]
DFgraphPCA['Sigmoid 75 features']=dfresultsSIGMOIDPCA[dfresultsSIGMOIDPCA.max().idxmax()]
DFgraphPCA.plot(ylabel='F1 score',xlabel='number of hidden nodes',logx=True,legend=True, title='ELM with PCA')

In [None]:
DFgraphCORR=dfresults.copy()
DFgraphCORR['RELU 10 features']=dfresultsRELUCORRBESTVAL
DFgraphCORR['Sine 80 features']=dfresultsSINECORR[dfresultsSINECORR.max().idxmax()]
DFgraphCORR['Sigmoid 90 features']=dfresultsSIGMOIDCORR[dfresultsSIGMOIDCORR.max().idxmax()]
DFgraphCORR.plot(ylabel='F1 score',xlabel='number of hidden nodes',logx=True,legend=True, title='ELM with correlation')

In [None]:
DFgraphsffs=dfresults.copy()
DFgraphsffs=DFgraphsffs.set_index(0)
DFgraphsffs['RELU']=dfresultsRELUsffs.set_index(0)
DFgraphsffs['Sine']=dfresultsSINEsffs
DFgraphsffs['Sigmoid']=dfresultsSIGMOIDsffs
DFgraphsffs.plot(ylabel='F1 score',xlabel='number of hidden nodes',logx=True,legend=True, title='ELM with sffs')

In [None]:
DFgraphRF=pd.DataFrame(trees).set_index(0)
DFgraphRF['sffs']=dfresultsRFsffs['75 feat sigmoid sffs RF']
DFgraphRF['PCA']=dfresultsRFPCA[dfresultsRFPCA.max().idxmax()]
DFgraphRF['CORR']=dfresultsRFCORR[dfresultsRFCORR.max().idxmax()]
DFgraphRF.plot(ylabel='F1 score',xlabel='Trees',logx=True,legend=True, title='Random forest')