In [1]:
# DataFrame handling
import pandas as pd

# Split data with stratified cv
from sklearn.model_selection import StratifiedKFold

# keras Models
from keras.models import Sequential
from keras.layers import Dense

# Encoding of classifications
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical

print('Imports complete.')

Using TensorFlow backend.


Imports complete.


In [2]:
# Set up a few constants to keep track of
random_state=1
path='../../tor_dataset/Scenario-B/'
dep_var = 'class'
num_classes=0

In [3]:
def get_Xy(filename='', verbose=False):
    """
        This function takes a filename, loads the data into a dataframe, then separates the classification data
        
        args:
            filename => str, path to csv file to be loaded
            
        returns:
            list(X,y) => data, classifications
    """
    df = pd.read_csv(filename)
    
    if verbose:
        print('Before encoding and splitting:')
        print(df.head())
    
    # Actual data
    X = df.loc[:, df.columns != dep_var]
    
    # Set number of classes we see
    num_classes = df[dep_var].nunique()
    
    # Classifications
    encoder = LabelEncoder()
    y = encoder.fit_transform(df[dep_var])
    
    if verbose:
        print('Classification encoding:')
        for i in range(len(encoder.classes_)):
            print('\t{} => {}'.format(i, encoder.classes_[i]))
        
        print('After encoding and splitting:')
        print('X = ')
        print(X.head())
        print('\ny = ')
        print(y[:5])
    
    # X holds the data while y holds the classifications
    return X, y

In [4]:
# Deep Neural Network model training and evaluation
def build_fit_eval(opt, act):
    # All of the data files
    files=['TimeBasedFeatures-15s-Layer2.csv',
          'TimeBasedFeatures-30s-Layer2.csv',
          'TimeBasedFeatures-60s-Layer2.csv',
          'TimeBasedFeatures-120s-Layer2.csv']

    # Lists for accuracies collected from models
    list_dnn = []

    for file in files:
        # Load in the data
        X, y = get_Xy(path + file)

        # Mean accuracies for each model
        mean_dnn = 0

        # 10-fold Stratified Cross-Validation
        n_splits = 10
        skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=random_state)
        for train_idxs, test_idxs in skf.split(X, y):
            # Define the training and testing sets
            X_train, X_test = X.iloc[train_idxs], X.iloc[test_idxs]
            y_train, y_test = y[train_idxs], y[test_idxs]

            # Create a different version of the y_train and y_test for the Deep Neural Network
            y_train_dnn = to_categorical(y_train, num_classes=num_classes)
            y_test_dnn = to_categorical(y_test, num_classes=num_classes)

            # Deep Neural Network
            dnn = Sequential([
                Dense(64, input_shape=(23,)),
                Dense(8, activation=act)
            ])
            dnn.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

            # Train the models
            dnn.fit(x=X_train, y=y_train_dnn, epochs=10, batch_size=10, verbose=0, validation_data=(X_test, y_test_dnn))

            # This returns [loss, accuracy]
            results_dnn = dnn.evaluate(X_test, y_test_dnn, verbose=0)

            # Add the results to the running mean
            mean_dnn += results_dnn[1] / (n_splits * 1.0)

        # Push the mean results from all of the splits to the lists
        list_dnn.append(mean_dnn)
    
    return list_dnn

In [5]:
optimizers=[
    'SGD',
    'RMSprop',
    'Adam',
    'Adadelta',
    'Adagrad',
    'Adamax',
    'Nadam'
]
activations=[
    'relu',
    'sigmoid',
    'softmax',
    'softplus',
    'softsign',
    'tanh',
    'selu',
    'elu',
    'exponential'
]

In [6]:
print('Optimizer\tActivation\tAccuracies (%)\n')
for opt in optimizers:
    print(opt, end='')
    for act in activations:
        print('\t\t{}\t'.format(act), end='')
        accs = build_fit_eval(opt, act)
        for acc in accs:
            print('\t{:.2f}'.format(100*acc), end='')
        print('\n', end='')

Optimizer	Activation	Accuracies (%)

SGD		relu		33.99	34.83	26.99	18.12
		sigmoid		5.09	8.48	4.59	4.31
		softmax		1.37	1.77	2.35	3.28
		softplus		22.71	31.86	36.44	23.42
		softsign		3.10	2.16	5.43	5.94
		tanh		1.37	1.77	2.35	3.28
		selu		36.52	21.64	33.78	30.93
		elu		33.60	31.72	32.58	23.34
		exponential		1.37	1.77	2.35	3.28
RMSprop		relu		5.80	20.60	28.00	11.11
		sigmoid		4.85	3.21	5.23	5.94
		softmax		48.63	50.37	50.34	44.43
		softplus		8.42	11.18	35.87	32.14
		softsign		1.37	1.77	2.35	3.28
		tanh		1.37	1.77	2.35	3.28
		selu		21.22	20.74	28.71	40.06
		elu		25.45	27.61	41.24	38.54
		exponential		1.37	1.77	2.35	3.28
Adam		relu		24.11	42.43	39.23	33.55
		sigmoid		3.21	7.70	3.94	5.75
		softmax		61.28	49.57	55.13	47.54
		softplus		31.16	30.87	42.18	33.16
		softsign		1.37	1.77	2.35	3.28
		tanh		1.37	1.88	2.99	3.28
		selu		37.02	35.95	35.43	35.40
		elu		47.71	39.26	37.50	33.16
		exponential		1.37	1.77	2.35	3.28
Adadelta		relu		7.53	27.89	43.53	35.73
		sigmoid		3.63	2.94	4.17	6.37
		softmax



	1.37	1.77	2.35	3.28
Adamax		relu		26.52	27.12	43.50	34.67
		sigmoid		3.75	5.44	4.92	9.49
		softmax		58.96	51.04	47.78	40.54
		softplus		25.92	35.45	34.07	42.50
		softsign		1.37	1.77	2.35	3.28
		tanh		1.85	2.27	2.35	4.12
		selu		33.63	35.38	33.60	39.81
		elu		33.63	32.00	41.69	30.51
		exponential		1.37	1.77	2.35	3.28
Nadam		relu		32.11	42.30	28.65	32.73
		sigmoid		3.75	4.38	6.52	5.55
		softmax		49.26	44.91	41.21	29.80
		softplus		34.40	37.25	33.25	40.75
		softsign		1.37	1.77	2.35	3.28
		tanh		1.37	1.77	2.35	4.12
		selu		36.13	46.47	36.87	36.68
		elu		42.53	47.43	31.91	39.20
		exponential		1.37	1.77	2.35	3.28
