In [1]:
# DataFrame handling
import pandas as pd

# Split data with stratified cv
from sklearn.model_selection import StratifiedKFold

# keras Models
from keras.models import Sequential
from keras.layers import Dense

# Encoding of classifications
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical

print('Imports complete.')

Imports complete.


Using TensorFlow backend.


In [2]:
# Set up a few constants to keep track of
random_state=1
path='../../tor_dataset/Scenario-B/'
dep_var = 'class'
num_classes=0

In [3]:
def get_Xy(filename='', verbose=False):
    """
        This function takes a filename, loads the data into a dataframe, then separates the classification data
        
        args:
            filename => str, path to csv file to be loaded
            
        returns:
            list(X,y) => data, classifications
    """
    df = pd.read_csv(filename)
    
    if verbose:
        print('Before encoding and splitting:')
        print(df.head())
    
    # Actual data
    X = df.loc[:, df.columns != dep_var]
    
    # Set number of classes we see
    num_classes = df[dep_var].nunique()
    
    # Classifications
    encoder = LabelEncoder()
    y = encoder.fit_transform(df[dep_var])
    
    if verbose:
        print('Classification encoding:')
        for i in range(len(encoder.classes_)):
            print('\t{} => {}'.format(i, encoder.classes_[i]))
        
        print('After encoding and splitting:')
        print('X = ')
        print(X.head())
        print('\ny = ')
        print(y[:5])
    
    # X holds the data while y holds the classifications
    return X, y

In [4]:
# Deep Neural Network model training and evaluation
def build_fit_eval(opt='adam', act='softmax', batch_size=10):
    # All of the data files
    files=['TimeBasedFeatures-15s-Layer2.csv',
          'TimeBasedFeatures-30s-Layer2.csv',
          'TimeBasedFeatures-60s-Layer2.csv',
          'TimeBasedFeatures-120s-Layer2.csv']

    # Lists for accuracies collected from models
    list_dnn = []

    for file in files:
        # Load in the data
        X, y = get_Xy(path + file)

        # Mean accuracies for each model
        mean_dnn = 0

        # 10-fold Stratified Cross-Validation
        n_splits = 10
        skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=random_state)
        for train_idxs, test_idxs in skf.split(X, y):
            # Define the training and testing sets
            X_train, X_test = X.iloc[train_idxs], X.iloc[test_idxs]
            y_train, y_test = y[train_idxs], y[test_idxs]

            # Create a different version of the y_train and y_test for the Deep Neural Network
            y_train_dnn = to_categorical(y_train, num_classes=num_classes)
            y_test_dnn = to_categorical(y_test, num_classes=num_classes)

            # Deep Neural Network
            dnn = Sequential([
                Dense(64, input_shape=(23,)),
                Dense(8, activation=act)
            ])
            dnn.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

            # Train the models
            dnn.fit(x=X_train, y=y_train_dnn, epochs=10, batch_size=batch_size, verbose=0, validation_data=(X_test, y_test_dnn))

            # This returns [loss, accuracy]
            results_dnn = dnn.evaluate(X_test, y_test_dnn, verbose=0)

            # Add the results to the running mean
            mean_dnn += results_dnn[1] / (n_splits * 1.0)

        # Push the mean results from all of the splits to the lists
        list_dnn.append(mean_dnn)
    
    return list_dnn

In [5]:
optimizers=[
    'SGD',
    'RMSprop',
    'Adam',
    'Adadelta',
    'Adagrad',
    'Adamax',
    'Nadam'
]
activations=[
    'relu',
    'sigmoid',
    'softmax',
    'softplus',
    'softsign',
    'tanh',
    'selu',
    'elu',
    'exponential'
]

In [6]:
"""print('Optimizer\tActivation\tAccuracies (%)\n')
for opt in optimizers:
    print(opt, end='')
    for act in activations:
        print('\t\t{}\t'.format(act), end='')
        accs = build_fit_eval(opt, act)
        for acc in accs:
            print('\t{:.2f}'.format(100*acc), end='')
        print('\n', end='')"""

"print('Optimizer\tActivation\tAccuracies (%)\n')\nfor opt in optimizers:\n    print(opt, end='')\n    for act in activations:\n        print('\t\t{}\t'.format(act), end='')\n        accs = build_fit_eval(opt, act)\n        for acc in accs:\n            print('\t{:.2f}'.format(100*acc), end='')\n        print('\n', end='')"

In [7]:
batch_sizes = [10, 15, 20, 25, 30, 35]

In [8]:
print('Batch Size\tAccuracies (%)\t\t\tMean Accuracy (%)')
for batch_size in batch_sizes:
    print('{}\t'.format(batch_size), end='')
    accs = build_fit_eval(batch_size=batch_size)
    for acc in accs:
        print('\t{:.2f}'.format(100*acc), end='')
    print('\t{:.2f}'.format( 100 * sum(accs) / len(accs) ))

Batch Size	Accuracies (%)			Mean Accuracy (%)
10		53.07	50.68	44.14	39.74	46.91
15		57.47	53.79	41.33	41.32	48.48
20		48.81	50.43	43.10	46.12	47.12
25		56.55	52.62	43.70	46.35	49.81
30		54.67	40.89	50.09	41.79	46.86
35		58.51	49.48	45.00	42.85	48.96
