# Required Modules Loading and Configs

In [1]:
import sys
import os
import warnings
from random import seed

if not sys.warnoptions:
    warnings.simplefilter("ignore")
    os.environ["PYTHONWARNINGS"] = "ignore"

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.exceptions import ConvergenceWarning
from scipy.stats import pearsonr
from itertools import combinations_with_replacement
from inc import *

%matplotlib inline

plt.rcParams['figure.figsize'] = (12, 4)

# Dataset Load

In [2]:
data = pd.read_csv('dataset.csv', index_col=0)
data.index = pd.to_datetime(data.index, format='%Y-%m-%d')
index = pd.date_range(start=data.index.min(), end=data.index.max(), freq='D')
data.set_index(index, inplace=True)
data[['Close', 'Open', 'High', 'Low']] = data[['Close', 'Open', 'High', 'Low']].replace(',', '', regex=True).astype(float)

In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1768 entries, 2013-03-01 to 2018-01-01
Freq: D
Data columns (total 18 columns):
 #   Column                              Non-Null Count  Dtype  
---  ------                              --------------  -----  
 0   Close                               1768 non-null   float64
 1   Open                                1768 non-null   float64
 2   High                                1768 non-null   float64
 3   Low                                 1768 non-null   float64
 4   Bitcoin Total Transaction Fees USD  1768 non-null   float64
 5   Bitcoin USD Exchange Trade Volume   1768 non-null   float64
 6   Bitcoin Hash Rate                   1768 non-null   float64
 7   Bitcoin Cost Per Transaction        1768 non-null   float64
 8   Bitcoin Number of Transactions      1768 non-null   float64
 9   Crude Oil                           1768 non-null   float64
 10  S&P500 Future                       1768 non-null   float64
 11  Gold             

# Feature Engineering

In [4]:
data['Direction'] = (data['Close'].diff() >= 0).astype(int)
data1 = data.loc['2013-08-19':'2016-07-19',:].copy()
data2 = data.loc['2013-04-01':'2017-04-01',:].copy()

In [5]:
lag_features = (
    'Close',
    'Open',
    'High',
    'Low',
    'Direction',
    'Bitcoin Total Transaction Fees USD',
    'Bitcoin USD Exchange Trade Volume',
    'Bitcoin Hash Rate',
    'Bitcoin Cost Per Transaction',
    'Bitcoin Number of Transactions'
)

data1_with_lags = calc_lags(data1, lag_features)
data2_with_lags = calc_lags(data2, lag_features)

In [6]:
wma_features = (
    'Close',
    'Open',
    'High',
    'Low',
    'Bitcoin Total Transaction Fees USD',
    'Bitcoin USD Exchange Trade Volume',
    'Bitcoin Hash Rate',
    'Bitcoin Cost Per Transaction',
    'Bitcoin Number of Transactions',
    'Crude Oil',
    'S&P500 Future',
    'Gold',
    'Silver',
    'Coffee',
    'Heating Oil',
    'Natural Gas',
    'NASDAQ Future',
    'DAX Index'
)

data1_with_wma = calc_wma(data1_with_lags, wma_features, 30)
data2_with_wma = calc_wma(data2_with_lags, wma_features, 30)

In [7]:
data1 = data1_with_wma.dropna().copy()
data2 = data2_with_wma.dropna().copy()
to_drop = list(set(lag_features + wma_features))
to_drop.remove('Open')
to_drop.remove('Direction')
data1.drop(to_drop, axis=1, inplace=True)
data2.drop(to_drop, axis=1, inplace=True)

# Data Partitioning

In [8]:
target = 'Direction'

# ds_int1 = data.loc['2013-08-19':'2016-07-19',:]
# ds_int2 = data.loc['2013-04-01':'2017-04-01',:]

X_int1 = data1.drop(target, axis=1).to_numpy()
y_int1 = data1[target].to_numpy()

X_int2 = data2.drop(target, axis=1).to_numpy()
y_int2 = data2[target].to_numpy()

In [9]:
print(f'X_int1: {X_int1.shape}')
print(f'y_int1: {y_int1.shape}')
print(f'X_int2: {X_int2.shape}')
print(f'y_int2: {y_int2.shape}')

X_int1: (1036, 89)
y_int1: (1036,)
X_int2: (1432, 89)
y_int2: (1432,)


# Experiments Interval 1

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X_int1, y_int1)
print(f'X_train: {X_train.shape}')
print(f'y_train: {y_train.shape}')
print(f'X_test: {X_test.shape}')
print(f'y_test: {y_test.shape}')

X_train: (829, 89)
y_train: (829,)
X_test: (207, 89)
y_test: (207,)


In [11]:
seed(100)
np.random.seed(100)


def CCSA_filter(X_train, y_train, max_iter=100, max_saturation=0):
    def f(i, j):
        mi = mutual_info_regression(i.reshape(-1, 1), j)[0]
        corr, _ = pearsonr(i, j)

        return mi + abs(corr)

    f_values = {}
    ncols = X_train.shape[1]

    results = Parallel(n_jobs=n_jobs)(
        delayed(f)(X_train[:, i], X_train[:, j])
        for i in range(ncols - 1)
        for j in range(i + 1, ncols)
    )

    index = 0
    for i in range(ncols - 1):
        for j in range(i + 1, ncols):
            f_values[(i, j)] = results[index]
            f_values[(j, i)] = results[index]
            index += 1

    results = Parallel(n_jobs=n_jobs)(delayed(f)(X_train[:, i], y_train) for i in range(ncols))

    index = 0
    for i in range(ncols):
        f_values[("target", i)] = results[index]
        index += 1

    def ccsa_filter_fitness(solution):
        selected = [i for i in range(len(solution)) if solution[i] >= 0]
        
        f_features_target = 0
        for idx in selected:
            m = f_values[("target", idx)]
            f_features_target += m

        f_features = 0
        count = 0
        for i in range(len(selected) - 1):
            for j in range(i + 1, len(selected)):
                count += 1
                m = f_values[(selected[i], selected[j])]
                f_features += m

        return -1 * (f_features_target - f_features)

    sol, _ = CCSA(X_train.shape[1], 30, ccsa_filter_fitness, max_iter, max_saturation)
    return [i for i in range(len(sol)) if sol[i] >= 0], _

In [12]:
filter_selected, f_hist = CCSA_filter(X_train, y_train, 3000, 50)
data1.drop(target, axis=1).columns[filter_selected]

Index(['Close_L1', 'High_L1', 'Low_L1',
       'Bitcoin Total Transaction Fees USD_L1',
       'Bitcoin USD Exchange Trade Volume_L1', 'Close_L2', 'Low_L2',
       'Bitcoin USD Exchange Trade Volume_L2', 'Bitcoin Hash Rate_L2',
       'Bitcoin Cost Per Transaction_L2', 'Bitcoin Number of Transactions_L2',
       'Open_L3', 'High_L3', 'Low_L3', 'Direction_L3',
       'Bitcoin USD Exchange Trade Volume_L3', 'Bitcoin Hash Rate_L3',
       'Bitcoin Number of Transactions_L3', 'Open_L4', 'High_L4', 'Low_L4',
       'Bitcoin Total Transaction Fees USD_L4', 'Bitcoin Hash Rate_L4',
       'Bitcoin Number of Transactions_L4', 'Open_L5', 'High_L5', 'Low_L5',
       'Bitcoin Total Transaction Fees USD_L5',
       'Bitcoin Cost Per Transaction_L5', 'High_L6', 'Low_L6', 'Direction_L6',
       'Bitcoin Number of Transactions_L6', 'Close_L7', 'Open_L7', 'Low_L7',
       'Bitcoin USD Exchange Trade Volume_L7', 'Bitcoin Hash Rate_L7',
       'Bitcoin Number of Transactions_L7', 'Close_WMA30',
       'B

### SVM

In [13]:
results = []

pipe = make_pipeline(
    StandardScaler(),
    SVC(kernel='poly', max_iter=5e5)
)

params, _ = find_best_params(
    pipe,
    X_train,
    y_train,
    X_test,
    y_test,
    {
        'svc__degree': np.arange(1,5),
        'svc__gamma': [1/X_train.shape[1]] + list(np.arange(.1, 1.1, .1)),
        'svc__C': [.5, 1, 5, 10]
    }
)

pipe = pipe.set_params(**params)

tests = run_tests(pipe, X_train, y_train, X_test, y_test, 1)
tests.insert(0, 'Interval 1')
tests.insert(1, params)
tests.insert(2, 'All Features')
results.append(tests)

params, _ = find_best_params(
    pipe,
    X_train[:,filter_selected],
    y_train,
    X_test[:,filter_selected],
    y_test,
    {
        'svc__degree': np.arange(1,5),
        'svc__gamma': [1/len(filter_selected)] + list(np.arange(.1, 1.1, .1)),
        'svc__C': [.5, 1, 5, 10]
    }
)

pipe = pipe.set_params(**params)

tests = run_tests(pipe, X_train[:,filter_selected], y_train, X_test[:,filter_selected], y_test, 1)
tests.insert(0, 'Interval 1')
tests.insert(1, params)
tests.insert(2, 'CCSA Filter')
results.append(tests)

dump_results(results, 'svm_int1.bin')

### RandomForestClassifier

In [14]:
results = []

model = RandomForestClassifier()

params, _ = find_best_params(
    model,
    X_train,
    y_train,
    X_test,
    y_test,
    {
        'max_depth': np.arange(1, 20),
        'n_estimators': [20, 30, 40, 50, 60, 70, 80, 90, 100]
    }
)

model.set_params(**params)

model.fit(X_train, y_train)
tests = run_tests(model, X_train, y_train, X_test, y_test, 50)
tests.insert(0, 'Interval 1')
tests.insert(1, params)
tests.insert(2, 'All Features')
results.append(tests)

params, _ = find_best_params(
    model,
    X_train[:,filter_selected],
    y_train,
    X_test[:,filter_selected],
    y_test,
    {
        'max_depth': np.arange(1, 20),
        'n_estimators': [20, 30, 40, 50, 60, 70, 80, 90, 100]
    }
)

model.set_params(**params)

tests = run_tests(model, X_train[:,filter_selected], y_train, X_test[:,filter_selected], y_test, 50)
tests.insert(0, 'Interval 1')
tests.insert(1, params)
tests.insert(2, 'CCSA Filter')
results.append(tests)

dump_results(results, 'rnd_forest_int1.bin')

### ExtraTreesClassifier

In [15]:
results = []

model = ExtraTreesClassifier()

params, _ = find_best_params(
    model,
    X_train,
    y_train,
    X_test,
    y_test,
    {
        'max_depth': np.arange(1, 20),
        'n_estimators': [20, 30, 40, 50, 60, 70, 80, 90, 100]
    }
)

model.set_params(**params)

model.fit(X_train, y_train)
tests = run_tests(model, X_train, y_train, X_test, y_test, 50)
tests.insert(0, 'Interval 1')
tests.insert(1, params)
tests.insert(2, 'All Features')
results.append(tests)

params, _ = find_best_params(
    model,
    X_train[:,filter_selected],
    y_train,
    X_test[:,filter_selected],
    y_test,
    {
        'max_depth': np.arange(1, 20),
        'n_estimators': [20, 30, 40, 50, 60, 70, 80, 90, 100]
    }
)

model.set_params(**params)

tests = run_tests(model, X_train[:,filter_selected], y_train, X_test[:,filter_selected], y_test, 50)
tests.insert(0, 'Interval 1')
tests.insert(1, params)
tests.insert(2, 'CCSA Filter')
results.append(tests)

dump_results(results, 'extrees_int1.bin')

### MLP

In [16]:
results = []

n_layers = np.arange(2) + 1
n_neurons = np.arange(0, 35, 5) + 5
epochs = [20, 100, 200, 300, 400, 500]
combinations = []

for layers in n_layers:
    combinations.extend(combinations_with_replacement(n_neurons, int(layers)))
    
pipe = make_pipeline(
    StandardScaler(),
    MLPClassifier(solver='adam', activation='tanh', learning_rate='adaptive')
)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore", category=ConvergenceWarning)
    
    params, _ = find_best_params(
        pipe,
        X_train,
        y_train,
        X_test,
        y_test,
        {
            'mlpclassifier__hidden_layer_sizes': combinations,
            'mlpclassifier__max_iter': epochs
        }
    )

    pipe = pipe.set_params(**params)

    tests = run_tests(pipe, X_train, y_train, X_test, y_test, 50)
    tests.insert(0, 'Interval 1')
    tests.insert(1, params)
    tests.insert(2, 'All Features')
    results.append(tests)
    
    params, _ = find_best_params(
        pipe,
        X_train[:,filter_selected],
        y_train,
        X_test[:,filter_selected],
        y_test,
        {
            'mlpclassifier__hidden_layer_sizes': combinations,
            'mlpclassifier__max_iter': epochs
        }
    )

    pipe = pipe.set_params(**params)
    
    tests = run_tests(pipe, X_train[:,filter_selected], y_train, X_test[:,filter_selected], y_test, 50)
    tests.insert(0, 'Interval 1')
    tests.insert(1, params)
    tests.insert(2, 'CCSA Filter')
    results.append(tests)

    dump_results(results, 'mlp_int1.bin')

### Ensemble 1

In [17]:
results = []

n_layers = np.arange(2) + 1
n_neurons = np.arange(0, 35, 5) + 5
epochs = [20, 100, 200, 300, 400, 500]
combinations = []

for layers in n_layers:
    combinations.extend(combinations_with_replacement(n_neurons, int(layers)))
    
pipe = make_pipeline(
    StandardScaler(),
    Ensemble1()
)

with warnings.catch_warnings():
    warnings.simplefilter("ignore")

    params, _ = find_best_params(
        pipe,
        X_train,
        y_train,
        X_test,
        y_test,
        {
            'ensemble1__mlp_hidden_layers_sizes': combinations,
            'ensemble1__epochs': epochs
        }
    )

    pipe = pipe.set_params(**params)

    tests = run_tests(pipe, X_train, y_train, X_test, y_test, 50)
    tests.insert(0, 'Interval 1')
    tests.insert(1, params)
    tests.insert(2, 'All Features')
    results.append(tests)
    
    params, _ = find_best_params(
        pipe,
        X_train[:,filter_selected],
        y_train,
        X_test[:,filter_selected],
        y_test,
        {
            'ensemble1__mlp_hidden_layers_sizes': combinations,
            'ensemble1__epochs': epochs
        }
    )
    
    pipe = pipe.set_params(**params)
    
    tests = run_tests(pipe, X_train[:,filter_selected], y_train, X_test[:,filter_selected], y_test, 50)
    tests.insert(0, 'Interval 1')
    tests.insert(1, params)
    tests.insert(2, 'CCSA Filter')
    results.append(tests)
    
    dump_results(results, 'ensemble1_int1.bin')

### Ensemble 2

In [18]:
results = []

n_layers = np.arange(2) + 1
n_neurons = np.arange(0, 35, 5) + 5
epochs = [20, 100, 200, 300, 400, 500]
combinations = []

for layers in n_layers:
    combinations.extend(combinations_with_replacement(n_neurons, int(layers)))
    
pipe = make_pipeline(
    StandardScaler(),
    Ensemble2()
)

with warnings.catch_warnings():
    warnings.simplefilter("ignore")

    params, _ = find_best_params(
        pipe,
        X_train,
        y_train,
        X_test,
        y_test,
        {
            'ensemble2__mlp_hidden_layers_sizes': combinations,
            'ensemble2__epochs': epochs
        }
    )

    pipe = pipe.set_params(**params)

    tests = run_tests(pipe, X_train, y_train, X_test, y_test, 50)
    tests.insert(0, 'Interval 1')
    tests.insert(1, params)
    tests.insert(2, 'All Features')
    results.append(tests)
    
    params, _ = find_best_params(
        pipe,
        X_train[:,filter_selected],
        y_train,
        X_test[:,filter_selected],
        y_test,
        {
            'ensemble2__mlp_hidden_layers_sizes': combinations,
            'ensemble2__epochs': epochs
        }
    )
    
    pipe = pipe.set_params(**params)
    
    tests = run_tests(pipe, X_train[:,filter_selected], y_train, X_test[:,filter_selected], y_test, 50)
    tests.insert(0, 'Interval 1')
    tests.insert(1, params)
    tests.insert(2, 'CCSA Filter')
    results.append(tests)
    
    dump_results(results, 'ensemble2_int1.bin')

# Experiments Interval 2

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X_int2, y_int2)
print(f'X_train: {X_train.shape}')
print(f'y_train: {y_train.shape}')
print(f'X_test: {X_test.shape}')
print(f'y_test: {y_test.shape}')

X_train: (1146, 89)
y_train: (1146,)
X_test: (286, 89)
y_test: (286,)


In [20]:
filter_selected, f_hist = CCSA_filter(X_train, y_train, 3000, 50)
data2.drop(target, axis=1).columns[filter_selected]

Index(['Open', 'Close_L1', 'Low_L1', 'Bitcoin USD Exchange Trade Volume_L1',
       'High_L2', 'Direction_L2', 'Bitcoin USD Exchange Trade Volume_L2',
       'Bitcoin Hash Rate_L2', 'Close_L3', 'Open_L3', 'High_L3',
       'Bitcoin Total Transaction Fees USD_L3',
       'Bitcoin Cost Per Transaction_L3', 'High_L4',
       'Bitcoin Total Transaction Fees USD_L4',
       'Bitcoin Cost Per Transaction_L4', 'Bitcoin Number of Transactions_L4',
       'Open_L5', 'High_L5', 'Direction_L5', 'Bitcoin Hash Rate_L5',
       'Close_L6', 'Direction_L6', 'Bitcoin USD Exchange Trade Volume_L6',
       'Bitcoin Hash Rate_L6', 'Bitcoin Cost Per Transaction_L6', 'Close_L7',
       'High_L7', 'Low_L7', 'Direction_L7',
       'Bitcoin Total Transaction Fees USD_L7',
       'Bitcoin USD Exchange Trade Volume_L7',
       'Bitcoin Number of Transactions_L7', 'Open_WMA30',
       'Bitcoin Total Transaction Fees USD_WMA30',
       'Bitcoin USD Exchange Trade Volume_WMA30',
       'Bitcoin Cost Per Transaction

### SVM

In [21]:
results = []

pipe = make_pipeline(
    StandardScaler(),
    SVC(kernel='poly', max_iter=5e5)
)

params, _ = find_best_params(
    pipe,
    X_train,
    y_train,
    X_test,
    y_test,
    {
        'svc__degree': np.arange(1,5),
        'svc__gamma': [1/X_train.shape[1]] + list(np.arange(.1, 1.1, .1)),
        'svc__C': [.5, 1, 5, 10]
    }
)

pipe = pipe.set_params(**params)

tests = run_tests(pipe, X_train, y_train, X_test, y_test, 1)
tests.insert(0, 'Interval 2')
tests.insert(1, params)
tests.insert(2, 'All Features')
results.append(tests)

params, _ = find_best_params(
    pipe,
    X_train[:,filter_selected],
    y_train,
    X_test[:,filter_selected],
    y_test,
    {
        'svc__degree': np.arange(1,5),
        'svc__gamma': [1/X_train.shape[1]] + list(np.arange(.1, 1.1, .1)),
        'svc__C': [.5, 1, 5, 10]
    }
)

pipe = pipe.set_params(**params)

tests = run_tests(pipe, X_train[:,filter_selected], y_train, X_test[:,filter_selected], y_test, 1)
tests.insert(0, 'Interval 2')
tests.insert(1, params)
tests.insert(2, 'CCSA Filter')
results.append(tests)

dump_results(results, 'svm_int2.bin')

### RandomForestClassifier

In [22]:
results = []

model = RandomForestClassifier()

params, _ = find_best_params(
    model,
    X_train,
    y_train,
    X_test,
    y_test,
    {
        'max_depth': np.arange(1, 20),
        'n_estimators': [20, 30, 40, 50, 60, 70, 80, 90, 100]
    }
)

model.set_params(**params)

model.fit(X_train, y_train)
tests = run_tests(model, X_train, y_train, X_test, y_test, 50)
tests.insert(0, 'Interval 2')
tests.insert(1, params)
tests.insert(2, 'All Features')
results.append(tests)

params, _ = find_best_params(
    model,
    X_train[:,filter_selected],
    y_train,
    X_test[:,filter_selected],
    y_test,
    {
        'max_depth': np.arange(1, 20),
        'n_estimators': [20, 30, 40, 50, 60, 70, 80, 90, 100]
    }
)

model.set_params(**params)

tests = run_tests(model, X_train[:,filter_selected], y_train, X_test[:,filter_selected], y_test, 50)
tests.insert(0, 'Interval 2')
tests.insert(1, params)
tests.insert(2, 'CCSA Filter')
results.append(tests)

dump_results(results, 'rnd_forest_int2.bin')

### ExtraTreesClassifier

In [23]:
results = []

model = ExtraTreesClassifier()

params, _ = find_best_params(
    model,
    X_train,
    y_train,
    X_test,
    y_test,
    {
        'max_depth': np.arange(1, 20),
        'n_estimators': [20, 30, 40, 50, 60, 70, 80, 90, 100]
    }
)

model.set_params(**params)

model.fit(X_train, y_train)
tests = run_tests(model, X_train, y_train, X_test, y_test, 50)
tests.insert(0, 'Interval 2')
tests.insert(1, params)
tests.insert(2, 'All Features')
results.append(tests)

params, _ = find_best_params(
    model,
    X_train[:,filter_selected],
    y_train,
    X_test[:,filter_selected],
    y_test,
    {
        'max_depth': np.arange(1, 20),
        'n_estimators': [20, 30, 40, 50, 60, 70, 80, 90, 100]
    }
)

model.set_params(**params)

tests = run_tests(model, X_train[:,filter_selected], y_train, X_test[:,filter_selected], y_test, 50)
tests.insert(0, 'Interval 2')
tests.insert(1, params)
tests.insert(2, 'CCSA Filter')
results.append(tests)

dump_results(results, 'extrees_int2.bin')

### MLP

In [24]:
results = []

n_layers = np.arange(2) + 1
n_neurons = np.arange(0, 35, 5) + 5
epochs = [20, 100, 200, 300, 400, 500]
combinations = []

for layers in n_layers:
    combinations.extend(combinations_with_replacement(n_neurons, int(layers)))
    
pipe = make_pipeline(
    StandardScaler(),
    MLPClassifier(solver='adam', activation='tanh', learning_rate='adaptive')
)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore", category=ConvergenceWarning)
    
    params, _ = find_best_params(
        pipe,
        X_train,
        y_train,
        X_test,
        y_test,
        {
            'mlpclassifier__hidden_layer_sizes': combinations,
            'mlpclassifier__max_iter': epochs
        }
    )

    pipe = pipe.set_params(**params)

    tests = run_tests(pipe, X_train, y_train, X_test, y_test, 50)
    tests.insert(0, 'Interval 2')
    tests.insert(1, params)
    tests.insert(2, 'All Features')
    results.append(tests)
    
    params, _ = find_best_params(
        pipe,
        X_train,
        y_train,
        X_test,
        y_test,
        {
            'mlpclassifier__hidden_layer_sizes': combinations,
            'mlpclassifier__max_iter': epochs
        }
    )

    pipe = pipe.set_params(**params)
    
    tests = run_tests(pipe, X_train[:,filter_selected], y_train, X_test[:,filter_selected], y_test, 50)
    tests.insert(0, 'Interval 2')
    tests.insert(1, params)
    tests.insert(2, 'CCSA Filter')
    results.append(tests)

    dump_results(results, 'mlp_int2.bin')

### Ensemble 1

In [25]:
results = []

n_layers = np.arange(2) + 1
n_neurons = np.arange(0, 35, 5) + 5
epochs = [20, 100, 200, 300, 400, 500]
combinations = []

for layers in n_layers:
    combinations.extend(combinations_with_replacement(n_neurons, int(layers)))
    
pipe = make_pipeline(
    StandardScaler(),
    Ensemble1()
)

with warnings.catch_warnings():
    warnings.simplefilter("ignore")

    params, _ = find_best_params(
        pipe,
        X_train,
        y_train,
        X_test,
        y_test,
        {
            'ensemble1__mlp_hidden_layers_sizes': combinations,
            'ensemble1__epochs': epochs
        }
    )

    pipe = pipe.set_params(**params)

    tests = run_tests(pipe, X_train, y_train, X_test, y_test, 50)
    tests.insert(0, 'Interval 2')
    tests.insert(1, params)
    tests.insert(2, 'All Features')
    results.append(tests)
    
    params, _ = find_best_params(
        pipe,
        X_train[:,filter_selected],
        y_train,
        X_test[:,filter_selected],
        y_test,
        {
            'ensemble1__mlp_hidden_layers_sizes': combinations,
            'ensemble1__epochs': epochs
        }
    )

    pipe = pipe.set_params(**params)
    
    tests = run_tests(pipe, X_train[:,filter_selected], y_train, X_test[:,filter_selected], y_test, 50)
    tests.insert(0, 'Interval 2')
    tests.insert(1, params)
    tests.insert(2, 'CCSA Filter')
    results.append(tests)
    
    dump_results(results, 'ensemble1_int2.bin')

### Ensemble 2

In [26]:
results = []

n_layers = np.arange(2) + 1
n_neurons = np.arange(0, 35, 5) + 5
epochs = [20, 100, 200, 300, 400, 500]
combinations = []

for layers in n_layers:
    combinations.extend(combinations_with_replacement(n_neurons, int(layers)))
    
pipe = make_pipeline(
    StandardScaler(),
    Ensemble2()
)

with warnings.catch_warnings():
    warnings.simplefilter("ignore")

    params, _ = find_best_params(
        pipe,
        X_train,
        y_train,
        X_test,
        y_test,
        {
            'ensemble2__mlp_hidden_layers_sizes': combinations,
            'ensemble2__epochs': epochs
        }
    )

    pipe = pipe.set_params(**params)

    tests = run_tests(pipe, X_train, y_train, X_test, y_test, 50)
    tests.insert(0, 'Interval 2')
    tests.insert(1, params)
    tests.insert(2, 'All Features')
    results.append(tests)
    
    params, _ = find_best_params(
        pipe,
        X_train[:,filter_selected],
        y_train,
        X_test[:,filter_selected],
        y_test,
        {
            'ensemble2__mlp_hidden_layers_sizes': combinations,
            'ensemble2__epochs': epochs
        }
    )

    pipe = pipe.set_params(**params)
    
    tests = run_tests(pipe, X_train[:,filter_selected], y_train, X_test[:,filter_selected], y_test, 50)
    tests.insert(0, 'Interval 2')
    tests.insert(1, params)
    tests.insert(2, 'CCSA Filter')
    results.append(tests)
    
    dump_results(results, 'ensemble2_int2.bin')

# Results Analysis

In [27]:
loaded_results = load_results()
results_table = pd.DataFrame(loaded_results, columns=['Interval', 'Params', 'Feature Selection', 'Model', 'auc_mu', 'auc_std', 'acc_mu', 'acc_std'])

In [28]:
results_table.to_excel('experiments/results_table.xlsx')

In [29]:
def extract_model_name(x):
    if isinstance(x, str):
        return x
    elif isinstance(x, Pipeline):
        return list(dict(x.named_steps).values())[-1]
    else:
        return type(x).__name__
    
results_table['Model'] = results_table['Model'].apply(extract_model_name)

In [30]:
results_table.sort_values(['Interval', 'Model', 'Feature Selection'])

Unnamed: 0,Interval,Params,Feature Selection,Model,auc_mu,auc_std,acc_mu,acc_std
14,Interval 1,"{'ensemble1__mlp_hidden_layers_sizes': (10, 10...",All Features,Ensemble1,0.51,0.04,50.7,6.05
15,Interval 1,"{'ensemble1__mlp_hidden_layers_sizes': (20, 30...",CCSA Filter,Ensemble1,0.5,0.03,48.24,6.2
4,Interval 1,"{'ensemble2__mlp_hidden_layers_sizes': (10, 15...",All Features,Ensemble2,0.51,0.03,50.63,6.2
5,Interval 1,"{'ensemble2__mlp_hidden_layers_sizes': (20, 35...",CCSA Filter,Ensemble2,0.5,0.03,48.8,5.51
10,Interval 1,"{'max_depth': 14, 'n_estimators': 60}",All Features,ExtraTreesClassifier,0.5,0.02,54.22,3.16
11,Interval 1,"{'max_depth': 5, 'n_estimators': 90}",CCSA Filter,ExtraTreesClassifier,0.51,0.01,56.76,1.82
22,Interval 1,"{'mlpclassifier__hidden_layer_sizes': (5, 5), ...",All Features,MLPClassifier,0.49,0.03,51.91,5.88
23,Interval 1,"{'mlpclassifier__hidden_layer_sizes': (35, 35)...",CCSA Filter,MLPClassifier,0.5,0.03,51.28,6.74
16,Interval 1,"{'max_depth': 2, 'n_estimators': 100}",All Features,RandomForestClassifier,0.51,0.01,57.12,1.86
17,Interval 1,"{'max_depth': 2, 'n_estimators': 90}",CCSA Filter,RandomForestClassifier,0.52,0.01,57.47,1.87
