# Cluster-based Input Weight Initialization for Echo State Networks

This notebook aims to be the supplemental material for the corresponding journal article.

We aim to pre-train the input weight matrix of ESNs using the K-Means algorithm since passing features to the non-linear reservoir of ESNs is closely related to compute the dot product between two vectors.

We use various datasets from https://github.com/FilippoMB/Time-series-classification-and-clustering-with-Reservoir-Computing

In [1]:
import os, sys
cwd = os.getcwd()
module_path = os.path.dirname(cwd)  # target working directory

sys.path = [item for item in sys.path if item != module_path]  # remove module_path from sys.path
sys.path.append(module_path)  # add module_path to sys.path

import time
import glob
import os
import numpy as np
import pandas as pd
from sklearn.base import clone
from sklearn.metrics import make_scorer
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV, ParameterGrid, cross_val_score
from sklearn.utils import shuffle
from sklearn.utils.fixes import loguniform
from scipy.stats import uniform
from sklearn.cluster import MiniBatchKMeans
from joblib import dump, load
from pyrcn.echo_state_network import SeqToLabelESNClassifier, SeqToSeqESNClassifier
from pyrcn.base import PredefinedWeightsInputToNode, NodeToNode
from pyrcn.metrics import accuracy_score, classification_report, confusion_matrix, mean_squared_error
from pyrcn.model_selection import SequentialSearchCV
import matplotlib
import seaborn as sns
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D  # noqa: F401 unused import
%matplotlib inline
#Options
plt.rc('image', cmap='RdBu')
plt.rc('font', family='serif', serif='Times')
plt.rc('text', usetex=True)
plt.rc('xtick', labelsize=8)
plt.rc('ytick', labelsize=8)
plt.rc('axes', labelsize=8)

from IPython.display import set_matplotlib_formats
set_matplotlib_formats('png', 'pdf')
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib import ticker

In [2]:
train = True

## Arabian spoken digit dataset 

(https://archive.ics.uci.edu/ml/datasets/Spoken+Arabic+Digit)

- Contains 6600 training utterances 
- Contains 2200 test utterances

Dataset from 8800(10 digits x 10 repetitions x 88 speakers) time series of 13 Frequency Cepstral
Coefficients (MFCCs) had taken from 44 males and 44 females Arabic native speakers
between the ages 18 and 40 to represent ten spoken Arabic digit.

Each line on the data base represents 13 MFCCs coefficients in the increasing order separated by
spaces. This corresponds to one analysis frame. The 13 Mel Frequency Cepstral Coefficients
(MFCCs) are computed with the following
conditions;
Sampling rate: 11025 Hz, 16 bits
Window applied: hamming
Filter pre-emphasized: $1-0.97^{-1}$

In [None]:
arab = np.load(r"E:\multivariate_time_series_dataset\numpy\ARAB.npz")
X_train = np.empty(shape=(6600, ), dtype=object)
y_train = np.empty(shape=(6600, ), dtype=object)
X_test = np.empty(shape=(2200, ), dtype=object)
y_test = np.empty(shape=(2200, ), dtype=object)

if train:
    for k, (X, y) in enumerate(zip(arab['X'], arab['Y'])):
        X_train[k] = X[X.sum(axis=1)!=0, :]  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_train[k] = np.tile(y, (X_train[k].shape[0], 1))
    scaler = StandardScaler().fit(np.concatenate(X_train))
    for k, X in enumerate(X_train):
        X_train[k] = scaler.transform(X=X)  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X

    X_train, y_train = shuffle(X_train, y_train, random_state=0)

    for k, (X, y) in enumerate(zip(arab['Xte'], arab['Yte'])):
        X_test[k] = scaler.transform(X=X[X.sum(axis=1)!=0, :])  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_test[k] = np.tile(y, (X_test[k].shape[0], 1))
else:
    for k, (X, y) in enumerate(zip(arab['X'], arab['Y'])):
        X_train[k] = X[X.sum(axis=1)!=0, :]  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_train[k] = np.argwhere(y).ravel()
    scaler = StandardScaler().fit(np.concatenate(X_train))
    for k, X in enumerate(X_train):
        X_train[k] = scaler.transform(X=X)  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X

    X_train, y_train = shuffle(X_train, y_train, random_state=0)

    for k, (X, y) in enumerate(zip(arab['Xte'], arab['Yte'])):
        X_test[k] = scaler.transform(X=X[X.sum(axis=1)!=0, :])  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_test[k] = np.argwhere(y).ravel()

Fit random ESN

In [None]:
initially_fixed_params = {'hidden_layer_size': 50,
                          'k_in': 10,
                          'input_scaling': 0.4,
                          'input_activation': 'identity',
                          'bias_scaling': 0.0,
                          'spectral_radius': 0.0,
                          'leakage': 0.1,
                          'k_rec': 10,
                          'reservoir_activation': 'tanh',
                          'bi_directional': False,
                          'wash_out': 0,
                          'continuation': False,
                          'alpha': 1e-3,
                          'random_state': 42}

step1_esn_params = {'input_scaling': uniform(loc=1e-2, scale=1),
                    'spectral_radius': uniform(loc=0, scale=2)}

step2_esn_params = {'leakage': loguniform(1e-5, 1e0)}
step3_esn_params = {'bias_scaling': np.linspace(0.0, 1.0, 11)}
step4_esn_params = {'alpha': loguniform(1e-5, 1e1)}

kwargs_step1 = {'n_iter': 200, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step2 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step3 = {'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step4 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}

# The searches are defined similarly to the steps of a sklearn.pipeline.Pipeline:
searches = [('step1', RandomizedSearchCV, step1_esn_params, kwargs_step1),
            ('step2', RandomizedSearchCV, step2_esn_params, kwargs_step2),
            ('step3', GridSearchCV, step3_esn_params, kwargs_step3),
            ('step4', RandomizedSearchCV, step4_esn_params, kwargs_step4)]

base_esn = SeqToSeqESNClassifier(**initially_fixed_params)
try:
    sequential_search = load("../sequential_search_arab.joblib")
except FileNotFoundError:
    sequential_search = SequentialSearchCV(base_esn, searches=searches).fit(X_train, y_train)
    dump(sequential_search, "../sequential_search_arab.joblib")

In [None]:
print(sequential_search.all_best_params_)
print(sequential_search.all_best_score_)
base_esn = SeqToLabelESNClassifier(**sequential_search.best_estimator_.get_params())
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600]}

for params in ParameterGrid(param_grid):
    gs_cv = GridSearchCV(clone(base_esn).set_params(**params), 
                         param_grid={'random_state': range(10)}, 
                         scoring=make_scorer(accuracy_score), n_jobs=-1).fit(X=X_train, y=y_train)
    print(gs_cv.cv_results_)
    print("---------------------------------------")
    acc_score = accuracy_score(y_test, gs_cv.best_estimator_.predict(X_test))
    print(acc_score)
    print("---------------------------------------")
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600],
              'random_state': range(1, 11)}

for params in ParameterGrid(param_grid):
    t1 = time.time()
    esn = clone(base_esn).set_params(**params).fit(X=X_train, y=y_train, n_jobs=8)
    t2 = time.time()
    score = accuracy_score(y_test, esn.predict(X_test))
    print("ESN with params {0} achieved score of {1} and was trained in {2} seconds.".format(params, score, t2-t1))

Fit KM-ESN

In [None]:
kmeans = MiniBatchKMeans(n_clusters=50, n_init=200, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=2, random_state=0)
kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])

initially_fixed_params = {'hidden_layer_size': 50,
                          'k_in': 10,
                          'input_scaling': 0.4,
                          'input_activation': 'identity',
                          'bias_scaling': 0.0,
                          'spectral_radius': 0.0,
                          'leakage': 0.1,
                          'k_rec': 10,
                          'reservoir_activation': 'tanh',
                          'bi_directional': False,
                          'wash_out': 0,
                          'continuation': False,
                          'alpha': 1e-3,
                          'random_state': 42}

step1_esn_params = {'input_scaling': uniform(loc=1e-2, scale=1),
                    'spectral_radius': uniform(loc=0, scale=2)}

step2_esn_params = {'leakage': loguniform(1e-5, 1e0)}
step3_esn_params = {'bias_scaling': np.linspace(0.0, 1.0, 11)}
step4_esn_params = {'alpha': loguniform(1e-5, 1e1)}

kwargs_step1 = {'n_iter': 200, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step2 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step3 = {'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step4 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}

# The searches are defined similarly to the steps of a sklearn.pipeline.Pipeline:
searches = [('step1', RandomizedSearchCV, step1_esn_params, kwargs_step1),
            ('step2', RandomizedSearchCV, step2_esn_params, kwargs_step2),
            ('step3', GridSearchCV, step3_esn_params, kwargs_step3),
            ('step4', RandomizedSearchCV, step4_esn_params, kwargs_step4)]

base_km_esn = SeqToSeqESNClassifier(input_to_node=PredefinedWeightsInputToNode(predefined_input_weights=w_in.T),
                                    **initially_fixed_params)

try:
    sequential_search = load("../sequential_search_arab_km.joblib")
except FileNotFoundError:
    sequential_search = SequentialSearchCV(base_km_esn, searches=searches).fit(X_train, y_train)
    dump(sequential_search, "../sequential_search_arab_km.joblib")

In [None]:
print(sequential_search.all_best_params_)
print(sequential_search.all_best_score_)
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600]}

for params in ParameterGrid(param_grid):
    kmeans = MiniBatchKMeans(n_clusters=params['hidden_layer_size'], n_init=20, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=0, random_state=0)
    kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
    w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
    base_km_esn = clone(sequential_search.best_estimator_)
    base_km_esn.input_to_node.predefined_input_weights=w_in.T
    base_km_esn.set_params(**params)
    gs_cv = GridSearchCV(base_km_esn,
                         param_grid={'random_state': range(10)},
                         scoring=make_scorer(accuracy_score), n_jobs=-1).fit(X=X_train, y=y_train)
    print(gs_cv.cv_results_)
    print("---------------------------------------")
    acc_score = accuracy_score(y_test, gs_cv.best_estimator_.predict(X_test))
    print(acc_score)
    print("---------------------------------------")
"""
constant_params = sequential_search.best_estimator_.get_params()
constant_params.pop('hidden_layer_size')
constant_params.pop('random_state')
constant_params.pop('predefined_input_weights')
base_esn = SeqToLabelESNClassifier(**constant_params)
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600],
              'random_state': range(1, 11)}

for params in ParameterGrid(param_grid):
    kmeans = MiniBatchKMeans(n_clusters=params['hidden_layer_size'], n_init=200, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=0, random_state=params['random_state'])
    t1 = time.time()
    kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
    w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
    t2 = time.time()
    km_esn = clone(base_esn)
    km_esn.input_to_node = PredefinedWeightsInputToNode(predefined_input_weights=w_in.T)
    km_esn.set_params(**constant_params, **params)
    km_esn.fit(X=X_train, y=y_train, n_jobs=8)
    score = accuracy_score(y_test, km_esn.predict(X_test))
    print("KM-ESN with params {0} achieved score of {1} and was trained in {2} seconds.".format(params, score, t2-t1))

##  Australian Sign Language signs (High Quality) Data Set 

(https://archive.ics.uci.edu/ml/datasets/Australian+Sign+Language+signs+(High+Quality))

- Contains 1140 training utterances 
- Contains 1425 test utterances

Data was captured using a setup that consisted of:

- Two Fifth Dimension Technologies (5DT) gloves, one right and one left
- Two Ascension Flock-of-Birds magnetic position trackers, one attached to each hand
- A four-port serial card to cope with four data sources
- A PC (128MB RAM, Intel Pentium II 266MHz) was used

In terms of the quality of the data, the Flock system was far superior to the Nintendo system also available from the same donor. Firstly, this was a two-hand system. Secondly, each position tracker provided 6 degrees of freedom - i.e. roll, pitch and yaw as well as x, y and z. The gloves also provided a full five fingers of data. But the big improvements were in resolution - both accuracy and temporal. Position and orientation were defined to 14-bit accuracy, giving position information with a typical positional error less than one centimetre and angle error less than one half of a degree. Finger bend was measured with 8 bits per finger, of which probably 6 bits were usable once the glove was calibrated. The refresh rate of the complete system was close to 100 frames per second; and all signals had significantly less noise than the Nintendo data.

Samples from a single signer (a native Auslan signer) were collected over a period of nine weeks. In total, 27 samples per sign, and a total of 2565 signs were collected. The average length of each sign was approximately 57 frames.

The data was collected from a volunteer native Auslan signer 

In [3]:
aus = np.load(r"E:\multivariate_time_series_dataset\numpy\AUS.npz")
X_train = np.empty(shape=(1140, ), dtype=object)
y_train = np.empty(shape=(1140, ), dtype=object)
X_test = np.empty(shape=(1425, ), dtype=object)
y_test = np.empty(shape=(1425, ), dtype=object)

if train:
    for k, (X, y) in enumerate(zip(aus['X'], aus['Y'])):
        X_train[k] = X[X.sum(axis=1)!=0, :]  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_train[k] = np.tile(y, (X_train[k].shape[0], 1))
    scaler = StandardScaler().fit(np.concatenate(X_train))
    for k, X in enumerate(X_train):
        X_train[k] = scaler.transform(X=X)  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X

    X_train, y_train = shuffle(X_train, y_train, random_state=0)

    for k, (X, y) in enumerate(zip(aus['Xte'], aus['Yte'])):
        X_test[k] = scaler.transform(X=X[X.sum(axis=1)!=0, :])  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_test[k] = np.tile(y, (X_test[k].shape[0], 1))
else:
    for k, (X, y) in enumerate(zip(aus['X'], aus['Y'])):
        X_train[k] = X[X.sum(axis=1)!=0, :]  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_train[k] = np.argwhere(y).ravel()
    scaler = StandardScaler().fit(np.concatenate(X_train))
    for k, X in enumerate(X_train):
        X_train[k] = scaler.transform(X=X)  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X

    X_train, y_train = shuffle(X_train, y_train, random_state=0)

    for k, (X, y) in enumerate(zip(aus['Xte'], aus['Yte'])):
        X_test[k] = scaler.transform(X=X[X.sum(axis=1)!=0, :])  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_test[k] = np.argwhere(y).ravel()

Fit random ESN

In [None]:
initially_fixed_params = {'hidden_layer_size': 50,
                          'k_in': 10,
                          'input_scaling': 0.4,
                          'input_activation': 'identity',
                          'bias_scaling': 0.0,
                          'spectral_radius': 0.0,
                          'leakage': 0.1,
                          'k_rec': 10,
                          'reservoir_activation': 'tanh',
                          'bi_directional': False,
                          'wash_out': 0,
                          'continuation': False,
                          'alpha': 1e-3,
                          'random_state': 42}

step1_esn_params = {'input_scaling': uniform(loc=1e-2, scale=1),
                    'spectral_radius': uniform(loc=0, scale=2)}

step2_esn_params = {'leakage': loguniform(1e-5, 1e0)}
step3_esn_params = {'bias_scaling': np.linspace(0.0, 1.0, 11)}
step4_esn_params = {'alpha': loguniform(1e-5, 1e1)}

kwargs_step1 = {'n_iter': 200, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step2 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step3 = {'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step4 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}

# The searches are defined similarly to the steps of a sklearn.pipeline.Pipeline:
searches = [('step1', RandomizedSearchCV, step1_esn_params, kwargs_step1),
            ('step2', RandomizedSearchCV, step2_esn_params, kwargs_step2),
            ('step3', GridSearchCV, step3_esn_params, kwargs_step3),
            ('step4', RandomizedSearchCV, step4_esn_params, kwargs_step4)]

base_esn = SeqToSeqESNClassifier(**initially_fixed_params)

try:
    sequential_search = load("../sequential_search_aus.joblib")
except FileNotFoundError:
    sequential_search = SequentialSearchCV(base_esn, searches=searches).fit(X_train, y_train)
    dump(sequential_search, "../sequential_search_aus.joblib")

In [None]:
print(sequential_search.all_best_params_)
print(sequential_search.all_best_score_)
base_esn = SeqToLabelESNClassifier(**sequential_search.best_estimator_.get_params())
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600]}

for params in ParameterGrid(param_grid):
    gs_cv = GridSearchCV(clone(base_esn).set_params(**params), 
                         param_grid={'random_state': range(10)}, 
                         scoring=make_scorer(accuracy_score), n_jobs=-1).fit(X=X_train, y=y_train)
    print(gs_cv.cv_results_)
    print("---------------------------------------")
    acc_score = accuracy_score(y_test, gs_cv.best_estimator_.predict(X_test))
    print(acc_score)
    print("---------------------------------------")
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600],
              'random_state': range(1, 11)}

for params in ParameterGrid(param_grid):
    t1 = time.time()
    esn = clone(base_esn).set_params(**params).fit(X=X_train, y=y_train, n_jobs=8)
    t2 = time.time()
    score = accuracy_score(y_test, esn.predict(X_test))
    print("ESN with params {0} achieved score of {1} and was trained in {2} seconds.".format(params, score, t2-t1))

Fit KM-ESN

In [6]:
kmeans = MiniBatchKMeans(n_clusters=400, n_init=200, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=2, random_state=0)
kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
w_in = np.pad(w_in, ((0, 1600 - 400), (0, 0)), mode='constant', constant_values=0)

initially_fixed_params = {'hidden_layer_size': 1600,
                          'k_in': 10,
                          'input_scaling': 0.4,
                          'input_activation': 'identity',
                          'bias_scaling': 0.0,
                          'spectral_radius': 0.0,
                          'leakage': 0.1,
                          'k_rec': 10,
                          'reservoir_activation': 'tanh',
                          'bi_directional': False,
                          'wash_out': 0,
                          'continuation': False,
                          'alpha': 1e-3,
                          'random_state': 42}

step1_esn_params = {'input_scaling': uniform(loc=1e-2, scale=1),
                    'spectral_radius': uniform(loc=0, scale=2)}

step2_esn_params = {'leakage': loguniform(1e-5, 1e0)}
step3_esn_params = {'bias_scaling': np.linspace(0.0, 1.0, 11)}
step4_esn_params = {'alpha': loguniform(1e-5, 1e1)}

kwargs_step1 = {'n_iter': 200, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step2 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step3 = {'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step4 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}

# The searches are defined similarly to the steps of a sklearn.pipeline.Pipeline:
searches = [('step1', RandomizedSearchCV, step1_esn_params, kwargs_step1),
            ('step2', RandomizedSearchCV, step2_esn_params, kwargs_step2),
            ('step3', GridSearchCV, step3_esn_params, kwargs_step3),
            ('step4', RandomizedSearchCV, step4_esn_params, kwargs_step4)]

base_km_esn = SeqToSeqESNClassifier(input_to_node=PredefinedWeightsInputToNode(predefined_input_weights=w_in.T),
                                    **initially_fixed_params)

try:
    sequential_search = load("../sequential_search_aus_km_sparse.joblib")
except FileNotFoundError:
    sequential_search = SequentialSearchCV(base_km_esn, searches=searches).fit(X_train, y_train)
    dump(sequential_search, "../sequential_search_aus_km_sparse.joblib")

Init 1/200 with method: k-means++
Inertia for init 1/200: 242.435190
Init 2/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 2/200: 259.818325
Init 3/200 with method: k-means++
Inertia for init 3/200: 297.517062
Init 4/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 4/200: 305.223192
Init 5/200 with method: k-means++
Inertia for init 5/200: 291.178815
Init 6/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 6/200: 322.542867
Init 7/200 with method: k-means++
Inertia for init 7/200: 294.845144
Init 8/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 8/200: 271.255444
Init 9/200 with method: k-means++
Inertia for init 9/200: 222.097487
Init 10/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 10/200: 269.383926
Init 11/200 with method: k-means++
Inertia for init 11/200: 243.352927
Init 12/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 12/200: 283.035243
Init 13/200 with method: k-means++
Inertia for init 13/200: 251.477757
Init 14/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 14/200: 284.439194
Init 15/200 with method: k-means++
Inertia for init 15/200: 245.329030
Init 16/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 16/200: 233.249598
Init 17/200 with method: k-means++
Inertia for init 17/200: 308.616616
Init 18/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 18/200: 259.056086
Init 19/200 with method: k-means++
Inertia for init 19/200: 271.643104
Init 20/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 20/200: 303.072878
Init 21/200 with method: k-means++
Inertia for init 21/200: 203.512445
Init 22/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 22/200: 348.986011
Init 23/200 with method: k-means++
Inertia for init 23/200: 267.684442
Init 24/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 24/200: 286.007953
Init 25/200 with method: k-means++
Inertia for init 25/200: 309.595053
Init 26/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 26/200: 254.283451
Init 27/200 with method: k-means++
Inertia for init 27/200: 285.970754
Init 28/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 28/200: 296.847772
Init 29/200 with method: k-means++
Inertia for init 29/200: 285.544130
Init 30/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 30/200: 300.447985
Init 31/200 with method: k-means++
Inertia for init 31/200: 244.804671
Init 32/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 32/200: 319.701353
Init 33/200 with method: k-means++
Inertia for init 33/200: 284.495129
Init 34/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 34/200: 262.967032
Init 35/200 with method: k-means++
Inertia for init 35/200: 246.267423
Init 36/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 36/200: 276.041533
Init 37/200 with method: k-means++
Inertia for init 37/200: 276.258259
Init 38/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 38/200: 198.543375
Init 39/200 with method: k-means++
Inertia for init 39/200: 219.080801
Init 40/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 40/200: 287.958102
Init 41/200 with method: k-means++
Inertia for init 41/200: 280.872348
Init 42/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 42/200: 260.158719
Init 43/200 with method: k-means++
Inertia for init 43/200: 271.602072
Init 44/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 44/200: 298.690006
Init 45/200 with method: k-means++
Inertia for init 45/200: 208.228128
Init 46/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 46/200: 229.085996
Init 47/200 with method: k-means++
Inertia for init 47/200: 185.721661
Init 48/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 48/200: 312.028273
Init 49/200 with method: k-means++
Inertia for init 49/200: 327.835613
Init 50/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 50/200: 234.301526
Init 51/200 with method: k-means++
Inertia for init 51/200: 239.748208
Init 52/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 52/200: 311.131649
Init 53/200 with method: k-means++
Inertia for init 53/200: 233.083764
Init 54/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 54/200: 272.802239
Init 55/200 with method: k-means++
Inertia for init 55/200: 236.417003
Init 56/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 56/200: 296.634252
Init 57/200 with method: k-means++
Inertia for init 57/200: 316.189658
Init 58/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 58/200: 288.988525
Init 59/200 with method: k-means++
Inertia for init 59/200: 220.241735
Init 60/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 60/200: 247.608809
Init 61/200 with method: k-means++
Inertia for init 61/200: 251.544889
Init 62/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 62/200: 357.237760
Init 63/200 with method: k-means++
Inertia for init 63/200: 214.767420
Init 64/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 64/200: 225.991815
Init 65/200 with method: k-means++
Inertia for init 65/200: 258.841846
Init 66/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 66/200: 354.063527
Init 67/200 with method: k-means++
Inertia for init 67/200: 194.529620
Init 68/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 68/200: 263.044858
Init 69/200 with method: k-means++
Inertia for init 69/200: 291.526451
Init 70/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 70/200: 344.874367
Init 71/200 with method: k-means++
Inertia for init 71/200: 286.914959
Init 72/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 72/200: 242.448022
Init 73/200 with method: k-means++
Inertia for init 73/200: 262.379769
Init 74/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 74/200: 231.347799
Init 75/200 with method: k-means++
Inertia for init 75/200: 253.125453
Init 76/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 76/200: 304.457353
Init 77/200 with method: k-means++
Inertia for init 77/200: 270.891070
Init 78/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 78/200: 296.968627
Init 79/200 with method: k-means++
Inertia for init 79/200: 285.572559
Init 80/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 80/200: 284.376900
Init 81/200 with method: k-means++
Inertia for init 81/200: 263.048130
Init 82/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 82/200: 242.293987
Init 83/200 with method: k-means++
Inertia for init 83/200: 303.491159
Init 84/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 84/200: 274.382361
Init 85/200 with method: k-means++
Inertia for init 85/200: 317.240692
Init 86/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 86/200: 381.052990
Init 87/200 with method: k-means++
Inertia for init 87/200: 229.930479
Init 88/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 88/200: 244.196433
Init 89/200 with method: k-means++
Inertia for init 89/200: 261.317997
Init 90/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 90/200: 287.468619
Init 91/200 with method: k-means++
Inertia for init 91/200: 258.626164
Init 92/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 92/200: 199.136406
Init 93/200 with method: k-means++
Inertia for init 93/200: 318.071227
Init 94/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 94/200: 284.063920
Init 95/200 with method: k-means++
Inertia for init 95/200: 256.801065
Init 96/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 96/200: 276.671765
Init 97/200 with method: k-means++
Inertia for init 97/200: 258.728443
Init 98/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 98/200: 243.222844
Init 99/200 with method: k-means++
Inertia for init 99/200: 218.384880
Init 100/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 100/200: 288.707674
Init 101/200 with method: k-means++
Inertia for init 101/200: 275.390226
Init 102/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 102/200: 240.530143
Init 103/200 with method: k-means++
Inertia for init 103/200: 241.951923
Init 104/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 104/200: 305.322672
Init 105/200 with method: k-means++
Inertia for init 105/200: 230.367209
Init 106/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 106/200: 252.251493
Init 107/200 with method: k-means++
Inertia for init 107/200: 293.712355
Init 108/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 108/200: 282.064019
Init 109/200 with method: k-means++
Inertia for init 109/200: 288.886049
Init 110/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 110/200: 229.836455
Init 111/200 with method: k-means++
Inertia for init 111/200: 252.568469
Init 112/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 112/200: 317.263847
Init 113/200 with method: k-means++
Inertia for init 113/200: 329.097906
Init 114/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 114/200: 255.541307
Init 115/200 with method: k-means++
Inertia for init 115/200: 265.953566
Init 116/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 116/200: 240.988180
Init 117/200 with method: k-means++
Inertia for init 117/200: 282.376651
Init 118/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 118/200: 226.999240
Init 119/200 with method: k-means++
Inertia for init 119/200: 363.545655
Init 120/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 120/200: 301.235127
Init 121/200 with method: k-means++
Inertia for init 121/200: 291.558832
Init 122/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 122/200: 222.067696
Init 123/200 with method: k-means++
Inertia for init 123/200: 278.698977
Init 124/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 124/200: 371.495375
Init 125/200 with method: k-means++
Inertia for init 125/200: 347.824856
Init 126/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 126/200: 269.707314
Init 127/200 with method: k-means++
Inertia for init 127/200: 290.590207
Init 128/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 128/200: 336.101139
Init 129/200 with method: k-means++
Inertia for init 129/200: 260.032335
Init 130/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 130/200: 330.560865
Init 131/200 with method: k-means++
Inertia for init 131/200: 289.712728
Init 132/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 132/200: 265.830196
Init 133/200 with method: k-means++
Inertia for init 133/200: 185.796626
Init 134/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 134/200: 242.023548
Init 135/200 with method: k-means++
Inertia for init 135/200: 260.548330
Init 136/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 136/200: 278.179016
Init 137/200 with method: k-means++
Inertia for init 137/200: 267.736314
Init 138/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 138/200: 251.236707
Init 139/200 with method: k-means++
Inertia for init 139/200: 227.573493
Init 140/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 140/200: 210.471002
Init 141/200 with method: k-means++
Inertia for init 141/200: 272.082367
Init 142/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 142/200: 249.199042
Init 143/200 with method: k-means++
Inertia for init 143/200: 318.622442
Init 144/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 144/200: 207.535738
Init 145/200 with method: k-means++
Inertia for init 145/200: 319.260241
Init 146/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 146/200: 215.887315
Init 147/200 with method: k-means++
Inertia for init 147/200: 291.591381
Init 148/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 148/200: 268.081193
Init 149/200 with method: k-means++
Inertia for init 149/200: 297.108702
Init 150/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 150/200: 290.039841
Init 151/200 with method: k-means++
Inertia for init 151/200: 272.452453
Init 152/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 152/200: 310.764787
Init 153/200 with method: k-means++
Inertia for init 153/200: 234.949548
Init 154/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 154/200: 226.586275
Init 155/200 with method: k-means++
Inertia for init 155/200: 220.291840
Init 156/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 156/200: 256.784734
Init 157/200 with method: k-means++
Inertia for init 157/200: 258.003652
Init 158/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 158/200: 277.068567
Init 159/200 with method: k-means++
Inertia for init 159/200: 290.710015
Init 160/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 160/200: 268.332420
Init 161/200 with method: k-means++
Inertia for init 161/200: 287.752451
Init 162/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 162/200: 223.948284
Init 163/200 with method: k-means++
Inertia for init 163/200: 255.389086
Init 164/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 164/200: 317.773993
Init 165/200 with method: k-means++
Inertia for init 165/200: 247.730735
Init 166/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 166/200: 271.923514
Init 167/200 with method: k-means++
Inertia for init 167/200: 224.952291
Init 168/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 168/200: 242.583850
Init 169/200 with method: k-means++
Inertia for init 169/200: 287.776712
Init 170/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 170/200: 276.845276
Init 171/200 with method: k-means++
Inertia for init 171/200: 266.262751
Init 172/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 172/200: 243.434404
Init 173/200 with method: k-means++
Inertia for init 173/200: 222.162791
Init 174/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 174/200: 279.118196
Init 175/200 with method: k-means++
Inertia for init 175/200: 219.465147
Init 176/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 176/200: 280.042390
Init 177/200 with method: k-means++
Inertia for init 177/200: 213.459323
Init 178/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 178/200: 288.064725
Init 179/200 with method: k-means++
Inertia for init 179/200: 305.232823
Init 180/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 180/200: 229.588568
Init 181/200 with method: k-means++
Inertia for init 181/200: 243.088251
Init 182/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 182/200: 257.910525
Init 183/200 with method: k-means++
Inertia for init 183/200: 228.850547
Init 184/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 184/200: 305.144111
Init 185/200 with method: k-means++
Inertia for init 185/200: 282.832982
Init 186/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 186/200: 288.739962
Init 187/200 with method: k-means++
Inertia for init 187/200: 224.735438
Init 188/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 188/200: 313.343031
Init 189/200 with method: k-means++
Inertia for init 189/200: 321.533733
Init 190/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 190/200: 299.830225
Init 191/200 with method: k-means++
Inertia for init 191/200: 264.749394
Init 192/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 192/200: 269.835542
Init 193/200 with method: k-means++
Inertia for init 193/200: 307.383708
Init 194/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 194/200: 294.771502
Init 195/200 with method: k-means++
Inertia for init 195/200: 218.568961
Init 196/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 196/200: 250.708148
Init 197/200 with method: k-means++
Inertia for init 197/200: 313.616865
Init 198/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 198/200: 193.919832
Init 199/200 with method: k-means++
Inertia for init 199/200: 231.150219
Init 200/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 200/200: 246.543688
Minibatch iteration 1/147000: mean batch inertia: 4.966012, ewa inertia: 4.966012 
Minibatch iteration 2/147000: mean batch inertia: 4.808802, ewa inertia: 4.965798 
Minibatch iteration 3/147000: mean batch inertia: 3.855740, ewa inertia: 4.964287 
Minibatch iteration 4/147000: mean batch inertia: 4.566538, ewa inertia: 4.963746 
Minibatch iteration 5/147000: mean batch inertia: 5.017678, ewa inertia: 4.963819 
Minibatch iteration 6/147000: mean batch inertia: 5.396283, ewa inertia: 4.964408 
Minibatch iteration 7/147000: mean batch inertia: 4.477910, ewa inertia: 4.963745 
Minibatch iteration 8/147000: mean batch inertia: 4.133962, ewa inertia: 4.962616 
Minibatch iteration 9/147000: mean batch inertia: 3.750977, ewa inertia: 4.960967 
Minibatch iteration 10/147000: mean batch inertia: 4.776039, ewa inertia: 4.960715 
Minibatch iteration 11/147000: mean batch inertia: 3.741040, ewa inertia: 4.959055 
Minibatch iteration 12/147000: mean batch inerti

Minibatch iteration 114/147000: mean batch inertia: 3.724601, ewa inertia: 4.798645 
Minibatch iteration 115/147000: mean batch inertia: 2.949718, ewa inertia: 4.796128 
Minibatch iteration 116/147000: mean batch inertia: 2.752479, ewa inertia: 4.793347 
Minibatch iteration 117/147000: mean batch inertia: 2.679385, ewa inertia: 4.790470 
Minibatch iteration 118/147000: mean batch inertia: 3.184683, ewa inertia: 4.788284 
Minibatch iteration 119/147000: mean batch inertia: 2.683682, ewa inertia: 4.785420 
Minibatch iteration 120/147000: mean batch inertia: 2.733384, ewa inertia: 4.782627 
Minibatch iteration 121/147000: mean batch inertia: 3.117959, ewa inertia: 4.780361 
Minibatch iteration 122/147000: mean batch inertia: 2.903547, ewa inertia: 4.777807 
Minibatch iteration 123/147000: mean batch inertia: 3.290429, ewa inertia: 4.775783 
Minibatch iteration 124/147000: mean batch inertia: 2.773123, ewa inertia: 4.773057 
Minibatch iteration 125/147000: mean batch inertia: 3.450346, ewa

Minibatch iteration 232/147000: mean batch inertia: 2.965906, ewa inertia: 4.576092 
Minibatch iteration 233/147000: mean batch inertia: 2.725180, ewa inertia: 4.573573 
Minibatch iteration 234/147000: mean batch inertia: 3.279400, ewa inertia: 4.571811 
Minibatch iteration 235/147000: mean batch inertia: 3.047618, ewa inertia: 4.569737 
Minibatch iteration 236/147000: mean batch inertia: 3.450529, ewa inertia: 4.568214 
Minibatch iteration 237/147000: mean batch inertia: 2.513269, ewa inertia: 4.565417 
Minibatch iteration 238/147000: mean batch inertia: 2.948232, ewa inertia: 4.563216 
Minibatch iteration 239/147000: mean batch inertia: 2.955661, ewa inertia: 4.561028 
Minibatch iteration 240/147000: mean batch inertia: 3.360890, ewa inertia: 4.559394 
Minibatch iteration 241/147000: mean batch inertia: 3.162044, ewa inertia: 4.557493 
Minibatch iteration 242/147000: mean batch inertia: 3.080641, ewa inertia: 4.555483 
Minibatch iteration 243/147000: mean batch inertia: 4.159176, ewa

Minibatch iteration 345/147000: mean batch inertia: 2.135010, ewa inertia: 4.377718 
Minibatch iteration 346/147000: mean batch inertia: 2.820180, ewa inertia: 4.375598 
Minibatch iteration 347/147000: mean batch inertia: 2.865719, ewa inertia: 4.373543 
Minibatch iteration 348/147000: mean batch inertia: 3.021020, ewa inertia: 4.371703 
Minibatch iteration 349/147000: mean batch inertia: 2.858684, ewa inertia: 4.369643 
Minibatch iteration 350/147000: mean batch inertia: 2.679171, ewa inertia: 4.367343 
Minibatch iteration 351/147000: mean batch inertia: 2.543813, ewa inertia: 4.364861 
Minibatch iteration 352/147000: mean batch inertia: 2.691841, ewa inertia: 4.362584 
Minibatch iteration 353/147000: mean batch inertia: 3.555832, ewa inertia: 4.361486 
Minibatch iteration 354/147000: mean batch inertia: 2.360721, ewa inertia: 4.358763 
Minibatch iteration 355/147000: mean batch inertia: 3.531466, ewa inertia: 4.357637 
Minibatch iteration 356/147000: mean batch inertia: 4.204857, ewa

Minibatch iteration 465/147000: mean batch inertia: 2.805382, ewa inertia: 4.190328 
Minibatch iteration 466/147000: mean batch inertia: 3.035404, ewa inertia: 4.188756 
Minibatch iteration 467/147000: mean batch inertia: 3.315712, ewa inertia: 4.187568 
Minibatch iteration 468/147000: mean batch inertia: 2.184200, ewa inertia: 4.184841 
Minibatch iteration 469/147000: mean batch inertia: 3.805733, ewa inertia: 4.184326 
Minibatch iteration 470/147000: mean batch inertia: 4.003948, ewa inertia: 4.184080 
Minibatch iteration 471/147000: mean batch inertia: 3.839474, ewa inertia: 4.183611 
Minibatch iteration 472/147000: mean batch inertia: 3.329689, ewa inertia: 4.182449 
Minibatch iteration 473/147000: mean batch inertia: 3.304819, ewa inertia: 4.181254 
Minibatch iteration 474/147000: mean batch inertia: 3.324033, ewa inertia: 4.180088 
Minibatch iteration 475/147000: mean batch inertia: 3.085444, ewa inertia: 4.178598 
Minibatch iteration 476/147000: mean batch inertia: 2.498904, ewa

Minibatch iteration 579/147000: mean batch inertia: 3.011577, ewa inertia: 4.039847 
Minibatch iteration 580/147000: mean batch inertia: 2.817775, ewa inertia: 4.038184 
Minibatch iteration 581/147000: mean batch inertia: 3.495816, ewa inertia: 4.037446 
Minibatch iteration 582/147000: mean batch inertia: 3.529313, ewa inertia: 4.036754 
Minibatch iteration 583/147000: mean batch inertia: 3.145811, ewa inertia: 4.035541 
Minibatch iteration 584/147000: mean batch inertia: 2.553511, ewa inertia: 4.033524 
Minibatch iteration 585/147000: mean batch inertia: 2.579427, ewa inertia: 4.031545 
Minibatch iteration 586/147000: mean batch inertia: 2.578751, ewa inertia: 4.029568 
Minibatch iteration 587/147000: mean batch inertia: 3.200521, ewa inertia: 4.028440 
Minibatch iteration 588/147000: mean batch inertia: 3.201612, ewa inertia: 4.027314 
Minibatch iteration 589/147000: mean batch inertia: 2.291675, ewa inertia: 4.024952 
Minibatch iteration 590/147000: mean batch inertia: 4.233389, ewa

Minibatch iteration 694/147000: mean batch inertia: 3.121114, ewa inertia: 3.909621 
Minibatch iteration 695/147000: mean batch inertia: 3.178685, ewa inertia: 3.908626 
Minibatch iteration 696/147000: mean batch inertia: 2.622027, ewa inertia: 3.906875 
Minibatch iteration 697/147000: mean batch inertia: 3.178114, ewa inertia: 3.905884 
Minibatch iteration 698/147000: mean batch inertia: 3.103809, ewa inertia: 3.904792 
Minibatch iteration 699/147000: mean batch inertia: 2.922987, ewa inertia: 3.903456 
Minibatch iteration 700/147000: mean batch inertia: 2.097396, ewa inertia: 3.900998 
Minibatch iteration 701/147000: mean batch inertia: 2.831046, ewa inertia: 3.899541 
Minibatch iteration 702/147000: mean batch inertia: 3.440719, ewa inertia: 3.898917 
Minibatch iteration 703/147000: mean batch inertia: 3.712034, ewa inertia: 3.898663 
Minibatch iteration 704/147000: mean batch inertia: 3.742759, ewa inertia: 3.898450 
Minibatch iteration 705/147000: mean batch inertia: 2.704377, ewa

Minibatch iteration 810/147000: mean batch inertia: 2.612597, ewa inertia: 3.796403 
Minibatch iteration 811/147000: mean batch inertia: 2.912572, ewa inertia: 3.795201 
Minibatch iteration 812/147000: mean batch inertia: 3.179181, ewa inertia: 3.794362 
Minibatch iteration 813/147000: mean batch inertia: 2.673887, ewa inertia: 3.792837 
Minibatch iteration 814/147000: mean batch inertia: 3.456379, ewa inertia: 3.792379 
Minibatch iteration 815/147000: mean batch inertia: 2.849534, ewa inertia: 3.791096 
Minibatch iteration 816/147000: mean batch inertia: 2.959437, ewa inertia: 3.789964 
Minibatch iteration 817/147000: mean batch inertia: 2.891327, ewa inertia: 3.788741 
Minibatch iteration 818/147000: mean batch inertia: 3.161594, ewa inertia: 3.787888 
Minibatch iteration 819/147000: mean batch inertia: 3.072437, ewa inertia: 3.786914 
Minibatch iteration 820/147000: mean batch inertia: 2.539470, ewa inertia: 3.785216 
Minibatch iteration 821/147000: mean batch inertia: 3.062208, ewa

Minibatch iteration 929/147000: mean batch inertia: 3.550121, ewa inertia: 3.688672 
Minibatch iteration 930/147000: mean batch inertia: 3.456846, ewa inertia: 3.688357 
Minibatch iteration 931/147000: mean batch inertia: 2.870170, ewa inertia: 3.687243 
Minibatch iteration 932/147000: mean batch inertia: 2.974104, ewa inertia: 3.686273 
Minibatch iteration 933/147000: mean batch inertia: 3.365955, ewa inertia: 3.685837 
Minibatch iteration 934/147000: mean batch inertia: 3.012134, ewa inertia: 3.684920 
Minibatch iteration 935/147000: mean batch inertia: 4.150436, ewa inertia: 3.685553 
Minibatch iteration 936/147000: mean batch inertia: 3.304391, ewa inertia: 3.685035 
Minibatch iteration 937/147000: mean batch inertia: 2.950744, ewa inertia: 3.684035 
Minibatch iteration 938/147000: mean batch inertia: 3.582294, ewa inertia: 3.683897 
Minibatch iteration 939/147000: mean batch inertia: 2.866930, ewa inertia: 3.682785 
Minibatch iteration 940/147000: mean batch inertia: 3.920184, ewa

Minibatch iteration 1041/147000: mean batch inertia: 2.986493, ewa inertia: 3.595666 
Minibatch iteration 1042/147000: mean batch inertia: 2.816747, ewa inertia: 3.594606 
Minibatch iteration 1043/147000: mean batch inertia: 2.947742, ewa inertia: 3.593726 
Minibatch iteration 1044/147000: mean batch inertia: 2.741027, ewa inertia: 3.592565 
Minibatch iteration 1045/147000: mean batch inertia: 3.030503, ewa inertia: 3.591800 
Minibatch iteration 1046/147000: mean batch inertia: 2.232407, ewa inertia: 3.589950 
Minibatch iteration 1047/147000: mean batch inertia: 3.123604, ewa inertia: 3.589315 
Minibatch iteration 1048/147000: mean batch inertia: 2.385647, ewa inertia: 3.587677 
Minibatch iteration 1049/147000: mean batch inertia: 3.686397, ewa inertia: 3.587811 
Minibatch iteration 1050/147000: mean batch inertia: 3.975274, ewa inertia: 3.588339 
Minibatch iteration 1051/147000: mean batch inertia: 3.042658, ewa inertia: 3.587596 
Minibatch iteration 1052/147000: mean batch inertia: 3

Minibatch iteration 1155/147000: mean batch inertia: 2.907831, ewa inertia: 3.516636 
Minibatch iteration 1156/147000: mean batch inertia: 2.688185, ewa inertia: 3.515508 
Minibatch iteration 1157/147000: mean batch inertia: 2.961922, ewa inertia: 3.514755 
Minibatch iteration 1158/147000: mean batch inertia: 2.758317, ewa inertia: 3.513726 
Minibatch iteration 1159/147000: mean batch inertia: 3.425702, ewa inertia: 3.513606 
Minibatch iteration 1160/147000: mean batch inertia: 2.893821, ewa inertia: 3.512762 
Minibatch iteration 1161/147000: mean batch inertia: 3.129652, ewa inertia: 3.512241 
Minibatch iteration 1162/147000: mean batch inertia: 3.477130, ewa inertia: 3.512193 
Minibatch iteration 1163/147000: mean batch inertia: 2.560128, ewa inertia: 3.510897 
Minibatch iteration 1164/147000: mean batch inertia: 3.891176, ewa inertia: 3.511415 
Minibatch iteration 1165/147000: mean batch inertia: 3.797748, ewa inertia: 3.511804 
Minibatch iteration 1166/147000: mean batch inertia: 3

Minibatch iteration 1273/147000: mean batch inertia: 3.487537, ewa inertia: 3.452022 
Minibatch iteration 1274/147000: mean batch inertia: 2.911112, ewa inertia: 3.451286 
Minibatch iteration 1275/147000: mean batch inertia: 3.309539, ewa inertia: 3.451093 
Minibatch iteration 1276/147000: mean batch inertia: 3.691046, ewa inertia: 3.451420 
Minibatch iteration 1277/147000: mean batch inertia: 2.798962, ewa inertia: 3.450532 
Minibatch iteration 1278/147000: mean batch inertia: 3.023503, ewa inertia: 3.449951 
Minibatch iteration 1279/147000: mean batch inertia: 2.694805, ewa inertia: 3.448923 
Minibatch iteration 1280/147000: mean batch inertia: 2.938293, ewa inertia: 3.448228 
Minibatch iteration 1281/147000: mean batch inertia: 3.053618, ewa inertia: 3.447691 
Minibatch iteration 1282/147000: mean batch inertia: 2.824164, ewa inertia: 3.446842 
Minibatch iteration 1283/147000: mean batch inertia: 3.384290, ewa inertia: 3.446757 
Minibatch iteration 1284/147000: mean batch inertia: 3

Minibatch iteration 1392/147000: mean batch inertia: 3.509849, ewa inertia: 3.391582 
Minibatch iteration 1393/147000: mean batch inertia: 3.287373, ewa inertia: 3.391440 
Minibatch iteration 1394/147000: mean batch inertia: 2.943233, ewa inertia: 3.390830 
Minibatch iteration 1395/147000: mean batch inertia: 2.161452, ewa inertia: 3.389157 
Minibatch iteration 1396/147000: mean batch inertia: 3.572425, ewa inertia: 3.389406 
Minibatch iteration 1397/147000: mean batch inertia: 2.650628, ewa inertia: 3.388400 
Minibatch iteration 1398/147000: mean batch inertia: 2.630300, ewa inertia: 3.387369 
Minibatch iteration 1399/147000: mean batch inertia: 3.617510, ewa inertia: 3.387682 
Minibatch iteration 1400/147000: mean batch inertia: 3.157446, ewa inertia: 3.387369 
Minibatch iteration 1401/147000: mean batch inertia: 2.290762, ewa inertia: 3.385876 
Minibatch iteration 1402/147000: mean batch inertia: 2.887022, ewa inertia: 3.385197 
Minibatch iteration 1403/147000: mean batch inertia: 3

Minibatch iteration 1507/147000: mean batch inertia: 2.410677, ewa inertia: 3.342309 
Minibatch iteration 1508/147000: mean batch inertia: 3.110204, ewa inertia: 3.341993 
Minibatch iteration 1509/147000: mean batch inertia: 3.068363, ewa inertia: 3.341620 
Minibatch iteration 1510/147000: mean batch inertia: 2.929435, ewa inertia: 3.341059 
Minibatch iteration 1511/147000: mean batch inertia: 3.157042, ewa inertia: 3.340809 
Minibatch iteration 1512/147000: mean batch inertia: 3.097719, ewa inertia: 3.340478 
Minibatch iteration 1513/147000: mean batch inertia: 2.423212, ewa inertia: 3.339230 
Minibatch iteration 1514/147000: mean batch inertia: 2.992068, ewa inertia: 3.338757 
Minibatch iteration 1515/147000: mean batch inertia: 2.445550, ewa inertia: 3.337542 
Minibatch iteration 1516/147000: mean batch inertia: 2.621709, ewa inertia: 3.336567 
Minibatch iteration 1517/147000: mean batch inertia: 2.848281, ewa inertia: 3.335903 
Minibatch iteration 1518/147000: mean batch inertia: 3

Minibatch iteration 1621/147000: mean batch inertia: 2.940443, ewa inertia: 3.301398 
Minibatch iteration 1622/147000: mean batch inertia: 2.856674, ewa inertia: 3.300792 
Minibatch iteration 1623/147000: mean batch inertia: 3.062067, ewa inertia: 3.300467 
Minibatch iteration 1624/147000: mean batch inertia: 2.810954, ewa inertia: 3.299801 
Minibatch iteration 1625/147000: mean batch inertia: 2.729609, ewa inertia: 3.299025 
Minibatch iteration 1626/147000: mean batch inertia: 3.373986, ewa inertia: 3.299127 
Minibatch iteration 1627/147000: mean batch inertia: 3.051320, ewa inertia: 3.298790 
Minibatch iteration 1628/147000: mean batch inertia: 3.563167, ewa inertia: 3.299150 
Minibatch iteration 1629/147000: mean batch inertia: 3.135129, ewa inertia: 3.298926 
Minibatch iteration 1630/147000: mean batch inertia: 2.973367, ewa inertia: 3.298483 
Minibatch iteration 1631/147000: mean batch inertia: 2.667939, ewa inertia: 3.297625 
Minibatch iteration 1632/147000: mean batch inertia: 2

Minibatch iteration 1744/147000: mean batch inertia: 3.271401, ewa inertia: 3.264724 
Minibatch iteration 1745/147000: mean batch inertia: 2.324643, ewa inertia: 3.263445 
Minibatch iteration 1746/147000: mean batch inertia: 3.361558, ewa inertia: 3.263579 
Minibatch iteration 1747/147000: mean batch inertia: 2.943965, ewa inertia: 3.263144 
Minibatch iteration 1748/147000: mean batch inertia: 3.613407, ewa inertia: 3.263620 
Minibatch iteration 1749/147000: mean batch inertia: 3.216156, ewa inertia: 3.263556 
Minibatch iteration 1750/147000: mean batch inertia: 4.146724, ewa inertia: 3.264758 
Minibatch iteration 1751/147000: mean batch inertia: 2.747394, ewa inertia: 3.264053 
Minibatch iteration 1752/147000: mean batch inertia: 2.656483, ewa inertia: 3.263227 
Minibatch iteration 1753/147000: mean batch inertia: 2.995842, ewa inertia: 3.262863 
Minibatch iteration 1754/147000: mean batch inertia: 2.932086, ewa inertia: 3.262412 
Minibatch iteration 1755/147000: mean batch inertia: 3

Minibatch iteration 1853/147000: mean batch inertia: 3.269541, ewa inertia: 3.230126 
Minibatch iteration 1854/147000: mean batch inertia: 2.573238, ewa inertia: 3.229232 
Minibatch iteration 1855/147000: mean batch inertia: 3.094318, ewa inertia: 3.229048 
Minibatch iteration 1856/147000: mean batch inertia: 3.359834, ewa inertia: 3.229226 
Minibatch iteration 1857/147000: mean batch inertia: 3.176945, ewa inertia: 3.229155 
Minibatch iteration 1858/147000: mean batch inertia: 3.108201, ewa inertia: 3.228991 
Minibatch iteration 1859/147000: mean batch inertia: 3.188877, ewa inertia: 3.228936 
Minibatch iteration 1860/147000: mean batch inertia: 3.369903, ewa inertia: 3.229128 
Minibatch iteration 1861/147000: mean batch inertia: 3.182267, ewa inertia: 3.229064 
Minibatch iteration 1862/147000: mean batch inertia: 2.255867, ewa inertia: 3.227740 
Minibatch iteration 1863/147000: mean batch inertia: 2.875799, ewa inertia: 3.227261 
Minibatch iteration 1864/147000: mean batch inertia: 2

Minibatch iteration 1955/147000: mean batch inertia: 4.085971, ewa inertia: 3.204879 
Minibatch iteration 1956/147000: mean batch inertia: 2.821620, ewa inertia: 3.204358 
Minibatch iteration 1957/147000: mean batch inertia: 3.680027, ewa inertia: 3.205005 
Minibatch iteration 1958/147000: mean batch inertia: 2.815720, ewa inertia: 3.204475 
Minibatch iteration 1959/147000: mean batch inertia: 2.645791, ewa inertia: 3.203715 
Minibatch iteration 1960/147000: mean batch inertia: 2.061124, ewa inertia: 3.202160 
Minibatch iteration 1961/147000: mean batch inertia: 3.416421, ewa inertia: 3.202451 
Minibatch iteration 1962/147000: mean batch inertia: 2.841326, ewa inertia: 3.201960 
Minibatch iteration 1963/147000: mean batch inertia: 3.182433, ewa inertia: 3.201933 
Minibatch iteration 1964/147000: mean batch inertia: 2.376677, ewa inertia: 3.200810 
Minibatch iteration 1965/147000: mean batch inertia: 2.964250, ewa inertia: 3.200488 
Minibatch iteration 1966/147000: mean batch inertia: 3

Minibatch iteration 2055/147000: mean batch inertia: 3.867632, ewa inertia: 3.176508 
Minibatch iteration 2056/147000: mean batch inertia: 3.363145, ewa inertia: 3.176762 
Minibatch iteration 2057/147000: mean batch inertia: 3.060719, ewa inertia: 3.176604 
Minibatch iteration 2058/147000: mean batch inertia: 3.320344, ewa inertia: 3.176800 
Minibatch iteration 2059/147000: mean batch inertia: 3.730817, ewa inertia: 3.177554 
Minibatch iteration 2060/147000: mean batch inertia: 3.215929, ewa inertia: 3.177606 
Minibatch iteration 2061/147000: mean batch inertia: 2.704395, ewa inertia: 3.176962 
Minibatch iteration 2062/147000: mean batch inertia: 3.372686, ewa inertia: 3.177228 
Minibatch iteration 2063/147000: mean batch inertia: 3.118159, ewa inertia: 3.177148 
Minibatch iteration 2064/147000: mean batch inertia: 3.608147, ewa inertia: 3.177734 
Minibatch iteration 2065/147000: mean batch inertia: 3.993494, ewa inertia: 3.178845 
Minibatch iteration 2066/147000: mean batch inertia: 2

Minibatch iteration 2159/147000: mean batch inertia: 2.634706, ewa inertia: 3.153668 
Minibatch iteration 2160/147000: mean batch inertia: 3.030688, ewa inertia: 3.153501 
Minibatch iteration 2161/147000: mean batch inertia: 3.143320, ewa inertia: 3.153487 
Minibatch iteration 2162/147000: mean batch inertia: 3.085328, ewa inertia: 3.153394 
Minibatch iteration 2163/147000: mean batch inertia: 3.006101, ewa inertia: 3.153194 
Minibatch iteration 2164/147000: mean batch inertia: 3.169435, ewa inertia: 3.153216 
Minibatch iteration 2165/147000: mean batch inertia: 2.693348, ewa inertia: 3.152590 
Minibatch iteration 2166/147000: mean batch inertia: 2.437325, ewa inertia: 3.151616 
Minibatch iteration 2167/147000: mean batch inertia: 2.853130, ewa inertia: 3.151210 
Minibatch iteration 2168/147000: mean batch inertia: 2.539404, ewa inertia: 3.150377 
Minibatch iteration 2169/147000: mean batch inertia: 3.459741, ewa inertia: 3.150798 
Minibatch iteration 2170/147000: mean batch inertia: 3

Minibatch iteration 2259/147000: mean batch inertia: 3.480058, ewa inertia: 3.135494 
Minibatch iteration 2260/147000: mean batch inertia: 3.148023, ewa inertia: 3.135511 
Minibatch iteration 2261/147000: mean batch inertia: 2.196504, ewa inertia: 3.134234 
Minibatch iteration 2262/147000: mean batch inertia: 3.071345, ewa inertia: 3.134148 
Minibatch iteration 2263/147000: mean batch inertia: 3.634564, ewa inertia: 3.134829 
Minibatch iteration 2264/147000: mean batch inertia: 3.186992, ewa inertia: 3.134900 
Minibatch iteration 2265/147000: mean batch inertia: 2.696022, ewa inertia: 3.134303 
Minibatch iteration 2266/147000: mean batch inertia: 2.844206, ewa inertia: 3.133908 
Minibatch iteration 2267/147000: mean batch inertia: 3.583103, ewa inertia: 3.134519 
Minibatch iteration 2268/147000: mean batch inertia: 3.485726, ewa inertia: 3.134997 
Minibatch iteration 2269/147000: mean batch inertia: 3.654132, ewa inertia: 3.135704 
Minibatch iteration 2270/147000: mean batch inertia: 2

Minibatch iteration 2377/147000: mean batch inertia: 3.679228, ewa inertia: 3.114171 
Minibatch iteration 2378/147000: mean batch inertia: 3.085477, ewa inertia: 3.114132 
Minibatch iteration 2379/147000: mean batch inertia: 2.404858, ewa inertia: 3.113166 
Minibatch iteration 2380/147000: mean batch inertia: 3.289453, ewa inertia: 3.113406 
Minibatch iteration 2381/147000: mean batch inertia: 2.942099, ewa inertia: 3.113173 
Minibatch iteration 2382/147000: mean batch inertia: 2.862656, ewa inertia: 3.112832 
Minibatch iteration 2383/147000: mean batch inertia: 2.913097, ewa inertia: 3.112560 
Minibatch iteration 2384/147000: mean batch inertia: 2.975628, ewa inertia: 3.112374 
Minibatch iteration 2385/147000: mean batch inertia: 3.237925, ewa inertia: 3.112545 
Minibatch iteration 2386/147000: mean batch inertia: 3.007866, ewa inertia: 3.112402 
Minibatch iteration 2387/147000: mean batch inertia: 2.144138, ewa inertia: 3.111085 
Minibatch iteration 2388/147000: mean batch inertia: 3

Minibatch iteration 2492/147000: mean batch inertia: 3.440333, ewa inertia: 3.105165 
Minibatch iteration 2493/147000: mean batch inertia: 2.957123, ewa inertia: 3.104964 
Minibatch iteration 2494/147000: mean batch inertia: 3.170152, ewa inertia: 3.105053 
Minibatch iteration 2495/147000: mean batch inertia: 2.988596, ewa inertia: 3.104894 
Minibatch iteration 2496/147000: mean batch inertia: 2.546939, ewa inertia: 3.104135 
Minibatch iteration 2497/147000: mean batch inertia: 3.714872, ewa inertia: 3.104966 
Minibatch iteration 2498/147000: mean batch inertia: 3.939009, ewa inertia: 3.106101 
Minibatch iteration 2499/147000: mean batch inertia: 2.754140, ewa inertia: 3.105622 
Minibatch iteration 2500/147000: mean batch inertia: 2.413103, ewa inertia: 3.104680 
Minibatch iteration 2501/147000: mean batch inertia: 3.585444, ewa inertia: 3.105334 
Converged (lack of improvement in inertia) at iteration 2501/147000
Computing label assignment and total inertia


In [None]:
print(sequential_search.all_best_params_)
print(sequential_search.all_best_score_)
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600]}

for params in ParameterGrid(param_grid):
    kmeans = MiniBatchKMeans(n_clusters=params['hidden_layer_size'], n_init=20, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=0, random_state=0)
    kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
    w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
    base_km_esn = clone(sequential_search.best_estimator_)
    base_km_esn.input_to_node.predefined_input_weights=w_in.T
    base_km_esn.set_params(**params)
    gs_cv = GridSearchCV(base_km_esn,
                         param_grid={'random_state': range(10)},
                         scoring=make_scorer(accuracy_score), n_jobs=-1).fit(X=X_train, y=y_train)
    print(gs_cv.cv_results_)
    print("---------------------------------------")
    acc_score = accuracy_score(y_test, gs_cv.best_estimator_.predict(X_test))
    print(acc_score)
    print("---------------------------------------")
"""
constant_params = sequential_search.best_estimator_.get_params()
constant_params.pop('hidden_layer_size')
constant_params.pop('random_state')
constant_params.pop('predefined_input_weights')
base_esn = SeqToLabelESNClassifier(**constant_params)
param_grid = {'hidden_layer_size': [800, 1600],
              'random_state': range(1, 11)}

for params in ParameterGrid(param_grid):
    if params['hidden_layer_size'] > 400:
        kmeans = MiniBatchKMeans(n_clusters=400, n_init=200, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=0, random_state=params['random_state'])
        t1 = time.time()
        kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
        w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
        t2 = time.time()
        w_in = np.pad(w_in, ((0, params['hidden_layer_size'] - 400), (0, 0)), mode='constant', constant_values=0)
    else:
        kmeans = MiniBatchKMeans(n_clusters=params['hidden_layer_size'], n_init=200, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=0, random_state=params['random_state'])
        t1 = time.time()
        kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
        w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
        t2 = time.time()
    km_esn = clone(base_esn)
    km_esn.input_to_node = PredefinedWeightsInputToNode(predefined_input_weights=w_in.T)
    km_esn.set_params(**constant_params, **params)
    km_esn.fit(X=X_train, y=y_train)
    score = accuracy_score(y_test, km_esn.predict(X_test))
    print("KM-ESN with params {0} achieved score of {1} and was trained in {2} seconds.".format(params, score, t2-t1))

##  Character Trajectories Data Set 
(http://archive.ics.uci.edu/ml/datasets/Character+Trajectories)

Data Set Information:

The characters here were used for a PhD study on primitive extraction using HMM based models. The data consists of 2858 character samples, contained in the cell array 'mixout'. The struct variable 'consts' contains a field consts.charlabels which provides ennummerated labels for the characters. consts.key provides the key for each label. The data was captured using a WACOM tablet. 3 Dimensions were kept - x, y, and pen tip force. The data has been numerically differentiated and Gaussian smoothed, with a sigma value of 2. Data was captured at 200Hz. The data was normalised with consts.datanorm. Only characters with a single 'PEN-DOWN' segment were considered. Character segmentation was performed using a pen tip force cut-off point. The characters have also been shifted so that their velocity profiles best match the mean of the set.

Attribute Information:

Each character sample is a 3-dimensional pen tip velocity trajectory. This is contained in matrix format, with 3 rows and T columns where T is the length of the character sample.

In [3]:
char = np.load(r"E:\multivariate_time_series_dataset\numpy\CHAR.npz")
X_train = np.empty(shape=(300, ), dtype=object)
y_train = np.empty(shape=(300, ), dtype=object)
X_test = np.empty(shape=(2558, ), dtype=object)
y_test = np.empty(shape=(2558, ), dtype=object)

if train:
    for k, (X, y) in enumerate(zip(char['X'], char['Y'])):
        X_train[k] = X[X.sum(axis=1)!=0, :]  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_train[k] = np.tile(y, (X_train[k].shape[0], 1))
    scaler = StandardScaler().fit(np.concatenate(X_train))
    for k, X in enumerate(X_train):
        X_train[k] = scaler.transform(X=X)  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X

    X_train, y_train = shuffle(X_train, y_train, random_state=0)

    for k, (X, y) in enumerate(zip(char['Xte'], char['Yte'])):
        X_test[k] = scaler.transform(X=X[X.sum(axis=1)!=0, :])  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_test[k] = np.tile(y, (X_test[k].shape[0], 1))
else:
    for k, (X, y) in enumerate(zip(char['X'], char['Y'])):
        X_train[k] = X[X.sum(axis=1)!=0, :]  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_train[k] = np.argwhere(y).ravel()
    scaler = StandardScaler().fit(np.concatenate(X_train))
    for k, X in enumerate(X_train):
        X_train[k] = scaler.transform(X=X)  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X

    X_train, y_train = shuffle(X_train, y_train, random_state=0)

    for k, (X, y) in enumerate(zip(char['Xte'], char['Yte'])):
        X_test[k] = scaler.transform(X=X[X.sum(axis=1)!=0, :])  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_test[k] = np.argwhere(y).ravel()

Fit random ESN

In [None]:
initially_fixed_params = {'hidden_layer_size': 50,
                          'k_in': 3,
                          'input_scaling': 0.4,
                          'input_activation': 'identity',
                          'bias_scaling': 0.0,
                          'spectral_radius': 0.0,
                          'leakage': 0.1,
                          'k_rec': 10,
                          'reservoir_activation': 'tanh',
                          'bi_directional': False,
                          'wash_out': 0,
                          'continuation': False,
                          'alpha': 1e-3,
                          'random_state': 42}

step1_esn_params = {'input_scaling': uniform(loc=1e-2, scale=1),
                    'spectral_radius': uniform(loc=0, scale=2)}

step2_esn_params = {'leakage': loguniform(1e-5, 1e0)}
step3_esn_params = {'bias_scaling': np.linspace(0.0, 1.0, 11)}
step4_esn_params = {'alpha': loguniform(1e-5, 1e1)}

kwargs_step1 = {'n_iter': 200, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step2 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step3 = {'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step4 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}

# The searches are defined similarly to the steps of a sklearn.pipeline.Pipeline:
searches = [('step1', RandomizedSearchCV, step1_esn_params, kwargs_step1),
            ('step2', RandomizedSearchCV, step2_esn_params, kwargs_step2),
            ('step3', GridSearchCV, step3_esn_params, kwargs_step3),
            ('step4', RandomizedSearchCV, step4_esn_params, kwargs_step4)]

base_esn = SeqToSeqESNClassifier(**initially_fixed_params)

try:
    sequential_search = load("../sequential_search_char.joblib")
except FileNotFoundError:
    sequential_search = SequentialSearchCV(base_esn, searches=searches).fit(X_train, y_train)
    dump(sequential_search, "../sequential_search_char.joblib")

In [None]:
print(sequential_search.all_best_params_)
print(sequential_search.all_best_score_)
base_esn = SeqToLabelESNClassifier(**sequential_search.best_estimator_.get_params())
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600]}

for params in ParameterGrid(param_grid):
    gs_cv = GridSearchCV(clone(base_esn).set_params(**params), 
                         param_grid={'random_state': range(10)}, 
                         scoring=make_scorer(accuracy_score), n_jobs=-1).fit(X=X_train, y=y_train)
    print(gs_cv.cv_results_)
    print("---------------------------------------")
    acc_score = accuracy_score(y_test, gs_cv.best_estimator_.predict(X_test))
    print(acc_score)
    print("---------------------------------------")
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600],
              'random_state': range(1, 11)}

for params in ParameterGrid(param_grid):
    t1 = time.time()
    esn = clone(base_esn).set_params(**params).fit(X=X_train, y=y_train, n_jobs=8)
    t2 = time.time()
    score = accuracy_score(y_test, esn.predict(X_test))
    print("ESN with params {0} achieved score of {1} and was trained in {2} seconds.".format(params, score, t2-t1))

Fit KM-ESN

In [4]:
kmeans = MiniBatchKMeans(n_clusters=50, n_init=200, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=2, random_state=0)
kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
w_in = np.pad(w_in, ((0, 800 - 50), (0, 0)), mode='constant', constant_values=0)

initially_fixed_params = {'hidden_layer_size': 800,
                          'k_in': 3,
                          'input_scaling': 0.4,
                          'input_activation': 'identity',
                          'bias_scaling': 0.0,
                          'spectral_radius': 0.0,
                          'leakage': 0.1,
                          'k_rec': 10,
                          'reservoir_activation': 'tanh',
                          'bi_directional': False,
                          'wash_out': 0,
                          'continuation': False,
                          'alpha': 1e-3,
                          'random_state': 42}

step1_esn_params = {'input_scaling': uniform(loc=1e-2, scale=1),
                    'spectral_radius': uniform(loc=0, scale=2)}

step2_esn_params = {'leakage': loguniform(1e-5, 1e0)}
step3_esn_params = {'bias_scaling': np.linspace(0.0, 1.0, 11)}
step4_esn_params = {'alpha': loguniform(1e-5, 1e1)}

kwargs_step1 = {'n_iter': 200, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step2 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step3 = {'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step4 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}

# The searches are defined similarly to the steps of a sklearn.pipeline.Pipeline:
searches = [('step1', RandomizedSearchCV, step1_esn_params, kwargs_step1),
            ('step2', RandomizedSearchCV, step2_esn_params, kwargs_step2),
            ('step3', GridSearchCV, step3_esn_params, kwargs_step3),
            ('step4', RandomizedSearchCV, step4_esn_params, kwargs_step4)]

base_km_esn = SeqToSeqESNClassifier(input_to_node=PredefinedWeightsInputToNode(predefined_input_weights=w_in.T),
                                    **initially_fixed_params)

try:
    sequential_search = load("../sequential_search_char_km_sparse.joblib")
except FileNotFoundError:
    sequential_search = SequentialSearchCV(base_km_esn, searches=searches).fit(X_train, y_train)
    dump(sequential_search, "../sequential_search_char_km_sparse.joblib")

Init 1/200 with method: k-means++
Inertia for init 1/200: 36.412375
Init 2/200 with method: k-means++
Inertia for init 2/200: 38.957241
Init 3/200 with method: k-means++
Inertia for init 3/200: 40.881089
Init 4/200 with method: k-means++
Inertia for init 4/200: 37.818107
Init 5/200 with method: k-means++
Inertia for init 5/200: 42.685401
Init 6/200 with method: k-means++
Inertia for init 6/200: 38.231006
Init 7/200 with method: k-means++
Inertia for init 7/200: 36.605706
Init 8/200 with method: k-means++
Inertia for init 8/200: 35.314030
Init 9/200 with method: k-means++
Inertia for init 9/200: 38.731324
Init 10/200 with method: k-means++
Inertia for init 10/200: 41.749969
Init 11/200 with method: k-means++
Inertia for init 11/200: 43.197657
Init 12/200 with method: k-means++
Inertia for init 12/200: 37.739229
Init 13/200 with method: k-means++
Inertia for init 13/200: 38.977729
Init 14/200 with method: k-means++
Inertia for init 14/200: 41.048150
Init 15/200 with method: k-means++
Ine

Inertia for init 119/200: 35.880397
Init 120/200 with method: k-means++
Inertia for init 120/200: 41.359890
Init 121/200 with method: k-means++
Inertia for init 121/200: 39.465034
Init 122/200 with method: k-means++
Inertia for init 122/200: 39.614490
Init 123/200 with method: k-means++
Inertia for init 123/200: 37.822621
Init 124/200 with method: k-means++
Inertia for init 124/200: 42.751979
Init 125/200 with method: k-means++
Inertia for init 125/200: 38.022697
Init 126/200 with method: k-means++
Inertia for init 126/200: 39.394675
Init 127/200 with method: k-means++
Inertia for init 127/200: 34.847364
Init 128/200 with method: k-means++
Inertia for init 128/200: 37.243961
Init 129/200 with method: k-means++
Inertia for init 129/200: 38.866651
Init 130/200 with method: k-means++
Inertia for init 130/200: 37.399464
Init 131/200 with method: k-means++
Inertia for init 131/200: 41.934446
Init 132/200 with method: k-means++
Inertia for init 132/200: 39.100990
Init 133/200 with method: k-

Minibatch iteration 78/343000: mean batch inertia: 0.136151, ewa inertia: 0.148553 
Minibatch iteration 79/343000: mean batch inertia: 0.121956, ewa inertia: 0.148538 
Minibatch iteration 80/343000: mean batch inertia: 0.114395, ewa inertia: 0.148518 
Minibatch iteration 81/343000: mean batch inertia: 0.164372, ewa inertia: 0.148527 
Minibatch iteration 82/343000: mean batch inertia: 0.138832, ewa inertia: 0.148522 
Minibatch iteration 83/343000: mean batch inertia: 0.149159, ewa inertia: 0.148522 
Minibatch iteration 84/343000: mean batch inertia: 0.159374, ewa inertia: 0.148528 
Minibatch iteration 85/343000: mean batch inertia: 0.144346, ewa inertia: 0.148526 
Minibatch iteration 86/343000: mean batch inertia: 0.146564, ewa inertia: 0.148525 
Minibatch iteration 87/343000: mean batch inertia: 0.119916, ewa inertia: 0.148508 
Minibatch iteration 88/343000: mean batch inertia: 0.124619, ewa inertia: 0.148494 
Minibatch iteration 89/343000: mean batch inertia: 0.137743, ewa inertia: 0.

Minibatch iteration 207/343000: mean batch inertia: 0.139090, ewa inertia: 0.147922 
Minibatch iteration 208/343000: mean batch inertia: 0.171204, ewa inertia: 0.147936 
Minibatch iteration 209/343000: mean batch inertia: 0.141699, ewa inertia: 0.147932 
Minibatch iteration 210/343000: mean batch inertia: 0.136652, ewa inertia: 0.147926 
Minibatch iteration 211/343000: mean batch inertia: 0.141357, ewa inertia: 0.147922 
Minibatch iteration 212/343000: mean batch inertia: 0.136361, ewa inertia: 0.147915 
Minibatch iteration 213/343000: mean batch inertia: 0.167982, ewa inertia: 0.147927 
Minibatch iteration 214/343000: mean batch inertia: 0.163460, ewa inertia: 0.147936 
Minibatch iteration 215/343000: mean batch inertia: 0.166437, ewa inertia: 0.147947 
Minibatch iteration 216/343000: mean batch inertia: 0.173761, ewa inertia: 0.147962 
Minibatch iteration 217/343000: mean batch inertia: 0.164364, ewa inertia: 0.147971 
Minibatch iteration 218/343000: mean batch inertia: 0.135385, ewa

Minibatch iteration 315/343000: mean batch inertia: 0.184245, ewa inertia: 0.147539 
Minibatch iteration 316/343000: mean batch inertia: 0.139740, ewa inertia: 0.147534 
Minibatch iteration 317/343000: mean batch inertia: 0.124794, ewa inertia: 0.147521 
Minibatch iteration 318/343000: mean batch inertia: 0.147959, ewa inertia: 0.147521 
Minibatch iteration 319/343000: mean batch inertia: 0.137489, ewa inertia: 0.147516 
Minibatch iteration 320/343000: mean batch inertia: 0.106469, ewa inertia: 0.147492 
Minibatch iteration 321/343000: mean batch inertia: 0.142369, ewa inertia: 0.147489 
Minibatch iteration 322/343000: mean batch inertia: 0.130707, ewa inertia: 0.147479 
Minibatch iteration 323/343000: mean batch inertia: 0.131856, ewa inertia: 0.147470 
Minibatch iteration 324/343000: mean batch inertia: 0.152855, ewa inertia: 0.147473 
Minibatch iteration 325/343000: mean batch inertia: 0.149142, ewa inertia: 0.147474 
Minibatch iteration 326/343000: mean batch inertia: 0.166873, ewa

Minibatch iteration 443/343000: mean batch inertia: 0.139449, ewa inertia: 0.146986 
Minibatch iteration 444/343000: mean batch inertia: 0.152375, ewa inertia: 0.146989 
Minibatch iteration 445/343000: mean batch inertia: 0.110750, ewa inertia: 0.146968 
Minibatch iteration 446/343000: mean batch inertia: 0.140868, ewa inertia: 0.146964 
Minibatch iteration 447/343000: mean batch inertia: 0.134822, ewa inertia: 0.146957 
Minibatch iteration 448/343000: mean batch inertia: 0.146056, ewa inertia: 0.146957 
Minibatch iteration 449/343000: mean batch inertia: 0.159440, ewa inertia: 0.146964 
Minibatch iteration 450/343000: mean batch inertia: 0.133370, ewa inertia: 0.146956 
Minibatch iteration 451/343000: mean batch inertia: 0.135481, ewa inertia: 0.146949 
Minibatch iteration 452/343000: mean batch inertia: 0.111545, ewa inertia: 0.146929 
Minibatch iteration 453/343000: mean batch inertia: 0.123101, ewa inertia: 0.146915 
Minibatch iteration 454/343000: mean batch inertia: 0.130128, ewa

Minibatch iteration 564/343000: mean batch inertia: 0.130057, ewa inertia: 0.146247 
Minibatch iteration 565/343000: mean batch inertia: 0.142739, ewa inertia: 0.146245 
Minibatch iteration 566/343000: mean batch inertia: 0.142825, ewa inertia: 0.146243 
Minibatch iteration 567/343000: mean batch inertia: 0.141709, ewa inertia: 0.146240 
Minibatch iteration 568/343000: mean batch inertia: 0.141824, ewa inertia: 0.146237 
Minibatch iteration 569/343000: mean batch inertia: 0.133647, ewa inertia: 0.146230 
Minibatch iteration 570/343000: mean batch inertia: 0.126059, ewa inertia: 0.146218 
Minibatch iteration 571/343000: mean batch inertia: 0.125889, ewa inertia: 0.146206 
Minibatch iteration 572/343000: mean batch inertia: 0.132766, ewa inertia: 0.146199 
Minibatch iteration 573/343000: mean batch inertia: 0.124205, ewa inertia: 0.146186 
Minibatch iteration 574/343000: mean batch inertia: 0.155012, ewa inertia: 0.146191 
Minibatch iteration 575/343000: mean batch inertia: 0.152972, ewa

Minibatch iteration 689/343000: mean batch inertia: 0.126904, ewa inertia: 0.145599 
Minibatch iteration 690/343000: mean batch inertia: 0.174557, ewa inertia: 0.145616 
Minibatch iteration 691/343000: mean batch inertia: 0.125555, ewa inertia: 0.145604 
Minibatch iteration 692/343000: mean batch inertia: 0.136565, ewa inertia: 0.145599 
Minibatch iteration 693/343000: mean batch inertia: 0.145173, ewa inertia: 0.145598 
Minibatch iteration 694/343000: mean batch inertia: 0.134643, ewa inertia: 0.145592 
Minibatch iteration 695/343000: mean batch inertia: 0.142880, ewa inertia: 0.145590 
Minibatch iteration 696/343000: mean batch inertia: 0.110959, ewa inertia: 0.145570 
Minibatch iteration 697/343000: mean batch inertia: 0.124963, ewa inertia: 0.145558 
Minibatch iteration 698/343000: mean batch inertia: 0.136731, ewa inertia: 0.145553 
Minibatch iteration 699/343000: mean batch inertia: 0.157384, ewa inertia: 0.145560 
Minibatch iteration 700/343000: mean batch inertia: 0.145901, ewa

Minibatch iteration 804/343000: mean batch inertia: 0.127531, ewa inertia: 0.145071 
Minibatch iteration 805/343000: mean batch inertia: 0.150763, ewa inertia: 0.145074 
Minibatch iteration 806/343000: mean batch inertia: 0.163289, ewa inertia: 0.145085 
Minibatch iteration 807/343000: mean batch inertia: 0.133284, ewa inertia: 0.145078 
Minibatch iteration 808/343000: mean batch inertia: 0.123564, ewa inertia: 0.145065 
Minibatch iteration 809/343000: mean batch inertia: 0.145074, ewa inertia: 0.145065 
Minibatch iteration 810/343000: mean batch inertia: 0.140125, ewa inertia: 0.145062 
Minibatch iteration 811/343000: mean batch inertia: 0.160697, ewa inertia: 0.145072 
Minibatch iteration 812/343000: mean batch inertia: 0.112962, ewa inertia: 0.145053 
Minibatch iteration 813/343000: mean batch inertia: 0.131789, ewa inertia: 0.145045 
Minibatch iteration 814/343000: mean batch inertia: 0.133056, ewa inertia: 0.145038 
Minibatch iteration 815/343000: mean batch inertia: 0.149604, ewa

Minibatch iteration 946/343000: mean batch inertia: 0.185913, ewa inertia: 0.144506 
Minibatch iteration 947/343000: mean batch inertia: 0.138422, ewa inertia: 0.144503 
Minibatch iteration 948/343000: mean batch inertia: 0.154199, ewa inertia: 0.144508 
Minibatch iteration 949/343000: mean batch inertia: 0.116597, ewa inertia: 0.144492 
Minibatch iteration 950/343000: mean batch inertia: 0.128219, ewa inertia: 0.144482 
Minibatch iteration 951/343000: mean batch inertia: 0.156059, ewa inertia: 0.144489 
Minibatch iteration 952/343000: mean batch inertia: 0.144585, ewa inertia: 0.144489 
Minibatch iteration 953/343000: mean batch inertia: 0.117012, ewa inertia: 0.144473 
Minibatch iteration 954/343000: mean batch inertia: 0.152472, ewa inertia: 0.144478 
Minibatch iteration 955/343000: mean batch inertia: 0.137105, ewa inertia: 0.144474 
Minibatch iteration 956/343000: mean batch inertia: 0.135776, ewa inertia: 0.144469 
Minibatch iteration 957/343000: mean batch inertia: 0.142786, ewa

Minibatch iteration 1087/343000: mean batch inertia: 0.131849, ewa inertia: 0.143771 
Minibatch iteration 1088/343000: mean batch inertia: 0.119962, ewa inertia: 0.143757 
Minibatch iteration 1089/343000: mean batch inertia: 0.121721, ewa inertia: 0.143744 
Minibatch iteration 1090/343000: mean batch inertia: 0.127913, ewa inertia: 0.143735 
Minibatch iteration 1091/343000: mean batch inertia: 0.148093, ewa inertia: 0.143738 
Minibatch iteration 1092/343000: mean batch inertia: 0.140281, ewa inertia: 0.143736 
Minibatch iteration 1093/343000: mean batch inertia: 0.115814, ewa inertia: 0.143719 
Minibatch iteration 1094/343000: mean batch inertia: 0.130644, ewa inertia: 0.143712 
Minibatch iteration 1095/343000: mean batch inertia: 0.161120, ewa inertia: 0.143722 
Minibatch iteration 1096/343000: mean batch inertia: 0.134749, ewa inertia: 0.143717 
Minibatch iteration 1097/343000: mean batch inertia: 0.139893, ewa inertia: 0.143714 
Minibatch iteration 1098/343000: mean batch inertia: 0

Minibatch iteration 1215/343000: mean batch inertia: 0.131314, ewa inertia: 0.143432 
Minibatch iteration 1216/343000: mean batch inertia: 0.138319, ewa inertia: 0.143429 
Minibatch iteration 1217/343000: mean batch inertia: 0.116752, ewa inertia: 0.143413 
Minibatch iteration 1218/343000: mean batch inertia: 0.164181, ewa inertia: 0.143425 
Minibatch iteration 1219/343000: mean batch inertia: 0.169882, ewa inertia: 0.143441 
Minibatch iteration 1220/343000: mean batch inertia: 0.133902, ewa inertia: 0.143435 
Minibatch iteration 1221/343000: mean batch inertia: 0.139541, ewa inertia: 0.143433 
Minibatch iteration 1222/343000: mean batch inertia: 0.127176, ewa inertia: 0.143423 
Minibatch iteration 1223/343000: mean batch inertia: 0.148055, ewa inertia: 0.143426 
Minibatch iteration 1224/343000: mean batch inertia: 0.126614, ewa inertia: 0.143416 
Minibatch iteration 1225/343000: mean batch inertia: 0.136363, ewa inertia: 0.143412 
Minibatch iteration 1226/343000: mean batch inertia: 0

Minibatch iteration 1339/343000: mean batch inertia: 0.139069, ewa inertia: 0.143025 
Minibatch iteration 1340/343000: mean batch inertia: 0.127649, ewa inertia: 0.143016 
Minibatch iteration 1341/343000: mean batch inertia: 0.130288, ewa inertia: 0.143009 
Minibatch iteration 1342/343000: mean batch inertia: 0.129759, ewa inertia: 0.143001 
Minibatch iteration 1343/343000: mean batch inertia: 0.161087, ewa inertia: 0.143012 
Minibatch iteration 1344/343000: mean batch inertia: 0.134177, ewa inertia: 0.143007 
Minibatch iteration 1345/343000: mean batch inertia: 0.118293, ewa inertia: 0.142992 
Minibatch iteration 1346/343000: mean batch inertia: 0.124982, ewa inertia: 0.142982 
Minibatch iteration 1347/343000: mean batch inertia: 0.124144, ewa inertia: 0.142971 
Minibatch iteration 1348/343000: mean batch inertia: 0.110200, ewa inertia: 0.142952 
Minibatch iteration 1349/343000: mean batch inertia: 0.135293, ewa inertia: 0.142947 
Minibatch iteration 1350/343000: mean batch inertia: 0

Minibatch iteration 1478/343000: mean batch inertia: 0.130580, ewa inertia: 0.142444 
Minibatch iteration 1479/343000: mean batch inertia: 0.130413, ewa inertia: 0.142437 
Minibatch iteration 1480/343000: mean batch inertia: 0.137135, ewa inertia: 0.142434 
Minibatch iteration 1481/343000: mean batch inertia: 0.130981, ewa inertia: 0.142427 
Minibatch iteration 1482/343000: mean batch inertia: 0.140815, ewa inertia: 0.142426 
Minibatch iteration 1483/343000: mean batch inertia: 0.173008, ewa inertia: 0.142444 
Minibatch iteration 1484/343000: mean batch inertia: 0.120988, ewa inertia: 0.142431 
Minibatch iteration 1485/343000: mean batch inertia: 0.151396, ewa inertia: 0.142436 
Minibatch iteration 1486/343000: mean batch inertia: 0.145560, ewa inertia: 0.142438 
Minibatch iteration 1487/343000: mean batch inertia: 0.112174, ewa inertia: 0.142421 
Minibatch iteration 1488/343000: mean batch inertia: 0.130079, ewa inertia: 0.142413 
Minibatch iteration 1489/343000: mean batch inertia: 0

Minibatch iteration 1604/343000: mean batch inertia: 0.148333, ewa inertia: 0.142046 
Minibatch iteration 1605/343000: mean batch inertia: 0.143509, ewa inertia: 0.142047 
Minibatch iteration 1606/343000: mean batch inertia: 0.115336, ewa inertia: 0.142032 
Minibatch iteration 1607/343000: mean batch inertia: 0.152627, ewa inertia: 0.142038 
Minibatch iteration 1608/343000: mean batch inertia: 0.125345, ewa inertia: 0.142028 
Minibatch iteration 1609/343000: mean batch inertia: 0.133172, ewa inertia: 0.142023 
Minibatch iteration 1610/343000: mean batch inertia: 0.128232, ewa inertia: 0.142015 
Minibatch iteration 1611/343000: mean batch inertia: 0.132067, ewa inertia: 0.142009 
Minibatch iteration 1612/343000: mean batch inertia: 0.130437, ewa inertia: 0.142002 
Minibatch iteration 1613/343000: mean batch inertia: 0.115158, ewa inertia: 0.141987 
Minibatch iteration 1614/343000: mean batch inertia: 0.169518, ewa inertia: 0.142003 
Minibatch iteration 1615/343000: mean batch inertia: 0

Minibatch iteration 1730/343000: mean batch inertia: 0.128652, ewa inertia: 0.141670 
Minibatch iteration 1731/343000: mean batch inertia: 0.155893, ewa inertia: 0.141679 
Minibatch iteration 1732/343000: mean batch inertia: 0.131488, ewa inertia: 0.141673 
Minibatch iteration 1733/343000: mean batch inertia: 0.132901, ewa inertia: 0.141668 
Minibatch iteration 1734/343000: mean batch inertia: 0.131916, ewa inertia: 0.141662 
Minibatch iteration 1735/343000: mean batch inertia: 0.136090, ewa inertia: 0.141659 
Minibatch iteration 1736/343000: mean batch inertia: 0.135906, ewa inertia: 0.141655 
Minibatch iteration 1737/343000: mean batch inertia: 0.135082, ewa inertia: 0.141652 
Minibatch iteration 1738/343000: mean batch inertia: 0.133312, ewa inertia: 0.141647 
Minibatch iteration 1739/343000: mean batch inertia: 0.146854, ewa inertia: 0.141650 
Minibatch iteration 1740/343000: mean batch inertia: 0.204641, ewa inertia: 0.141686 
Minibatch iteration 1741/343000: mean batch inertia: 0

Minibatch iteration 1858/343000: mean batch inertia: 0.131871, ewa inertia: 0.141398 
Minibatch iteration 1859/343000: mean batch inertia: 0.114437, ewa inertia: 0.141383 
Minibatch iteration 1860/343000: mean batch inertia: 0.114497, ewa inertia: 0.141367 
Minibatch iteration 1861/343000: mean batch inertia: 0.140247, ewa inertia: 0.141366 
Minibatch iteration 1862/343000: mean batch inertia: 0.158029, ewa inertia: 0.141376 
Minibatch iteration 1863/343000: mean batch inertia: 0.135464, ewa inertia: 0.141373 
Minibatch iteration 1864/343000: mean batch inertia: 0.133718, ewa inertia: 0.141368 
Minibatch iteration 1865/343000: mean batch inertia: 0.127982, ewa inertia: 0.141360 
Minibatch iteration 1866/343000: mean batch inertia: 0.133192, ewa inertia: 0.141355 
Minibatch iteration 1867/343000: mean batch inertia: 0.114005, ewa inertia: 0.141340 
Minibatch iteration 1868/343000: mean batch inertia: 0.155425, ewa inertia: 0.141348 
Minibatch iteration 1869/343000: mean batch inertia: 0

Minibatch iteration 1983/343000: mean batch inertia: 0.125205, ewa inertia: 0.141046 
Minibatch iteration 1984/343000: mean batch inertia: 0.143515, ewa inertia: 0.141048 
Minibatch iteration 1985/343000: mean batch inertia: 0.135001, ewa inertia: 0.141044 
Minibatch iteration 1986/343000: mean batch inertia: 0.116793, ewa inertia: 0.141030 
Minibatch iteration 1987/343000: mean batch inertia: 0.145230, ewa inertia: 0.141032 
Minibatch iteration 1988/343000: mean batch inertia: 0.153191, ewa inertia: 0.141039 
Minibatch iteration 1989/343000: mean batch inertia: 0.135000, ewa inertia: 0.141036 
Minibatch iteration 1990/343000: mean batch inertia: 0.138714, ewa inertia: 0.141035 
Minibatch iteration 1991/343000: mean batch inertia: 0.138861, ewa inertia: 0.141033 
Minibatch iteration 1992/343000: mean batch inertia: 0.123965, ewa inertia: 0.141023 
Minibatch iteration 1993/343000: mean batch inertia: 0.120094, ewa inertia: 0.141011 
Minibatch iteration 1994/343000: mean batch inertia: 0

Minibatch iteration 2119/343000: mean batch inertia: 0.158089, ewa inertia: 0.140565 
Minibatch iteration 2120/343000: mean batch inertia: 0.156459, ewa inertia: 0.140574 
Minibatch iteration 2121/343000: mean batch inertia: 0.145508, ewa inertia: 0.140577 
Minibatch iteration 2122/343000: mean batch inertia: 0.121983, ewa inertia: 0.140566 
Minibatch iteration 2123/343000: mean batch inertia: 0.133167, ewa inertia: 0.140562 
Minibatch iteration 2124/343000: mean batch inertia: 0.125354, ewa inertia: 0.140553 
Minibatch iteration 2125/343000: mean batch inertia: 0.145917, ewa inertia: 0.140556 
Minibatch iteration 2126/343000: mean batch inertia: 0.134966, ewa inertia: 0.140553 
Minibatch iteration 2127/343000: mean batch inertia: 0.137562, ewa inertia: 0.140551 
Minibatch iteration 2128/343000: mean batch inertia: 0.123123, ewa inertia: 0.140541 
Minibatch iteration 2129/343000: mean batch inertia: 0.112761, ewa inertia: 0.140525 
Minibatch iteration 2130/343000: mean batch inertia: 0

Minibatch iteration 2238/343000: mean batch inertia: 0.194741, ewa inertia: 0.140455 
Minibatch iteration 2239/343000: mean batch inertia: 0.139356, ewa inertia: 0.140454 
Minibatch iteration 2240/343000: mean batch inertia: 0.121314, ewa inertia: 0.140443 
Minibatch iteration 2241/343000: mean batch inertia: 0.130972, ewa inertia: 0.140438 
Minibatch iteration 2242/343000: mean batch inertia: 0.160983, ewa inertia: 0.140450 
Minibatch iteration 2243/343000: mean batch inertia: 0.139262, ewa inertia: 0.140449 
Minibatch iteration 2244/343000: mean batch inertia: 0.148194, ewa inertia: 0.140453 
Minibatch iteration 2245/343000: mean batch inertia: 0.177173, ewa inertia: 0.140475 
Minibatch iteration 2246/343000: mean batch inertia: 0.162525, ewa inertia: 0.140488 
Minibatch iteration 2247/343000: mean batch inertia: 0.139647, ewa inertia: 0.140487 
Minibatch iteration 2248/343000: mean batch inertia: 0.126597, ewa inertia: 0.140479 
Minibatch iteration 2249/343000: mean batch inertia: 0

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:  5.8min
[Parallel(n_jobs=-1)]: Done 434 tasks      | elapsed: 13.2min
[Parallel(n_jobs=-1)]: Done 784 tasks      | elapsed: 23.2min
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed: 29.6min finished


Fitting 5 folds for each of 50 candidates, totalling 250 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:  5.4min
[Parallel(n_jobs=-1)]: Done 250 out of 250 | elapsed:  7.1min finished


Fitting 5 folds for each of 11 candidates, totalling 55 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done  55 out of  55 | elapsed:  1.6min finished


Fitting 5 folds for each of 50 candidates, totalling 250 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:  5.5min
[Parallel(n_jobs=-1)]: Done 250 out of 250 | elapsed:  7.4min finished


In [None]:
print(sequential_search.all_best_params_)
print(sequential_search.all_best_score_)
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600]}

for params in ParameterGrid(param_grid):
    kmeans = MiniBatchKMeans(n_clusters=params['hidden_layer_size'], n_init=20, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=0, random_state=0)
    kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
    w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
    base_km_esn = clone(sequential_search.best_estimator_)
    base_km_esn.input_to_node.predefined_input_weights=w_in.T
    base_km_esn.set_params(**params)
    gs_cv = GridSearchCV(base_km_esn,
                         param_grid={'random_state': range(10)},
                         scoring=make_scorer(accuracy_score), n_jobs=-1).fit(X=X_train, y=y_train)
    print(gs_cv.cv_results_)
    print("---------------------------------------")
    acc_score = accuracy_score(y_test, gs_cv.best_estimator_.predict(X_test))
    print(acc_score)
    print("---------------------------------------")
"""
constant_params = sequential_search.best_estimator_.get_params()
constant_params.pop('hidden_layer_size')
constant_params.pop('random_state')
constant_params.pop('predefined_input_weights')
base_esn = SeqToLabelESNClassifier(**constant_params)
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600],
              'random_state': range(1, 11)}

for params in ParameterGrid(param_grid):
    if params['hidden_layer_size'] > 50:
        kmeans = MiniBatchKMeans(n_clusters=50, n_init=200, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=0, random_state=params['random_state'])
        t1 = time.time()
        kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
        w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
        t2 = time.time()
        w_in = np.pad(w_in, ((0, params['hidden_layer_size'] - 50), (0, 0)), mode='constant', constant_values=0)
    else:
        kmeans = MiniBatchKMeans(n_clusters=params['hidden_layer_size'], n_init=200, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=0, random_state=params['random_state'])
        t1 = time.time()
        kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
        w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
        t2 = time.time()
    km_esn = clone(base_esn)
    km_esn.input_to_node = PredefinedWeightsInputToNode(predefined_input_weights=w_in.T)
    km_esn.set_params(**constant_params, **params)
    km_esn.fit(X=X_train, y=y_train, n_jobs=8)
    score = accuracy_score(y_test, km_esn.predict(X_test))
    print("KM-ESN with params {0} achieved score of {1} and was trained in {2} seconds.".format(params, score, t2-t1))

##   ChlorineConcentration: Chlorine concentration data set 
(https://rdrr.io/github/moviedo5/fda.tsc/man/ChlorineConcentration.html)

Data Set Information:

The data set consists of 166 nodes (pipe junctions) and measurement of the Chlorine concentration level at all these nodes during 15 days (one measurement for every 5 minutes, a total of 4310 time ticks). 

This dataset was defined in a PhD thesis by Lei Li (Carnegie Mellon University). It was produced by EPANET that models the hydraulic and water quality behavior of water distribution piping systems. EPANET can track, in a given water network, the water level and pressure in each tank, the water flow in the pipes and the concentration of a chemical species (Chlorine in this case) throughout the network within a simulated duration. 

Attribute Information:



- class: Corresponding class level of “ChlorineConcentration” curves with 3 classes with 1000, 1000 and 2307 observations per class respectively.

- sample:Factor variable. In TSC database, the first 467 values (sample=train) are used for training sample and the rest of 3840 (sample=test) for testing.

In [None]:
chlo = np.load(r"E:\multivariate_time_series_dataset\numpy\CHLO.npz")
X_train = np.empty(shape=(467, ), dtype=object)
y_train = np.empty(shape=(467, ), dtype=object)
X_test = np.empty(shape=(3840, ), dtype=object)
y_test = np.empty(shape=(3840, ), dtype=object)

if train:
    for k, (X, y) in enumerate(zip(chlo['X'], chlo['Y'])):
        X_train[k] = X[X.sum(axis=1)!=0, :]  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_train[k] = np.tile(y, (X_train[k].shape[0], 1))
    scaler = StandardScaler().fit(np.concatenate(X_train))
    for k, X in enumerate(X_train):
        X_train[k] = scaler.transform(X=X)  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X

    X_train, y_train = shuffle(X_train, y_train, random_state=0)

    for k, (X, y) in enumerate(zip(chlo['Xte'], chlo['Yte'])):
        X_test[k] = scaler.transform(X=X[X.sum(axis=1)!=0, :])  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_test[k] = np.tile(y, (X_test[k].shape[0], 1))
else:
    for k, (X, y) in enumerate(zip(chlo['X'], chlo['Y'])):
        X_train[k] = X[X.sum(axis=1)!=0, :]  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_train[k] = np.argwhere(y).ravel()
    scaler = StandardScaler().fit(np.concatenate(X_train))
    for k, X in enumerate(X_train):
        X_train[k] = scaler.transform(X=X)  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X

    X_train, y_train = shuffle(X_train, y_train, random_state=0)

    for k, (X, y) in enumerate(zip(chlo['Xte'], chlo['Yte'])):
        X_test[k] = scaler.transform(X=X[X.sum(axis=1)!=0, :])  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_test[k] = np.argwhere(y).ravel()

Fit random ESN

In [None]:
initially_fixed_params = {'hidden_layer_size': 50,
                          'k_in': 1,
                          'input_scaling': 0.4,
                          'input_activation': 'identity',
                          'bias_scaling': 0.0,
                          'spectral_radius': 0.0,
                          'leakage': 0.1,
                          'k_rec': 10,
                          'reservoir_activation': 'tanh',
                          'bi_directional': False,
                          'wash_out': 0,
                          'continuation': False,
                          'alpha': 1e-3,
                          'random_state': 42}

step1_esn_params = {'input_scaling': uniform(loc=1e-2, scale=1),
                    'spectral_radius': uniform(loc=0, scale=2)}

step2_esn_params = {'leakage': loguniform(1e-5, 1e0)}
step3_esn_params = {'bias_scaling': np.linspace(0.0, 1.0, 11)}
step4_esn_params = {'alpha': loguniform(1e-5, 1e1)}

kwargs_step1 = {'n_iter': 200, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step2 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step3 = {'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step4 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}

# The searches are defined similarly to the steps of a sklearn.pipeline.Pipeline:
searches = [('step1', RandomizedSearchCV, step1_esn_params, kwargs_step1),
            ('step2', RandomizedSearchCV, step2_esn_params, kwargs_step2),
            ('step3', GridSearchCV, step3_esn_params, kwargs_step3),
            ('step4', RandomizedSearchCV, step4_esn_params, kwargs_step4)]

base_esn = SeqToSeqESNClassifier(**initially_fixed_params)

try:
    sequential_search = load("../sequential_search_chlo.joblib")
except FileNotFoundError:
    sequential_search = SequentialSearchCV(base_esn, searches=searches).fit(X_train, y_train)
    dump(sequential_search, "../sequential_search_chlo.joblib")

In [None]:
print(sequential_search.all_best_params_)
print(sequential_search.all_best_score_)
base_esn = SeqToLabelESNClassifier(**sequential_search.best_estimator_.get_params())
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600]}

for params in ParameterGrid(param_grid):
    gs_cv = GridSearchCV(clone(base_esn).set_params(**params), 
                         param_grid={'random_state': range(10)}, 
                         scoring=make_scorer(accuracy_score), n_jobs=-1).fit(X=X_train, y=y_train)
    print(gs_cv.cv_results_)
    print("---------------------------------------")
    acc_score = accuracy_score(y_test, gs_cv.best_estimator_.predict(X_test))
    print(acc_score)
    print("---------------------------------------")
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600],
              'random_state': range(1, 11)}

for params in ParameterGrid(param_grid):
    t1 = time.time()
    esn = clone(base_esn).set_params(**params).fit(X=X_train, y=y_train, n_jobs=8)
    t2 = time.time()
    score = accuracy_score(y_test, esn.predict(X_test))
    print("ESN with params {0} achieved score of {1} and was trained in {2} seconds.".format(params, score, t2-t1))

Fit KM-ESN

In [None]:
kmeans = MiniBatchKMeans(n_clusters=50, n_init=200, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=2, random_state=0)
kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])

initially_fixed_params = {'hidden_layer_size': 50,
                          'k_in': 3,
                          'input_scaling': 0.4,
                          'input_activation': 'identity',
                          'bias_scaling': 0.0,
                          'spectral_radius': 0.0,
                          'leakage': 0.1,
                          'k_rec': 10,
                          'reservoir_activation': 'tanh',
                          'bi_directional': False,
                          'wash_out': 0,
                          'continuation': False,
                          'alpha': 1e-3,
                          'random_state': 42}

step1_esn_params = {'input_scaling': uniform(loc=1e-2, scale=1),
                    'spectral_radius': uniform(loc=0, scale=2)}

step2_esn_params = {'leakage': loguniform(1e-5, 1e0)}
step3_esn_params = {'bias_scaling': np.linspace(0.0, 1.0, 11)}
step4_esn_params = {'alpha': loguniform(1e-5, 1e1)}

kwargs_step1 = {'n_iter': 200, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step2 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step3 = {'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step4 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}

# The searches are defined similarly to the steps of a sklearn.pipeline.Pipeline:
searches = [('step1', RandomizedSearchCV, step1_esn_params, kwargs_step1),
            ('step2', RandomizedSearchCV, step2_esn_params, kwargs_step2),
            ('step3', GridSearchCV, step3_esn_params, kwargs_step3),
            ('step4', RandomizedSearchCV, step4_esn_params, kwargs_step4)]

base_km_esn = SeqToSeqESNClassifier(input_to_node=PredefinedWeightsInputToNode(predefined_input_weights=w_in.T),
                                    **initially_fixed_params)

try:
    sequential_search = load("../sequential_search_chlo_km.joblib")
except FileNotFoundError:
    sequential_search = SequentialSearchCV(base_km_esn, searches=searches).fit(X_train, y_train)
    dump(sequential_search, "../sequential_search_chlo_km.joblib")

In [None]:
print(sequential_search.all_best_params_)
print(sequential_search.all_best_score_)
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600]}

for params in ParameterGrid(param_grid):
    kmeans = MiniBatchKMeans(n_clusters=params['hidden_layer_size'], n_init=20, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=0, random_state=0)
    kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
    w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
    base_km_esn = clone(sequential_search.best_estimator_)
    base_km_esn.input_to_node.predefined_input_weights=w_in.T
    base_km_esn.set_params(**params)
    gs_cv = GridSearchCV(base_km_esn,
                         param_grid={'random_state': range(10)},
                         scoring=make_scorer(accuracy_score), n_jobs=-1).fit(X=X_train, y=y_train)
    print(gs_cv.cv_results_)
    print("---------------------------------------")
    acc_score = accuracy_score(y_test, gs_cv.best_estimator_.predict(X_test))
    print(acc_score)
    print("---------------------------------------")
"""
constant_params = sequential_search.best_estimator_.get_params()
constant_params.pop('hidden_layer_size')
constant_params.pop('random_state')
constant_params.pop('predefined_input_weights')
base_esn = SeqToLabelESNClassifier(**constant_params)
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600],
              'random_state': range(1, 11)}

for params in ParameterGrid(param_grid):
    kmeans = MiniBatchKMeans(n_clusters=params['hidden_layer_size'], n_init=200, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=0, random_state=params['random_state'])
    t1 = time.time()
    kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
    w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
    t2 = time.time()
    km_esn = clone(base_esn)
    km_esn.input_to_node = PredefinedWeightsInputToNode(predefined_input_weights=w_in.T)
    km_esn.set_params(**constant_params, **params)
    km_esn.fit(X=X_train, y=y_train, n_jobs=8)
    score = accuracy_score(y_test, km_esn.predict(X_test))
    print("KM-ESN with params {0} achieved score of {1} and was trained in {2} seconds.".format(params, score, t2-t1))

In [None]:
plt.plot(np.concatenate(X_train[:2]))

##  CMU Graphics Lab Motion Capture Database
(http://mocap.cs.cmu.edu/)

Data Set Information:

The mocap lab in the basement of Wean contains 12 Vicon infrared MX-40 cameras, each of which is capable of recording 120 Hz with images of 4 megapixel resolution. The cameras are placed around a rectangular area, of approximately 3m x 8m, in the center of the room. Only motions that take place in this rectangle can be captured. If motion of human hands is being captured, more detail is required and the cameras are moved closer to capture a smaller space with higher resolution.

To capture something, small grey markers are placed on it. Humans wear a black jumpsuit and have 41 markers taped on. The Vicon cameras see the markers in infra-red. The images that the various cameras pick up are triangulated to get 3D data.

Attribute Information:

 ViconIQ requires user interaction to start off the skeleton fitting. To process a capture, a segment of motion is loaded onscreen as a point cloud of markers. The user goes through and specifies the correspondence between these markers and the markers in the .vst, e.g. "this white dot is the clavicle marker". From this data ViconIQ can fit a skeleton and determine the skeleton's limb lengths. From here on out the labeling process is automatic. ViconIQ can load up each motion clip and automatically perform a "Kinematic Fit" of the skeleton to the markers. During this time the software uses its knowledge of the skeleton to correct captured marker aberrations. The user can also fix things up by editing the joint rotation/translation graphs directly. 

In [None]:
cmu = np.load(r"E:\multivariate_time_series_dataset\numpy\CMU.npz")
X_train = np.empty(shape=(29, ), dtype=object)
y_train = np.empty(shape=(29, ), dtype=object)
X_test = np.empty(shape=(29, ), dtype=object)
y_test = np.empty(shape=(29, ), dtype=object)

if train:
    for k, (X, y) in enumerate(zip(cmu['X'], cmu['Y'])):
        X_train[k] = X[X.sum(axis=1)!=0, :]  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_train[k] = np.tile(y, (X_train[k].shape[0], 1))
    scaler = StandardScaler().fit(np.concatenate(X_train))
    for k, X in enumerate(X_train):
        X_train[k] = scaler.transform(X=X)  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X

    X_train, y_train = shuffle(X_train, y_train, random_state=0)

    for k, (X, y) in enumerate(zip(cmu['Xte'], cmu['Yte'])):
        X_test[k] = scaler.transform(X=X[X.sum(axis=1)!=0, :])  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_test[k] = np.tile(y, (X_test[k].shape[0], 1))
else:
    for k, (X, y) in enumerate(zip(cmu['X'], cmu['Y'])):
        X_train[k] = X[X.sum(axis=1)!=0, :]  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_train[k] = np.argwhere(y).ravel()
    scaler = StandardScaler().fit(np.concatenate(X_train))
    for k, X in enumerate(X_train):
        X_train[k] = scaler.transform(X=X)  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X

    X_train, y_train = shuffle(X_train, y_train, random_state=0)

    for k, (X, y) in enumerate(zip(cmu['Xte'], cmu['Yte'])):
        X_test[k] = scaler.transform(X=X[X.sum(axis=1)!=0, :])  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_test[k] = np.argwhere(y).ravel()

Fit random ESN

In [None]:
initially_fixed_params = {'hidden_layer_size': 50,
                          'k_in': 10,
                          'input_scaling': 0.4,
                          'input_activation': 'identity',
                          'bias_scaling': 0.0,
                          'spectral_radius': 0.0,
                          'leakage': 0.1,
                          'k_rec': 10,
                          'reservoir_activation': 'tanh',
                          'bi_directional': False,
                          'wash_out': 0,
                          'continuation': False,
                          'alpha': 1e-3,
                          'random_state': 42}

step1_esn_params = {'input_scaling': uniform(loc=1e-2, scale=1),
                    'spectral_radius': uniform(loc=0, scale=2)}

step2_esn_params = {'leakage': loguniform(1e-5, 1e0)}
step3_esn_params = {'bias_scaling': np.linspace(0.0, 1.0, 11)}
step4_esn_params = {'alpha': loguniform(1e-5, 1e1)}

kwargs_step1 = {'cv': 2, 'n_iter': 200, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step2 = {'cv': 2, 'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step3 = {'cv': 2, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step4 = {'cv': 2, 'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}

# The searches are defined similarly to the steps of a sklearn.pipeline.Pipeline:
searches = [('step1', RandomizedSearchCV, step1_esn_params, kwargs_step1),
            ('step2', RandomizedSearchCV, step2_esn_params, kwargs_step2),
            ('step3', GridSearchCV, step3_esn_params, kwargs_step3),
            ('step4', RandomizedSearchCV, step4_esn_params, kwargs_step4)]

base_esn = SeqToSeqESNClassifier(**initially_fixed_params)

try:
    sequential_search = load("../sequential_search_cmu.joblib")
except FileNotFoundError:
    sequential_search = SequentialSearchCV(base_esn, searches=searches).fit(X_train, y_train)
    dump(sequential_search, "../sequential_search_cmu.joblib")

In [None]:
print(sequential_search.all_best_params_)
print(sequential_search.all_best_score_)
base_esn = SeqToLabelESNClassifier(**sequential_search.best_estimator_.get_params())
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600]}

for params in ParameterGrid(param_grid):
    gs_cv = GridSearchCV(clone(base_esn).set_params(**params), 
                         param_grid={'random_state': range(10)}, 
                         scoring=make_scorer(accuracy_score), n_jobs=-1).fit(X=X_train, y=y_train)
    print(gs_cv.cv_results_)
    print("---------------------------------------")
    acc_score = accuracy_score(y_test, gs_cv.best_estimator_.predict(X_test))
    print(acc_score)
    print("---------------------------------------")
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600],
              'random_state': range(1, 11)}

for params in ParameterGrid(param_grid):
    t1 = time.time()
    esn = clone(base_esn).set_params(**params).fit(X=X_train, y=y_train, n_jobs=8)
    t2 = time.time()
    score = accuracy_score(y_test, esn.predict(X_test))
    print("ESN with params {0} achieved score of {1} and was trained in {2} seconds.".format(params, score, t2-t1))

Fit KM-ESN

In [None]:
kmeans = MiniBatchKMeans(n_clusters=50, n_init=200, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=2, random_state=0)
kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])

initially_fixed_params = {'hidden_layer_size': 50,
                          'k_in': 10,
                          'input_scaling': 0.4,
                          'input_activation': 'identity',
                          'bias_scaling': 0.0,
                          'spectral_radius': 0.0,
                          'leakage': 0.1,
                          'k_rec': 10,
                          'reservoir_activation': 'tanh',
                          'bi_directional': False,
                          'wash_out': 0,
                          'continuation': False,
                          'alpha': 1e-3,
                          'random_state': 42}

step1_esn_params = {'input_scaling': uniform(loc=1e-2, scale=1),
                    'spectral_radius': uniform(loc=0, scale=2)}

step2_esn_params = {'leakage': loguniform(1e-5, 1e0)}
step3_esn_params = {'bias_scaling': np.linspace(0.0, 1.0, 11)}
step4_esn_params = {'alpha': loguniform(1e-5, 1e1)}

kwargs_step1 = {'cv': 2, 'n_iter': 200, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step2 = {'cv': 2, 'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step3 = {'cv': 2, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step4 = {'cv': 2, 'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}

# The searches are defined similarly to the steps of a sklearn.pipeline.Pipeline:
searches = [('step1', RandomizedSearchCV, step1_esn_params, kwargs_step1),
            ('step2', RandomizedSearchCV, step2_esn_params, kwargs_step2),
            ('step3', GridSearchCV, step3_esn_params, kwargs_step3),
            ('step4', RandomizedSearchCV, step4_esn_params, kwargs_step4)]

base_km_esn = SeqToSeqESNClassifier(input_to_node=PredefinedWeightsInputToNode(predefined_input_weights=w_in.T),
                                    **initially_fixed_params)

try:
    sequential_search = load("../sequential_search_cmu_km.joblib")
except FileNotFoundError:
    sequential_search = SequentialSearchCV(base_km_esn, searches=searches).fit(X_train, y_train)
    dump(sequential_search, "../sequential_search_cmu_km.joblib")

In [None]:
print(sequential_search.all_best_params_)
print(sequential_search.all_best_score_)
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600]}

for params in ParameterGrid(param_grid):
    kmeans = MiniBatchKMeans(n_clusters=params['hidden_layer_size'], n_init=20, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=0, random_state=0)
    kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
    w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
    base_km_esn = clone(sequential_search.best_estimator_)
    base_km_esn.input_to_node.predefined_input_weights=w_in.T
    base_km_esn.set_params(**params)
    gs_cv = GridSearchCV(base_km_esn,
                         param_grid={'random_state': range(10)},
                         scoring=make_scorer(accuracy_score), n_jobs=-1).fit(X=X_train, y=y_train)
    print(gs_cv.cv_results_)
    print("---------------------------------------")
    acc_score = accuracy_score(y_test, gs_cv.best_estimator_.predict(X_test))
    print(acc_score)
    print("---------------------------------------")
"""
constant_params = sequential_search.best_estimator_.get_params()
constant_params.pop('hidden_layer_size')
constant_params.pop('random_state')
constant_params.pop('predefined_input_weights')
base_esn = SeqToLabelESNClassifier(**constant_params)
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600],
              'random_state': range(1, 11)}

for params in ParameterGrid(param_grid):
    kmeans = MiniBatchKMeans(n_clusters=params['hidden_layer_size'], n_init=200, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=0, random_state=params['random_state'])
    t1 = time.time()
    kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
    w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
    t2 = time.time()
    km_esn = clone(base_esn)
    km_esn.input_to_node = PredefinedWeightsInputToNode(predefined_input_weights=w_in.T)
    km_esn.set_params(**constant_params, **params)
    km_esn.fit(X=X_train, y=y_train, n_jobs=8)
    score = accuracy_score(y_test, km_esn.predict(X_test))
    print("KM-ESN with params {0} achieved score of {1} and was trained in {2} seconds.".format(params, score, t2-t1))

##  ECG dataset

Data Set Information:

TODO

Attribute Information:

TODO

In [None]:
ecg = np.load(r"E:\multivariate_time_series_dataset\numpy\ECG.npz")
X_train = np.empty(shape=(100, ), dtype=object)
y_train = np.empty(shape=(100, ), dtype=object)
X_test = np.empty(shape=(100, ), dtype=object)
y_test = np.empty(shape=(100, ), dtype=object)

if train:
    for k, (X, y) in enumerate(zip(ecg['X'], ecg['Y'])):
        X_train[k] = X[X.sum(axis=1)!=0, :]  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_train[k] = np.tile(y, (X_train[k].shape[0], 1))
    scaler = StandardScaler().fit(np.concatenate(X_train))
    for k, X in enumerate(X_train):
        X_train[k] = scaler.transform(X=X)  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X

    X_train, y_train = shuffle(X_train, y_train, random_state=0)

    for k, (X, y) in enumerate(zip(ecg['Xte'], ecg['Yte'])):
        X_test[k] = scaler.transform(X=X[X.sum(axis=1)!=0, :])  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_test[k] = np.tile(y, (X_test[k].shape[0], 1))
else:
    for k, (X, y) in enumerate(zip(ecg['X'], ecg['Y'])):
        X_train[k] = X[X.sum(axis=1)!=0, :]  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_train[k] = np.argwhere(y).ravel()
    scaler = StandardScaler().fit(np.concatenate(X_train))
    for k, X in enumerate(X_train):
        X_train[k] = scaler.transform(X=X)  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X

    X_train, y_train = shuffle(X_train, y_train, random_state=0)

    for k, (X, y) in enumerate(zip(ecg['Xte'], ecg['Yte'])):
        X_test[k] = scaler.transform(X=X[X.sum(axis=1)!=0, :])  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_test[k] = np.argwhere(y).ravel()

Fit random ESN

In [None]:
initially_fixed_params = {'hidden_layer_size': 50,
                          'k_in': 2,
                          'input_scaling': 0.4,
                          'input_activation': 'identity',
                          'bias_scaling': 0.0,
                          'spectral_radius': 0.0,
                          'leakage': 0.1,
                          'k_rec': 10,
                          'reservoir_activation': 'tanh',
                          'bi_directional': False,
                          'wash_out': 0,
                          'continuation': False,
                          'alpha': 1e-3,
                          'random_state': 42}

step1_esn_params = {'input_scaling': uniform(loc=1e-2, scale=1),
                    'spectral_radius': uniform(loc=0, scale=2)}

step2_esn_params = {'leakage': loguniform(1e-5, 1e0)}
step3_esn_params = {'bias_scaling': np.linspace(0.0, 1.0, 11)}
step4_esn_params = {'alpha': loguniform(1e-5, 1e1)}

kwargs_step1 = {'n_iter': 200, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step2 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step3 = {'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step4 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}

# The searches are defined similarly to the steps of a sklearn.pipeline.Pipeline:
searches = [('step1', RandomizedSearchCV, step1_esn_params, kwargs_step1),
            ('step2', RandomizedSearchCV, step2_esn_params, kwargs_step2),
            ('step3', GridSearchCV, step3_esn_params, kwargs_step3),
            ('step4', RandomizedSearchCV, step4_esn_params, kwargs_step4)]

base_esn = SeqToSeqESNClassifier(**initially_fixed_params)

try:
    sequential_search = load("../sequential_search_ecg.joblib")
except FileNotFoundError:
    sequential_search = SequentialSearchCV(base_esn, searches=searches).fit(X_train, y_train)
    dump(sequential_search, "../sequential_search_ecg.joblib")

In [None]:
print(sequential_search.all_best_params_)
print(sequential_search.all_best_score_)
base_esn = SeqToLabelESNClassifier(**sequential_search.best_estimator_.get_params())
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600]}

for params in ParameterGrid(param_grid):
    gs_cv = GridSearchCV(clone(base_esn).set_params(**params), 
                         param_grid={'random_state': range(10)}, 
                         scoring=make_scorer(accuracy_score), n_jobs=-1).fit(X=X_train, y=y_train)
    print(gs_cv.cv_results_)
    print("---------------------------------------")
    acc_score = accuracy_score(y_test, gs_cv.best_estimator_.predict(X_test))
    print(acc_score)
    print("---------------------------------------")
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600],
              'random_state': range(1, 11)}

for params in ParameterGrid(param_grid):
    t1 = time.time()
    esn = clone(base_esn).set_params(**params).fit(X=X_train, y=y_train, n_jobs=8)
    t2 = time.time()
    score = accuracy_score(y_test, esn.predict(X_test))
    print("ESN with params {0} achieved score of {1} and was trained in {2} seconds.".format(params, score, t2-t1))

Fit KM-ESN

In [None]:
kmeans = MiniBatchKMeans(n_clusters=50, n_init=200, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=2, random_state=0)
kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])

initially_fixed_params = {'hidden_layer_size': 50,
                          'k_in': 2,
                          'input_scaling': 0.4,
                          'input_activation': 'identity',
                          'bias_scaling': 0.0,
                          'spectral_radius': 0.0,
                          'leakage': 0.1,
                          'k_rec': 10,
                          'reservoir_activation': 'tanh',
                          'bi_directional': False,
                          'wash_out': 0,
                          'continuation': False,
                          'alpha': 1e-3,
                          'random_state': 42}

step1_esn_params = {'input_scaling': uniform(loc=1e-2, scale=1),
                    'spectral_radius': uniform(loc=0, scale=2)}

step2_esn_params = {'leakage': loguniform(1e-5, 1e0)}
step3_esn_params = {'bias_scaling': np.linspace(0.0, 1.0, 11)}
step4_esn_params = {'alpha': loguniform(1e-5, 1e1)}

kwargs_step1 = {'n_iter': 200, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step2 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step3 = {'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step4 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}

# The searches are defined similarly to the steps of a sklearn.pipeline.Pipeline:
searches = [('step1', RandomizedSearchCV, step1_esn_params, kwargs_step1),
            ('step2', RandomizedSearchCV, step2_esn_params, kwargs_step2),
            ('step3', GridSearchCV, step3_esn_params, kwargs_step3),
            ('step4', RandomizedSearchCV, step4_esn_params, kwargs_step4)]

base_km_esn = SeqToSeqESNClassifier(input_to_node=PredefinedWeightsInputToNode(predefined_input_weights=w_in.T),
                                    **initially_fixed_params)

try:
    sequential_search = load("../sequential_search_ecg_km.joblib")
except FileNotFoundError:
    sequential_search = SequentialSearchCV(base_km_esn, searches=searches).fit(X_train, y_train)
    dump(sequential_search, "../sequential_search_ecg_km.joblib")

In [None]:
print(sequential_search.all_best_params_)
print(sequential_search.all_best_score_)
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600]}

for params in ParameterGrid(param_grid):
    kmeans = MiniBatchKMeans(n_clusters=params['hidden_layer_size'], n_init=20, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=0, random_state=0)
    kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
    w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
    base_km_esn = clone(sequential_search.best_estimator_)
    base_km_esn.input_to_node.predefined_input_weights=w_in.T
    base_km_esn.set_params(**params)
    gs_cv = GridSearchCV(base_km_esn,
                         param_grid={'random_state': range(10)},
                         scoring=make_scorer(accuracy_score), n_jobs=-1).fit(X=X_train, y=y_train)
    print(gs_cv.cv_results_)
    print("---------------------------------------")
    acc_score = accuracy_score(y_test, gs_cv.best_estimator_.predict(X_test))
    print(acc_score)
    print("---------------------------------------")
"""
constant_params = sequential_search.best_estimator_.get_params()
constant_params.pop('hidden_layer_size')
constant_params.pop('random_state')
constant_params.pop('predefined_input_weights')
base_esn = SeqToLabelESNClassifier(**constant_params)
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600],
              'random_state': range(1, 11)}

for params in ParameterGrid(param_grid):
    kmeans = MiniBatchKMeans(n_clusters=params['hidden_layer_size'], n_init=200, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=0, random_state=params['random_state'])
    t1 = time.time()
    kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
    w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
    t2 = time.time()
    km_esn = clone(base_esn)
    km_esn.input_to_node = PredefinedWeightsInputToNode(predefined_input_weights=w_in.T)
    km_esn.set_params(**constant_params, **params)
    km_esn.fit(X=X_train, y=y_train, n_jobs=8)
    score = accuracy_score(y_test, km_esn.predict(X_test))
    print("KM-ESN with params {0} achieved score of {1} and was trained in {2} seconds.".format(params, score, t2-t1))

##   Japanese Vowels Data Set 

Data Set Information:

The data was collected for examining our newly developed classifier for multidimensional curves (multidimensional time series). Nine male speakers uttered two Japanese vowels /ae/ successively. For each utterance, with the analysis parameters described below, we applied 12-degree linear prediction analysis to it to obtain a discrete-time series with 12 LPC cepstrum coefficients. This means that one utterance by a speaker forms a time series whose length is in the range 7-29 and each point of a time series is of 12 features (12 coefficients).

The number of the time series is 640 in total. We used one set of 270 time series for training and the other set of 370 time series for testing.

- Number of Instances (Utterances):
    - Training: 270 (30 utterances by 9 speakers. See file 'size_ae.train'.)
    - Testing: 370 (24-88 utterances by the same 9 speakers in different opportunities. See file 'size_ae.test'.)

- Length of Time Series:
     - 7 - 29 depending on utterances

- Analysis parameters:
    - Sampling rate : 10kHz
    - Frame length : 25.6 ms
    - Shift length : 6.4ms
    - Degree of LPC coefficients : 12 

Attribute Information:

12 Real Attributes

In [5]:
jpvow = np.load(r"E:\multivariate_time_series_dataset\numpy\JPVOW.npz")
X_train = np.empty(shape=(270, ), dtype=object)
y_train = np.empty(shape=(270, ), dtype=object)
X_test = np.empty(shape=(370, ), dtype=object)
y_test = np.empty(shape=(370, ), dtype=object)

if train:
    for k, (X, y) in enumerate(zip(jpvow['X'], jpvow['Y'])):
        X_train[k] = X[X.sum(axis=1)!=0, :]  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_train[k] = np.tile(y, (X_train[k].shape[0], 1))
    scaler = StandardScaler().fit(np.concatenate(X_train))
    for k, X in enumerate(X_train):
        X_train[k] = scaler.transform(X=X)  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X

    X_train, y_train = shuffle(X_train, y_train, random_state=0)

    for k, (X, y) in enumerate(zip(jpvow['Xte'], jpvow['Yte'])):
        X_test[k] = scaler.transform(X=X[X.sum(axis=1)!=0, :])  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_test[k] = np.tile(y, (X_test[k].shape[0], 1))
else:
    for k, (X, y) in enumerate(zip(jpvow['X'], jpvow['Y'])):
        X_train[k] = X[X.sum(axis=1)!=0, :]  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_train[k] = np.argwhere(y).ravel()
    scaler = StandardScaler().fit(np.concatenate(X_train))
    for k, X in enumerate(X_train):
        X_train[k] = scaler.transform(X=X)  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X

    X_train, y_train = shuffle(X_train, y_train, random_state=0)

    for k, (X, y) in enumerate(zip(jpvow['Xte'], jpvow['Yte'])):
        X_test[k] = scaler.transform(X=X[X.sum(axis=1)!=0, :])  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_test[k] = np.argwhere(y).ravel()

Fit random ESN

In [None]:
initially_fixed_params = {'hidden_layer_size': 50,
                          'k_in': 10,
                          'input_scaling': 0.4,
                          'input_activation': 'identity',
                          'bias_scaling': 0.0,
                          'spectral_radius': 0.0,
                          'leakage': 0.1,
                          'k_rec': 10,
                          'reservoir_activation': 'tanh',
                          'bi_directional': False,
                          'wash_out': 0,
                          'continuation': False,
                          'alpha': 1e-3,
                          'random_state': 42}

step1_esn_params = {'input_scaling': uniform(loc=1e-2, scale=1),
                    'spectral_radius': uniform(loc=0, scale=2)}

step2_esn_params = {'leakage': loguniform(1e-5, 1e0)}
step3_esn_params = {'bias_scaling': np.linspace(0.0, 1.0, 11)}
step4_esn_params = {'alpha': loguniform(1e-5, 1e1)}

kwargs_step1 = {'n_iter': 200, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step2 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step3 = {'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step4 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}

# The searches are defined similarly to the steps of a sklearn.pipeline.Pipeline:
searches = [('step1', RandomizedSearchCV, step1_esn_params, kwargs_step1),
            ('step2', RandomizedSearchCV, step2_esn_params, kwargs_step2),
            ('step3', GridSearchCV, step3_esn_params, kwargs_step3),
            ('step4', RandomizedSearchCV, step4_esn_params, kwargs_step4)]

base_esn = SeqToSeqESNClassifier(**initially_fixed_params)

try:
    sequential_search = load("../sequential_search_jpvow.joblib")
except FileNotFoundError:
    sequential_search = SequentialSearchCV(base_esn, searches=searches).fit(X_train, y_train)
    dump(sequential_search, "../sequential_search_jpvow.joblib")

In [None]:
print(sequential_search.all_best_params_)
print(sequential_search.all_best_score_)
base_esn = SeqToLabelESNClassifier(**sequential_search.best_estimator_.get_params())
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600]}

for params in ParameterGrid(param_grid):
    gs_cv = GridSearchCV(clone(base_esn).set_params(**params), 
                         param_grid={'random_state': range(10)}, 
                         scoring=make_scorer(accuracy_score), n_jobs=-1).fit(X=X_train, y=y_train)
    print(gs_cv.cv_results_)
    print("---------------------------------------")
    acc_score = accuracy_score(y_test, gs_cv.best_estimator_.predict(X_test))
    print(acc_score)
    print("---------------------------------------")
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600],
              'random_state': range(1, 11)}

for params in ParameterGrid(param_grid):
    t1 = time.time()
    esn = clone(base_esn).set_params(**params).fit(X=X_train, y=y_train)
    t2 = time.time()
    score = accuracy_score(y_test, esn.predict(X_test))
    print("ESN with params {0} achieved score of {1} and was trained in {2} seconds.".format(params, score, t2-t1))

Fit KM-ESN

In [6]:
kmeans = MiniBatchKMeans(n_clusters=50, n_init=200, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=2, random_state=0)
kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
w_in = np.pad(w_in, ((0, 800 - 50), (0, 0)), mode='constant', constant_values=0)

initially_fixed_params = {'hidden_layer_size': 800,
                          'k_in': 2,
                          'input_scaling': 0.4,
                          'input_activation': 'identity',
                          'bias_scaling': 0.0,
                          'spectral_radius': 0.0,
                          'leakage': 0.1,
                          'k_rec': 10,
                          'reservoir_activation': 'tanh',
                          'bi_directional': False,
                          'wash_out': 0,
                          'continuation': False,
                          'alpha': 1e-3,
                          'random_state': 42}

step1_esn_params = {'input_scaling': uniform(loc=1e-2, scale=1),
                    'spectral_radius': uniform(loc=0, scale=2)}

step2_esn_params = {'leakage': loguniform(1e-5, 1e0)}
step3_esn_params = {'bias_scaling': np.linspace(0.0, 1.0, 11)}
step4_esn_params = {'alpha': loguniform(1e-5, 1e1)}

kwargs_step1 = {'n_iter': 200, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step2 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step3 = {'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step4 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}

# The searches are defined similarly to the steps of a sklearn.pipeline.Pipeline:
searches = [('step1', RandomizedSearchCV, step1_esn_params, kwargs_step1),
            ('step2', RandomizedSearchCV, step2_esn_params, kwargs_step2),
            ('step3', GridSearchCV, step3_esn_params, kwargs_step3),
            ('step4', RandomizedSearchCV, step4_esn_params, kwargs_step4)]

base_km_esn = SeqToSeqESNClassifier(input_to_node=PredefinedWeightsInputToNode(predefined_input_weights=w_in.T),
                                    **initially_fixed_params)

try:
    sequential_search = load("../sequential_search_jpvow_km_sparse.joblib")
except FileNotFoundError:
    sequential_search = SequentialSearchCV(base_km_esn, searches=searches).fit(X_train, y_train)
    dump(sequential_search, "../sequential_search_jpvow_km_sparse.joblib")

Init 1/200 with method: k-means++
Inertia for init 1/200: 860.398663
Init 2/200 with method: k-means++
Inertia for init 2/200: 894.565307
Init 3/200 with method: k-means++
Inertia for init 3/200: 858.110492
Init 4/200 with method: k-means++
Inertia for init 4/200: 881.408851
Init 5/200 with method: k-means++
Inertia for init 5/200: 901.060799
Init 6/200 with method: k-means++
Inertia for init 6/200: 884.335380
Init 7/200 with method: k-means++
Inertia for init 7/200: 843.834349
Init 8/200 with method: k-means++
Inertia for init 8/200: 865.322659
Init 9/200 with method: k-means++
Inertia for init 9/200: 870.379731
Init 10/200 with method: k-means++
Inertia for init 10/200: 851.653824
Init 11/200 with method: k-means++
Inertia for init 11/200: 873.335226
Init 12/200 with method: k-means++
Inertia for init 12/200: 953.702927
Init 13/200 with method: k-means++
Inertia for init 13/200: 835.524590
Init 14/200 with method: k-means++
Inertia for init 14/200: 877.564724
Init 15/200 with method:

Inertia for init 119/200: 820.345957
Init 120/200 with method: k-means++
Inertia for init 120/200: 851.579517
Init 121/200 with method: k-means++
Inertia for init 121/200: 888.331787
Init 122/200 with method: k-means++
Inertia for init 122/200: 867.889879
Init 123/200 with method: k-means++
Inertia for init 123/200: 872.307836
Init 124/200 with method: k-means++
Inertia for init 124/200: 914.978777
Init 125/200 with method: k-means++
Inertia for init 125/200: 841.886001
Init 126/200 with method: k-means++
Inertia for init 126/200: 850.907370
Init 127/200 with method: k-means++
Inertia for init 127/200: 901.474166
Init 128/200 with method: k-means++
Inertia for init 128/200: 916.032081
Init 129/200 with method: k-means++
Inertia for init 129/200: 850.171102
Init 130/200 with method: k-means++
Inertia for init 130/200: 903.317707
Init 131/200 with method: k-means++
Inertia for init 131/200: 867.555077
Init 132/200 with method: k-means++
Inertia for init 132/200: 886.826780
Init 133/200 w

Minibatch iteration 75/10000: mean batch inertia: 3.366456, ewa inertia: 3.227180 
Minibatch iteration 76/10000: mean batch inertia: 3.307039, ewa inertia: 3.228783 
Minibatch iteration 77/10000: mean batch inertia: 3.061087, ewa inertia: 3.225417 
Minibatch iteration 78/10000: mean batch inertia: 2.808983, ewa inertia: 3.217056 
Minibatch iteration 79/10000: mean batch inertia: 2.994955, ewa inertia: 3.212597 
Minibatch iteration 80/10000: mean batch inertia: 3.478489, ewa inertia: 3.217935 
Minibatch iteration 81/10000: mean batch inertia: 2.932988, ewa inertia: 3.212215 
Minibatch iteration 82/10000: mean batch inertia: 3.123548, ewa inertia: 3.210434 
Minibatch iteration 83/10000: mean batch inertia: 2.971507, ewa inertia: 3.205638 
Minibatch iteration 84/10000: mean batch inertia: 3.158367, ewa inertia: 3.204689 
Minibatch iteration 85/10000: mean batch inertia: 3.099183, ewa inertia: 3.202571 
Minibatch iteration 86/10000: mean batch inertia: 3.070382, ewa inertia: 3.199917 
Mini

Minibatch iteration 195/10000: mean batch inertia: 3.319107, ewa inertia: 3.157721 
Minibatch iteration 196/10000: mean batch inertia: 3.050995, ewa inertia: 3.155579 
Minibatch iteration 197/10000: mean batch inertia: 3.130693, ewa inertia: 3.155079 
Minibatch iteration 198/10000: mean batch inertia: 3.020479, ewa inertia: 3.152377 
Minibatch iteration 199/10000: mean batch inertia: 3.192302, ewa inertia: 3.153178 
Minibatch iteration 200/10000: mean batch inertia: 3.178430, ewa inertia: 3.153685 
Minibatch iteration 201/10000: mean batch inertia: 3.447538, ewa inertia: 3.159585 
Minibatch iteration 202/10000: mean batch inertia: 3.365375, ewa inertia: 3.163716 
Minibatch iteration 203/10000: mean batch inertia: 2.889752, ewa inertia: 3.158216 
Minibatch iteration 204/10000: mean batch inertia: 3.095601, ewa inertia: 3.156959 
Minibatch iteration 205/10000: mean batch inertia: 3.014193, ewa inertia: 3.154093 
Minibatch iteration 206/10000: mean batch inertia: 3.180212, ewa inertia: 3.

Minibatch iteration 321/10000: mean batch inertia: 2.949595, ewa inertia: 3.140305 
Minibatch iteration 322/10000: mean batch inertia: 3.074342, ewa inertia: 3.138981 
Minibatch iteration 323/10000: mean batch inertia: 3.136428, ewa inertia: 3.138930 
Minibatch iteration 324/10000: mean batch inertia: 3.120585, ewa inertia: 3.138562 
Minibatch iteration 325/10000: mean batch inertia: 3.205251, ewa inertia: 3.139900 
Minibatch iteration 326/10000: mean batch inertia: 3.322499, ewa inertia: 3.143566 
Minibatch iteration 327/10000: mean batch inertia: 2.987179, ewa inertia: 3.140427 
Minibatch iteration 328/10000: mean batch inertia: 3.174245, ewa inertia: 3.141106 
Minibatch iteration 329/10000: mean batch inertia: 3.138021, ewa inertia: 3.141044 
Minibatch iteration 330/10000: mean batch inertia: 3.349743, ewa inertia: 3.145234 
Minibatch iteration 331/10000: mean batch inertia: 3.615871, ewa inertia: 3.154682 
Minibatch iteration 332/10000: mean batch inertia: 2.931083, ewa inertia: 3.

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   36.9s
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:  3.0min
[Parallel(n_jobs=-1)]: Done 434 tasks      | elapsed:  6.6min
[Parallel(n_jobs=-1)]: Done 784 tasks      | elapsed: 11.9min
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed: 15.1min finished


Fitting 5 folds for each of 50 candidates, totalling 250 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   38.3s
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:  3.0min
[Parallel(n_jobs=-1)]: Done 250 out of 250 | elapsed:  4.1min finished


Fitting 5 folds for each of 11 candidates, totalling 55 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   37.0s
[Parallel(n_jobs=-1)]: Done  55 out of  55 | elapsed:   54.9s finished


Fitting 5 folds for each of 50 candidates, totalling 250 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   39.8s
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:  3.0min
[Parallel(n_jobs=-1)]: Done 250 out of 250 | elapsed:  4.1min finished


In [None]:
print(sequential_search.all_best_params_)
print(sequential_search.all_best_score_)
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600]}

for params in ParameterGrid(param_grid):
    kmeans = MiniBatchKMeans(n_clusters=params['hidden_layer_size'], n_init=20, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=0, random_state=0)
    kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
    w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
    base_km_esn = clone(sequential_search.best_estimator_)
    base_km_esn.input_to_node.predefined_input_weights=w_in.T
    base_km_esn.set_params(**params)
    gs_cv = GridSearchCV(base_km_esn,
                         param_grid={'random_state': range(10)},
                         scoring=make_scorer(accuracy_score), n_jobs=-1).fit(X=X_train, y=y_train)
    print(gs_cv.cv_results_)
    print("---------------------------------------")
    acc_score = accuracy_score(y_test, gs_cv.best_estimator_.predict(X_test))
    print(acc_score)
    print("---------------------------------------")
"""
constant_params = sequential_search.best_estimator_.get_params()
constant_params.pop('hidden_layer_size')
constant_params.pop('random_state')
constant_params.pop('predefined_input_weights')
base_esn = SeqToLabelESNClassifier(**constant_params)
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600],
              'random_state': range(1, 11)}

for params in ParameterGrid(param_grid):
    if params['hidden_layer_size'] > 50:
        kmeans = MiniBatchKMeans(n_clusters=50, n_init=200, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=0, random_state=params['random_state'])
        t1 = time.time()
        kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
        w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
        t2 = time.time()
        w_in = np.pad(w_in, ((0, params['hidden_layer_size'] - 50), (0, 0)), mode='constant', constant_values=0)
    else:
        kmeans = MiniBatchKMeans(n_clusters=params['hidden_layer_size'], n_init=200, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=0, random_state=params['random_state'])
        t1 = time.time()
        kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
        w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
        t2 = time.time()
    km_esn = clone(base_esn)
    km_esn.input_to_node = PredefinedWeightsInputToNode(predefined_input_weights=w_in.T)
    km_esn.set_params(**constant_params, **params)
    km_esn.fit(X=X_train, y=y_train, n_jobs=8)
    score = accuracy_score(y_test, km_esn.predict(X_test))
    print("KM-ESN with params {0} achieved score of {1} and was trained in {2} seconds.".format(params, score, t2-t1))

##   Kicks vs. Punch Dataset
(https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7813879/)

Data Set Information:

TODO

Attribute Information:

TODO

In [7]:
kick = np.load(r"E:\multivariate_time_series_dataset\numpy\KICK.npz")
X_train = np.empty(shape=(16, ), dtype=object)
y_train = np.empty(shape=(16, ), dtype=object)
X_test = np.empty(shape=(10, ), dtype=object)
y_test = np.empty(shape=(10, ), dtype=object)

if train:
    for k, (X, y) in enumerate(zip(kick['X'], kick['Y'])):
        X_train[k] = X[X.sum(axis=1)!=0, :]  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_train[k] = np.tile(y, (X_train[k].shape[0], 1))
    scaler = StandardScaler().fit(np.concatenate(X_train))
    for k, X in enumerate(X_train):
        X_train[k] = scaler.transform(X=X)  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X

    X_train, y_train = shuffle(X_train, y_train, random_state=0)

    for k, (X, y) in enumerate(zip(kick['Xte'], kick['Yte'])):
        X_test[k] = scaler.transform(X=X[X.sum(axis=1)!=0, :])  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_test[k] = np.tile(y, (X_test[k].shape[0], 1))
else:
    for k, (X, y) in enumerate(zip(kick['X'], kick['Y'])):
        X_train[k] = X[X.sum(axis=1)!=0, :]  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_train[k] = np.argwhere(y).ravel()
    scaler = StandardScaler().fit(np.concatenate(X_train))
    for k, X in enumerate(X_train):
        X_train[k] = scaler.transform(X=X)  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X

    X_train, y_train = shuffle(X_train, y_train, random_state=0)

    for k, (X, y) in enumerate(zip(kick['Xte'], kick['Yte'])):
        X_test[k] = scaler.transform(X=X[X.sum(axis=1)!=0, :])  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_test[k] = np.argwhere(y).ravel()

Fit random ESN

In [None]:
initially_fixed_params = {'hidden_layer_size': 50,
                          'k_in': 10,
                          'input_scaling': 0.4,
                          'input_activation': 'identity',
                          'bias_scaling': 0.0,
                          'spectral_radius': 0.0,
                          'leakage': 0.1,
                          'k_rec': 10,
                          'reservoir_activation': 'tanh',
                          'bi_directional': False,
                          'wash_out': 0,
                          'continuation': False,
                          'alpha': 1e-3,
                          'random_state': 42}

step1_esn_params = {'input_scaling': uniform(loc=1e-2, scale=1),
                    'spectral_radius': uniform(loc=0, scale=2)}

step2_esn_params = {'leakage': loguniform(1e-5, 1e0)}
step3_esn_params = {'bias_scaling': np.linspace(0.0, 1.0, 11)}
step4_esn_params = {'alpha': loguniform(1e-5, 1e1)}

kwargs_step1 = {'cv': 2, 'n_iter': 200, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step2 = {'cv': 2, 'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step3 = {'cv': 2, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step4 = {'cv': 2, 'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}

# The searches are defined similarly to the steps of a sklearn.pipeline.Pipeline:
searches = [('step1', RandomizedSearchCV, step1_esn_params, kwargs_step1),
            ('step2', RandomizedSearchCV, step2_esn_params, kwargs_step2),
            ('step3', GridSearchCV, step3_esn_params, kwargs_step3),
            ('step4', RandomizedSearchCV, step4_esn_params, kwargs_step4)]

base_esn = SeqToSeqESNClassifier(**initially_fixed_params)

try:
    sequential_search = load("../sequential_search_kick.joblib")
except FileNotFoundError:
    sequential_search = SequentialSearchCV(base_esn, searches=searches).fit(X_train, y_train)
    dump(sequential_search, "../sequential_search_kick.joblib")

In [None]:
print(sequential_search.all_best_params_)
print(sequential_search.all_best_score_)
base_esn = SeqToLabelESNClassifier(**sequential_search.best_estimator_.get_params())
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600]}

for params in ParameterGrid(param_grid):
    gs_cv = GridSearchCV(clone(base_esn).set_params(**params), 
                         param_grid={'random_state': range(10)}, 
                         scoring=make_scorer(accuracy_score), n_jobs=-1).fit(X=X_train, y=y_train)
    print(gs_cv.cv_results_)
    print("---------------------------------------")
    acc_score = accuracy_score(y_test, gs_cv.best_estimator_.predict(X_test))
    print(acc_score)
    print("---------------------------------------")
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600],
              'random_state': range(1, 11)}

for params in ParameterGrid(param_grid):
    t1 = time.time()
    esn = clone(base_esn).set_params(**params).fit(X=X_train, y=y_train, n_jobs=8)
    t2 = time.time()
    score = accuracy_score(y_test, esn.predict(X_test))
    print("ESN with params {0} achieved score of {1} and was trained in {2} seconds.".format(params, score, t2-t1))

Fit KM-ESN

In [9]:
kmeans = MiniBatchKMeans(n_clusters=100, n_init=200, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=2, random_state=0)
kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
w_in = np.pad(w_in, ((0, 800 - 100), (0, 0)), mode='constant', constant_values=0)

initially_fixed_params = {'hidden_layer_size': 800,
                          'k_in': 2,
                          'input_scaling': 0.4,
                          'input_activation': 'identity',
                          'bias_scaling': 0.0,
                          'spectral_radius': 0.0,
                          'leakage': 0.1,
                          'k_rec': 10,
                          'reservoir_activation': 'tanh',
                          'bi_directional': False,
                          'wash_out': 0,
                          'continuation': False,
                          'alpha': 1e-3,
                          'random_state': 42}

step1_esn_params = {'input_scaling': uniform(loc=1e-2, scale=1),
                    'spectral_radius': uniform(loc=0, scale=2)}

step2_esn_params = {'leakage': loguniform(1e-5, 1e0)}
step3_esn_params = {'bias_scaling': np.linspace(0.0, 1.0, 11)}
step4_esn_params = {'alpha': loguniform(1e-5, 1e1)}

kwargs_step1 = {'cv': 2, 'n_iter': 200, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step2 = {'cv': 2, 'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step3 = {'cv': 2, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step4 = {'cv': 2, 'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}

# The searches are defined similarly to the steps of a sklearn.pipeline.Pipeline:
searches = [('step1', RandomizedSearchCV, step1_esn_params, kwargs_step1),
            ('step2', RandomizedSearchCV, step2_esn_params, kwargs_step2),
            ('step3', GridSearchCV, step3_esn_params, kwargs_step3),
            ('step4', RandomizedSearchCV, step4_esn_params, kwargs_step4)]

base_km_esn = SeqToSeqESNClassifier(input_to_node=PredefinedWeightsInputToNode(predefined_input_weights=w_in.T),
                                    **initially_fixed_params)

try:
    sequential_search = load("../sequential_search_kick_km_sparse.joblib")
except FileNotFoundError:
    sequential_search = SequentialSearchCV(base_km_esn, searches=searches).fit(X_train, y_train)
    dump(sequential_search, "../sequential_search_kick_km_sparse.joblib")

Init 1/200 with method: k-means++
Inertia for init 1/200: 3190.035686
Init 2/200 with method: k-means++
Inertia for init 2/200: 2897.591869
Init 3/200 with method: k-means++
Inertia for init 3/200: 3140.507830
Init 4/200 with method: k-means++
Inertia for init 4/200: 3131.750272
Init 5/200 with method: k-means++
Inertia for init 5/200: 3147.290125
Init 6/200 with method: k-means++
Inertia for init 6/200: 2954.979400
Init 7/200 with method: k-means++
Inertia for init 7/200: 3162.499527
Init 8/200 with method: k-means++
Inertia for init 8/200: 3195.411027
Init 9/200 with method: k-means++
Inertia for init 9/200: 3142.987939
Init 10/200 with method: k-means++
Inertia for init 10/200: 2832.706655
Init 11/200 with method: k-means++
Inertia for init 11/200: 3134.010328
Init 12/200 with method: k-means++
Inertia for init 12/200: 3244.864073
Init 13/200 with method: k-means++
Inertia for init 13/200: 3264.206035
Init 14/200 with method: k-means++
Inertia for init 14/200: 3140.414020
Init 15/20

Inertia for init 120/200: 3072.840758
Init 121/200 with method: k-means++
Inertia for init 121/200: 3408.095228
Init 122/200 with method: k-means++
Inertia for init 122/200: 3221.030872
Init 123/200 with method: k-means++
Inertia for init 123/200: 3248.501583
Init 124/200 with method: k-means++
Inertia for init 124/200: 2943.776979
Init 125/200 with method: k-means++
Inertia for init 125/200: 3029.565691
Init 126/200 with method: k-means++
Inertia for init 126/200: 2668.838247
Init 127/200 with method: k-means++
Inertia for init 127/200: 3078.536644
Init 128/200 with method: k-means++
Inertia for init 128/200: 3058.100847
Init 129/200 with method: k-means++
Inertia for init 129/200: 3245.700994
Init 130/200 with method: k-means++
Inertia for init 130/200: 2910.889365
Init 131/200 with method: k-means++
Inertia for init 131/200: 2890.923917
Init 132/200 with method: k-means++
Inertia for init 132/200: 3007.067895
Init 133/200 with method: k-means++
Inertia for init 133/200: 3234.454716


Minibatch iteration 90/11100: mean batch inertia: 13.242025, ewa inertia: 13.621701 
Minibatch iteration 91/11100: mean batch inertia: 12.082249, ewa inertia: 13.593953 
Minibatch iteration 92/11100: mean batch inertia: 12.619724, ewa inertia: 13.576393 
Minibatch iteration 93/11100: mean batch inertia: 11.897750, ewa inertia: 13.546136 
Minibatch iteration 94/11100: mean batch inertia: 11.737276, ewa inertia: 13.513533 
Minibatch iteration 95/11100: mean batch inertia: 13.198302, ewa inertia: 13.507851 
Minibatch iteration 96/11100: mean batch inertia: 13.220015, ewa inertia: 13.502663 
Minibatch iteration 97/11100: mean batch inertia: 12.407492, ewa inertia: 13.482923 
Minibatch iteration 98/11100: mean batch inertia: 12.771550, ewa inertia: 13.470101 
Minibatch iteration 99/11100: mean batch inertia: 12.183481, ewa inertia: 13.446910 
Minibatch iteration 100/11100: mean batch inertia: 13.691200, ewa inertia: 13.451313 
Minibatch iteration 101/11100: mean batch inertia: 11.589367, ew

Minibatch iteration 247/11100: mean batch inertia: 12.119299, ewa inertia: 12.277944 
Minibatch iteration 248/11100: mean batch inertia: 13.723644, ewa inertia: 12.304002 
Minibatch iteration 249/11100: mean batch inertia: 11.770500, ewa inertia: 12.294386 
Minibatch iteration 250/11100: mean batch inertia: 12.040336, ewa inertia: 12.289807 
Minibatch iteration 251/11100: mean batch inertia: 11.794632, ewa inertia: 12.280882 
Minibatch iteration 252/11100: mean batch inertia: 11.599440, ewa inertia: 12.268599 
Minibatch iteration 253/11100: mean batch inertia: 12.028069, ewa inertia: 12.264264 
Minibatch iteration 254/11100: mean batch inertia: 11.722001, ewa inertia: 12.254489 
Minibatch iteration 255/11100: mean batch inertia: 11.837435, ewa inertia: 12.246972 
Minibatch iteration 256/11100: mean batch inertia: 11.816186, ewa inertia: 12.239208 
Minibatch iteration 257/11100: mean batch inertia: 12.918125, ewa inertia: 12.251445 
Minibatch iteration 258/11100: mean batch inertia: 11.

Minibatch iteration 402/11100: mean batch inertia: 14.297543, ewa inertia: 12.035487 
Minibatch iteration 403/11100: mean batch inertia: 12.675439, ewa inertia: 12.047021 
Minibatch iteration 404/11100: mean batch inertia: 11.321561, ewa inertia: 12.033945 
Minibatch iteration 405/11100: mean batch inertia: 12.915566, ewa inertia: 12.049836 
Minibatch iteration 406/11100: mean batch inertia: 12.213516, ewa inertia: 12.052786 
Minibatch iteration 407/11100: mean batch inertia: 12.449913, ewa inertia: 12.059944 
Minibatch iteration 408/11100: mean batch inertia: 12.539002, ewa inertia: 12.068579 
Minibatch iteration 409/11100: mean batch inertia: 12.114381, ewa inertia: 12.069405 
Minibatch iteration 410/11100: mean batch inertia: 12.297153, ewa inertia: 12.073510 
Minibatch iteration 411/11100: mean batch inertia: 11.816078, ewa inertia: 12.068870 
Minibatch iteration 412/11100: mean batch inertia: 11.643104, ewa inertia: 12.061195 
Minibatch iteration 413/11100: mean batch inertia: 12.

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   13.0s
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:   56.7s
[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:  2.0min finished


Fitting 2 folds for each of 50 candidates, totalling 100 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   11.6s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:   30.6s finished


Fitting 2 folds for each of 11 candidates, totalling 22 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  22 out of  22 | elapsed:    6.6s finished


Fitting 2 folds for each of 50 candidates, totalling 100 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   11.4s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:   30.2s finished


In [None]:
from sklearn.decomposition import PCA


pca_dataset = PCA(whiten=True)
pca_dataset.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
X_decomposed = pca_dataset.transform(np.concatenate(np.concatenate((X_train, X_test))))

fig, ax11 = plt.subplots()
ax11.bar(range(1, np.concatenate(np.concatenate((X_train, X_test))).shape[1]+1), 100*pca_dataset.explained_variance_ratio_)
ax11.set_xlabel("Number of Components")
ax11.set_ylabel("Explained Variance in \%")

ax12 = ax11.twinx()
ax12.plot(range(1, np.concatenate(np.concatenate((X_train, X_test))).shape[1]+1), 100*np.cumsum(pca_dataset.explained_variance_ratio_), '-ro', alpha=0.5)
ax12.axhline(y=95, ls='-', c='g')
ax12.set_ylabel("Accumulated Variance in \%")

width = 3.487
height =width / 1.618
fig.set_size_inches(width, height)
fig.subplots_adjust(wspace=1.5)
# plt.savefig('ParetoChart_cleaned.pdf', bbox_inches = 'tight', pad_inches = 0)

In [None]:
plt.figure()
if X_decomposed.shape[1] > 2:
    ax = plt.axes(projection='3d')
    plt.scatter(X_decomposed[:, 0], X_decomposed[:, 1], X_decomposed[:, 2], cmap=plt.cm.RdBu, rasterized=True, c=kmeans.labels_)
    ax.set_zlabel("Component 3")
else:
    ax = plt.axes()
    plt.scatter(X_decomposed[:, 0], X_decomposed[:, 1], cmap=plt.cm.RdBu, rasterized=True, c=kmeans.labels_)
ax.set_xlabel("Component 1")
ax.set_ylabel("Component 2")
plt.tight_layout()

width = 3.487
height =width / 1.618
fig.set_size_inches(width, height)
fig.subplots_adjust(wspace=1.5)
# plt.savefig('PCA_cleaned.pdf', bbox_inches = 'tight', pad_inches = 0)

In [None]:
print(sequential_search.all_best_params_)
print(sequential_search.all_best_score_)
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600]}

for params in ParameterGrid(param_grid):
    kmeans = MiniBatchKMeans(n_clusters=params['hidden_layer_size'], n_init=20, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=0, random_state=0)
    kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
    w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
    base_km_esn = clone(sequential_search.best_estimator_)
    base_km_esn.input_to_node.predefined_input_weights=w_in.T
    base_km_esn.set_params(**params)
    gs_cv = GridSearchCV(base_km_esn,
                         param_grid={'random_state': range(10)},
                         scoring=make_scorer(accuracy_score), n_jobs=-1).fit(X=X_train, y=y_train)
    print(gs_cv.cv_results_)
    print("---------------------------------------")
    acc_score = accuracy_score(y_test, gs_cv.best_estimator_.predict(X_test))
    print(acc_score)
    print("---------------------------------------")
"""
constant_params = sequential_search.best_estimator_.get_params()
constant_params.pop('hidden_layer_size')
constant_params.pop('random_state')
constant_params.pop('predefined_input_weights')
base_esn = SeqToLabelESNClassifier(**constant_params)
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600],
              'random_state': range(1, 11)}

for params in ParameterGrid(param_grid):
    if params['hidden_layer_size'] > 100:
        kmeans = MiniBatchKMeans(n_clusters=100, n_init=200, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=0, random_state=params['random_state'])
        t1 = time.time()
        kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
        w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
        t2 = time.time()
        w_in = np.pad(w_in, ((0, params['hidden_layer_size'] - 100), (0, 0)), mode='constant', constant_values=0)
    else:
        kmeans = MiniBatchKMeans(n_clusters=params['hidden_layer_size'], n_init=200, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=0, random_state=params['random_state'])
        t1 = time.time()
        kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
        w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
        t2 = time.time()
    km_esn = clone(base_esn)
    km_esn.input_to_node = PredefinedWeightsInputToNode(predefined_input_weights=w_in.T)
    km_esn.set_params(**constant_params, **params)
    km_esn.fit(X=X_train, y=y_train, n_jobs=8)
    score = accuracy_score(y_test, km_esn.predict(X_test))
    print("KM-ESN with params {0} achieved score of {1} and was trained in {2} seconds.".format(params, score, t2-t1))

##    Libras Movement Data Set
(https://archive.ics.uci.edu/ml/datasets/Libras+Movement)

Data Set Information:

The dataset (movement_libras) contains 15 classes of 24 instances each, where each class references to a hand movement type in LIBRAS.

In the video pre-processing, a time normalization is carried out selecting 45 frames from each video, in according
to an uniform distribution. In each frame, the centroid pixels of the segmented objects (the hand) are found, which
compose the discrete version of the curve F with 45 points. All curves are normalized in the unitary space.

In order to prepare these movements to be analysed by algorithms, we have carried out a mapping operation, that is, each
curve F is mapped in a representation with 90 features, with representing the coordinates of movement.

Some sub-datasets are offered in order to support comparisons of results. 

Attribute Information:

90 numeric (double) and 1 for the class (integer)

In [14]:
lib = np.load(r"E:\multivariate_time_series_dataset\numpy\LIB.npz")
X_train = np.empty(shape=(180, ), dtype=object)
y_train = np.empty(shape=(180, ), dtype=object)
X_test = np.empty(shape=(180, ), dtype=object)
y_test = np.empty(shape=(180, ), dtype=object)

if train:
    for k, (X, y) in enumerate(zip(lib['X'], lib['Y'])):
        X_train[k] = X[X.sum(axis=1)!=0, :]  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_train[k] = np.tile(y, (X_train[k].shape[0], 1))
    scaler = StandardScaler().fit(np.concatenate(X_train))
    for k, X in enumerate(X_train):
        X_train[k] = scaler.transform(X=X)  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X

    X_train, y_train = shuffle(X_train, y_train, random_state=0)

    for k, (X, y) in enumerate(zip(lib['Xte'], lib['Yte'])):
        X_test[k] = scaler.transform(X=X[X.sum(axis=1)!=0, :])  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_test[k] = np.tile(y, (X_test[k].shape[0], 1))
else:
    for k, (X, y) in enumerate(zip(lib['X'], lib['Y'])):
        X_train[k] = X[X.sum(axis=1)!=0, :]  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_train[k] = np.argwhere(y).ravel()
    scaler = StandardScaler().fit(np.concatenate(X_train))
    for k, X in enumerate(X_train):
        X_train[k] = scaler.transform(X=X)  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X

    X_train, y_train = shuffle(X_train, y_train, random_state=0)

    for k, (X, y) in enumerate(zip(lib['Xte'], lib['Yte'])):
        X_test[k] = scaler.transform(X=X[X.sum(axis=1)!=0, :])  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_test[k] = np.argwhere(y).ravel()

Fit random ESN

In [None]:
initially_fixed_params = {'hidden_layer_size': 50,
                          'k_in': 2,
                          'input_scaling': 0.4,
                          'input_activation': 'identity',
                          'bias_scaling': 0.0,
                          'spectral_radius': 0.0,
                          'leakage': 0.1,
                          'k_rec': 10,
                          'reservoir_activation': 'tanh',
                          'bi_directional': False,
                          'wash_out': 0,
                          'continuation': False,
                          'alpha': 1e-3,
                          'random_state': 42}

step1_esn_params = {'input_scaling': uniform(loc=1e-2, scale=1),
                    'spectral_radius': uniform(loc=0, scale=2)}

step2_esn_params = {'leakage': loguniform(1e-5, 1e0)}
step3_esn_params = {'bias_scaling': np.linspace(0.0, 1.0, 11)}
step4_esn_params = {'alpha': loguniform(1e-5, 1e1)}

kwargs_step1 = {'n_iter': 200, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step2 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step3 = {'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step4 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}

# The searches are defined similarly to the steps of a sklearn.pipeline.Pipeline:
searches = [('step1', RandomizedSearchCV, step1_esn_params, kwargs_step1),
            ('step2', RandomizedSearchCV, step2_esn_params, kwargs_step2),
            ('step3', GridSearchCV, step3_esn_params, kwargs_step3),
            ('step4', RandomizedSearchCV, step4_esn_params, kwargs_step4)]

base_esn = SeqToSeqESNClassifier(**initially_fixed_params)

try:
    sequential_search = load("../sequential_search_lib.joblib")
except FileNotFoundError:
    sequential_search = SequentialSearchCV(base_esn, searches=searches).fit(X_train, y_train)
    dump(sequential_search, "../sequential_search_lib.joblib")

In [None]:
print(sequential_search.all_best_params_)
print(sequential_search.all_best_score_)
base_esn = SeqToLabelESNClassifier(**sequential_search.best_estimator_.get_params())
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600]}

for params in ParameterGrid(param_grid):
    gs_cv = GridSearchCV(clone(base_esn).set_params(**params), 
                         param_grid={'random_state': range(10)}, 
                         scoring=make_scorer(accuracy_score), n_jobs=-1).fit(X=X_train, y=y_train)
    print(gs_cv.cv_results_)
    print("---------------------------------------")
    acc_score = accuracy_score(y_test, gs_cv.best_estimator_.predict(X_test))
    print(acc_score)
    print("---------------------------------------")
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600],
              'random_state': range(1, 11)}

for params in ParameterGrid(param_grid):
    t1 = time.time()
    esn = clone(base_esn).set_params(**params).fit(X=X_train, y=y_train, n_jobs=8)
    t2 = time.time()
    score = accuracy_score(y_test, esn.predict(X_test))
    print("ESN with params {0} achieved score of {1} and was trained in {2} seconds.".format(params, score, t2-t1))

Fit KM-ESN (TODO: RENAME and SPARSE)

In [18]:
kmeans = MiniBatchKMeans(n_clusters=400, n_init=200, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=2, random_state=0)
kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
w_in = np.pad(w_in, ((0, 1600 - 400), (0, 0)), mode='constant', constant_values=0)

initially_fixed_params = {'hidden_layer_size': 1600,
                          'k_in': 2,
                          'input_scaling': 0.4,
                          'input_activation': 'identity',
                          'bias_scaling': 0.0,
                          'spectral_radius': 0.0,
                          'leakage': 0.1,
                          'k_rec': 10,
                          'reservoir_activation': 'tanh',
                          'bi_directional': False,
                          'wash_out': 0,
                          'continuation': False,
                          'alpha': 1e-3,
                          'random_state': 42}

step1_esn_params = {'input_scaling': uniform(loc=1e-2, scale=1),
                    'spectral_radius': uniform(loc=0, scale=2)}

step2_esn_params = {'leakage': loguniform(1e-5, 1e0)}
step3_esn_params = {'bias_scaling': np.linspace(0.0, 1.0, 11)}
step4_esn_params = {'alpha': loguniform(1e-5, 1e1)}

kwargs_step1 = {'n_iter': 200, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step2 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step3 = {'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step4 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}

# The searches are defined similarly to the steps of a sklearn.pipeline.Pipeline:
searches = [('step1', RandomizedSearchCV, step1_esn_params, kwargs_step1),
            ('step2', RandomizedSearchCV, step2_esn_params, kwargs_step2),
            ('step3', GridSearchCV, step3_esn_params, kwargs_step3),
            ('step4', RandomizedSearchCV, step4_esn_params, kwargs_step4)]

base_km_esn = SeqToSeqESNClassifier(input_to_node=PredefinedWeightsInputToNode(predefined_input_weights=w_in.T),
                                    **initially_fixed_params)

try:
    sequential_search = load("../sequential_search_lib_km_sparse.joblib")
except FileNotFoundError:
    sequential_search = SequentialSearchCV(base_km_esn, searches=searches).fit(X_train, y_train)
    dump(sequential_search, "../sequential_search_lib_km_sparse.joblib")

Init 1/200 with method: k-means++
Inertia for init 1/200: 0.681000
Init 2/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 2/200: 0.444726
Init 3/200 with method: k-means++
Inertia for init 3/200: 0.681551
Init 4/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 4/200: 0.499404
Init 5/200 with method: k-means++
Inertia for init 5/200: 0.708743
Init 6/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 6/200: 0.462701
Init 7/200 with method: k-means++
Inertia for init 7/200: 0.674047
Init 8/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 8/200: 0.638982
Init 9/200 with method: k-means++
Inertia for init 9/200: 0.613428
Init 10/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 10/200: 0.558949
Init 11/200 with method: k-means++
Inertia for init 11/200: 0.544839
Init 12/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 12/200: 0.490703
Init 13/200 with method: k-means++
Inertia for init 13/200: 0.668385
Init 14/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 14/200: 0.608963
Init 15/200 with method: k-means++
Inertia for init 15/200: 0.567672
Init 16/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 16/200: 0.483430
Init 17/200 with method: k-means++
Inertia for init 17/200: 0.587640
Init 18/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 18/200: 0.626838
Init 19/200 with method: k-means++
Inertia for init 19/200: 0.553976
Init 20/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 20/200: 0.468226
Init 21/200 with method: k-means++
Inertia for init 21/200: 0.645505
Init 22/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 22/200: 0.447311
Init 23/200 with method: k-means++
Inertia for init 23/200: 0.617592
Init 24/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 24/200: 0.658698
Init 25/200 with method: k-means++
Inertia for init 25/200: 0.480082
Init 26/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 26/200: 0.577957
Init 27/200 with method: k-means++
Inertia for init 27/200: 0.488891
Init 28/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 28/200: 0.565126
Init 29/200 with method: k-means++
Inertia for init 29/200: 0.630727
Init 30/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 30/200: 0.633580
Init 31/200 with method: k-means++
Inertia for init 31/200: 0.629642
Init 32/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 32/200: 0.488840
Init 33/200 with method: k-means++
Inertia for init 33/200: 0.554234
Init 34/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 34/200: 0.693665
Init 35/200 with method: k-means++
Inertia for init 35/200: 0.729660
Init 36/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 36/200: 0.594826
Init 37/200 with method: k-means++
Inertia for init 37/200: 0.605258
Init 38/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 38/200: 0.576362
Init 39/200 with method: k-means++
Inertia for init 39/200: 0.729565
Init 40/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 40/200: 0.671186
Init 41/200 with method: k-means++
Inertia for init 41/200: 0.845534
Init 42/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 42/200: 0.537730
Init 43/200 with method: k-means++
Inertia for init 43/200: 0.684704
Init 44/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 44/200: 0.660771
Init 45/200 with method: k-means++
Inertia for init 45/200: 0.631476
Init 46/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 46/200: 0.533574
Init 47/200 with method: k-means++
Inertia for init 47/200: 0.617778
Init 48/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 48/200: 0.535320
Init 49/200 with method: k-means++
Inertia for init 49/200: 0.598103
Init 50/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 50/200: 0.450130
Init 51/200 with method: k-means++
Inertia for init 51/200: 0.472731
Init 52/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 52/200: 0.629147
Init 53/200 with method: k-means++
Inertia for init 53/200: 0.663381
Init 54/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 54/200: 0.607672
Init 55/200 with method: k-means++
Inertia for init 55/200: 0.838114
Init 56/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 56/200: 0.595188
Init 57/200 with method: k-means++
Inertia for init 57/200: 0.521147
Init 58/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 58/200: 0.666994
Init 59/200 with method: k-means++
Inertia for init 59/200: 0.782700
Init 60/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 60/200: 0.628261
Init 61/200 with method: k-means++
Inertia for init 61/200: 0.490357
Init 62/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 62/200: 0.729813
Init 63/200 with method: k-means++
Inertia for init 63/200: 0.627140
Init 64/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 64/200: 0.684377
Init 65/200 with method: k-means++
Inertia for init 65/200: 0.566233
Init 66/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 66/200: 0.761178
Init 67/200 with method: k-means++
Inertia for init 67/200: 0.584116
Init 68/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 68/200: 0.467475
Init 69/200 with method: k-means++
Inertia for init 69/200: 0.600817
Init 70/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 70/200: 0.645028
Init 71/200 with method: k-means++
Inertia for init 71/200: 0.699784
Init 72/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 72/200: 0.499151
Init 73/200 with method: k-means++
Inertia for init 73/200: 0.656621
Init 74/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 74/200: 0.626614
Init 75/200 with method: k-means++
Inertia for init 75/200: 0.727625
Init 76/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 76/200: 0.496331
Init 77/200 with method: k-means++
Inertia for init 77/200: 0.683296
Init 78/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 78/200: 0.686165
Init 79/200 with method: k-means++
Inertia for init 79/200: 0.538452
Init 80/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 80/200: 0.528837
Init 81/200 with method: k-means++
Inertia for init 81/200: 0.569990
Init 82/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 82/200: 0.800017
Init 83/200 with method: k-means++
Inertia for init 83/200: 0.694880
Init 84/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 84/200: 0.619931
Init 85/200 with method: k-means++
Inertia for init 85/200: 0.633289
Init 86/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 86/200: 0.627501
Init 87/200 with method: k-means++
Inertia for init 87/200: 0.624497
Init 88/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 88/200: 0.726570
Init 89/200 with method: k-means++
Inertia for init 89/200: 0.622171
Init 90/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 90/200: 0.474464
Init 91/200 with method: k-means++
Inertia for init 91/200: 0.731209
Init 92/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 92/200: 0.570173
Init 93/200 with method: k-means++
Inertia for init 93/200: 0.500080
Init 94/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 94/200: 0.726415
Init 95/200 with method: k-means++
Inertia for init 95/200: 0.525807
Init 96/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 96/200: 0.679123
Init 97/200 with method: k-means++
Inertia for init 97/200: 0.634138
Init 98/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 98/200: 0.709243
Init 99/200 with method: k-means++
Inertia for init 99/200: 0.590523
Init 100/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 100/200: 0.728881
Init 101/200 with method: k-means++
Inertia for init 101/200: 0.795328
Init 102/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 102/200: 0.559777
Init 103/200 with method: k-means++
Inertia for init 103/200: 0.677846
Init 104/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 104/200: 0.564875
Init 105/200 with method: k-means++
Inertia for init 105/200: 0.507514
Init 106/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 106/200: 0.638866
Init 107/200 with method: k-means++
Inertia for init 107/200: 0.581312
Init 108/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 108/200: 0.579132
Init 109/200 with method: k-means++
Inertia for init 109/200: 0.489853
Init 110/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 110/200: 0.569319
Init 111/200 with method: k-means++
Inertia for init 111/200: 0.561779
Init 112/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 112/200: 0.590991
Init 113/200 with method: k-means++
Inertia for init 113/200: 0.655290
Init 114/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 114/200: 0.563136
Init 115/200 with method: k-means++
Inertia for init 115/200: 0.655391
Init 116/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 116/200: 0.560461
Init 117/200 with method: k-means++
Inertia for init 117/200: 0.629053
Init 118/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 118/200: 0.556510
Init 119/200 with method: k-means++
Inertia for init 119/200: 0.673165
Init 120/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 120/200: 0.686324
Init 121/200 with method: k-means++
Inertia for init 121/200: 0.656603
Init 122/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 122/200: 0.652855
Init 123/200 with method: k-means++
Inertia for init 123/200: 0.882053
Init 124/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 124/200: 0.493575
Init 125/200 with method: k-means++
Inertia for init 125/200: 0.492811
Init 126/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 126/200: 0.667494
Init 127/200 with method: k-means++
Inertia for init 127/200: 0.427474
Init 128/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 128/200: 0.585988
Init 129/200 with method: k-means++
Inertia for init 129/200: 0.749873
Init 130/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 130/200: 0.579647
Init 131/200 with method: k-means++
Inertia for init 131/200: 0.569626
Init 132/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 132/200: 0.679163
Init 133/200 with method: k-means++
Inertia for init 133/200: 0.687634
Init 134/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 134/200: 0.632930
Init 135/200 with method: k-means++
Inertia for init 135/200: 0.577739
Init 136/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 136/200: 0.571282
Init 137/200 with method: k-means++
Inertia for init 137/200: 0.688302
Init 138/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 138/200: 0.549420
Init 139/200 with method: k-means++
Inertia for init 139/200: 0.571014
Init 140/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 140/200: 0.550047
Init 141/200 with method: k-means++
Inertia for init 141/200: 0.557854
Init 142/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 142/200: 0.604205
Init 143/200 with method: k-means++
Inertia for init 143/200: 0.647828
Init 144/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 144/200: 0.570168
Init 145/200 with method: k-means++
Inertia for init 145/200: 0.519700
Init 146/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 146/200: 0.670349
Init 147/200 with method: k-means++
Inertia for init 147/200: 0.599345
Init 148/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 148/200: 0.603522
Init 149/200 with method: k-means++
Inertia for init 149/200: 0.509155
Init 150/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 150/200: 0.587571
Init 151/200 with method: k-means++
Inertia for init 151/200: 0.576741
Init 152/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 152/200: 0.500661
Init 153/200 with method: k-means++
Inertia for init 153/200: 0.478765
Init 154/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 154/200: 0.606934
Init 155/200 with method: k-means++
Inertia for init 155/200: 0.711407
Init 156/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 156/200: 0.380512
Init 157/200 with method: k-means++
Inertia for init 157/200: 0.573739
Init 158/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 158/200: 0.520994
Init 159/200 with method: k-means++
Inertia for init 159/200: 0.507864
Init 160/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 160/200: 0.673248
Init 161/200 with method: k-means++
Inertia for init 161/200: 0.518271
Init 162/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 162/200: 0.538196
Init 163/200 with method: k-means++
Inertia for init 163/200: 0.549474
Init 164/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 164/200: 0.522275
Init 165/200 with method: k-means++
Inertia for init 165/200: 0.607302
Init 166/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 166/200: 0.616279
Init 167/200 with method: k-means++
Inertia for init 167/200: 0.579484
Init 168/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 168/200: 0.705967
Init 169/200 with method: k-means++
Inertia for init 169/200: 0.469897
Init 170/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 170/200: 0.479876
Init 171/200 with method: k-means++
Inertia for init 171/200: 0.640085
Init 172/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 172/200: 0.508324
Init 173/200 with method: k-means++
Inertia for init 173/200: 0.637070
Init 174/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 174/200: 0.696978
Init 175/200 with method: k-means++
Inertia for init 175/200: 0.612019
Init 176/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 176/200: 0.640094
Init 177/200 with method: k-means++
Inertia for init 177/200: 0.698814
Init 178/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 178/200: 0.588101
Init 179/200 with method: k-means++
Inertia for init 179/200: 0.648498
Init 180/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 180/200: 0.609654
Init 181/200 with method: k-means++
Inertia for init 181/200: 0.574548
Init 182/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 182/200: 0.528273
Init 183/200 with method: k-means++
Inertia for init 183/200: 0.656557
Init 184/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 184/200: 0.621554
Init 185/200 with method: k-means++
Inertia for init 185/200: 0.762951
Init 186/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 186/200: 0.638990
Init 187/200 with method: k-means++
Inertia for init 187/200: 0.420635
Init 188/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 188/200: 0.665866
Init 189/200 with method: k-means++
Inertia for init 189/200: 0.654803
Init 190/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 190/200: 0.464118
Init 191/200 with method: k-means++
Inertia for init 191/200: 0.671934
Init 192/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 192/200: 0.776571
Init 193/200 with method: k-means++
Inertia for init 193/200: 0.852457
Init 194/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 194/200: 0.573268
Init 195/200 with method: k-means++
Inertia for init 195/200: 0.769976
Init 196/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 196/200: 0.571200
Init 197/200 with method: k-means++
Inertia for init 197/200: 0.658356
Init 198/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 198/200: 0.604996
Init 199/200 with method: k-means++
Inertia for init 199/200: 0.501847
Init 200/200 with method: k-means++


  init_size=init_size)
  init_size=init_size)


Inertia for init 200/200: 0.650615
Minibatch iteration 1/16200: mean batch inertia: 0.014505, ewa inertia: 0.014505 
Minibatch iteration 2/16200: mean batch inertia: 0.009239, ewa inertia: 0.014440 
Minibatch iteration 3/16200: mean batch inertia: 0.006560, ewa inertia: 0.014343 
Minibatch iteration 4/16200: mean batch inertia: 0.009494, ewa inertia: 0.014283 
Minibatch iteration 5/16200: mean batch inertia: 0.008500, ewa inertia: 0.014212 
Minibatch iteration 6/16200: mean batch inertia: 0.006484, ewa inertia: 0.014117 
Minibatch iteration 7/16200: mean batch inertia: 0.010369, ewa inertia: 0.014070 
Minibatch iteration 8/16200: mean batch inertia: 0.008568, ewa inertia: 0.014002 
Minibatch iteration 9/16200: mean batch inertia: 0.007333, ewa inertia: 0.013920 
Minibatch iteration 10/16200: mean batch inertia: 0.007672, ewa inertia: 0.013843 
Minibatch iteration 11/16200: mean batch inertia: 0.006982, ewa inertia: 0.013758 
Minibatch iteration 12/16200: mean batch inertia: 0.007209, e

Minibatch iteration 148/16200: mean batch inertia: 0.009414, ewa inertia: 0.008377 
Minibatch iteration 149/16200: mean batch inertia: 0.005782, ewa inertia: 0.008345 
Minibatch iteration 150/16200: mean batch inertia: 0.007324, ewa inertia: 0.008332 
Minibatch iteration 151/16200: mean batch inertia: 0.007571, ewa inertia: 0.008323 
Minibatch iteration 152/16200: mean batch inertia: 0.006030, ewa inertia: 0.008295 
Minibatch iteration 153/16200: mean batch inertia: 0.004777, ewa inertia: 0.008251 
Minibatch iteration 154/16200: mean batch inertia: 0.005834, ewa inertia: 0.008221 
Minibatch iteration 155/16200: mean batch inertia: 0.005579, ewa inertia: 0.008189 
Minibatch iteration 156/16200: mean batch inertia: 0.007632, ewa inertia: 0.008182 
Minibatch iteration 157/16200: mean batch inertia: 0.006762, ewa inertia: 0.008164 
Minibatch iteration 158/16200: mean batch inertia: 0.006506, ewa inertia: 0.008144 
Minibatch iteration 159/16200: mean batch inertia: 0.007399, ewa inertia: 0.

Minibatch iteration 294/16200: mean batch inertia: 0.006137, ewa inertia: 0.006828 
Minibatch iteration 295/16200: mean batch inertia: 0.006638, ewa inertia: 0.006825 
Minibatch iteration 296/16200: mean batch inertia: 0.005413, ewa inertia: 0.006808 
Minibatch iteration 297/16200: mean batch inertia: 0.006026, ewa inertia: 0.006798 
Minibatch iteration 298/16200: mean batch inertia: 0.007039, ewa inertia: 0.006801 
Minibatch iteration 299/16200: mean batch inertia: 0.006680, ewa inertia: 0.006800 
Minibatch iteration 300/16200: mean batch inertia: 0.007065, ewa inertia: 0.006803 
Minibatch iteration 301/16200: mean batch inertia: 0.005768, ewa inertia: 0.006790 
Minibatch iteration 302/16200: mean batch inertia: 0.006323, ewa inertia: 0.006784 
Minibatch iteration 303/16200: mean batch inertia: 0.005168, ewa inertia: 0.006764 
Minibatch iteration 304/16200: mean batch inertia: 0.006388, ewa inertia: 0.006760 
Minibatch iteration 305/16200: mean batch inertia: 0.007298, ewa inertia: 0.

Minibatch iteration 439/16200: mean batch inertia: 0.006541, ewa inertia: 0.006467 
Minibatch iteration 440/16200: mean batch inertia: 0.007204, ewa inertia: 0.006476 
Minibatch iteration 441/16200: mean batch inertia: 0.004523, ewa inertia: 0.006452 
Minibatch iteration 442/16200: mean batch inertia: 0.008070, ewa inertia: 0.006472 
Minibatch iteration 443/16200: mean batch inertia: 0.007990, ewa inertia: 0.006491 
Minibatch iteration 444/16200: mean batch inertia: 0.006896, ewa inertia: 0.006496 
Minibatch iteration 445/16200: mean batch inertia: 0.005536, ewa inertia: 0.006484 
Minibatch iteration 446/16200: mean batch inertia: 0.005338, ewa inertia: 0.006470 
Minibatch iteration 447/16200: mean batch inertia: 0.005732, ewa inertia: 0.006461 
Minibatch iteration 448/16200: mean batch inertia: 0.006410, ewa inertia: 0.006460 
Minibatch iteration 449/16200: mean batch inertia: 0.005756, ewa inertia: 0.006452 
Minibatch iteration 450/16200: mean batch inertia: 0.007822, ewa inertia: 0.

Minibatch iteration 537/16200: mean batch inertia: 0.005917, ewa inertia: 0.006418 
Minibatch iteration 538/16200: mean batch inertia: 0.008075, ewa inertia: 0.006438 
Minibatch iteration 539/16200: mean batch inertia: 0.005993, ewa inertia: 0.006433 
Minibatch iteration 540/16200: mean batch inertia: 0.006613, ewa inertia: 0.006435 
Minibatch iteration 541/16200: mean batch inertia: 0.006209, ewa inertia: 0.006432 
Minibatch iteration 542/16200: mean batch inertia: 0.005965, ewa inertia: 0.006427 
Minibatch iteration 543/16200: mean batch inertia: 0.006245, ewa inertia: 0.006424 
Minibatch iteration 544/16200: mean batch inertia: 0.006934, ewa inertia: 0.006431 
Minibatch iteration 545/16200: mean batch inertia: 0.006532, ewa inertia: 0.006432 
Minibatch iteration 546/16200: mean batch inertia: 0.005558, ewa inertia: 0.006421 
Minibatch iteration 547/16200: mean batch inertia: 0.007673, ewa inertia: 0.006437 
Minibatch iteration 548/16200: mean batch inertia: 0.005947, ewa inertia: 0.

Minibatch iteration 674/16200: mean batch inertia: 0.006209, ewa inertia: 0.006218 
Minibatch iteration 675/16200: mean batch inertia: 0.005689, ewa inertia: 0.006211 
Minibatch iteration 676/16200: mean batch inertia: 0.005187, ewa inertia: 0.006199 
Minibatch iteration 677/16200: mean batch inertia: 0.006027, ewa inertia: 0.006196 
Minibatch iteration 678/16200: mean batch inertia: 0.006399, ewa inertia: 0.006199 
Minibatch iteration 679/16200: mean batch inertia: 0.006000, ewa inertia: 0.006197 
Minibatch iteration 680/16200: mean batch inertia: 0.006483, ewa inertia: 0.006200 
Minibatch iteration 681/16200: mean batch inertia: 0.005914, ewa inertia: 0.006197 
Minibatch iteration 682/16200: mean batch inertia: 0.005872, ewa inertia: 0.006193 
Minibatch iteration 683/16200: mean batch inertia: 0.006425, ewa inertia: 0.006195 
Minibatch iteration 684/16200: mean batch inertia: 0.007070, ewa inertia: 0.006206 
Minibatch iteration 685/16200: mean batch inertia: 0.005559, ewa inertia: 0.

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:  2.0min
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:  9.1min
[Parallel(n_jobs=-1)]: Done 434 tasks      | elapsed: 21.0min
[Parallel(n_jobs=-1)]: Done 784 tasks      | elapsed: 37.8min
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed: 47.8min finished


Fitting 5 folds for each of 50 candidates, totalling 250 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:  2.1min
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:  9.7min
[Parallel(n_jobs=-1)]: Done 250 out of 250 | elapsed: 12.9min finished


Fitting 5 folds for each of 11 candidates, totalling 55 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:  2.0min
[Parallel(n_jobs=-1)]: Done  55 out of  55 | elapsed:  2.8min finished


Fitting 5 folds for each of 50 candidates, totalling 250 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:  8.9min
[Parallel(n_jobs=-1)]: Done 250 out of 250 | elapsed: 11.9min finished


In [None]:
fig = plt.figure()
ax = plt.axes()
plt.scatter(np.concatenate(X_train)[:, 0], np.concatenate(X_train)[:, 1], cmap=plt.cm.RdBu, rasterized=True, c=kmeans.labels_)
ax.set_xlabel("Feature 1")
ax.set_ylabel("Feature 2")
plt.tight_layout()

width = 3.487
height =width / 1.618
fig.set_size_inches(width, height)
fig.subplots_adjust(wspace=1.5)
plt.savefig('lib_k_400.pdf', bbox_inches = 'tight', pad_inches = 0)

In [None]:
print(sequential_search.all_best_params_)
print(sequential_search.all_best_score_)
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600]}

for params in ParameterGrid(param_grid):
    kmeans = MiniBatchKMeans(n_clusters=params['hidden_layer_size'], n_init=20, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=0, random_state=0)
    kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
    w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
    base_km_esn = clone(sequential_search.best_estimator_)
    base_km_esn.input_to_node.predefined_input_weights=w_in.T
    base_km_esn.set_params(**params)
    gs_cv = GridSearchCV(base_km_esn,
                         param_grid={'random_state': range(10)},
                         scoring=make_scorer(accuracy_score), n_jobs=-1).fit(X=X_train, y=y_train)
    print(gs_cv.cv_results_)
    print("---------------------------------------")
    acc_score = accuracy_score(y_test, gs_cv.best_estimator_.predict(X_test))
    print(acc_score)
    print("---------------------------------------")
"""
constant_params = sequential_search.best_estimator_.get_params()
constant_params.pop('hidden_layer_size')
constant_params.pop('random_state')
constant_params.pop('predefined_input_weights')
base_esn = SeqToLabelESNClassifier(**constant_params)
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600],
              'random_state': range(1, 11)}

for params in ParameterGrid(param_grid):
    if params['hidden_layer_size'] > 400:
        kmeans = MiniBatchKMeans(n_clusters=400, n_init=200, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=0, random_state=params['random_state'])
        t1 = time.time()
        kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
        w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
        t2 = time.time()
        w_in = np.pad(w_in, ((0, params['hidden_layer_size'] - 400), (0, 0)), mode='constant', constant_values=0)
    else:
        kmeans = MiniBatchKMeans(n_clusters=params['hidden_layer_size'], n_init=200, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=0, random_state=params['random_state'])
        t1 = time.time()
        kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
        w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
        t2 = time.time()
    km_esn = clone(base_esn)
    km_esn.input_to_node = PredefinedWeightsInputToNode(predefined_input_weights=w_in.T)
    km_esn.set_params(**constant_params, **params)
    km_esn.fit(X=X_train, y=y_train, n_jobs=8)
    score = accuracy_score(y_test, km_esn.predict(X_test))
    print("KM-ESN with params {0} achieved score of {1} and was trained in {2} seconds.".format(params, score, t2-t1))

##    NetFlow dataset


Data Set Information:

TODO

Attribute Information:

TODO

TODO: Shuffle, find a better random state

In [19]:
net = np.load(r"E:\multivariate_time_series_dataset\numpy\NET.npz")
X_train = np.empty(shape=(803, ), dtype=object)
y_train = np.empty(shape=(803, ), dtype=object)
X_test = np.empty(shape=(534, ), dtype=object)
y_test = np.empty(shape=(534, ), dtype=object)

if train:
    for k, (X, y) in enumerate(zip(net['X'], net['Y'])):
        X_train[k] = X[X.sum(axis=1)!=0, :]  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_train[k] = np.tile(y, (X_train[k].shape[0], 1))
    scaler = StandardScaler().fit(np.concatenate(X_train))
    for k, X in enumerate(X_train):
        X_train[k] = scaler.transform(X=X)  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X

    X_train, y_train = shuffle(X_train, y_train, random_state=0)

    for k, (X, y) in enumerate(zip(net['Xte'], net['Yte'])):
        X_test[k] = scaler.transform(X=X[X.sum(axis=1)!=0, :])  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_test[k] = np.tile(y, (X_test[k].shape[0], 1))
else:
    for k, (X, y) in enumerate(zip(net['X'], net['Y'])):
        X_train[k] = X[X.sum(axis=1)!=0, :]  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_train[k] = np.argwhere(y).ravel()
    scaler = StandardScaler().fit(np.concatenate(X_train))
    for k, X in enumerate(X_train):
        X_train[k] = scaler.transform(X=X)  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X

    X_train, y_train = shuffle(X_train, y_train, random_state=0)

    for k, (X, y) in enumerate(zip(net['Xte'], net['Yte'])):
        X_test[k] = scaler.transform(X=X[X.sum(axis=1)!=0, :])  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_test[k] = np.argwhere(y).ravel()

Fit random ESN

In [None]:
initially_fixed_params = {'hidden_layer_size': 50,
                          'k_in': 4,
                          'input_scaling': 0.4,
                          'input_activation': 'identity',
                          'bias_scaling': 0.0,
                          'spectral_radius': 0.0,
                          'leakage': 0.1,
                          'k_rec': 10,
                          'reservoir_activation': 'tanh',
                          'bi_directional': False,
                          'wash_out': 0,
                          'continuation': False,
                          'alpha': 1e-3,
                          'random_state': 42}

step1_esn_params = {'input_scaling': uniform(loc=1e-2, scale=1),
                    'spectral_radius': uniform(loc=0, scale=2)}

step2_esn_params = {'leakage': loguniform(1e-5, 1e0)}
step3_esn_params = {'bias_scaling': np.linspace(0.0, 1.0, 11)}
step4_esn_params = {'alpha': loguniform(1e-5, 1e1)}

kwargs_step1 = {'n_iter': 200, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step2 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step3 = {'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step4 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}

# The searches are defined similarly to the steps of a sklearn.pipeline.Pipeline:
searches = [('step1', RandomizedSearchCV, step1_esn_params, kwargs_step1),
            ('step2', RandomizedSearchCV, step2_esn_params, kwargs_step2),
            ('step3', GridSearchCV, step3_esn_params, kwargs_step3),
            ('step4', RandomizedSearchCV, step4_esn_params, kwargs_step4)]

base_esn = SeqToSeqESNClassifier(**initially_fixed_params)

try:
    sequential_search = load("../sequential_search_net.joblib")
except FileNotFoundError:
    sequential_search = SequentialSearchCV(base_esn, searches=searches).fit(X_train, y_train)
    dump(sequential_search, "../sequential_search_net.joblib")

In [None]:
print(sequential_search.all_best_params_)
print(sequential_search.all_best_score_)
base_esn = SeqToLabelESNClassifier(**sequential_search.best_estimator_.get_params())
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600]}

for params in ParameterGrid(param_grid):
    gs_cv = GridSearchCV(clone(base_esn).set_params(**params), 
                         param_grid={'random_state': range(10)}, 
                         scoring=make_scorer(accuracy_score), n_jobs=-1).fit(X=X_train, y=y_train)
    print(gs_cv.cv_results_)
    print("---------------------------------------")
    acc_score = accuracy_score(y_test, gs_cv.best_estimator_.predict(X_test))
    print(acc_score)
    print("---------------------------------------")
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600],
              'random_state': range(1, 11)}

for params in ParameterGrid(param_grid):
    t1 = time.time()
    esn = clone(base_esn).set_params(**params).fit(X=X_train, y=y_train, n_jobs=8)
    t2 = time.time()
    score = accuracy_score(y_test, esn.predict(X_test))
    print("ESN with params {0} achieved score of {1} and was trained in {2} seconds.".format(params, score, t2-t1))

Fit KM-ESN

In [20]:
kmeans = MiniBatchKMeans(n_clusters=100, n_init=200, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=2, random_state=0)
kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
w_in = np.pad(w_in, ((0, 800 - 100), (0, 0)), mode='constant', constant_values=0)

initially_fixed_params = {'hidden_layer_size': 800,
                          'k_in': 4,
                          'input_scaling': 0.4,
                          'input_activation': 'identity',
                          'bias_scaling': 0.0,
                          'spectral_radius': 0.0,
                          'leakage': 0.1,
                          'k_rec': 10,
                          'reservoir_activation': 'tanh',
                          'bi_directional': False,
                          'wash_out': 0,
                          'continuation': False,
                          'alpha': 1e-3,
                          'random_state': 42}

step1_esn_params = {'input_scaling': uniform(loc=1e-2, scale=1),
                    'spectral_radius': uniform(loc=0, scale=2)}

step2_esn_params = {'leakage': loguniform(1e-5, 1e0)}
step3_esn_params = {'bias_scaling': np.linspace(0.0, 1.0, 11)}
step4_esn_params = {'alpha': loguniform(1e-5, 1e1)}

kwargs_step1 = {'n_iter': 200, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step2 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step3 = {'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step4 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}

# The searches are defined similarly to the steps of a sklearn.pipeline.Pipeline:
searches = [('step1', RandomizedSearchCV, step1_esn_params, kwargs_step1),
            ('step2', RandomizedSearchCV, step2_esn_params, kwargs_step2),
            ('step3', GridSearchCV, step3_esn_params, kwargs_step3),
            ('step4', RandomizedSearchCV, step4_esn_params, kwargs_step4)]

base_km_esn = SeqToSeqESNClassifier(input_to_node=PredefinedWeightsInputToNode(predefined_input_weights=w_in.T),
                                    **initially_fixed_params)

try:
    sequential_search = load("../sequential_search_net_km_sparse.joblib")
except FileNotFoundError:
    sequential_search = SequentialSearchCV(base_km_esn, searches=searches).fit(X_train, y_train)
    dump(sequential_search, "../sequential_search_net_km_sparse.joblib")

Init 1/200 with method: k-means++
Inertia for init 1/200: 0.494128
Init 2/200 with method: k-means++
Inertia for init 2/200: 0.263712
Init 3/200 with method: k-means++
Inertia for init 3/200: 0.063756
Init 4/200 with method: k-means++
Inertia for init 4/200: 0.341841
Init 5/200 with method: k-means++
Inertia for init 5/200: 0.429972
Init 6/200 with method: k-means++
Inertia for init 6/200: 1.064744
Init 7/200 with method: k-means++
Inertia for init 7/200: 0.224021
Init 8/200 with method: k-means++
Inertia for init 8/200: 0.502849
Init 9/200 with method: k-means++
Inertia for init 9/200: 0.167693
Init 10/200 with method: k-means++
Inertia for init 10/200: 0.181575
Init 11/200 with method: k-means++
Inertia for init 11/200: 0.124060
Init 12/200 with method: k-means++
Inertia for init 12/200: 0.243499
Init 13/200 with method: k-means++
Inertia for init 13/200: 0.488753
Init 14/200 with method: k-means++
Inertia for init 14/200: 0.370937
Init 15/200 with method: k-means++
Inertia for init 

Inertia for init 119/200: 0.318665
Init 120/200 with method: k-means++
Inertia for init 120/200: 0.511351
Init 121/200 with method: k-means++
Inertia for init 121/200: 0.160176
Init 122/200 with method: k-means++
Inertia for init 122/200: 0.303003
Init 123/200 with method: k-means++
Inertia for init 123/200: 0.068385
Init 124/200 with method: k-means++
Inertia for init 124/200: 0.168852
Init 125/200 with method: k-means++
Inertia for init 125/200: 0.172022
Init 126/200 with method: k-means++
Inertia for init 126/200: 0.176695
Init 127/200 with method: k-means++
Inertia for init 127/200: 0.486969
Init 128/200 with method: k-means++
Inertia for init 128/200: 0.607098
Init 129/200 with method: k-means++
Inertia for init 129/200: 0.083457
Init 130/200 with method: k-means++
Inertia for init 130/200: 0.336721
Init 131/200 with method: k-means++
Inertia for init 131/200: 0.657970
Init 132/200 with method: k-means++
Inertia for init 132/200: 0.130800
Init 133/200 with method: k-means++
Inerti

KeyboardInterrupt: 

In [None]:
print(sequential_search.all_best_params_)
print(sequential_search.all_best_score_)
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600]}

for params in ParameterGrid(param_grid):
    kmeans = MiniBatchKMeans(n_clusters=params['hidden_layer_size'], n_init=20, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=0, random_state=0)
    kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
    w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
    base_km_esn = clone(sequential_search.best_estimator_)
    base_km_esn.input_to_node.predefined_input_weights=w_in.T
    base_km_esn.set_params(**params)
    gs_cv = GridSearchCV(base_km_esn,
                         param_grid={'random_state': range(10)},
                         scoring=make_scorer(accuracy_score), n_jobs=-1).fit(X=X_train, y=y_train)
    print(gs_cv.cv_results_)
    print("---------------------------------------")
    acc_score = accuracy_score(y_test, gs_cv.best_estimator_.predict(X_test))
    print(acc_score)
    print("---------------------------------------")
"""
constant_params = sequential_search.best_estimator_.get_params()
constant_params.pop('hidden_layer_size')
constant_params.pop('random_state')
constant_params.pop('predefined_input_weights')
base_esn = SeqToLabelESNClassifier(**constant_params)
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600],
              'random_state': range(1, 11)}

for params in ParameterGrid(param_grid):
    if params['hidden_layer_size'] > 100:
        kmeans = MiniBatchKMeans(n_clusters=100, n_init=200, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=0, random_state=params['random_state'])
        t1 = time.time()
        kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
        w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
        t2 = time.time()
        w_in = np.pad(w_in, ((0, params['hidden_layer_size'] - 100), (0, 0)), mode='constant', constant_values=0)
    else:
        kmeans = MiniBatchKMeans(n_clusters=params['hidden_layer_size'], n_init=200, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=0, random_state=params['random_state'])
        t1 = time.time()
        kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
        w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
        t2 = time.time()
    km_esn = clone(base_esn)
    km_esn.input_to_node = PredefinedWeightsInputToNode(predefined_input_weights=w_in.T)
    km_esn.set_params(**constant_params, **params)
    km_esn.fit(X=X_train, y=y_train, n_jobs=8)
    score = accuracy_score(y_test, km_esn.predict(X_test))
    print("KM-ESN with params {0} achieved score of {1} and was trained in {2} seconds.".format(params, score, t2-t1))

## PEMS-SF Data Set


Data Set Information:

15 months worth of daily data from the California Department of Transportation PEMS website, (http://pems.dot.ca.gov/), the data describes the occupancy rate, between 0 and 1, of different car lanes of San Francisco bay area freeways. The measurements cover the period from Jan. 1st 2008 to Mar. 30th 2009 and are sampled every 10 minutes. We consider each day in this database as a single time series of dimension 963 (the number of sensors which functioned consistently throughout the studied period) and length 6 x 24=144. We remove public holidays from the dataset, as well as two days with anomalies (March 8th 2009 and March 9th 2008) where all sensors were muted between 2:00 and 3:00 AM. This results in a database of 440 time series.

The task we propose on this dataset is to classify each observed day as the correct day of the week, from Monday to Sunday, e.g. label it with an integer in {1,2,3,4,5,6,7}.

I will keep separate copies of this database on my website in a Matlab format. If you use Matlab, it might be more convenient to consider these .mat files directly.

There are two files for each fold, the data file and the labels file. We have split the 440 time series between train and test folds, but you are of course free to merge them to consider a different cross validation setting.
- The PEMS_train textfile has 263 lines. Each line describes a time-series provided as a matrix. The matrix syntax is that of Matlab, e.g. [ a b ; c d] is the matrix with row vectors [a b] and [c d] in that order. Each matrix describes the different occupancies rates (963 lines, one for each station/detector) sampled every 10 minutes during the day (144 columns).
- The PEMS_trainlabel text describes, for each day of measurements described above, the day of the week on which the data was sampled, namely an integer between 1 (Mon.) and 7 (Sun.).

- PEMS_test and PEMS_testlabels are formatted in the same way, except that there are 173 test instances.

- The permutation that I used to shuffle the dataset is given in the randperm file. If you need to rearrange the data so that it follows the calendar order, you should merge train and test samples and reorder them using the inverse permutation of randperm.

Attribute Information:

Each attribute describes the measurement of the occupancy rate (between 0 and 1) of a captor location as recorded by a measuring station, at a given timestamp in time during the day. The ID of each station is given in the stations_list text file. For more information on the location (GPS, Highway, Direction) of each station please refer to the PEMS website. There are 963 (stations) x 144 (timestamps) = 138.672 attributes for each record.

In [None]:
pems = np.load(r"E:\multivariate_time_series_dataset\numpy\PEMS.npz")
X_train = np.empty(shape=(267, ), dtype=object)
y_train = np.empty(shape=(267, ), dtype=object)
X_test = np.empty(shape=(173, ), dtype=object)
y_test = np.empty(shape=(173, ), dtype=object)

if train:
    for k, (X, y) in enumerate(zip(pems['X'], pems['Y'])):
        X_train[k] = X[X.sum(axis=1)!=0, :]  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_train[k] = np.tile(y, (X_train[k].shape[0], 1))
    scaler = StandardScaler().fit(np.concatenate(X_train))
    for k, X in enumerate(X_train):
        X_train[k] = scaler.transform(X=X)  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X

    X_train, y_train = shuffle(X_train, y_train, random_state=0)

    for k, (X, y) in enumerate(zip(pems['Xte'], pems['Yte'])):
        X_test[k] = scaler.transform(X=X[X.sum(axis=1)!=0, :])  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_test[k] = np.tile(y, (X_test[k].shape[0], 1))
else:
    for k, (X, y) in enumerate(zip(pems['X'], pems['Y'])):
        X_train[k] = X[X.sum(axis=1)!=0, :]  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_train[k] = np.argwhere(y).ravel()
    scaler = StandardScaler().fit(np.concatenate(X_train))
    for k, X in enumerate(X_train):
        X_train[k] = scaler.transform(X=X)  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X

    X_train, y_train = shuffle(X_train, y_train, random_state=0)

    for k, (X, y) in enumerate(zip(pems['Xte'], pems['Yte'])):
        X_test[k] = scaler.transform(X=X[X.sum(axis=1)!=0, :])  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_test[k] = np.argwhere(y).ravel()

Fit random ESN

In [None]:
initially_fixed_params = {'hidden_layer_size': 50,
                          'k_in': 10,
                          'input_scaling': 0.4,
                          'input_activation': 'identity',
                          'bias_scaling': 0.0,
                          'spectral_radius': 0.0,
                          'leakage': 0.1,
                          'k_rec': 10,
                          'reservoir_activation': 'tanh',
                          'bi_directional': False,
                          'wash_out': 0,
                          'continuation': False,
                          'alpha': 1e-3,
                          'random_state': 42}

step1_esn_params = {'input_scaling': uniform(loc=1e-2, scale=1),
                    'spectral_radius': uniform(loc=0, scale=2)}

step2_esn_params = {'leakage': loguniform(1e-5, 1e0)}
step3_esn_params = {'bias_scaling': np.linspace(0.0, 1.0, 11)}
step4_esn_params = {'alpha': loguniform(1e-5, 1e1)}

kwargs_step1 = {'n_iter': 200, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step2 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step3 = {'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step4 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}

# The searches are defined similarly to the steps of a sklearn.pipeline.Pipeline:
searches = [('step1', RandomizedSearchCV, step1_esn_params, kwargs_step1),
            ('step2', RandomizedSearchCV, step2_esn_params, kwargs_step2),
            ('step3', GridSearchCV, step3_esn_params, kwargs_step3),
            ('step4', RandomizedSearchCV, step4_esn_params, kwargs_step4)]

base_esn = SeqToSeqESNClassifier(**initially_fixed_params)

try:
    sequential_search = load("../sequential_search_pems.joblib")
except FileNotFoundError:
    sequential_search = SequentialSearchCV(base_esn, searches=searches).fit(X_train, y_train)
    dump(sequential_search, "../sequential_search_pems.joblib")

In [None]:
print(sequential_search.all_best_params_)
print(sequential_search.all_best_score_)
base_esn = SeqToLabelESNClassifier(**sequential_search.best_estimator_.get_params())
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600]}

for params in ParameterGrid(param_grid):
    gs_cv = GridSearchCV(clone(base_esn).set_params(**params), 
                         param_grid={'random_state': range(10)}, 
                         scoring=make_scorer(accuracy_score), n_jobs=-1).fit(X=X_train, y=y_train)
    print(gs_cv.cv_results_)
    print("---------------------------------------")
    acc_score = accuracy_score(y_test, gs_cv.best_estimator_.predict(X_test))
    print(acc_score)
    print("---------------------------------------")
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600],
              'random_state': range(1, 11)}

for params in ParameterGrid(param_grid):
    t1 = time.time()
    esn = clone(base_esn).set_params(**params).fit(X=X_train, y=y_train, n_jobs=8)
    t2 = time.time()
    score = accuracy_score(y_test, esn.predict(X_test))
    print("ESN with params {0} achieved score of {1} and was trained in {2} seconds.".format(params, score, t2-t1))

Fit KM-ESN

In [None]:
kmeans = MiniBatchKMeans(n_clusters=50, n_init=200, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=2, random_state=0)
kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])

initially_fixed_params = {'hidden_layer_size': 50,
                          'k_in': 10,
                          'input_scaling': 0.4,
                          'input_activation': 'identity',
                          'bias_scaling': 0.0,
                          'spectral_radius': 0.0,
                          'leakage': 0.1,
                          'k_rec': 10,
                          'reservoir_activation': 'tanh',
                          'bi_directional': False,
                          'wash_out': 0,
                          'continuation': False,
                          'alpha': 1e-3,
                          'random_state': 42}

step1_esn_params = {'input_scaling': uniform(loc=1e-2, scale=1),
                    'spectral_radius': uniform(loc=0, scale=2)}

step2_esn_params = {'leakage': loguniform(1e-5, 1e0)}
step3_esn_params = {'bias_scaling': np.linspace(0.0, 1.0, 11)}
step4_esn_params = {'alpha': loguniform(1e-5, 1e1)}

kwargs_step1 = {'n_iter': 200, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step2 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step3 = {'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step4 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}

# The searches are defined similarly to the steps of a sklearn.pipeline.Pipeline:
searches = [('step1', RandomizedSearchCV, step1_esn_params, kwargs_step1),
            ('step2', RandomizedSearchCV, step2_esn_params, kwargs_step2),
            ('step3', GridSearchCV, step3_esn_params, kwargs_step3),
            ('step4', RandomizedSearchCV, step4_esn_params, kwargs_step4)]

base_km_esn = SeqToSeqESNClassifier(input_to_node=PredefinedWeightsInputToNode(predefined_input_weights=w_in.T),
                                    **initially_fixed_params)

try:
    sequential_search = load("../sequential_search_pems_km.joblib")
except FileNotFoundError:
    sequential_search = SequentialSearchCV(base_km_esn, searches=searches).fit(X_train, y_train)
    dump(sequential_search, "../sequential_search_pems_km.joblib")

In [None]:
print(sequential_search.all_best_params_)
print(sequential_search.all_best_score_)
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600]}

for params in ParameterGrid(param_grid):
    kmeans = MiniBatchKMeans(n_clusters=params['hidden_layer_size'], n_init=20, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=0, random_state=0)
    kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
    w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
    base_km_esn = clone(sequential_search.best_estimator_)
    base_km_esn.input_to_node.predefined_input_weights=w_in.T
    base_km_esn.set_params(**params)
    gs_cv = GridSearchCV(base_km_esn,
                         param_grid={'random_state': range(10)},
                         scoring=make_scorer(accuracy_score), n_jobs=-1).fit(X=X_train, y=y_train)
    print(gs_cv.cv_results_)
    print("---------------------------------------")
    acc_score = accuracy_score(y_test, gs_cv.best_estimator_.predict(X_test))
    print(acc_score)
    print("---------------------------------------")
"""
constant_params = sequential_search.best_estimator_.get_params()
constant_params.pop('hidden_layer_size')
constant_params.pop('random_state')
constant_params.pop('predefined_input_weights')
base_esn = SeqToLabelESNClassifier(**constant_params)
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600],
              'random_state': range(1, 11)}

for params in ParameterGrid(param_grid):
    kmeans = MiniBatchKMeans(n_clusters=params['hidden_layer_size'], n_init=200, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=0, random_state=params['random_state'])
    t1 = time.time()
    kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
    w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
    t2 = time.time()
    km_esn = clone(base_esn)
    km_esn.input_to_node = PredefinedWeightsInputToNode(predefined_input_weights=w_in.T)
    km_esn.set_params(**constant_params, **params)
    km_esn.fit(X=X_train, y=y_train, n_jobs=8)
    score = accuracy_score(y_test, km_esn.predict(X_test))
    print("KM-ESN with params {0} achieved score of {1} and was trained in {2} seconds.".format(params, score, t2-t1))

## DistalPhalanxTW Data Set


Data Set Information:

This series of 11 classification problems were created as part of Luke Davis's PhD titled "Predictive Modelling of Bone Ageing". They are all derived from the same images, extracted from Cao et al. "Digital hand atlas and web-based bone age assessment: system design and implementation". They are designed to test the efficacy of hand and bone outline detection and whether these outlines could be helpful in bone age prediction. Algorithms to automatically extract the hand outlines and then the outlines of three bones of the middle finger (proximal, middle and distal phalanges) were applied to over 1300 images, and three human evaluators labelled the output of the image outlining as correct or incorrect. This generated three classification problems: DistalPhalanxOutlineCorrect; MiddlePhalanxOutlineCorrect; and ProximalPhalanxOutlineCorrect. The next stage of the project was to use the outlines to predict information about the subjects age. The three problems {\em DistalPhalanxOutlineAgeGroup, MiddlePhalanxOutlineAgeGroup and ProximalPhalanxOutlineAgeGroup} involve using the outline of one of the phalanges to predict whether the subject is one of three age groups: 0-6 years old, 7-12 years old and 13-19 years old. Note that these problems are aligned by subject, and hence can be treated as a multi dimensional TSC problem. Problem Phalanges contains the concatenation of all three problems. Bone age estimation is usually performed by an expert with an algorithm called Tanner-Whitehouse. This involves scoring each bone into one of seven categories based on the stage of development. The final three bone image classification problems, DistalPhalanxTW, MiddlePhalanxTW and ProximalPhalanxTW}, involve predicting the Tanner-Whitehouse score (as labelled by a human expert) from the outline.

Attribute Information:

TODO

In [None]:
phal = np.load(r"E:\multivariate_time_series_dataset\numpy\PHAL.npz")
X_train = np.empty(shape=(400, ), dtype=object)
y_train = np.empty(shape=(400, ), dtype=object)
X_test = np.empty(shape=(139, ), dtype=object)
y_test = np.empty(shape=(139, ), dtype=object)

if train:
    for k, (X, y) in enumerate(zip(phal['X'], phal['Y'])):
        X_train[k] = X[X.sum(axis=1)!=0, :]  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_train[k] = np.tile(y, (X_train[k].shape[0], 1))
    scaler = StandardScaler().fit(np.concatenate(X_train))
    for k, X in enumerate(X_train):
        X_train[k] = scaler.transform(X=X)  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X

    X_train, y_train = shuffle(X_train, y_train, random_state=0)

    for k, (X, y) in enumerate(zip(phal['Xte'], phal['Yte'])):
        X_test[k] = scaler.transform(X=X[X.sum(axis=1)!=0, :])  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_test[k] = np.tile(y, (X_test[k].shape[0], 1))
else:
    for k, (X, y) in enumerate(zip(phal['X'], phal['Y'])):
        X_train[k] = X[X.sum(axis=1)!=0, :]  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_train[k] = np.argwhere(y).ravel()
    scaler = StandardScaler().fit(np.concatenate(X_train))
    for k, X in enumerate(X_train):
        X_train[k] = scaler.transform(X=X)  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X

    X_train, y_train = shuffle(X_train, y_train, random_state=0)

    for k, (X, y) in enumerate(zip(phal['Xte'], phal['Yte'])):
        X_test[k] = scaler.transform(X=X[X.sum(axis=1)!=0, :])  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_test[k] = np.argwhere(y).ravel()

Fit random ESN

In [None]:
initially_fixed_params = {'hidden_layer_size': 50,
                          'k_in': 1,
                          'input_scaling': 0.4,
                          'input_activation': 'identity',
                          'bias_scaling': 0.0,
                          'spectral_radius': 0.0,
                          'leakage': 0.1,
                          'k_rec': 10,
                          'reservoir_activation': 'tanh',
                          'bi_directional': False,
                          'wash_out': 0,
                          'continuation': False,
                          'alpha': 1e-3,
                          'random_state': 42}

step1_esn_params = {'input_scaling': uniform(loc=1e-2, scale=1),
                    'spectral_radius': uniform(loc=0, scale=2)}

step2_esn_params = {'leakage': loguniform(1e-5, 1e0)}
step3_esn_params = {'bias_scaling': np.linspace(0.0, 1.0, 11)}
step4_esn_params = {'alpha': loguniform(1e-5, 1e1)}

kwargs_step1 = {'n_iter': 200, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step2 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step3 = {'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step4 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}

# The searches are defined similarly to the steps of a sklearn.pipeline.Pipeline:
searches = [('step1', RandomizedSearchCV, step1_esn_params, kwargs_step1),
            ('step2', RandomizedSearchCV, step2_esn_params, kwargs_step2),
            ('step3', GridSearchCV, step3_esn_params, kwargs_step3),
            ('step4', RandomizedSearchCV, step4_esn_params, kwargs_step4)]

base_esn = SeqToSeqESNClassifier(**initially_fixed_params)

try:
    sequential_search = load("../sequential_search_phal.joblib")
except FileNotFoundError:
    sequential_search = SequentialSearchCV(base_esn, searches=searches).fit(X_train, y_train)
    dump(sequential_search, "../sequential_search_phal.joblib")

In [None]:
print(sequential_search.all_best_params_)
print(sequential_search.all_best_score_)
base_esn = SeqToLabelESNClassifier(**sequential_search.best_estimator_.get_params())
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600]}

for params in ParameterGrid(param_grid):
    gs_cv = GridSearchCV(clone(base_esn).set_params(**params), 
                         param_grid={'random_state': range(10)}, 
                         scoring=make_scorer(accuracy_score), n_jobs=-1).fit(X=X_train, y=y_train)
    print(gs_cv.cv_results_)
    print("---------------------------------------")
    acc_score = accuracy_score(y_test, gs_cv.best_estimator_.predict(X_test))
    print(acc_score)
    print("---------------------------------------")
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600],
              'random_state': range(1, 11)}

for params in ParameterGrid(param_grid):
    t1 = time.time()
    esn = clone(base_esn).set_params(**params).fit(X=X_train, y=y_train, n_jobs=8)
    t2 = time.time()
    score = accuracy_score(y_test, esn.predict(X_test))
    print("ESN with params {0} achieved score of {1} and was trained in {2} seconds.".format(params, score, t2-t1))

Fit KM-ESN

In [None]:
kmeans = MiniBatchKMeans(n_clusters=50, n_init=200, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=2, random_state=0)
kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
w_in = MinMaxScaler(feature_range=(-1, 1)).fit_transform(kmeans.cluster_centers_)
# w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])

initially_fixed_params = {'hidden_layer_size': 50,
                          'k_in': 1,
                          'input_scaling': 0.4,
                          'input_activation': 'identity',
                          'bias_scaling': 0.0,
                          'spectral_radius': 0.0,
                          'leakage': 0.1,
                          'k_rec': 10,
                          'reservoir_activation': 'tanh',
                          'bi_directional': False,
                          'wash_out': 0,
                          'continuation': False,
                          'alpha': 1e-3,
                          'random_state': 42}

step1_esn_params = {'input_scaling': uniform(loc=1e-2, scale=1),
                    'spectral_radius': uniform(loc=0, scale=2)}

step2_esn_params = {'leakage': loguniform(1e-5, 1e0)}
step3_esn_params = {'bias_scaling': np.linspace(0.0, 1.0, 11)}
step4_esn_params = {'alpha': loguniform(1e-5, 1e1)}

kwargs_step1 = {'n_iter': 200, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step2 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step3 = {'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step4 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}

# The searches are defined similarly to the steps of a sklearn.pipeline.Pipeline:
searches = [('step1', RandomizedSearchCV, step1_esn_params, kwargs_step1),
            ('step2', RandomizedSearchCV, step2_esn_params, kwargs_step2),
            ('step3', GridSearchCV, step3_esn_params, kwargs_step3),
            ('step4', RandomizedSearchCV, step4_esn_params, kwargs_step4)]

base_km_esn = SeqToSeqESNClassifier(input_to_node=PredefinedWeightsInputToNode(predefined_input_weights=w_in.T),
                                    **initially_fixed_params)

try:
    sequential_search = load("../sequential_search_phal_km.joblib")
except FileNotFoundError:
    sequential_search = SequentialSearchCV(base_km_esn, searches=searches).fit(X_train, y_train)
    dump(sequential_search, "../sequential_search_phal_km.joblib")

In [None]:
print(sequential_search.all_best_params_)
print(sequential_search.all_best_score_)
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600]}

for params in ParameterGrid(param_grid):
    kmeans = MiniBatchKMeans(n_clusters=params['hidden_layer_size'], n_init=20, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=0, random_state=0)
    kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
    w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
    base_km_esn = clone(sequential_search.best_estimator_)
    base_km_esn.input_to_node.predefined_input_weights=w_in.T
    base_km_esn.set_params(**params)
    gs_cv = GridSearchCV(base_km_esn,
                         param_grid={'random_state': range(10)},
                         scoring=make_scorer(accuracy_score), n_jobs=-1).fit(X=X_train, y=y_train)
    print(gs_cv.cv_results_)
    print("---------------------------------------")
    acc_score = accuracy_score(y_test, gs_cv.best_estimator_.predict(X_test))
    print(acc_score)
    print("---------------------------------------")
"""
constant_params = sequential_search.best_estimator_.get_params()
constant_params.pop('hidden_layer_size')
constant_params.pop('random_state')
constant_params.pop('predefined_input_weights')
base_esn = SeqToLabelESNClassifier(**constant_params)
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600],
              'random_state': range(1, 11)}

for params in ParameterGrid(param_grid):
    kmeans = MiniBatchKMeans(n_clusters=params['hidden_layer_size'], n_init=200, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=0, random_state=params['random_state'])
    t1 = time.time()
    kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
    w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
    t2 = time.time()
    km_esn = clone(base_esn)
    km_esn.input_to_node = PredefinedWeightsInputToNode(predefined_input_weights=w_in.T)
    km_esn.set_params(**constant_params, **params)
    km_esn.fit(X=X_train, y=y_train, n_jobs=8)
    score = accuracy_score(y_test, km_esn.predict(X_test))
    print("KM-ESN with params {0} achieved score of {1} and was trained in {2} seconds.".format(params, score, t2-t1))

In [None]:
plt.plot(np.concatenate(X_train[:1]))

##  Robot Execution Failures Data Set 
(http://archive.ics.uci.edu/ml/datasets/Robot+Execution+Failures)

Data Set Information:

The donation includes 5 datasets, each of them defining a different learning problem:

- LP1: failures in approach to grasp position
- LP2: failures in transfer of a part
- LP3: position of part after a transfer failure
- LP4: failures in approach to ungrasp position
- LP5: failures in motion with part

In order to improve classification accuracy, a set of five feature transformation strategies (based on statistical summary features, discrete Fourier transform, etc.) was defined and evaluated. This enabled an average improvement of 20% in accuracy. The most accessible reference is Seabra Lopes and Camarinha-Matos, 1998.

Attribute Information:

All features are numeric although they are integer valued only. Each feature represents a force or a torque measured after failure detection; each failure instance is characterized in terms of 15 force/torque samples collected at regular time intervals starting immediately after failure detection; The total observation window for each failure instance was of 315 ms.

Each example is described as follows:

class
Fx1 Fy1 Fz1 Tx1 Ty1 Tz1
Fx2 Fy2 Fz2 Tx2 Ty2 Tz2
......
Fx15 Fy15 Fz15 Tx15 Ty15 Tz15

where Fx1 ... Fx15 is the evolution of force Fx in the observation window, the same for Fy, Fz and the torques; there is a total of 90 features.

In [None]:
robot = np.load(r"E:\multivariate_time_series_dataset\numpy\ROBOT.npz")
X_train = np.empty(shape=(100, ), dtype=object)
y_train = np.empty(shape=(100, ), dtype=object)
X_test = np.empty(shape=(64, ), dtype=object)
y_test = np.empty(shape=(64, ), dtype=object)

if train:
    for k, (X, y) in enumerate(zip(robot['X'], robot['Y'])):
        X_train[k] = X[X.sum(axis=1)!=0, :]  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_train[k] = np.tile(y, (X_train[k].shape[0], 1))
    scaler = StandardScaler().fit(np.concatenate(X_train))
    for k, X in enumerate(X_train):
        X_train[k] = scaler.transform(X=X)  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X

    X_train, y_train = shuffle(X_train, y_train, random_state=0)

    for k, (X, y) in enumerate(zip(robot['Xte'], robot['Yte'])):
        X_test[k] = scaler.transform(X=X[X.sum(axis=1)!=0, :])  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_test[k] = np.tile(y, (X_test[k].shape[0], 1))
else:
    for k, (X, y) in enumerate(zip(robot['X'], robot['Y'])):
        X_train[k] = X[X.sum(axis=1)!=0, :]  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_train[k] = np.argwhere(y).ravel()
    scaler = StandardScaler().fit(np.concatenate(X_train))
    for k, X in enumerate(X_train):
        X_train[k] = scaler.transform(X=X)  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X

    X_train, y_train = shuffle(X_train, y_train, random_state=0)

    for k, (X, y) in enumerate(zip(robot['Xte'], robot['Yte'])):
        X_test[k] = scaler.transform(X=X[X.sum(axis=1)!=0, :])  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_test[k] = np.argwhere(y).ravel()

Fit random ESN

In [None]:
initially_fixed_params = {'hidden_layer_size': 50,
                          'k_in': 5,
                          'input_scaling': 0.4,
                          'input_activation': 'identity',
                          'bias_scaling': 0.0,
                          'spectral_radius': 0.0,
                          'leakage': 0.1,
                          'k_rec': 10,
                          'reservoir_activation': 'tanh',
                          'bi_directional': False,
                          'wash_out': 0,
                          'continuation': False,
                          'alpha': 1e-3,
                          'random_state': 42}

step1_esn_params = {'input_scaling': uniform(loc=1e-2, scale=1),
                    'spectral_radius': uniform(loc=0, scale=2)}

step2_esn_params = {'leakage': loguniform(1e-5, 1e0)}
step3_esn_params = {'bias_scaling': np.linspace(0.0, 1.0, 11)}
step4_esn_params = {'alpha': loguniform(1e-5, 1e1)}

kwargs_step1 = {'n_iter': 200, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step2 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step3 = {'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step4 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}

# The searches are defined similarly to the steps of a sklearn.pipeline.Pipeline:
searches = [('step1', RandomizedSearchCV, step1_esn_params, kwargs_step1),
            ('step2', RandomizedSearchCV, step2_esn_params, kwargs_step2),
            ('step3', GridSearchCV, step3_esn_params, kwargs_step3),
            ('step4', RandomizedSearchCV, step4_esn_params, kwargs_step4)]

base_esn = SeqToSeqESNClassifier(**initially_fixed_params)

try:
    sequential_search = load("../sequential_search_robot.joblib")
except FileNotFoundError:
    sequential_search = SequentialSearchCV(base_esn, searches=searches).fit(X_train, y_train)
    dump(sequential_search, "../sequential_search_robot.joblib")

In [None]:
print(sequential_search.all_best_params_)
print(sequential_search.all_best_score_)
base_esn = SeqToLabelESNClassifier(**sequential_search.best_estimator_.get_params())
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600]}

for params in ParameterGrid(param_grid):
    gs_cv = GridSearchCV(clone(base_esn).set_params(**params), 
                         param_grid={'random_state': range(10)}, 
                         scoring=make_scorer(accuracy_score), n_jobs=-1).fit(X=X_train, y=y_train)
    print(gs_cv.cv_results_)
    print("---------------------------------------")
    acc_score = accuracy_score(y_test, gs_cv.best_estimator_.predict(X_test))
    print(acc_score)
    print("---------------------------------------")
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600],
              'random_state': range(1, 11)}

for params in ParameterGrid(param_grid):
    t1 = time.time()
    esn = clone(base_esn).set_params(**params).fit(X=X_train, y=y_train, n_jobs=8)
    t2 = time.time()
    score = accuracy_score(y_test, esn.predict(X_test))
    print("ESN with params {0} achieved score of {1} and was trained in {2} seconds.".format(params, score, t2-t1))

Fit KM-ESN

In [None]:
kmeans = MiniBatchKMeans(n_clusters=50, n_init=200, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=2, random_state=0)
kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])

initially_fixed_params = {'hidden_layer_size': 50,
                          'k_in': 5,
                          'input_scaling': 0.4,
                          'input_activation': 'identity',
                          'bias_scaling': 0.0,
                          'spectral_radius': 0.0,
                          'leakage': 0.1,
                          'k_rec': 10,
                          'reservoir_activation': 'tanh',
                          'bi_directional': False,
                          'wash_out': 0,
                          'continuation': False,
                          'alpha': 1e-3,
                          'random_state': 42}

step1_esn_params = {'input_scaling': uniform(loc=1e-2, scale=1),
                    'spectral_radius': uniform(loc=0, scale=2)}

step2_esn_params = {'leakage': loguniform(1e-5, 1e0)}
step3_esn_params = {'bias_scaling': np.linspace(0.0, 1.0, 11)}
step4_esn_params = {'alpha': loguniform(1e-5, 1e1)}

kwargs_step1 = {'n_iter': 200, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step2 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step3 = {'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step4 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}

# The searches are defined similarly to the steps of a sklearn.pipeline.Pipeline:
searches = [('step1', RandomizedSearchCV, step1_esn_params, kwargs_step1),
            ('step2', RandomizedSearchCV, step2_esn_params, kwargs_step2),
            ('step3', GridSearchCV, step3_esn_params, kwargs_step3),
            ('step4', RandomizedSearchCV, step4_esn_params, kwargs_step4)]

base_km_esn = SeqToSeqESNClassifier(input_to_node=PredefinedWeightsInputToNode(predefined_input_weights=w_in.T),
                                    **initially_fixed_params)

try:
    sequential_search = load("../sequential_search_robot_km.joblib")
except FileNotFoundError:
    sequential_search = SequentialSearchCV(base_km_esn, searches=searches).fit(X_train, y_train)
    dump(sequential_search, "../sequential_search_robot_km.joblib")

In [None]:
print(sequential_search.all_best_params_)
print(sequential_search.all_best_score_)
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600]}

for params in ParameterGrid(param_grid):
    kmeans = MiniBatchKMeans(n_clusters=params['hidden_layer_size'], n_init=20, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=0, random_state=0)
    kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
    w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
    base_km_esn = clone(sequential_search.best_estimator_)
    base_km_esn.input_to_node.predefined_input_weights=w_in.T
    base_km_esn.set_params(**params)
    gs_cv = GridSearchCV(base_km_esn,
                         param_grid={'random_state': range(10)},
                         scoring=make_scorer(accuracy_score), n_jobs=-1).fit(X=X_train, y=y_train)
    print(gs_cv.cv_results_)
    print("---------------------------------------")
    acc_score = accuracy_score(y_test, gs_cv.best_estimator_.predict(X_test))
    print(acc_score)
    print("---------------------------------------")
"""
constant_params = sequential_search.best_estimator_.get_params()
constant_params.pop('hidden_layer_size')
constant_params.pop('random_state')
constant_params.pop('predefined_input_weights')
base_esn = SeqToLabelESNClassifier(**constant_params)
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600],
              'random_state': range(1, 11)}

for params in ParameterGrid(param_grid):
    kmeans = MiniBatchKMeans(n_clusters=params['hidden_layer_size'], n_init=200, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=0, random_state=params['random_state'])
    t1 = time.time()
    kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
    w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
    t2 = time.time()
    km_esn = clone(base_esn)
    km_esn.input_to_node = PredefinedWeightsInputToNode(predefined_input_weights=w_in.T)
    km_esn.set_params(**constant_params, **params)
    km_esn.fit(X=X_train, y=y_train, n_jobs=8)
    score = accuracy_score(y_test, km_esn.predict(X_test))
    print("KM-ESN with params {0} achieved score of {1} and was trained in {2} seconds.".format(params, score, t2-t1))

##  SwedishLeaf Data Set 
(http://www.timeseriesclassification.com/description.php?Dataset=SwedishLeaf)

Data Set Information:

Swedish leaf is a set of leaf outlines donated by Oskar S�derkvist, and used in his MSc thesis Computer vision classifcation of leaves from swedish trees, 2001. The tree classes are 1. Ulmus carpinifolia 2. Acer 3. Salix aurita 4. Quercus 5. Alnus incana 6. Betula pubescens 7. Salix alba 'Sericea' 8. Populus tremula 9. Ulmus glabra 10. Sorbus aucuparia 11. Salix sinerea 12. Populus 13.Tilia 14. Sorbus intermedia 15. Fagus silvatica 

Attribute Information:

TODO

In [None]:
swe = np.load(r"E:\multivariate_time_series_dataset\numpy\SWE.npz")
X_train = np.empty(shape=(500, ), dtype=object)
y_train = np.empty(shape=(500, ), dtype=object)
X_test = np.empty(shape=(625, ), dtype=object)
y_test = np.empty(shape=(625, ), dtype=object)

if train:
    for k, (X, y) in enumerate(zip(swe['X'], swe['Y'])):
        X_train[k] = X[X.sum(axis=1)!=0, :]  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_train[k] = np.tile(y, (X_train[k].shape[0], 1))
    scaler = StandardScaler().fit(np.concatenate(X_train))
    for k, X in enumerate(X_train):
        X_train[k] = scaler.transform(X=X)  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X

    X_train, y_train = shuffle(X_train, y_train, random_state=0)

    for k, (X, y) in enumerate(zip(swe['Xte'], swe['Yte'])):
        X_test[k] = scaler.transform(X=X[X.sum(axis=1)!=0, :])  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_test[k] = np.tile(y, (X_test[k].shape[0], 1))
else:
    for k, (X, y) in enumerate(zip(swe['X'], swe['Y'])):
        X_train[k] = X[X.sum(axis=1)!=0, :]  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_train[k] = np.argwhere(y).ravel()
    scaler = StandardScaler().fit(np.concatenate(X_train))
    for k, X in enumerate(X_train):
        X_train[k] = scaler.transform(X=X)  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X

    X_train, y_train = shuffle(X_train, y_train, random_state=0)

    for k, (X, y) in enumerate(zip(swe['Xte'], swe['Yte'])):
        X_test[k] = scaler.transform(X=X[X.sum(axis=1)!=0, :])  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_test[k] = np.argwhere(y).ravel()

Fit random ESN

In [None]:
initially_fixed_params = {'hidden_layer_size': 50,
                          'k_in': 1,
                          'input_scaling': 0.4,
                          'input_activation': 'identity',
                          'bias_scaling': 0.0,
                          'spectral_radius': 0.0,
                          'leakage': 0.1,
                          'k_rec': 10,
                          'reservoir_activation': 'tanh',
                          'bi_directional': False,
                          'wash_out': 0,
                          'continuation': False,
                          'alpha': 1e-3,
                          'random_state': 42}

step1_esn_params = {'input_scaling': uniform(loc=1e-2, scale=1),
                    'spectral_radius': uniform(loc=0, scale=2)}

step2_esn_params = {'leakage': loguniform(1e-5, 1e0)}
step3_esn_params = {'bias_scaling': np.linspace(0.0, 1.0, 11)}
step4_esn_params = {'alpha': loguniform(1e-5, 1e1)}

kwargs_step1 = {'n_iter': 200, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step2 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step3 = {'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step4 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}

# The searches are defined similarly to the steps of a sklearn.pipeline.Pipeline:
searches = [('step1', RandomizedSearchCV, step1_esn_params, kwargs_step1),
            ('step2', RandomizedSearchCV, step2_esn_params, kwargs_step2),
            ('step3', GridSearchCV, step3_esn_params, kwargs_step3),
            ('step4', RandomizedSearchCV, step4_esn_params, kwargs_step4)]

base_esn = SeqToSeqESNClassifier(**initially_fixed_params)

try:
    sequential_search = load("../sequential_search_swe.joblib")
except FileNotFoundError:
    sequential_search = SequentialSearchCV(base_esn, searches=searches).fit(X_train, y_train)
    dump(sequential_search, "../sequential_search_swe.joblib")

In [None]:
print(sequential_search.all_best_params_)
print(sequential_search.all_best_score_)
base_esn = SeqToLabelESNClassifier(**sequential_search.best_estimator_.get_params())
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600]}

for params in ParameterGrid(param_grid):
    gs_cv = GridSearchCV(clone(base_esn).set_params(**params), 
                         param_grid={'random_state': range(10)}, 
                         scoring=make_scorer(accuracy_score), n_jobs=-1).fit(X=X_train, y=y_train)
    print(gs_cv.cv_results_)
    print("---------------------------------------")
    acc_score = accuracy_score(y_test, gs_cv.best_estimator_.predict(X_test))
    print(acc_score)
    print("---------------------------------------")
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600],
              'random_state': range(1, 11)}

for params in ParameterGrid(param_grid):
    t1 = time.time()
    esn = clone(base_esn).set_params(**params).fit(X=X_train, y=y_train, n_jobs=8)
    t2 = time.time()
    score = accuracy_score(y_test, esn.predict(X_test))
    print("ESN with params {0} achieved score of {1} and was trained in {2} seconds.".format(params, score, t2-t1))

Fit KM-ESN

In [None]:
kmeans = MiniBatchKMeans(n_clusters=50, n_init=200, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=2, random_state=0)
kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
w_in = MinMaxScaler(feature_range=(-1, 1)).fit_transform(kmeans.cluster_centers_)

initially_fixed_params = {'hidden_layer_size': 50,
                          'k_in': 1,
                          'input_scaling': 0.4,
                          'input_activation': 'identity',
                          'bias_scaling': 0.0,
                          'spectral_radius': 0.0,
                          'leakage': 0.1,
                          'k_rec': 10,
                          'reservoir_activation': 'tanh',
                          'bi_directional': False,
                          'wash_out': 0,
                          'continuation': False,
                          'alpha': 1e-3,
                          'random_state': 42}

step1_esn_params = {'input_scaling': uniform(loc=1e-2, scale=1),
                    'spectral_radius': uniform(loc=0, scale=2)}

step2_esn_params = {'leakage': loguniform(1e-5, 1e0)}
step3_esn_params = {'bias_scaling': np.linspace(0.0, 1.0, 11)}
step4_esn_params = {'alpha': loguniform(1e-5, 1e1)}

kwargs_step1 = {'n_iter': 200, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step2 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step3 = {'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step4 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}

# The searches are defined similarly to the steps of a sklearn.pipeline.Pipeline:
searches = [('step1', RandomizedSearchCV, step1_esn_params, kwargs_step1),
            ('step2', RandomizedSearchCV, step2_esn_params, kwargs_step2),
            ('step3', GridSearchCV, step3_esn_params, kwargs_step3),
            ('step4', RandomizedSearchCV, step4_esn_params, kwargs_step4)]

base_km_esn = SeqToSeqESNClassifier(input_to_node=PredefinedWeightsInputToNode(predefined_input_weights=w_in.T),
                                    **initially_fixed_params)

try:
    sequential_search = load("../sequential_search_swe_km.joblib")
except FileNotFoundError:
    sequential_search = SequentialSearchCV(base_km_esn, searches=searches).fit(X_train, y_train)
    dump(sequential_search, "../sequential_search_swe_km.joblib")

In [None]:
print(sequential_search.all_best_params_)
print(sequential_search.all_best_score_)
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600]}

for params in ParameterGrid(param_grid):
    kmeans = MiniBatchKMeans(n_clusters=params['hidden_layer_size'], n_init=20, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=0, random_state=0)
    kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
    w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
    base_km_esn = clone(sequential_search.best_estimator_)
    base_km_esn.input_to_node.predefined_input_weights=w_in.T
    base_km_esn.set_params(**params)
    gs_cv = GridSearchCV(base_km_esn,
                         param_grid={'random_state': range(10)},
                         scoring=make_scorer(accuracy_score), n_jobs=-1).fit(X=X_train, y=y_train)
    print(gs_cv.cv_results_)
    print("---------------------------------------")
    acc_score = accuracy_score(y_test, gs_cv.best_estimator_.predict(X_test))
    print(acc_score)
    print("---------------------------------------")
"""
constant_params = sequential_search.best_estimator_.get_params()
constant_params.pop('hidden_layer_size')
constant_params.pop('random_state')
constant_params.pop('predefined_input_weights')
base_esn = SeqToLabelESNClassifier(**constant_params)
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600],
              'random_state': range(1, 11)}

for params in ParameterGrid(param_grid):
    kmeans = MiniBatchKMeans(n_clusters=params['hidden_layer_size'], n_init=200, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=0, random_state=params['random_state'])
    t1 = time.time()
    kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
    w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
    t2 = time.time()
    km_esn = clone(base_esn)
    km_esn.input_to_node = PredefinedWeightsInputToNode(predefined_input_weights=w_in.T)
    km_esn.set_params(**constant_params, **params)
    km_esn.fit(X=X_train, y=y_train, n_jobs=8)
    score = accuracy_score(y_test, km_esn.predict(X_test))
    print("KM-ESN with params {0} achieved score of {1} and was trained in {2} seconds.".format(params, score, t2-t1))

In [None]:
plt.plot(np.concatenate(X_train[:5]))

##  uWave Data Set 
(https://www.yecl.org/publications/liu09percom.pdf)

Data Set Information:

TODO

Attribute Information:

TODO

In [None]:
uwav = np.load(r"E:\multivariate_time_series_dataset\numpy\UWAV.npz")
X_train = np.empty(shape=(200, ), dtype=object)
y_train = np.empty(shape=(200, ), dtype=object)
X_test = np.empty(shape=(428, ), dtype=object)
y_test = np.empty(shape=(428, ), dtype=object)

if train:
    for k, (X, y) in enumerate(zip(uwav['X'], uwav['Y'])):
        X_train[k] = X[X.sum(axis=1)!=0, :]  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_train[k] = np.tile(y, (X_train[k].shape[0], 1))
    scaler = StandardScaler().fit(np.concatenate(X_train))
    for k, X in enumerate(X_train):
        X_train[k] = scaler.transform(X=X)  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X

    X_train, y_train = shuffle(X_train, y_train, random_state=0)

    for k, (X, y) in enumerate(zip(uwav['Xte'], uwav['Yte'])):
        X_test[k] = scaler.transform(X=X[X.sum(axis=1)!=0, :])  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_test[k] = np.tile(y, (X_test[k].shape[0], 1))
else:
    for k, (X, y) in enumerate(zip(uwav['X'], uwav['Y'])):
        X_train[k] = X[X.sum(axis=1)!=0, :]  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_train[k] = np.argwhere(y).ravel()
    scaler = StandardScaler().fit(np.concatenate(X_train))
    for k, X in enumerate(X_train):
        X_train[k] = scaler.transform(X=X)  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X

    X_train, y_train = shuffle(X_train, y_train, random_state=0)

    for k, (X, y) in enumerate(zip(uwav['Xte'], uwav['Yte'])):
        X_test[k] = scaler.transform(X=X[X.sum(axis=1)!=0, :])  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_test[k] = np.argwhere(y).ravel()

Fit random ESN

In [None]:
initially_fixed_params = {'hidden_layer_size': 50,
                          'k_in': 3,
                          'input_scaling': 0.4,
                          'input_activation': 'identity',
                          'bias_scaling': 0.0,
                          'spectral_radius': 0.0,
                          'leakage': 0.1,
                          'k_rec': 10,
                          'reservoir_activation': 'tanh',
                          'bi_directional': False,
                          'wash_out': 0,
                          'continuation': False,
                          'alpha': 1e-3,
                          'random_state': 42}

step1_esn_params = {'input_scaling': uniform(loc=1e-2, scale=1),
                    'spectral_radius': uniform(loc=0, scale=2)}

step2_esn_params = {'leakage': loguniform(1e-5, 1e0)}
step3_esn_params = {'bias_scaling': np.linspace(0.0, 1.0, 11)}
step4_esn_params = {'alpha': loguniform(1e-5, 1e1)}

kwargs_step1 = {'n_iter': 200, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step2 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step3 = {'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step4 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}

# The searches are defined similarly to the steps of a sklearn.pipeline.Pipeline:
searches = [('step1', RandomizedSearchCV, step1_esn_params, kwargs_step1),
            ('step2', RandomizedSearchCV, step2_esn_params, kwargs_step2),
            ('step3', GridSearchCV, step3_esn_params, kwargs_step3),
            ('step4', RandomizedSearchCV, step4_esn_params, kwargs_step4)]

base_esn = SeqToSeqESNClassifier(**initially_fixed_params)

try:
    sequential_search = load("../sequential_search_uwav.joblib")
except FileNotFoundError:
    sequential_search = SequentialSearchCV(base_esn, searches=searches).fit(X_train, y_train)
    dump(sequential_search, "../sequential_search_uwav.joblib")

In [None]:
print(sequential_search.all_best_params_)
print(sequential_search.all_best_score_)
base_esn = SeqToLabelESNClassifier(**sequential_search.best_estimator_.get_params())
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600]}

for params in ParameterGrid(param_grid):
    gs_cv = GridSearchCV(clone(base_esn).set_params(**params), 
                         param_grid={'random_state': range(10)}, 
                         scoring=make_scorer(accuracy_score), n_jobs=-1).fit(X=X_train, y=y_train)
    print(gs_cv.cv_results_)
    print("---------------------------------------")
    acc_score = accuracy_score(y_test, gs_cv.best_estimator_.predict(X_test))
    print(acc_score)
    print("---------------------------------------")
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600],
              'random_state': range(1, 11)}

for params in ParameterGrid(param_grid):
    t1 = time.time()
    esn = clone(base_esn).set_params(**params).fit(X=X_train, y=y_train, n_jobs=8)
    t2 = time.time()
    score = accuracy_score(y_test, esn.predict(X_test))
    print("ESN with params {0} achieved score of {1} and was trained in {2} seconds.".format(params, score, t2-t1))

Fit KM-ESN

In [None]:
kmeans = MiniBatchKMeans(n_clusters=50, n_init=200, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=2, random_state=0)
kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])

initially_fixed_params = {'hidden_layer_size': 50,
                          'k_in': 3,
                          'input_scaling': 0.4,
                          'input_activation': 'identity',
                          'bias_scaling': 0.0,
                          'spectral_radius': 0.0,
                          'leakage': 0.1,
                          'k_rec': 10,
                          'reservoir_activation': 'tanh',
                          'bi_directional': False,
                          'wash_out': 0,
                          'continuation': False,
                          'alpha': 1e-3,
                          'random_state': 42}

step1_esn_params = {'input_scaling': uniform(loc=1e-2, scale=1),
                    'spectral_radius': uniform(loc=0, scale=2)}

step2_esn_params = {'leakage': loguniform(1e-5, 1e0)}
step3_esn_params = {'bias_scaling': np.linspace(0.0, 1.0, 11)}
step4_esn_params = {'alpha': loguniform(1e-5, 1e1)}

kwargs_step1 = {'n_iter': 200, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step2 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step3 = {'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step4 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}

# The searches are defined similarly to the steps of a sklearn.pipeline.Pipeline:
searches = [('step1', RandomizedSearchCV, step1_esn_params, kwargs_step1),
            ('step2', RandomizedSearchCV, step2_esn_params, kwargs_step2),
            ('step3', GridSearchCV, step3_esn_params, kwargs_step3),
            ('step4', RandomizedSearchCV, step4_esn_params, kwargs_step4)]

base_km_esn = SeqToSeqESNClassifier(input_to_node=PredefinedWeightsInputToNode(predefined_input_weights=w_in.T),
                                    **initially_fixed_params)

try:
    sequential_search = load("../sequential_search_uwav_km.joblib")
except FileNotFoundError:
    sequential_search = SequentialSearchCV(base_km_esn, searches=searches).fit(X_train, y_train)
    dump(sequential_search, "../sequential_search_uwav_km.joblib")

In [None]:
print(sequential_search.all_best_params_)
print(sequential_search.all_best_score_)
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600]}

for params in ParameterGrid(param_grid):
    kmeans = MiniBatchKMeans(n_clusters=params['hidden_layer_size'], n_init=20, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=0, random_state=0)
    kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
    w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
    base_km_esn = clone(sequential_search.best_estimator_)
    base_km_esn.input_to_node.predefined_input_weights=w_in.T
    base_km_esn.set_params(**params)
    gs_cv = GridSearchCV(base_km_esn,
                         param_grid={'random_state': range(10)},
                         scoring=make_scorer(accuracy_score), n_jobs=-1).fit(X=X_train, y=y_train)
    print(gs_cv.cv_results_)
    print("---------------------------------------")
    acc_score = accuracy_score(y_test, gs_cv.best_estimator_.predict(X_test))
    print(acc_score)
    print("---------------------------------------")
"""
constant_params = sequential_search.best_estimator_.get_params()
constant_params.pop('hidden_layer_size')
constant_params.pop('random_state')
constant_params.pop('predefined_input_weights')
base_esn = SeqToLabelESNClassifier(**constant_params)
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600],
              'random_state': range(1, 11)}

for params in ParameterGrid(param_grid):
    kmeans = MiniBatchKMeans(n_clusters=params['hidden_layer_size'], n_init=200, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=0, random_state=params['random_state'])
    t1 = time.time()
    kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
    w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
    t2 = time.time()
    km_esn = clone(base_esn)
    km_esn.input_to_node = PredefinedWeightsInputToNode(predefined_input_weights=w_in.T)
    km_esn.set_params(**constant_params, **params)
    km_esn.fit(X=X_train, y=y_train, n_jobs=8)
    score = accuracy_score(y_test, km_esn.predict(X_test))
    print("KM-ESN with params {0} achieved score of {1} and was trained in {2} seconds.".format(params, score, t2-t1))

##  Wafer Data Set 
(http://www.timeseriesclassification.com/description.php?Dataset=Wafer)

Data Set Information:

This dataset was formatted by R. Olszewski as part of his thesis Generalized feature extraction for structural pattern recognition in time-series data at Carnegie Mellon University, 2001. Wafer data relates to semi-conductor microelectronics fabrication. A collection of inline process control measurements recorded from various sensors during the processing of silicon wafers for semiconductor fabrication constitute the wafer database; each data set in the wafer database contains the measurements recorded by one sensor during the processing of one wafer by one tool. The two classes are normal and abnormal. There is a large class imbalance between normal and abnormal (10.7% of the train are abnormal, 12.1% of the test). 

Attribute Information:

TODO

In [21]:
waf = np.load(r"E:\multivariate_time_series_dataset\numpy\WAF.npz")
X_train = np.empty(shape=(298, ), dtype=object)
y_train = np.empty(shape=(298, ), dtype=object)
X_test = np.empty(shape=(896, ), dtype=object)
y_test = np.empty(shape=(896, ), dtype=object)

if train:
    for k, (X, y) in enumerate(zip(waf['X'], waf['Y'])):
        X_train[k] = X[X.sum(axis=1)!=0, :]  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_train[k] = np.tile(y, (X_train[k].shape[0], 1))
    scaler = StandardScaler().fit(np.concatenate(X_train))
    for k, X in enumerate(X_train):
        X_train[k] = scaler.transform(X=X)  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X

    X_train, y_train = shuffle(X_train, y_train, random_state=0)

    for k, (X, y) in enumerate(zip(waf['Xte'], waf['Yte'])):
        X_test[k] = scaler.transform(X=X[X.sum(axis=1)!=0, :])  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_test[k] = np.tile(y, (X_test[k].shape[0], 1))
else:
    for k, (X, y) in enumerate(zip(waf['X'], waf['Y'])):
        X_train[k] = X[X.sum(axis=1)!=0, :]  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_train[k] = np.argwhere(y).ravel()
    scaler = StandardScaler().fit(np.concatenate(X_train))
    for k, X in enumerate(X_train):
        X_train[k] = scaler.transform(X=X)  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X

    X_train, y_train = shuffle(X_train, y_train, random_state=0)

    for k, (X, y) in enumerate(zip(waf['Xte'], waf['Yte'])):
        X_test[k] = scaler.transform(X=X[X.sum(axis=1)!=0, :])  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_test[k] = np.argwhere(y).ravel()

Fit random ESN

In [None]:
initially_fixed_params = {'hidden_layer_size': 50,
                          'k_in': 5,
                          'input_scaling': 0.4,
                          'input_activation': 'identity',
                          'bias_scaling': 0.0,
                          'spectral_radius': 0.0,
                          'leakage': 0.1,
                          'k_rec': 10,
                          'reservoir_activation': 'tanh',
                          'bi_directional': False,
                          'wash_out': 0,
                          'continuation': False,
                          'alpha': 1e-3,
                          'random_state': 42}

step1_esn_params = {'input_scaling': uniform(loc=1e-2, scale=1),
                    'spectral_radius': uniform(loc=0, scale=2)}

step2_esn_params = {'leakage': loguniform(1e-5, 1e0)}
step3_esn_params = {'bias_scaling': np.linspace(0.0, 1.0, 11)}
step4_esn_params = {'alpha': loguniform(1e-5, 1e1)}

kwargs_step1 = {'n_iter': 200, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step2 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step3 = {'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step4 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}

# The searches are defined similarly to the steps of a sklearn.pipeline.Pipeline:
searches = [('step1', RandomizedSearchCV, step1_esn_params, kwargs_step1),
            ('step2', RandomizedSearchCV, step2_esn_params, kwargs_step2),
            ('step3', GridSearchCV, step3_esn_params, kwargs_step3),
            ('step4', RandomizedSearchCV, step4_esn_params, kwargs_step4)]

base_esn = SeqToSeqESNClassifier(**initially_fixed_params)

try:
    sequential_search = load("../sequential_search_waf.joblib")
except FileNotFoundError:
    sequential_search = SequentialSearchCV(base_esn, searches=searches).fit(X_train, y_train)
    dump(sequential_search, "../sequential_search_waf.joblib")

In [None]:
print(sequential_search.all_best_params_)
print(sequential_search.all_best_score_)
base_esn = SeqToLabelESNClassifier(**sequential_search.best_estimator_.get_params())
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600]}

for params in ParameterGrid(param_grid):
    gs_cv = GridSearchCV(clone(base_esn).set_params(**params), 
                         param_grid={'random_state': range(10)}, 
                         scoring=make_scorer(accuracy_score), n_jobs=-1).fit(X=X_train, y=y_train)
    print(gs_cv.cv_results_)
    print("---------------------------------------")
    acc_score = accuracy_score(y_test, gs_cv.best_estimator_.predict(X_test))
    print(acc_score)
    print("---------------------------------------")
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600],
              'random_state': range(1, 11)}

for params in ParameterGrid(param_grid):
    t1 = time.time()
    esn = clone(base_esn).set_params(**params).fit(X=X_train, y=y_train, n_jobs=8)
    t2 = time.time()
    score = accuracy_score(y_test, esn.predict(X_test))
    print("ESN with params {0} achieved score of {1} and was trained in {2} seconds.".format(params, score, t2-t1))

Fit KM-ESN

In [23]:
kmeans = MiniBatchKMeans(n_clusters=200, n_init=200, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=2, random_state=0)
kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
w_in = np.pad(w_in, ((0, 800 - 200), (0, 0)), mode='constant', constant_values=0)

initially_fixed_params = {'hidden_layer_size': 800,
                          'k_in': 5,
                          'input_scaling': 0.4,
                          'input_activation': 'identity',
                          'bias_scaling': 0.0,
                          'spectral_radius': 0.0,
                          'leakage': 0.1,
                          'k_rec': 10,
                          'reservoir_activation': 'tanh',
                          'bi_directional': False,
                          'wash_out': 0,
                          'continuation': False,
                          'alpha': 1e-3,
                          'random_state': 42}

step1_esn_params = {'input_scaling': uniform(loc=1e-2, scale=1),
                    'spectral_radius': uniform(loc=0, scale=2)}

step2_esn_params = {'leakage': loguniform(1e-5, 1e0)}
step3_esn_params = {'bias_scaling': np.linspace(0.0, 1.0, 11)}
step4_esn_params = {'alpha': loguniform(1e-5, 1e1)}

kwargs_step1 = {'n_iter': 200, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step2 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step3 = {'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step4 = {'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}

# The searches are defined similarly to the steps of a sklearn.pipeline.Pipeline:
searches = [('step1', RandomizedSearchCV, step1_esn_params, kwargs_step1),
            ('step2', RandomizedSearchCV, step2_esn_params, kwargs_step2),
            ('step3', GridSearchCV, step3_esn_params, kwargs_step3),
            ('step4', RandomizedSearchCV, step4_esn_params, kwargs_step4)]

base_km_esn = SeqToSeqESNClassifier(input_to_node=PredefinedWeightsInputToNode(predefined_input_weights=w_in.T),
                                    **initially_fixed_params)

try:
    sequential_search = load("../sequential_search_waf_km_sparse.joblib")
except FileNotFoundError:
    sequential_search = SequentialSearchCV(base_km_esn, searches=searches).fit(X_train, y_train)
    dump(sequential_search, "../sequential_search_waf_km_sparse.joblib")

Init 1/200 with method: k-means++
Inertia for init 1/200: 5.034372
Init 2/200 with method: k-means++
Inertia for init 2/200: 1.303531
Init 3/200 with method: k-means++
Inertia for init 3/200: 14.386570
Init 4/200 with method: k-means++
Inertia for init 4/200: 20.127613
Init 5/200 with method: k-means++
Inertia for init 5/200: 8.088663
Init 6/200 with method: k-means++
Inertia for init 6/200: 3.523553
Init 7/200 with method: k-means++
Inertia for init 7/200: 2.121652
Init 8/200 with method: k-means++
Inertia for init 8/200: 23.843817
Init 9/200 with method: k-means++
Inertia for init 9/200: 3.124773
Init 10/200 with method: k-means++
Inertia for init 10/200: 8.841286
Init 11/200 with method: k-means++
Inertia for init 11/200: 5.047710
Init 12/200 with method: k-means++
Inertia for init 12/200: 2.859102
Init 13/200 with method: k-means++
Inertia for init 13/200: 2.883923
Init 14/200 with method: k-means++
Inertia for init 14/200: 2.486586
Init 15/200 with method: k-means++
Inertia for in

Inertia for init 119/200: 5.097120
Init 120/200 with method: k-means++
Inertia for init 120/200: 3.078909
Init 121/200 with method: k-means++
Inertia for init 121/200: 16.004988
Init 122/200 with method: k-means++
Inertia for init 122/200: 2.364164
Init 123/200 with method: k-means++
Inertia for init 123/200: 3.560895
Init 124/200 with method: k-means++
Inertia for init 124/200: 15.636125
Init 125/200 with method: k-means++
Inertia for init 125/200: 14.887952
Init 126/200 with method: k-means++
Inertia for init 126/200: 2.327432
Init 127/200 with method: k-means++
Inertia for init 127/200: 3.779043
Init 128/200 with method: k-means++
Inertia for init 128/200: 4.034629
Init 129/200 with method: k-means++
Inertia for init 129/200: 3.039261
Init 130/200 with method: k-means++
Inertia for init 130/200: 6.098618
Init 131/200 with method: k-means++
Inertia for init 131/200: 8.344823
Init 132/200 with method: k-means++
Inertia for init 132/200: 1.160935
Init 133/200 with method: k-means++
Ine

Fitting 5 folds for each of 200 candidates, totalling 1000 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:  6.4min
[Parallel(n_jobs=-1)]: Done 434 tasks      | elapsed: 15.3min
[Parallel(n_jobs=-1)]: Done 784 tasks      | elapsed: 26.1min
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed: 32.0min finished


Fitting 5 folds for each of 50 candidates, totalling 250 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:  5.2min
[Parallel(n_jobs=-1)]: Done 250 out of 250 | elapsed:  7.1min finished


Fitting 5 folds for each of 11 candidates, totalling 55 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done  55 out of  55 | elapsed:  1.5min finished


Fitting 5 folds for each of 50 candidates, totalling 250 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:  5.3min
[Parallel(n_jobs=-1)]: Done 250 out of 250 | elapsed:  7.0min finished


In [None]:
print(sequential_search.all_best_params_)
print(sequential_search.all_best_score_)
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600]}

for params in ParameterGrid(param_grid):
    kmeans = MiniBatchKMeans(n_clusters=params['hidden_layer_size'], n_init=20, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=0, random_state=0)
    kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
    w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
    base_km_esn = clone(sequential_search.best_estimator_)
    base_km_esn.input_to_node.predefined_input_weights=w_in.T
    base_km_esn.set_params(**params)
    gs_cv = GridSearchCV(base_km_esn,
                         param_grid={'random_state': range(10)},
                         scoring=make_scorer(accuracy_score), n_jobs=-1).fit(X=X_train, y=y_train)
    print(gs_cv.cv_results_)
    print("---------------------------------------")
    acc_score = accuracy_score(y_test, gs_cv.best_estimator_.predict(X_test))
    print(acc_score)
    print("---------------------------------------")
"""
constant_params = sequential_search.best_estimator_.get_params()
constant_params.pop('hidden_layer_size')
constant_params.pop('random_state')
constant_params.pop('predefined_input_weights')
base_esn = SeqToLabelESNClassifier(**constant_params)
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600],
              'random_state': range(1, 11)}

for params in ParameterGrid(param_grid):
    if params['hidden_layer_size'] > 200:
        kmeans = MiniBatchKMeans(n_clusters=200, n_init=200, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=0, random_state=params['random_state'])
        t1 = time.time()
        kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
        w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
        t2 = time.time()
        w_in = np.pad(w_in, ((0, params['hidden_layer_size'] - 200), (0, 0)), mode='constant', constant_values=0)
    else:
        kmeans = MiniBatchKMeans(n_clusters=params['hidden_layer_size'], n_init=200, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=0, random_state=params['random_state'])
        t1 = time.time()
        kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
        w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
        t2 = time.time()
    km_esn = clone(base_esn)
    km_esn.input_to_node = PredefinedWeightsInputToNode(predefined_input_weights=w_in.T)
    km_esn.set_params(**constant_params, **params)
    km_esn.fit(X=X_train, y=y_train, n_jobs=8)
    score = accuracy_score(y_test, km_esn.predict(X_test))
    print("KM-ESN with params {0} achieved score of {1} and was trained in {2} seconds.".format(params, score, t2-t1))

##  BasicMotions
(http://www.timeseriesclassification.com/description.php?Dataset=BasicMotions)

Data Set Information:

The data was generated as part of a student project where four students performed four activities whilst wearing a smart watch. The watch collects 3D accelerometer and a 3D gyroscope It consists of four classes, which are walking, resting, running and badminton. Participants were required to record motion a total of five times, and the data is sampled once every tenth of a second, for a ten second period.
Attribute Information:

TODO

In [None]:
walk = np.load(r"E:\multivariate_time_series_dataset\numpy\WALK.npz")
X_train = np.empty(shape=(28, ), dtype=object)
y_train = np.empty(shape=(28, ), dtype=object)
X_test = np.empty(shape=(16, ), dtype=object)
y_test = np.empty(shape=(16, ), dtype=object)

if train:
    for k, (X, y) in enumerate(zip(walk['X'], walk['Y'])):
        X_train[k] = X[X.sum(axis=1)!=0, :]  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_train[k] = np.tile(y, (X_train[k].shape[0], 1))
    scaler = StandardScaler().fit(np.concatenate(X_train))
    for k, X in enumerate(X_train):
        X_train[k] = scaler.transform(X=X)  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X

    X_train, y_train = shuffle(X_train, y_train, random_state=0)

    for k, (X, y) in enumerate(zip(walk['Xte'], walk['Yte'])):
        X_test[k] = scaler.transform(X=X[X.sum(axis=1)!=0, :])  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_test[k] = np.tile(y, (X_test[k].shape[0], 1))
else:
    for k, (X, y) in enumerate(zip(walk['X'], walk['Y'])):
        X_train[k] = X[X.sum(axis=1)!=0, :]  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_train[k] = np.argwhere(y).ravel()
    scaler = StandardScaler().fit(np.concatenate(X_train))
    for k, X in enumerate(X_train):
        X_train[k] = scaler.transform(X=X)  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X

    X_train, y_train = shuffle(X_train, y_train, random_state=0)

    for k, (X, y) in enumerate(zip(walk['Xte'], walk['Yte'])):
        X_test[k] = scaler.transform(X=X[X.sum(axis=1)!=0, :])  # Sequences are zeropadded -> should we remove zeros? if not, X_train[k] = X
        y_test[k] = np.argwhere(y).ravel()

Fit random ESN

In [None]:
initially_fixed_params = {'hidden_layer_size': 50,
                          'k_in': 10,
                          'input_scaling': 0.4,
                          'input_activation': 'identity',
                          'bias_scaling': 0.0,
                          'spectral_radius': 0.0,
                          'leakage': 0.1,
                          'k_rec': 10,
                          'reservoir_activation': 'tanh',
                          'bi_directional': False,
                          'wash_out': 0,
                          'continuation': False,
                          'alpha': 1e-3,
                          'random_state': 42}

step1_esn_params = {'input_scaling': uniform(loc=1e-2, scale=1),
                    'spectral_radius': uniform(loc=0, scale=2)}

step2_esn_params = {'leakage': loguniform(1e-5, 1e0)}
step3_esn_params = {'bias_scaling': np.linspace(0.0, 1.0, 11)}
step4_esn_params = {'alpha': loguniform(1e-5, 1e1)}

kwargs_step1 = {'cv': 2, 'n_iter': 200, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step2 = {'cv': 2, 'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step3 = {'cv': 2, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step4 = {'cv': 2, 'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}

# The searches are defined similarly to the steps of a sklearn.pipeline.Pipeline:
searches = [('step1', RandomizedSearchCV, step1_esn_params, kwargs_step1),
            ('step2', RandomizedSearchCV, step2_esn_params, kwargs_step2),
            ('step3', GridSearchCV, step3_esn_params, kwargs_step3),
            ('step4', RandomizedSearchCV, step4_esn_params, kwargs_step4)]

base_esn = SeqToSeqESNClassifier(**initially_fixed_params)

try:
    sequential_search = load("../sequential_search_walk.joblib")
except FileNotFoundError:
    sequential_search = SequentialSearchCV(base_esn, searches=searches).fit(X_train, y_train)
    dump(sequential_search, "../sequential_search_walk.joblib")

In [None]:
print(sequential_search.all_best_params_)
print(sequential_search.all_best_score_)
base_esn = SeqToLabelESNClassifier(**sequential_search.best_estimator_.get_params())
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600]}

for params in ParameterGrid(param_grid):
    gs_cv = GridSearchCV(clone(base_esn).set_params(**params), 
                         param_grid={'random_state': range(10)}, 
                         scoring=make_scorer(accuracy_score), n_jobs=-1).fit(X=X_train, y=y_train)
    print(gs_cv.cv_results_)
    print("---------------------------------------")
    acc_score = accuracy_score(y_test, gs_cv.best_estimator_.predict(X_test))
    print(acc_score)
    print("---------------------------------------")
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600],
              'random_state': range(1, 11)}

for params in ParameterGrid(param_grid):
    t1 = time.time()
    esn = clone(base_esn).set_params(**params).fit(X=X_train, y=y_train, n_jobs=8)
    t2 = time.time()
    score = accuracy_score(y_test, esn.predict(X_test))
    print("ESN with params {0} achieved score of {1} and was trained in {2} seconds.".format(params, score, t2-t1))

Fit KM-ESN (TODO CHECK)

In [None]:
kmeans = MiniBatchKMeans(n_clusters=50, n_init=200, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=2, random_state=0)
kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])

initially_fixed_params = {'hidden_layer_size': 50,
                          'k_in': 10,
                          'input_scaling': 0.4,
                          'input_activation': 'identity',
                          'bias_scaling': 0.0,
                          'spectral_radius': 0.0,
                          'leakage': 0.1,
                          'k_rec': 10,
                          'reservoir_activation': 'tanh',
                          'bi_directional': False,
                          'wash_out': 0,
                          'continuation': False,
                          'alpha': 1e-3,
                          'random_state': 42}

step1_esn_params = {'input_scaling': uniform(loc=1e-2, scale=1),
                    'spectral_radius': uniform(loc=0, scale=2)}

step2_esn_params = {'leakage': loguniform(1e-5, 1e0)}
step3_esn_params = {'bias_scaling': np.linspace(0.0, 1.0, 11)}
step4_esn_params = {'alpha': loguniform(1e-5, 1e1)}

kwargs_step1 = {'cv': 2, 'n_iter': 200, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step2 = {'cv': 2, 'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step3 = {'cv': 2, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}
kwargs_step4 = {'cv': 2, 'n_iter': 50, 'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'scoring': make_scorer(mean_squared_error, greater_is_better=False, needs_proba=True)}

# The searches are defined similarly to the steps of a sklearn.pipeline.Pipeline:
searches = [('step1', RandomizedSearchCV, step1_esn_params, kwargs_step1),
            ('step2', RandomizedSearchCV, step2_esn_params, kwargs_step2),
            ('step3', GridSearchCV, step3_esn_params, kwargs_step3),
            ('step4', RandomizedSearchCV, step4_esn_params, kwargs_step4)]

base_km_esn = SeqToSeqESNClassifier(input_to_node=PredefinedWeightsInputToNode(predefined_input_weights=w_in.T),
                                    **initially_fixed_params)

try:
    sequential_search = load("../sequential_search_walk_km.joblib")
except FileNotFoundError:
    sequential_search = SequentialSearchCV(base_km_esn, searches=searches).fit(X_train, y_train)
    dump(sequential_search, "../sequential_search_walk_km.joblib")

In [None]:
print(sequential_search.all_best_params_)
print(sequential_search.all_best_score_)
"""
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600]}

for params in ParameterGrid(param_grid):
    kmeans = MiniBatchKMeans(n_clusters=params['hidden_layer_size'], n_init=20, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=0, random_state=0)
    kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
    w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
    base_km_esn = clone(sequential_search.best_estimator_)
    base_km_esn.input_to_node.predefined_input_weights=w_in.T
    base_km_esn.set_params(**params)
    gs_cv = GridSearchCV(base_km_esn,
                         param_grid={'random_state': range(10)},
                         scoring=make_scorer(accuracy_score), n_jobs=-1).fit(X=X_train, y=y_train)
    print(gs_cv.cv_results_)
    print("---------------------------------------")
    acc_score = accuracy_score(y_test, gs_cv.best_estimator_.predict(X_test))
    print(acc_score)
    print("---------------------------------------")
"""
constant_params = sequential_search.best_estimator_.get_params()
constant_params.pop('hidden_layer_size')
constant_params.pop('random_state')
constant_params.pop('predefined_input_weights')
base_esn = SeqToLabelESNClassifier(**constant_params)
param_grid = {'hidden_layer_size': [50, 100, 200, 400, 800, 1600],
              'random_state': range(1, 11)}

for params in ParameterGrid(param_grid):
    kmeans = MiniBatchKMeans(n_clusters=params['hidden_layer_size'], n_init=200, reassignment_ratio=0, max_no_improvement=50, init='k-means++', verbose=0, random_state=params['random_state'])
    t1 = time.time()
    kmeans.fit(X=np.concatenate(np.concatenate((X_train, X_test))))
    w_in = np.divide(kmeans.cluster_centers_, np.linalg.norm(kmeans.cluster_centers_, axis=1)[:, None])
    t2 = time.time()
    km_esn = clone(base_esn)
    km_esn.input_to_node = PredefinedWeightsInputToNode(predefined_input_weights=w_in.T)
    km_esn.set_params(**constant_params, **params)
    km_esn.fit(X=X_train, y=y_train, n_jobs=8)
    score = accuracy_score(y_test, km_esn.predict(X_test))
    print("KM-ESN with params {0} achieved score of {1} and was trained in {2} seconds.".format(params, score, t2-t1))

In [None]:
sequential_search = load("../sequential_search_chlo_km.joblib")
sequential_search.all_best_params_, sequential_search.all_best_score_
sequential_search.best_estimator_.input_to_node.input_weights

In [None]:
# idx = np.random.randint(0, 800, 50)
fig = plt.figure()
fig.set_size_inches(2, 1.25)
ax = sns.histplot(data=w_in, stat="count", legend=False)
# ax = sns.heatmap(data=sequential_search.best_estimator_.input_to_node.input_weights, cmap="RdBu", vmin=-1.0, vmax=1.0)
# ax.invert_yaxis()
# ax.yaxis.set_major_locator(ticker.MaxNLocator(5))
# ax.xaxis.set_major_locator(ticker.MaxNLocator(5))
# plt.xlabel("Neuron Index")
# plt.ylabel("Input Dimension")
plt.xlabel("Weight")
plt.ylabel("Count")
# plt.imshow(sequential_search.best_estimator_.input_to_node.input_weights.todense()[:, idx])
# plt.colorbar()
plt.savefig('KM_ESN_Input_Weight_Hist_CHLO.pdf', bbox_inches='tight', pad_inches=0)

In [None]:
df = pd.DataFrame(sequential_search.all_cv_results_["step1"])

fig = plt.figure()
ax = sns.scatterplot(x="param_spectral_radius", y="param_input_scaling", hue="mean_test_score", palette='RdBu', data=df)
plt.xlabel("Spectral Radius")
plt.ylabel("Input Scaling")

norm = plt.Normalize(0.97, 1.0)
sm = plt.cm.ScalarMappable(cmap="RdBu", norm=norm)
sm.set_array([])
plt.xlim((0, 2.05))
plt.ylim((0, 1.05))

# Remove the legend and add a colorbar
ax.get_legend().remove()
ax.figure.colorbar(sm)
fig.set_size_inches(4, 2.5)
tick_locator = ticker.MaxNLocator(5)
ax.yaxis.set_major_locator(tick_locator)
ax.xaxis.set_major_locator(tick_locator)

In [None]:
df = pd.DataFrame(sequential_search.all_cv_results_["step2"])
fig = plt.figure()
fig.set_size_inches(2, 1.25)
ax = sns.lineplot(data=df, x="param_leakage", y="mean_test_score")
ax.set_xscale('log')
plt.xlabel("Leakage")
plt.ylabel("Score")
plt.xlim((1e-5, 1e0))
tick_locator = ticker.MaxNLocator(10)
ax.xaxis.set_major_locator(tick_locator)
ax.yaxis.set_major_formatter(ticker.FormatStrFormatter('%.4f'))
plt.grid()

In [None]:
df = pd.DataFrame(sequential_search.all_cv_results_["step3"])
fig = plt.figure()
fig.set_size_inches(2, 1.25)
ax = sns.lineplot(data=df, x="param_bias_scaling", y="mean_test_score")
plt.xlabel("Bias Scaling")
plt.ylabel("Score")
plt.xlim((0, 1))
tick_locator = ticker.MaxNLocator(5)
ax.xaxis.set_major_locator(tick_locator)
ax.yaxis.set_major_formatter(ticker.FormatStrFormatter('%.5f'))
plt.grid()

In [None]:
df = pd.DataFrame(sequential_search.all_cv_results_["step4"])
fig = plt.figure()
fig.set_size_inches(2, 1.25)
ax = sns.lineplot(data=df, x="param_alpha", y="mean_test_score")
ax.set_xscale('log')
plt.xlabel("Alpha")
plt.ylabel("Score")
plt.xlim((1e-5, 1e0))
tick_locator = ticker.MaxNLocator(20)
ax.xaxis.set_major_locator(tick_locator)
ax.yaxis.set_major_formatter(ticker.FormatStrFormatter('%.5f'))
plt.grid()

In [None]:
sequential_search.best_estimator_

In [None]:
plt.plot(np.concatenate(X_train))
plt.xlim((0, 1000))