In [1]:
import numpy as np
import pandas as pd

from taskutils import read_input

train_data_path = "data/X_train.csv"
label_data_path = "data/y_train.csv"
test_data_path = "data/X_test.csv"

X_train = read_input(train_data_path).values[:, 1:]
y = pd.read_csv(label_data_path)['y'].values
X_test = read_input(test_data_path).values[:, 1:]

In [8]:
import ecgfeatures as ecg

def handleNans(sample):
    sample = sample[~np.isnan(sample)]
    return sample

def extractFeatures(data):
    features = []
    for datum in data:
        cleaned_datum = handleNans(datum)
        features.append(ecg.feature_extraction(cleaned_datum)) 
    return features

X = extractFeatures(X_train)
np.savetxt("X_train_features.csv", X, delimiter=',')


In [9]:
X2 = extractFeatures(X_test)
np.savetxt("X_test_features.csv", X2, delimiter=',')


In [None]:
from sklearn.pipeline import Pipeline
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import StratifiedShuffleSplit
from taskutils import perform_cv, write_results

parameters_mlp = {
    'classifier__activation': ['relu', 'logistic', 'tanh'],
    'classifier__alpha': 10. ** np.arange(-6, 0),
    'classifier__batch_size': ['auto'],
    'classifier__beta_1': [0.9],
    'classifier__beta_2': [0.999],
    'classifier__early_stopping': [False],
    'classifier__epsilon': [1e-08],
    # 'classifier__hidden_layer_sizes': [(X.shape[1], X.shape[1]), (X.shape[1], X.shape[1], X.shape[1])],
    'classifier__hidden_layer_sizes': [(len(X), len(X)), (len(X), len(X), len(X))],
    'classifier__learning_rate': ['constant'],
    'classifier__learning_rate_init': 10. ** np.arange(-4, 0),
    'classifier__max_iter': [200],
    'classifier__momentum': [0.9],
    'classifier__n_iter_no_change': [10],
    'classifier__nesterovs_momentum': [True],
    'classifier__power_t': [0.5],
    'classifier__random_state': [1],
    'classifier__shuffle': [True],
    'classifier__solver': ['adam'],
    'classifier__tol': [0.0001],
    'classifier__validation_fraction': [0.1],
    'classifier__warm_start': [False]
}

scaler = MinMaxScaler()
classifier = MLPClassifier()
pipe_line = Pipeline([
    ('scaler', scaler),
    ('classifier', classifier)
])
n_splits = 10
cross_validation_model = StratifiedShuffleSplit(n_splits=n_splits)
model = perform_cv(pipe_line, X, y, parameters_mlp, 'f1_micro', cross_validation_model, 10, 10)

Fitting 10 folds for each of 144 candidates, totalling 1440 fits


[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.


In [None]:
y_pred = model.predict(X2)
write_results(classifier, range(len(X2)), y_pred)
