In [1]:
import numpy as np
from sklearn.pipeline import Pipeline
from python.utils import load_data, get_features_from 
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
from sklearn.model_selection import GroupKFold
from python.features_extraction import ExtractFeatures
from sklearn.model_selection import cross_validate
from python.utils import write_submission
from sklearn.preprocessing import StandardScaler
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from tensorflow import keras
from scikeras.wrappers import KerasClassifier

2021-12-05 22:49:18.662534: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-12-05 22:49:18.662575: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [37]:
fc_parameters = {
    "abs_energy": None,
    "maximum": None,
    "absolute_sum_of_changes": None,
    "fft_coefficient": [{"coeff": 10, "attr": "abs"}],
    "fourier_entropy": [{"bins": 10}],
    "linear_trend": [{"attr": "slope"}],
    "mean": None,
    "number_peaks": [{"n":4}],
    "variance": None,
}

In [38]:
def create_model(meta):
    n_features_in_ = meta["n_features_in_"]
    X_shape_ = meta["X_shape_"]
    n_classes_ = meta["n_classes_"]
    
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(n_features_in_, input_shape=X_shape_[1:]))
    model.add(keras.layers.Dropout(rate=0.2))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Dense(300, activation="relu"))
    model.add(keras.layers.Dropout(rate=0.2))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Dense(100, activation="relu"))
    model.add(keras.layers.Dropout(rate=0.2))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Dense(n_classes_, activation="softmax"))

    return model

In [15]:
DATA_PATH = 'data'
X_train, y_train, X_test, subjects = load_data(DATA_PATH)

X_train size: (3500, 15872).
y_train size: (3500,).
X_test size: (3500, 15872).


In [40]:
pre_process = Pipeline([
    ("si", SimpleImputer(missing_values=-999999.99, strategy="mean")),
    ("pp", ExtractFeatures(fc_parameters)),
    ("si2", SimpleImputer(missing_values=np.nan, strategy="mean")),
    ("ss", StandardScaler())
])

In [41]:
voting_clf = Pipeline([
    ("pp", pre_process),
    ("vot", VotingClassifier(
        estimators=[("rnf_cls", RandomForestClassifier(random_state=42, n_estimators=500, max_features=4,max_depth=15,max_samples=0.4, bootstrap=True)),
                    ("svc", OneVsRestClassifier(estimator=SVC(C=0.25, kernel='poly', degree=3, coef0=2, gamma='scale', random_state=42, probability=True))),
                    ("knn", KNeighborsClassifier(n_neighbors=10,weights='distance', p=1)),
                    ("ada", AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=10, random_state=42), n_estimators=200, random_state=42)),
                    ("dnn", KerasClassifier(create_model,loss="sparse_categorical_crossentropy", epochs=100, verbose=0))
                    ],
        voting='soft'
    )) 
])

In [None]:
cv_res = cross_validate(voting_clf, X_train, y_train-1, groups=subjects, cv=GroupKFold(len(np.unique(subjects))), return_train_score=True, scoring="accuracy")
cv_res["test_score"].mean()

In [None]:
voting_clf.fit(X_train, y_train-1)

In [None]:
y_pred = voting_clf.predict(X_test) + 1
write_submission(y_pred, 'submissions')