In [1]:
import numpy as np
from sklearn.pipeline import Pipeline
from python.utils import load_data
from python.features_extraction import AdvancedSlidingWindowFeaturesExtractor
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score, GroupKFold
from python.utils import write_submission

# Data

In [2]:
DATA_PATH = 'data'
X_train, y_train, X_test, subjects = load_data(DATA_PATH)

X_train size: (3500, 15872).
y_train size: (3500,).
X_test size: (3500, 15872).


# Models

In [11]:
from sklearn.neighbors import KNeighborsClassifier

knn_clf = Pipeline([
    ("si", SimpleImputer(missing_values=-999999.99, strategy="mean")),
    ("std_scaler", StandardScaler()),
    ("swfe", AdvancedSlidingWindowFeaturesExtractor(128, 0, True, True, True, True, True, True)),
    ("knn", KNeighborsClassifier())
])

In [20]:
from sklearn.neighbors import KNeighborsClassifier

rf_clf = Pipeline([
    ("si", SimpleImputer(missing_values=-999999.99, strategy="mean")),
    ("std_scaler", StandardScaler()),
    ("swfe", AdvancedSlidingWindowFeaturesExtractor(128, 0, True, True, True, True, True, True)),
    ("rnf_cls", RandomForestClassifier(random_state=42, n_estimators=500, max_features=2, bootstrap=True, max_depth=None)),
])

In [3]:
svc_clf = Pipeline([
    ("si", SimpleImputer(missing_values=-999999.99, strategy="mean")),
    ("std_scaler", StandardScaler()),
    ("swfe", AdvancedSlidingWindowFeaturesExtractor(128, 0, True, True, True, True, True, True)),
    ("svr", OneVsRestClassifier(SVC(random_state=42, C=0.1))),
])

In [4]:
from sklearn.model_selection import cross_validate

cv_res = cross_validate(svc_clf, X_train, y_train, groups=subjects, cv=GroupKFold(len(np.unique(subjects))), return_train_score=True)
cv_res["test_score"].mean()

0.7160823747714675

# Submit prediction

In [20]:
final_model = svc_clf
final_model.fit(X_train, y_train)
y_pred = final_model.predict(X_test)

write_submission(y_pred, 'submissions')

Submission toy_submission.csv saved in submissions/toy_submission.csv.
