# Simple classification example

In [1]:
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score, make_scorer
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

from nemo.epochs import get_epochs, get_epochs_dfs
from nemo.feature_extraction import (
    create_datasets_from_epochs_df,
    load_dataset,
    create_experiment_id,
)
from nemo.classification import get_cv

### Load data

In [2]:
task = "4_class"
n_windows = 3
features = ["MV"]
include_events = "empe"
ch_selection = "hbo"
try:
    exp_id = create_experiment_id(
        task=task,
        n_windows=n_windows,
        features=features,
        include_events=include_events,
        ch_selection=ch_selection,
    )
    X, y, _ = load_dataset(exp_id)
    print(f"Loaded dataset {exp_id}.")
except:
    X, y, _ = create_datasets_from_epochs_df(
        get_epochs_dfs(get_epochs(include_events=include_events))[0],
        task=task,
        n_windows=n_windows,
        features=features,
        include_events=include_events,
        save=True,
    )
    print(f"Created and saved dataset {exp_id}.")

Loaded dataset empe-4_class-MV-3-hbo.


## Subject-specific classification

In [3]:
n_classes = len(np.unique(np.concatenate([*y.values()])))
clf = LinearDiscriminantAnalysis(
    solver="lsqr", shrinkage="auto", priors=np.ones(n_classes) / n_classes
)
scorer = accuracy_score
subject_scores = []
for subject in X:
    subject_scores.append(
        cross_val_score(
            clf,
            X[subject],
            y[subject],
            cv=get_cv(y[subject]),
            scoring=make_scorer(scorer),
        ).mean()
    )

print(
    f"""
Subject-specific classification accuracy: {np.mean(subject_scores):.3f}
"""
)


Subject-specific classification accuracy: 0.391



## Combined model classification

In [4]:
test_subject_ix = np.array([0, 1, 2, 3, 4])
test_subject_mask = np.isin(np.arange(len(X)), test_subject_ix)
X_train = np.concatenate(np.array(list(X.values()), dtype=object)[~test_subject_mask])
y_train = np.concatenate(np.array(list(y.values()), dtype=object)[~test_subject_mask])
X_test = np.concatenate(np.array(list(X.values()), dtype=object)[test_subject_mask])
y_test = np.concatenate(np.array(list(y.values()), dtype=object)[test_subject_mask])

y_pred = clf.fit(X_train, y_train).predict(X_test)
score = scorer(y_test, y_pred)

print(
    f"""
Test subjects: {", ".join(np.array(list(X.keys()))[test_subject_ix])}

X_train: {X_train.shape}
X_test: {X_test.shape}

Combined model classification accuracy: {score:.3f}
"""
)


Test subjects: sub-101, sub-105, sub-107, sub-108, sub-109

X_train: (1003, 72)
X_test: (200, 72)

Combined model classification accuracy: 0.300

