# Load features

In [1]:
import pickle

with open("data/processed/features.pkl", "rb") as f:
    data = pickle.load(f)

data.keys(), data["regional"].keys(), data["regional"]["ALPHA"].keys(), data["regional"]["ALPHA"]["X"].shape, data["regional"]["ALPHA"]["y"]

(dict_keys(['vector', 'regional', 'graph']),
 dict_keys(['ALPHA', 'BETA', 'THETA', 'DELTA']),
 dict_keys(['X', 'y', 'meta']),
 (90, 30),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2]))

# Classification

## Random forest on the ALPHA band with regional feature extraction

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import LeaveOneOut
from sklearn.metrics import accuracy_score, confusion_matrix
import numpy as np

loo = LeaveOneOut()

X = data["regional"]["ALPHA"]["X"]
y = data["regional"]["ALPHA"]["y"]

y_true = []
y_pred = []

for train_index, test_index in loo.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    model = RandomForestClassifier(n_estimators=100, random_state=33)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    y_true.append(y_test[0])
    y_pred.append(pred[0])

y_true = np.array(y_true)
y_pred = np.array(y_pred)

acc = accuracy_score(y_true, y_pred)
cm = confusion_matrix(y_true, y_pred) 
acc, cm

(0.5555555555555556,
 array([[10,  9,  3],
        [ 6, 25,  9],
        [ 5,  8, 15]]))

## With ANOVA feature selection

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import LeaveOneOut
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_classif
import numpy as np

loo = LeaveOneOut()

X = data["regional"]["ALPHA"]["X"]
y = data["regional"]["ALPHA"]["y"]


y_true = []
y_pred = []

for train_index, test_index in loo.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    scaler = StandardScaler()
    model = RandomForestClassifier(n_estimators=100, random_state=33)

    scaler.fit(X_train)
    selector = SelectKBest(f_classif, k=10)
    selector.fit(X_train, y_train)

    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)

    X_train = selector.transform(X_train)
    X_test = selector.transform(X_test)

    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    y_true.append(y_test[0])
    y_pred.append(pred[0])

y_true = np.array(y_true)
y_pred = np.array(y_pred)

acc = accuracy_score(y_true, y_pred)
cm = confusion_matrix(y_true, y_pred) 
acc, cm

(0.6111111111111112,
 array([[10,  8,  4],
        [ 2, 32,  6],
        [ 5, 10, 13]]))

## Keeping only 28 SCI (instead of the 40)

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import LeaveOneOut
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_classif
import numpy as np

loo = LeaveOneOut()

X = data["regional"]["ALPHA"]["X"]
y = data["regional"]["ALPHA"]["y"]

# Keep only 28 SCI 
n_sci_short = 28
X_mci, X_sci, X_ad = X[y==0], X[y==1], X[y==2]
X_sci_idx = np.random.choice(np.arange(len(X_sci)), size=n_sci_short, replace=False)

X_sci_short = X_sci[X_sci_idx]
X = np.concat([X_mci, X_sci_short, X_ad])
y = np.concat([np.array([0] * len(X_mci)), np.array([1] * len(X_sci_short)), np.array([2] * len(X_ad))])

y_true = []
y_pred = []

for train_index, test_index in loo.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    scaler = StandardScaler()
    model = RandomForestClassifier(n_estimators=100, random_state=33)
    
    scaler.fit(X_train)
    selector = SelectKBest(f_classif, k=10)
    selector.fit(X_train, y_train)

    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)

    X_train = selector.transform(X_train)
    X_test = selector.transform(X_test)

    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    y_true.append(y_test[0])
    y_pred.append(pred[0])

y_true = np.array(y_true)
y_pred = np.array(y_pred)

acc = accuracy_score(y_true, y_pred)
cm = confusion_matrix(y_true, y_pred) 
acc, cm

(0.5641025641025641,
 array([[11,  7,  4],
        [ 6, 18,  4],
        [ 6,  7, 15]]))

## Mutual information instead of ANOVA

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import LeaveOneOut
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, mutual_info_classif, f_classif
import numpy as np

loo = LeaveOneOut()

X = data["regional"]["ALPHA"]["X"]
y = data["regional"]["ALPHA"]["y"]


y_true = []
y_pred = []

for train_index, test_index in loo.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    scaler = StandardScaler()
    model = RandomForestClassifier(n_estimators=100, random_state=33)

    scaler.fit(X_train)
    selector = SelectKBest(mutual_info_classif, k=10)
    selector.fit(X_train, y_train)

    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)

    X_train = selector.transform(X_train)
    X_test = selector.transform(X_test)

    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    y_true.append(y_test[0])
    y_pred.append(pred[0])

y_true = np.array(y_true)
y_pred = np.array(y_pred)

acc = accuracy_score(y_true, y_pred)
cm = confusion_matrix(y_true, y_pred) 
acc, cm

(0.5333333333333333,
 array([[ 9, 10,  3],
        [ 8, 26,  6],
        [ 6,  9, 13]]))

## Tree-based feature extraction

In [None]:
from sklearn.ensemble import RandomForestClassifier 
from sklearn.model_selection import LeaveOneOut
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectFromModel
import numpy as np

loo = LeaveOneOut()

X = data["regional"]["ALPHA"]["X"]
y = data["regional"]["ALPHA"]["y"]
print(X.shape)

preprocess_model = RandomForestClassifier(n_estimators=100, random_state=33)
preprocess_model = preprocess_model.fit(X, y)
selector = SelectFromModel(preprocess_model, prefit=True)
X = selector.transform(X)
print(X.shape)

y_true = []
y_pred = []

for train_index, test_index in loo.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # scaler = StandardScaler()
    # scaler.fit(X_train)
    # selector = SelectKBest(mutual_info_classif, k=10)
    # selector.fit(X_train, y_train)

    # X_train = scaler.transform(X_train)
    # X_test = scaler.transform(X_test)

    # X_train = selector.transform(X_train)
    # X_test = selector.transform(X_test)

    model = RandomForestClassifier(n_estimators=100, random_state=33)
    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    y_true.append(y_test[0])
    y_pred.append(pred[0])

y_true = np.array(y_true)
y_pred = np.array(y_pred)

acc = accuracy_score(y_true, y_pred)
cm = confusion_matrix(y_true, y_pred) 
acc, cm

(90, 30)
(90, 11)


(0.6,
 array([[ 9,  9,  4],
        [ 5, 29,  6],
        [ 5,  7, 16]]))