In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
pd.set_option("display.max_rows", 1000)

exp_path = Path("../experiments/issue_32/")

In [2]:
!ls /home/paulo/Documents/GIT/BCI_MsC/experiments/issue_32/results/

1  2  3  4  5  6  7  8	9  results.csv


In [3]:
df = pd.read_csv("/home/paulo/Documents/GIT/BCI_MsC/experiments/issue_32/results_openbmi/results.csv")
uids = df.uid.unique()

full_df = df[~df.algorithm.isin(["pca", "whitening"])]
# full_df = pd.read_csv("/home/paulo/Documents/GIT/BCI_MsC/experiments/issue_19/results.csv")

In [4]:
full_df.run.unique(), full_df.uid.unique()

(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
 array([ 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,  2, 20, 21, 22, 23, 24,
        25, 26, 27, 28, 29,  3]))

In [5]:
full_df.algorithm.unique(), full_df.classifier.unique()

(array(['None', 'ORICA (0)', 'ORICA (1)', 'Ext. Infomax', 'Infomax',
        'SOBI', 'JADE', 'Picard', 'FastICA', 'picard_o'], dtype=object),
 array(['mlp', 'random_forest', 'extra_trees', 'gaussian_nb', 'lda',
        'svm_sigmoid', 'svm_poly', 'svm_linear', 'svm_rbf', 'logistic_l2',
        'logistic_l1', 'logistic'], dtype=object))

In [6]:
def get_default(get_fn):
    def fn(key):
        return get_fn(key, key)
    return fn

classifier_rename_dict = dict(
    mlp="MLP",
    random_forest="Random Forest",
    extra_trees="Extra Trees",
    gaussian_nb="Naïve Bayes",
    lda="LDA",
    svm_sigmoid="SVM (Sig.)",
    svm_poly="SVM (Poly)",
    svm_linear="SVM (Lin.)",
    svm_rbf="SVM (RBF)",
    logistic_l1="Log. Reg. (L1)",
    logistic_l2="Log. Reg. (L2)",
    logistic="Log. Reg.",
)
algorithm_rename_dict = dict(
    picard_o="Picard-O",
)
full_df["algorithm"] = full_df.algorithm.apply(get_default(algorithm_rename_dict.get))
full_df["classifier"] = full_df.classifier.apply(get_default(classifier_rename_dict.get)) 

In [7]:
full_df.algorithm.unique(), full_df.classifier.unique()

(array(['None', 'ORICA (0)', 'ORICA (1)', 'Ext. Infomax', 'Infomax',
        'SOBI', 'JADE', 'Picard', 'FastICA', 'Picard-O'], dtype=object),
 array(['MLP', 'Random Forest', 'Extra Trees', 'Naïve Bayes', 'LDA',
        'SVM (Sig.)', 'SVM (Poly)', 'SVM (Lin.)', 'SVM (RBF)',
        'Log. Reg. (L2)', 'Log. Reg. (L1)', 'Log. Reg.'], dtype=object))

In [10]:
full_df.to_csv("formatted.csv", index=None)
!rm -rf ../experiments/issue_32/plots_openbmi
!python ../ica_benchmark/visualization/multiple_runs.py -path ./formatted.csv -save_folder ../experiments/issue_32/plots_openbmi





  fig = plt.figure(figsize=figsize, dpi=150)




















In [11]:
print(full_df.shape)
subset = set(full_df.columns) - set(["run"])
full_df = full_df.drop_duplicates(subset=subset)
print(full_df.shape)
full_df.columns

(27600, 11)
(25758, 11)


Index(['run', 'algorithm', 'classifier', 'uid', 'Acc.', 'Bal. Acc.', 'Kappa',
       'clf_fit_time', 'preprocess_fit_time', 'selected_features',
       'duplicated'],
      dtype='object')

In [12]:
df = full_df.groupby(["uid", "classifier", "algorithm"], as_index=False).mean()

# Best combination

In [13]:
sorted_all = full_df.groupby(["uid", "classifier", "algorithm"], as_index=False).mean().reset_index()#[["uid", "Kappa"]]
sorted_all["order"] = sorted_all.groupby("uid").rank(ascending=False)["Kappa"]
ranked_df_list = list()
for uid in sorted_all.uid.unique():
    uid_df = sorted_all.query("uid == @uid")
    max_rank = uid_df.order.min()
    r_df = uid_df.query("order == @max_rank")[["uid", "classifier", "algorithm", "Kappa"]].round(3)
    ranked_df_list.append(r_df)
ranked_df = pd.concat(ranked_df_list).sort_values(by="uid")
ranked_df

Unnamed: 0,uid,classifier,algorithm,Kappa
40,1,Log. Reg. (L2),Ext. Infomax,0.385
154,2,Log. Reg. (L1),,0.22
164,2,Log. Reg. (L2),,0.22
224,2,SVM (RBF),,0.22
271,3,Log. Reg. (L1),FastICA,0.718
464,10,SVM (RBF),,0.15
566,11,SVM (Lin.),ORICA (1),0.07
576,11,SVM (Poly),ORICA (1),0.07
699,12,SVM (Poly),SOBI,0.18
824,13,SVM (RBF),,0.17


In [14]:
ranked_df.to_latex(exp_path / "best_combination.tex", index=False)

  ranked_df.to_latex(exp_path / "best_combination.tex", index=False)


# Best ICA's

In [15]:
sorted_alg = df.groupby(["uid", "algorithm"], as_index=False).agg([np.mean, np.std]).Kappa.sort_values(by=["uid", "mean"]).reset_index()
sorted_alg["order"] = sorted_alg.groupby("uid").rank(ascending=False)["mean"]
sorted_alg

  sorted_alg = df.groupby(["uid", "algorithm"], as_index=False).agg([np.mean, np.std]).Kappa.sort_values(by=["uid", "mean"]).reset_index()


Unnamed: 0,uid,algorithm,mean,std,order
0,1,ORICA (0),-0.080833,0.031389,10.0
1,1,JADE,0.01375,0.022547,9.0
2,1,ORICA (1),0.0225,0.025816,8.0
3,1,,0.177667,0.061614,7.0
4,1,Picard,0.2525,0.011572,5.5
5,1,Picard-O,0.2525,0.011572,5.5
6,1,SOBI,0.256417,0.041903,4.0
7,1,Infomax,0.25775,0.011733,3.0
8,1,FastICA,0.2765,0.015067,2.0
9,1,Ext. Infomax,0.36475,0.017131,1.0


In [16]:
ranked_df_list = list()
for uid in sorted_alg.uid.unique():
    uid_df = sorted_alg.query("uid == @uid")
    max_rank = uid_df.order.min()
    r_df = uid_df.query("order == @max_rank")[["uid", "algorithm", "mean", "std"]].round(3)
    ranked_df_list.append(r_df)
ranked_df = pd.concat(ranked_df_list).sort_values(by="uid")
ranked_df

Unnamed: 0,uid,algorithm,mean,std
9,1,Ext. Infomax,0.365,0.017
19,2,,0.169,0.05
29,3,,0.64,0.035
39,10,,0.114,0.032
49,11,SOBI,0.014,0.023
59,12,,0.059,0.033
69,13,,0.106,0.041
79,14,ORICA (1),0.123,0.045
89,15,JADE,0.001,0.006
99,16,ORICA (0),0.192,0.043


In [17]:
ranked_df.to_latex(exp_path / "best_alg.tex", index=False)

  ranked_df.to_latex(exp_path / "best_alg.tex", index=False)


In [18]:
order_df = sorted_alg.groupby("algorithm").median()[["order"]].sort_values(by="order")
order_df

Unnamed: 0_level_0,order
algorithm,Unnamed: 1_level_1
SOBI,4.0
Picard,4.5
Picard-O,4.5
Ext. Infomax,5.0
FastICA,5.0
Infomax,5.0
,6.0
ORICA (1),7.0
ORICA (0),8.0
JADE,9.0


In [19]:
order_df.to_latex(exp_path / "best_alg_median_rank.tex", index=False)

  order_df.to_latex(exp_path / "best_alg_median_rank.tex", index=False)


# Best classifier

In [20]:
sorted_clf = df.groupby(["uid", "classifier"], as_index=False).agg([np.mean, np.std]).Kappa.sort_values(by=["uid", "mean"]).reset_index()
sorted_clf["order"] = sorted_clf.groupby("uid").rank(ascending=False)["mean"]
ranked_df_list = list()
for uid in sorted_clf.uid.unique():
    uid_df = sorted_clf.query("uid == @uid")
    max_rank = uid_df.order.min()
    r_df = uid_df.query("order == @max_rank")[["uid", "classifier", "mean", "std"]].round(3)
    ranked_df_list.append(r_df)
ranked_df = pd.concat(ranked_df_list).sort_values(by="uid")
ranked_df

  sorted_clf = df.groupby(["uid", "classifier"], as_index=False).agg([np.mean, np.std]).Kappa.sort_values(by=["uid", "mean"]).reset_index()


Unnamed: 0,uid,classifier,mean,std
11,1,SVM (Lin.),0.193,0.137
23,2,SVM (Lin.),0.095,0.098
35,3,Extra Trees,0.572,0.146
47,10,Random Forest,0.087,0.042
59,11,SVM (Poly),0.003,0.031
71,12,Random Forest,0.042,0.053
83,13,Naïve Bayes,0.025,0.056
95,14,Naïve Bayes,0.021,0.046
107,15,SVM (RBF),-0.015,0.017
119,16,LDA,0.02,0.088


In [21]:
ranked_df.to_latex(exp_path / "best_clf.tex", index=False)

  ranked_df.to_latex(exp_path / "best_clf.tex", index=False)


In [22]:
order_df = sorted_clf.groupby("classifier").median()[["order"]].sort_values(by="order")
order_df

Unnamed: 0_level_0,order
classifier,Unnamed: 1_level_1
Log. Reg. (L1),3.0
Log. Reg.,5.0
Random Forest,5.0
SVM (Lin.),5.0
Log. Reg. (L2),6.0
MLP,7.0
Extra Trees,8.0
LDA,8.0
SVM (RBF),8.0
SVM (Sig.),8.0


In [23]:
order_df.to_latex(exp_path / "best_clf_median_rank.tex", index=False)

  order_df.to_latex(exp_path / "best_clf_median_rank.tex", index=False)


In [26]:
import ipywidgets as widgets

save_folder = Path("../experiments/issue_32/plots_openbmi/")

classifiers = full_df.classifier.unique()
tab_contents = classifiers
children = list()
for clf in classifiers:
    with open(save_folder / f"best_algorithm_per_subject_for_{clf}.png", "rb") as f:
        widget = widgets.Image(
            value=f.read(),
            format='png',
        )
    children.append(widget)

tab = widgets.Tab()
tab.children = children
for i, clf in enumerate(classifiers):
    tab.set_title(i, clf)
tab

Tab(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x03\xc0\x00\x00\x02\xd0\x08\x06\x00\x0…

In [29]:
import ipywidgets as widgets

save_folder = Path("../experiments/issue_32/plots_openbmi/")

algorithms = full_df.algorithm.unique()
tab_contents = algorithms
children = list()
for alg in algorithms:
    with open(save_folder / f"best_classifer_per_subject_for_{alg}.png", "rb") as f:
        widget = widgets.Image(
            value=f.read(),
            format='png',
        )
    children.append(widget)

tab = widgets.Tab()
tab.children = children
for i, clf in enumerate(algorithms):
    tab.set_title(i, alg)
tab

Tab(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x03\xc0\x00\x00\x02\xd0\x08\x06\x00\x0…

In [34]:
import ipywidgets as widgets

save_folder = Path("../experiments/issue_32/plots_openbmi/")

classifiers = full_df.classifier.unique()
tab_contents = classifiers
children = list()
for clf in classifiers:
    with open(save_folder / f"average_per_algorithm_for_{clf}.png", "rb") as f:
        widget = widgets.Image(
            value=f.read(),
            format='png',
        )
    children.append(widget)

tab = widgets.Tab()
tab.children = children
for i, clf in enumerate(classifiers):
    tab.set_title(i, clf)
tab

Tab(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\t`\x00\x00\x02\xd0\x08\x06\x00\x00\x00…

In [33]:
import ipywidgets as widgets

save_folder = Path("../experiments/issue_32/plots_openbmi/")

algorithms = full_df.algorithm.unique()
tab_contents = algorithms
children = list()
for alg in algorithms:
    with open(save_folder / f"average_per_classifier_for_{alg}.png", "rb") as f:
        widget = widgets.Image(
            value=f.read(),
            format='png',
        )
    children.append(widget)

tab = widgets.Tab()
tab.children = children
for i, clf in enumerate(algorithms):
    tab.set_title(i, alg)
tab

Tab(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\t`\x00\x00\x02\xd0\x08\x06\x00\x00\x00…

In [30]:
classifiers = full_df.classifier.unique()
tab_contents = classifiers
children = list()
for clf in classifiers:
    with open(save_folder / f"detailed_{clf}.png", "rb") as f:
        widget = widgets.Image(
            value=f.read(),
            format='png',
        )
    children.append(widget)

tab = widgets.Tab()
tab.children = children
for i, clf in enumerate(classifiers):
    tab.set_title(i, clf)
tab

Tab(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\t`\x00\x00\x02\xd0\x08\x06\x00\x00\x00…

In [32]:
algorithms = full_df.algorithm.unique()
tab_contents = algorithms
children = list()
for alg in algorithms:
    with open(save_folder / f"detailed_{alg}.png", "rb") as f:
        widget = widgets.Image(
            value=f.read(),
            format='png',
        )
    children.append(widget)

tab = widgets.Tab()
tab.children = children
for i, clf in enumerate(algorithms):
    tab.set_title(i, clf)
tab

Tab(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\t`\x00\x00\x02\xd0\x08\x06\x00\x00\x00…