In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
pd.set_option("display.max_rows", 1000)

exp_path = Path("../experiments/issue_32/")

In [2]:
!ls /home/paulo/Documents/GIT/BCI_MsC/experiments/issue_32/results/

1  2  3  4  5  6  7  8	9  results.csv


In [3]:
df = pd.read_csv("/home/paulo/Documents/GIT/BCI_MsC/experiments/issue_32/results/results.csv")
uids = df.uid.unique()

full_df = df[~df.algorithm.isin(["pca", "whitening"])]
# full_df = pd.read_csv("/home/paulo/Documents/GIT/BCI_MsC/experiments/issue_19/results.csv")

In [4]:
full_df.run.unique(), full_df.uid.unique()

(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), array([1, 2, 3, 4, 5, 6, 7, 8, 9]))

In [5]:
full_df.algorithm.unique(), full_df.classifier.unique()

(array(['None', 'ORICA (0)', 'ORICA (1)', 'Ext. Infomax', 'Infomax',
        'SOBI', 'JADE', 'Picard', 'FastICA', 'picard_o'], dtype=object),
 array(['mlp', 'random_forest', 'extra_trees', 'gaussian_nb', 'lda',
        'svm_sigmoid', 'svm_poly', 'svm_linear', 'svm_rbf', 'logistic_l2',
        'logistic_l1', 'logistic'], dtype=object))

In [6]:
def get_default(get_fn):
    def fn(key):
        return get_fn(key, key)
    return fn

classifier_rename_dict = dict(
    mlp="MLP",
    random_forest="Random Forest",
    extra_trees="Extra Trees",
    gaussian_nb="Naïve Bayes",
    lda="LDA",
    svm_sigmoid="SVM (Sig.)",
    svm_poly="SVM (Poly)",
    svm_linear="SVM (Lin.)",
    svm_rbf="SVM (RBF)",
    logistic_l1="Log. Reg. (L1)",
    logistic_l2="Log. Reg. (L2)",
    logistic="Log. Reg.",
)
algorithm_rename_dict = dict(
    picard_o="Picard-O",
)
full_df["algorithm"] = full_df.algorithm.apply(get_default(algorithm_rename_dict.get))
full_df["classifier"] = full_df.classifier.apply(get_default(classifier_rename_dict.get)) 

In [7]:
full_df.algorithm.unique(), full_df.classifier.unique()

(array(['None', 'ORICA (0)', 'ORICA (1)', 'Ext. Infomax', 'Infomax',
        'SOBI', 'JADE', 'Picard', 'FastICA', 'Picard-O'], dtype=object),
 array(['MLP', 'Random Forest', 'Extra Trees', 'Naïve Bayes', 'LDA',
        'SVM (Sig.)', 'SVM (Poly)', 'SVM (Lin.)', 'SVM (RBF)',
        'Log. Reg. (L2)', 'Log. Reg. (L1)', 'Log. Reg.'], dtype=object))

In [8]:
full_df.to_csv("formatted.csv", index=None)
!rm -rf ../experiments/issue_32/plots_openbmi
!python ../ica_benchmark/visualization/multiple_runs.py -path ./formatted.csv -save_folder ../experiments/issue_32/plots



  fig = plt.figure(figsize=figsize, dpi=150)












In [9]:
print(full_df.shape)
subset = set(full_df.columns) - set(["run"])
full_df = full_df.drop_duplicates(subset=subset)
print(full_df.shape)
full_df.columns

(10800, 11)
(10080, 11)


Index(['run', 'algorithm', 'classifier', 'uid', 'Acc.', 'Bal. Acc.', 'Kappa',
       'clf_fit_time', 'preprocess_fit_time', 'selected_features',
       'duplicated'],
      dtype='object')

In [10]:
df = full_df.groupby(["uid", "classifier", "algorithm"], as_index=False).mean()

# Best combination

In [11]:
sorted_all = full_df.groupby(["uid", "classifier", "algorithm"], as_index=False).mean().reset_index()#[["uid", "Kappa"]]
sorted_all["order"] = sorted_all.groupby("uid").rank(ascending=False)["Kappa"]
ranked_df_list = list()
for uid in sorted_all.uid.unique():
    uid_df = sorted_all.query("uid == @uid")
    max_rank = uid_df.order.min()
    r_df = uid_df.query("order == @max_rank")[["uid", "classifier", "algorithm", "Kappa"]].round(3)
    ranked_df_list.append(r_df)
ranked_df = pd.concat(ranked_df_list).sort_values(by="uid")
ranked_df

Unnamed: 0,uid,classifier,algorithm,Kappa
39,1,Log. Reg. (L1),SOBI,0.574
192,2,Random Forest,Infomax,0.247
266,3,Log. Reg.,ORICA (1),0.676
276,3,Log. Reg. (L1),ORICA (1),0.676
346,3,SVM (RBF),ORICA (1),0.676
448,4,SVM (Lin.),Picard-O,0.284
546,5,Naïve Bayes,ORICA (1),0.102
639,6,Log. Reg. (L1),SOBI,0.278
755,7,Log. Reg. (L1),ORICA (0),0.611
947,8,SVM (RBF),Picard,0.626


In [12]:
ranked_df.to_latex(exp_path / "best_combination.tex", index=False)

  ranked_df.to_latex(exp_path / "best_combination.tex", index=False)


# Best ICA's

In [13]:
sorted_alg = df.groupby(["uid", "algorithm"], as_index=False).agg([np.mean, np.std]).Kappa.sort_values(by=["uid", "mean"]).reset_index()
sorted_alg["order"] = sorted_alg.groupby("uid").rank(ascending=False)["mean"]
sorted_alg

  sorted_alg = df.groupby(["uid", "algorithm"], as_index=False).agg([np.mean, np.std]).Kappa.sort_values(by=["uid", "mean"]).reset_index()


Unnamed: 0,uid,algorithm,mean,std,order
0,1,,0.31412,0.071406,10.0
1,1,Ext. Infomax,0.419792,0.023459,9.0
2,1,ORICA (0),0.428356,0.049015,8.0
3,1,Infomax,0.433063,0.020441,7.0
4,1,Picard-O,0.445602,0.030219,6.0
5,1,ORICA (1),0.449961,0.032109,5.0
6,1,Picard,0.45162,0.034548,4.0
7,1,FastICA,0.468287,0.022055,3.0
8,1,JADE,0.468981,0.03356,2.0
9,1,SOBI,0.52064,0.030655,1.0


In [14]:
ranked_df_list = list()
for uid in sorted_alg.uid.unique():
    uid_df = sorted_alg.query("uid == @uid")
    max_rank = uid_df.order.min()
    r_df = uid_df.query("order == @max_rank")[["uid", "algorithm", "mean", "std"]].round(3)
    ranked_df_list.append(r_df)
ranked_df = pd.concat(ranked_df_list).sort_values(by="uid")
ranked_df

Unnamed: 0,uid,algorithm,mean,std
9,1,SOBI,0.521,0.031
19,2,Infomax,0.185,0.033
29,3,ORICA (1),0.643,0.035
39,4,Picard,0.259,0.024
49,5,Infomax,0.071,0.013
59,6,SOBI,0.231,0.054
69,7,ORICA (0),0.527,0.078
79,8,Infomax,0.584,0.024
89,9,ORICA (1),0.613,0.038


In [15]:
ranked_df.to_latex(exp_path / "best_alg.tex", index=False)

  ranked_df.to_latex(exp_path / "best_alg.tex", index=False)


In [16]:
order_df = sorted_alg.groupby("algorithm").median()[["order"]].sort_values(by="order")
order_df

Unnamed: 0_level_0,order
algorithm,Unnamed: 1_level_1
Picard,3.0
FastICA,4.0
SOBI,4.0
Infomax,5.0
ORICA (1),5.0
Picard-O,5.0
Ext. Infomax,6.0
ORICA (0),7.0
JADE,8.0
,9.0


In [17]:
order_df.to_latex(exp_path / "best_alg_median_rank.tex", index=False)

  order_df.to_latex(exp_path / "best_alg_median_rank.tex", index=False)


# Best classifier

In [18]:
sorted_clf = df.groupby(["uid", "classifier"], as_index=False).agg([np.mean, np.std]).Kappa.sort_values(by=["uid", "mean"]).reset_index()
sorted_clf["order"] = sorted_clf.groupby("uid").rank(ascending=False)["mean"]
ranked_df_list = list()
for uid in sorted_clf.uid.unique():
    uid_df = sorted_clf.query("uid == @uid")
    max_rank = uid_df.order.min()
    r_df = uid_df.query("order == @max_rank")[["uid", "classifier", "mean", "std"]].round(3)
    ranked_df_list.append(r_df)
ranked_df = pd.concat(ranked_df_list).sort_values(by="uid")
ranked_df

  sorted_clf = df.groupby(["uid", "classifier"], as_index=False).agg([np.mean, np.std]).Kappa.sort_values(by=["uid", "mean"]).reset_index()


Unnamed: 0,uid,classifier,mean,std
11,1,Log. Reg. (L2),0.474,0.048
23,2,Random Forest,0.183,0.082
34,3,Log. Reg.,0.561,0.087
35,3,Log. Reg. (L2),0.561,0.085
47,4,Log. Reg.,0.222,0.069
59,5,Log. Reg.,0.057,0.036
71,6,SVM (Sig.),0.142,0.054
83,7,Log. Reg. (L1),0.433,0.166
95,8,SVM (Lin.),0.535,0.095
107,9,Log. Reg. (L2),0.518,0.101


In [19]:
ranked_df.to_latex(exp_path / "best_clf.tex", index=False)

  ranked_df.to_latex(exp_path / "best_clf.tex", index=False)


In [20]:
order_df = sorted_clf.groupby("classifier").median()[["order"]].sort_values(by="order")
order_df

Unnamed: 0_level_0,order
classifier,Unnamed: 1_level_1
Log. Reg.,2.0
Log. Reg. (L1),3.0
Log. Reg. (L2),3.0
SVM (Lin.),5.0
LDA,6.0
MLP,6.0
SVM (RBF),6.0
SVM (Sig.),7.0
Random Forest,9.0
Extra Trees,10.0


In [21]:
order_df.to_latex(exp_path / "best_clf_median_rank.tex", index=False)

  order_df.to_latex(exp_path / "best_clf_median_rank.tex", index=False)


In [26]:
import ipywidgets as widgets

save_folder = Path("../experiments/issue_32/plots/")

classifiers = full_df.classifier.unique()
tab_contents = classifiers
children = list()
for clf in classifiers:
    with open(save_folder / f"best_algorithm_per_subject_for_{clf}.png", "rb") as f:
        widget = widgets.Image(
            value=f.read(),
            format='png',
        )
    children.append(widget)

tab = widgets.Tab()
tab.children = children
for i, clf in enumerate(classifiers):
    tab.set_title(i, clf)
tab

Tab(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x03\xc0\x00\x00\x02\xd0\x08\x06\x00\x0…

In [29]:
algorithms = full_df.algorithm.unique()
tab_contents = algorithms
children = list()
for alg in algorithms:
    with open(save_folder / f"best_classifier_per_subject_for_{alg}.png", "rb") as f:
        widget = widgets.Image(
            value=f.read(),
            format='png',
        )
    children.append(widget)

tab = widgets.Tab()
tab.children = children
for i, clf in enumerate(algorithms):
    tab.set_title(i, alg)
tab

Tab(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x03\xc0\x00\x00\x02\xd0\x08\x06\x00\x0…

In [30]:

classifiers = full_df.classifier.unique()
tab_contents = classifiers
children = list()
for clf in classifiers:
    with open(save_folder / f"average_per_algorithm_for_{clf}.png", "rb") as f:
        widget = widgets.Image(
            value=f.read(),
            format='png',
        )
    children.append(widget)

tab = widgets.Tab()
tab.children = children
for i, clf in enumerate(classifiers):
    tab.set_title(i, clf)
tab

Tab(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\t`\x00\x00\x02\xd0\x08\x06\x00\x00\x00…

In [31]:
algorithms = full_df.algorithm.unique()
tab_contents = algorithms
children = list()
for alg in algorithms:
    with open(save_folder / f"average_per_classifier_for_{alg}.png", "rb") as f:
        widget = widgets.Image(
            value=f.read(),
            format='png',
        )
    children.append(widget)

tab = widgets.Tab()
tab.children = children
for i, clf in enumerate(algorithms):
    tab.set_title(i, alg)
tab

Tab(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\t`\x00\x00\x02\xd0\x08\x06\x00\x00\x00…

In [32]:
classifiers = full_df.classifier.unique()
tab_contents = classifiers
children = list()
for clf in classifiers:
    with open(save_folder / f"detailed_{clf}.png", "rb") as f:
        widget = widgets.Image(
            value=f.read(),
            format='png',
        )
    children.append(widget)

tab = widgets.Tab()
tab.children = children
for i, clf in enumerate(classifiers):
    tab.set_title(i, clf)
tab

Tab(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\t`\x00\x00\x02\xd0\x08\x06\x00\x00\x00…

In [33]:
algorithms = full_df.algorithm.unique()
tab_contents = algorithms
children = list()
for alg in algorithms:
    with open(save_folder / f"detailed_{alg}.png", "rb") as f:
        widget = widgets.Image(
            value=f.read(),
            format='png',
        )
    children.append(widget)

tab = widgets.Tab()
tab.children = children
for i, clf in enumerate(algorithms):
    tab.set_title(i, clf)
tab

Tab(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\t`\x00\x00\x02\xd0\x08\x06\x00\x00\x00…