In [1]:
import numpy as np
import mne

annotation_desc_dict = {
    276: "Idling EEG (eyes open)",
    277: "Idling EEG (eyes closed)",
    768: "Start of a trial",
    769: "Cue onset left (class 1)",
    770: "Cue onset right (class 2)",
    771: "Cue onset foot (class 3)",
    772: "Cue onset tongue (class 4)",
    783: "Cue unknown",
    1023: "Rejected trial",
    1072: "Eye movements",
    32766: "Start of a new run",
}

annotation_encode_dict = {
    276: 0,
    277: 1,
    768: 2,
    769: 3,
    770: 4,
    771: 5,
    772: 6,
    783: 7,
    1023: 8,
    1072: 9,
    32766: 10,
}

def get_annotations(data):
    sr = data.info["sfreq"]
    n_samples = data._raw_extras[0]["n_records"]

    onsets = np.trunc(data.annotations.onset * sr).astype(np.uint32, casting="unsafe")
    durations = np.trunc(data.annotations.duration * sr).astype(np.uint32, casting="unsafe")
    
    desc = data.annotations.description.astype(np.uint32)
    labels_codes = np.vectorize(annotation_encode_dict.get)(desc)
    
    n_codes = len(annotation_encode_dict)
    labels = np.zeros((n_samples, n_codes))
    
    for code, onset, duration in zip(labels_codes, onsets, durations):
        labels[onset:onset+duration, code] = 1
    
    return labels



In [2]:
from mne.io import read_raw_gdf
from pathlib import Path
import numpy as np

root = Path("C:/Users/paull/Documents/GIT/BCI_MsC/notebooks/BCI_Comp_IV_2a/BCICIV_2a_gdf")

dataset_folder = root
mat_files = list(dataset_folder.iterdir())

PRELOAD = False

def load_gdf_file(filepath):
    gdf_data = read_raw_gdf(filepath, preload=PRELOAD)

    chs = gdf_data.ch_names

    gdf_data = read_raw_gdf(
        filepath,
        preload=True,
        eog=["EOG-left", "EOG-central", "EOG-right"],
        exclude=[x for x in chs if "EOG" in x]
    )
    ch_names = gdf_data.ch_names
    info = parse_info(
        gdf_data._raw_extras[0]["subject_info"]
    )
    
    labels = get_annotations(gdf_data)
    
    return gdf_data, labels, ch_names, info

def parse_info(info_dict):
    cols = ['id', 'smoking', 'alcohol_abuse', 'drug_abuse', 'medication', 'weight', 'height', 'sex', 'handedness', 'age']
    parsed_info = {k:v for k, v in info_dict.items() if k in cols}
    return parsed_info
     
def load_subject_data(root, subject, mode=None):
    if mode is None:
        mode = "train"
    
    if mode == "train":
        filepath = root / f"{subject}T.gdf"
        gdf_data, labels, ch_names, info = load_gdf_file(filepath)
    elif mode == "test":
        filepath = root / f"{subject}E.gdf"
        gdf_data, labels, ch_names, info = load_gdf_file(filepath)
    elif mode == "both":
        filepath_t = root / f"{subject}T.gdf"
        filepath_e = root / f"{subject}E.gdf"
        gdf_data_t, labels_t, ch_names_t, info_t = load_gdf_file(filepath_t)
        gdf_data_e, labels_e, ch_names, info = load_gdf_file(filepath_e)
        
        
        assert np.all(ch_names_t == ch_names)
        assert np.all(info_t == info)
        
        gdf_data = gdf_data_t.copy()
        gdf_data._data = np.concatenate(
            [
                gdf_data_t._data,
                gdf_data_e._data,
            ],
            axis=1
        )
        
        labels = np.concatenate(
            [
                labels_t,
                labels_e
            ],
            axis=0
        )
    
    return gdf_data, labels, ch_names, info

def load_subjects_data(root, datasets=None, mode="train"):
    if datasets is None:
#         data_dict = {
#             "all": {
#                 filepath.name[:3]: None for filepath in root.glob("*T.gdf")
#             }
#         }
        data_dict = {
            filepath.name[:3]: {
                filepath.name[:3]: None
            } for filepath in root.glob("*T.gdf")
        }
    else:
        data_dict = {
            dataset: {
                subject_id: {} for subject_id in datasets[dataset]
            } for dataset in datasets
        }
    
    chs_ = None
    for dataset in data_dict:
        for subject_id in data_dict[dataset]:
            gdf, labels, chs, info = load_subject_data(root, subject_id, mode=mode)
            if chs_ is None:
                chs_ = chs
            else:
                assert chs_ == chs
            data_dict[dataset][subject_id] = {
                "gdf": gdf,
                "chs": chs,
                "info": info,
                "labels": labels
            }
    
    
    return data_dict



In [3]:
dataset_dict = {
    "train": ["A02", "A07", "A09", "A01"],
    "validation": ["A03", "A06"],
    "test": ["A04", "A05"],
}
all_subjects = [f"A0{i}" for i in range(10)]

dataset_dict = {
    "train": ["A02", "A07"],
    "validation": ["A03"],
    "test": ["A04"],
}

In [4]:
import mne

def get_kwargs(m, is_extended=False):
    if is_extended:
        return dict(method=m, fit_params=dict(extended=True))
    return dict(method=m)

ica_kwargs_dict = {
    "fastica": get_kwargs("fastica"),
    "infomax": get_kwargs("infomax"),
    "picard": get_kwargs("picard"),
    "ext_infomax": get_kwargs("infomax", is_extended=True),
    "ext_picard": get_kwargs("picard", is_extended=True)
}



In [5]:
from scoring import mutual_information, coherence, correntropy, apply_pairwise, apply_pairwise_parallel

In [7]:
import time
from ica import ICA_METHODS


def join_gdfs(data_dict, datasets_names=None):
    new_dict = {}
    if datasets_names is None:
        datasets_names = data_dict.keys()
    
    for dataset_name in datasets_names:
        all_gdfs = []
        all_labels = []
        for subject_id in data_dict[dataset_name]:
            all_gdfs.append(data_dict[dataset_name][subject_id]["gdf"])
            all_labels.append(data_dict[dataset_name][subject_id]["labels"])

        labels = np.concatenate(all_labels, axis=0)
        gdf_base = all_gdfs[0].copy()
        for gdf in all_gdfs[1:]:
            gdf_base._data = np.concatenate(
                [
                    gdf_base._data,
                    gdf._data
                ],
                axis=1
            )
        new_dict[dataset_name] = {
            "all": {
                "gdf": gdf_base,
                "labels": labels,
                "info": None,
                "chs": gdf_base.ch_names
            }
        }
        
    return new_dict
        
N_RUNS = 3

results = {}

fn_dict = {
    "MI": mutual_information,
    "correntropy": correntropy,
    "coherence": coherence
}

n_components_list = [4, 8, 12, 16, 20]
n_components_list = [5, 10, 15, 20]



try:
    datasets
except:
    datasets = load_subjects_data(root, datasets=dataset_dict, mode="both")

score_calculated_before = {}

for ica_method in ICA_METHODS:
    for n_components in n_components_list:
        for run_n in range(N_RUNS):
            
            joined_dataset = join_gdfs(datasets, ["train"])

            gdf_data = joined_dataset["train"]["all"]["gdf"]
            ica_transform = mne.preprocessing.ICA(n_components=n_components, **ica_kwargs_dict[ica_method])
            ica_transform = ica_transform.fit(gdf_data)
            
            del joined_dataset

            for dataset_name in ("test", "validation", "train"):

                for subject_id in datasets[dataset_name]:
                    

                    gdf_data = datasets[dataset_name][subject_id]["gdf"]   
                    
                    data_after = ica_transform.get_sources(gdf_data).get_data().T

                    for fn_name in fn_dict:

                        print((fn_name, ica_method, dataset_name, subject_id, run_n, n_components))
                        
                        if (n_components > 5) or len(data_after) > 2e6:
                            apply_fn = apply_pairwise_parallel
                        else:
                            apply_fn = apply_pairwise
                        
                        if not (subject_id, fn_name) in score_calculated_before:
                            data_before = gdf_data.get_data().T
                            score_before = apply_pairwise_parallel(data_before, fn_dict[fn_name])
                            score_calculated_before[(subject_id, fn_name)] = score_before

                        start = time.time()
                        score_after = apply_fn(data_after, fn_dict[fn_name])
                        duration = time.time() - start
                        
                        results[(fn_name, ica_method, dataset_name, subject_id, run_n, n_components)] = {
                            "score_before": score_calculated_before[(subject_id, fn_name)],
                            "score_after": score_after,
                            "time": duration
                        }

Fitting ICA to data using 22 channels (please be patient, this may take a while)
Selecting by number: 5 components
Fitting ICA took 2.9s.
('MI', 'fastica', 'test', 'A04', 0, 5)
('correntropy', 'fastica', 'test', 'A04', 0, 5)
('coherence', 'fastica', 'test', 'A04', 0, 5)
('MI', 'fastica', 'validation', 'A03', 0, 5)
('correntropy', 'fastica', 'validation', 'A03', 0, 5)
('coherence', 'fastica', 'validation', 'A03', 0, 5)
('MI', 'fastica', 'train', 'A02', 0, 5)
('correntropy', 'fastica', 'train', 'A02', 0, 5)
('coherence', 'fastica', 'train', 'A02', 0, 5)
('MI', 'fastica', 'train', 'A07', 0, 5)
('correntropy', 'fastica', 'train', 'A07', 0, 5)
('coherence', 'fastica', 'train', 'A07', 0, 5)
Fitting ICA to data using 22 channels (please be patient, this may take a while)
Selecting by number: 5 components
Fitting ICA took 2.5s.
('MI', 'fastica', 'test', 'A04', 1, 5)
('correntropy', 'fastica', 'test', 'A04', 1, 5)
('coherence', 'fastica', 'test', 'A04', 1, 5)
('MI', 'fastica', 'validation', 'A0

('correntropy', 'fastica', 'train', 'A02', 2, 20)
('coherence', 'fastica', 'train', 'A02', 2, 20)
('MI', 'fastica', 'train', 'A07', 2, 20)
('correntropy', 'fastica', 'train', 'A07', 2, 20)
('coherence', 'fastica', 'train', 'A07', 2, 20)
Fitting ICA to data using 22 channels (please be patient, this may take a while)
Selecting by number: 5 components
 


  y = 1.0 / (1.0 + np.exp(-u))


Fitting ICA took 57.6s.
('MI', 'infomax', 'test', 'A04', 0, 5)
('correntropy', 'infomax', 'test', 'A04', 0, 5)
('coherence', 'infomax', 'test', 'A04', 0, 5)
('MI', 'infomax', 'validation', 'A03', 0, 5)
('correntropy', 'infomax', 'validation', 'A03', 0, 5)
('coherence', 'infomax', 'validation', 'A03', 0, 5)
('MI', 'infomax', 'train', 'A02', 0, 5)
('correntropy', 'infomax', 'train', 'A02', 0, 5)
('coherence', 'infomax', 'train', 'A02', 0, 5)
('MI', 'infomax', 'train', 'A07', 0, 5)
('correntropy', 'infomax', 'train', 'A07', 0, 5)
('coherence', 'infomax', 'train', 'A07', 0, 5)
Fitting ICA to data using 22 channels (please be patient, this may take a while)
Selecting by number: 5 components
 
Fitting ICA took 60.6s.
('MI', 'infomax', 'test', 'A04', 1, 5)
('correntropy', 'infomax', 'test', 'A04', 1, 5)
('coherence', 'infomax', 'test', 'A04', 1, 5)
('MI', 'infomax', 'validation', 'A03', 1, 5)
('correntropy', 'infomax', 'validation', 'A03', 1, 5)
('coherence', 'infomax', 'validation', 'A03', 1

('MI', 'infomax', 'train', 'A07', 2, 20)
('correntropy', 'infomax', 'train', 'A07', 2, 20)
('coherence', 'infomax', 'train', 'A07', 2, 20)
Fitting ICA to data using 22 channels (please be patient, this may take a while)
Selecting by number: 5 components
Fitting ICA took 6.5s.
('MI', 'picard', 'test', 'A04', 0, 5)
('correntropy', 'picard', 'test', 'A04', 0, 5)
('coherence', 'picard', 'test', 'A04', 0, 5)
('MI', 'picard', 'validation', 'A03', 0, 5)
('correntropy', 'picard', 'validation', 'A03', 0, 5)
('coherence', 'picard', 'validation', 'A03', 0, 5)
('MI', 'picard', 'train', 'A02', 0, 5)
('correntropy', 'picard', 'train', 'A02', 0, 5)
('coherence', 'picard', 'train', 'A02', 0, 5)
('MI', 'picard', 'train', 'A07', 0, 5)
('correntropy', 'picard', 'train', 'A07', 0, 5)
('coherence', 'picard', 'train', 'A07', 0, 5)
Fitting ICA to data using 22 channels (please be patient, this may take a while)
Selecting by number: 5 components
Fitting ICA took 6.5s.
('MI', 'picard', 'test', 'A04', 1, 5)
('c

('correntropy', 'picard', 'train', 'A02', 2, 20)
('coherence', 'picard', 'train', 'A02', 2, 20)
('MI', 'picard', 'train', 'A07', 2, 20)
('correntropy', 'picard', 'train', 'A07', 2, 20)
('coherence', 'picard', 'train', 'A07', 2, 20)
Fitting ICA to data using 22 channels (please be patient, this may take a while)
Selecting by number: 5 components
Computing Extended Infomax ICA
Fitting ICA took 61.2s.
('MI', 'ext_infomax', 'test', 'A04', 0, 5)
('correntropy', 'ext_infomax', 'test', 'A04', 0, 5)
('coherence', 'ext_infomax', 'test', 'A04', 0, 5)
('MI', 'ext_infomax', 'validation', 'A03', 0, 5)
('correntropy', 'ext_infomax', 'validation', 'A03', 0, 5)
('coherence', 'ext_infomax', 'validation', 'A03', 0, 5)
('MI', 'ext_infomax', 'train', 'A02', 0, 5)
('correntropy', 'ext_infomax', 'train', 'A02', 0, 5)
('coherence', 'ext_infomax', 'train', 'A02', 0, 5)
('MI', 'ext_infomax', 'train', 'A07', 0, 5)
('correntropy', 'ext_infomax', 'train', 'A07', 0, 5)
('coherence', 'ext_infomax', 'train', 'A07', 

Fitting ICA took 165.4s.
('MI', 'ext_infomax', 'test', 'A04', 1, 20)
('correntropy', 'ext_infomax', 'test', 'A04', 1, 20)
('coherence', 'ext_infomax', 'test', 'A04', 1, 20)
('MI', 'ext_infomax', 'validation', 'A03', 1, 20)
('correntropy', 'ext_infomax', 'validation', 'A03', 1, 20)
('coherence', 'ext_infomax', 'validation', 'A03', 1, 20)
('MI', 'ext_infomax', 'train', 'A02', 1, 20)
('correntropy', 'ext_infomax', 'train', 'A02', 1, 20)
('coherence', 'ext_infomax', 'train', 'A02', 1, 20)
('MI', 'ext_infomax', 'train', 'A07', 1, 20)
('correntropy', 'ext_infomax', 'train', 'A07', 1, 20)
('coherence', 'ext_infomax', 'train', 'A07', 1, 20)
Fitting ICA to data using 22 channels (please be patient, this may take a while)
Selecting by number: 20 components
Computing Extended Infomax ICA
Fitting ICA took 158.4s.
('MI', 'ext_infomax', 'test', 'A04', 2, 20)
('correntropy', 'ext_infomax', 'test', 'A04', 2, 20)
('coherence', 'ext_infomax', 'test', 'A04', 2, 20)
('MI', 'ext_infomax', 'validation', 'A0

Fitting ICA took 30.1s.
('MI', 'ext_picard', 'test', 'A04', 0, 20)
('correntropy', 'ext_picard', 'test', 'A04', 0, 20)
('coherence', 'ext_picard', 'test', 'A04', 0, 20)
('MI', 'ext_picard', 'validation', 'A03', 0, 20)
('correntropy', 'ext_picard', 'validation', 'A03', 0, 20)
('coherence', 'ext_picard', 'validation', 'A03', 0, 20)
('MI', 'ext_picard', 'train', 'A02', 0, 20)
('correntropy', 'ext_picard', 'train', 'A02', 0, 20)
('coherence', 'ext_picard', 'train', 'A02', 0, 20)
('MI', 'ext_picard', 'train', 'A07', 0, 20)
('correntropy', 'ext_picard', 'train', 'A07', 0, 20)
('coherence', 'ext_picard', 'train', 'A07', 0, 20)
Fitting ICA to data using 22 channels (please be patient, this may take a while)
Selecting by number: 20 components
Fitting ICA took 31.2s.
('MI', 'ext_picard', 'test', 'A04', 1, 20)
('correntropy', 'ext_picard', 'test', 'A04', 1, 20)
('coherence', 'ext_picard', 'test', 'A04', 1, 20)
('MI', 'ext_picard', 'validation', 'A03', 1, 20)
('correntropy', 'ext_picard', 'validat

In [8]:
import pandas as pd

df = []
cols = ["scoring", "algorithm", "dataset", "subject_id", "run", "n_components", "score_before", "score_after", "time"]

for k, v in results.items():
    df.append(list(k) + list(v.values()))
pd.DataFrame(df, columns=cols).to_csv("results.csv")

In [23]:
df = pd.DataFrame(df, columns=cols)
df.groupby(["scoring", "algorithm", "dataset", "subject_id", "n_components"]).mean().query(""" (dataset == "test") """)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,run,score_before,score_after,time
scoring,algorithm,dataset,subject_id,n_components,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
MI,ext_infomax,test,A04,5,1,0.494188,0.048598,0.641394
MI,ext_infomax,test,A04,10,1,0.494188,0.033574,3.831437
MI,ext_infomax,test,A04,15,1,0.494188,0.024833,7.581667
MI,ext_infomax,test,A04,20,1,0.494188,0.023015,12.45086
MI,ext_picard,test,A04,5,1,0.494188,0.050661,0.724077
MI,ext_picard,test,A04,10,1,0.494188,0.03433,4.248268
MI,ext_picard,test,A04,15,1,0.494188,0.025495,7.508686
MI,ext_picard,test,A04,20,1,0.494188,0.023191,12.585967
MI,fastica,test,A04,5,1,0.494188,0.050186,0.719492
MI,fastica,test,A04,10,1,0.494188,0.034235,4.106989


In [10]:
10 + 20

30

In [1]:
from ica import ICA_METHODS
import numpy as np

In [3]:
for method in ICA_METHODS:
    print(method, end=",")
    x = np.random.rand(10000, 5)
    y = method(x)
    print(y.shape)
    print()

<function ica_jade at 0x00000204BF61E1F0>,(10000, 5)

<function ica_picard at 0x00000204BF61E280>,Creating RawArray with float64 data, n_channels=5, n_times=10000
    Range : 0 ... 9999 =      0.000 ...    39.996 secs
Ready.
Fitting ICA to data using 5 channels (please be patient, this may take a while)
Selecting by number: 5 components
Fitting ICA took 0.1s.
(10000, 5)

<function ica_ext_picard at 0x00000204BF61E310>,Creating RawArray with float64 data, n_channels=5, n_times=10000
    Range : 0 ... 9999 =      0.000 ...    39.996 secs
Ready.
Fitting ICA to data using 5 channels (please be patient, this may take a while)
Selecting by number: 5 components
Fitting ICA took 0.0s.
(10000, 5)

<function ica_infomax at 0x00000204BF61E3A0>,Creating RawArray with float64 data, n_channels=5, n_times=10000
    Range : 0 ... 9999 =      0.000 ...    39.996 secs
Ready.
Fitting ICA to data using 5 channels (please be patient, this may take a while)
Selecting by number: 5 components
 
Fitting ICA to