In [1]:
import pandas as pd
from scipy.io import loadmat
import tables
import numpy as np

import glob
import os
import random

In [2]:
"""
Author: Trevor Jordan Grant.
default4: spatial/verbal
"""
# Dictionary is labeled such that task labels in conditions files will have
# more than one multilabeling schema.

# To add more multilabeling schema - include them in the task Dictionary.
# (The sub-dictionary where the task label is the key.)

# default4 = ["VerbalWM", "SpatialWM", "VisualPerceptual", "AuditoryPerceptual"]
# every label in default 4 has discrete values of 'off', 'low', 'high'

# default3 = ["WM", "VisualPerceptual", "AuditoryPerceptual"]
# every label in default 3 has discrete values of 'off', 'low', 'high'

cog_load_label_dict = {
# Mindfulness task labels.
                       "nb": {
                              "default4": ["high", "off", "low", "off"],
                              "default3": ["high", "low", "off"],
                             },
                       "anb": {
                               "default4": ["high", "off", "off", "low"],
                               "default3": ["high", "off", "low"],
                              },
                       "ewm": {
                               "default4": ["low", "off", "high", "off"],
                               "default3": ["low", "high", "off"]
                              },
                        "cr": {
                               "default4": ["off", "off", "off", "off"],
                               "default3": ["off", "off", "off"],
                              },
                        "rt": {
                               "default4": ["off", "off", "low", "off"],
                               "default3": ["off", "low", "off"],
                              },
                        "es": {
                               "default4": ["off", "off", "low", "off"],
                               "default3": ["off", "low", "off"],
                              },
                       "gng": {
                               "default4": ["off", "off", "low", "off"],
                               "default3": ["off", "low", "off"],
                              },
            "adaptive_words": {
                               "default4": ["off", "off", "low", "off"],
                               "default3": ["off", "low", "off"],
                              },
                   "go_nogo": {
                               "default4": ["off", "off", "low", "off"],
                               "default3": ["off", "low", "off"],
                              },
                     "nback": {
                               "default4": ["high", "off", "low", "off"],
                               "default3": ["high", "low", "off"],
                              },
                    "posner": {
                               "default4": ["off", "off", "low", "off"],
                               "default3": ["off", "low", "off"],
                              },
                 "simple_rt": {
                               "default4": ["off", "off", "low", "off"],
                               "default3": ["off", "low", "off"],
                              },
             "visual_search": {
                               "default4": ["off", "off", "high", "off"],
                               "default3": ["off", "high", "off"],
                              },
                      }


def strings_to_vectors(string_labels, as_list=False):
    """Maps strings in dict to interger values.
    Args:
        string_labels(list): The string label value of load.
        as_list(bool): False, if True, return list instead of np.array()
    Returns:
        labels as np.array()
    """

    maps = {
            "off": 0,
            "low": 1,
            "high": 2,
           }

    if as_list:
        return [maps[label] for label in string_labels]
    return np.array([maps[label] for label in string_labels])


def return_label(task, label_type="default3", as_strings=False):
    """Returns a label from the cog_load_label_dict.
    Args:
        task(str): The task label from the coditions file.
        label_type(string): The label schema used for the model.
        as_strings(bool): False, if True, return string (in list) values instead.
    Returns:
        labels(np.array): Under defaults labels will be returned as interger
        values in a np.array().
    """
    if as_strings:
        return cog_load_label_dict[task][label_type]
    return strings_to_vectors(cog_load_label_dict[task][label_type])

In [3]:
channel_52_5x11_mat = {
           1:[0,1],  2:[0,2],  3:[0,3],  4:[0,4],  5:[0,5],  6:[0,6],  7:[0,7],  8:[0,8],  9:[0,9], 10:[0,10], 
11:[1,0], 12:[1,1], 13:[1,2], 14:[1,3], 15:[1,4], 16:[1,5], 17:[1,6], 18:[1,7], 19:[1,8], 20:[1,9], 21:[1,10], 
          22:[2,1], 23:[2,2], 24:[2,3], 25:[2,4], 26:[2,5], 27:[2,6], 28:[2,7], 29:[2,8], 30:[2,9], 31:[2,10], 
32:[3,0], 33:[3,1], 34:[3,2], 35:[3,3], 36:[3,4], 37:[3,5], 38:[3,6], 39:[3,7], 40:[3,8], 41:[3,9], 42:[3,10], 
          43:[4,1], 44:[4,2], 45:[4,3], 46:[4,4], 47:[4,5], 48:[4,6], 49:[4,7], 50:[4,8], 51:[4,9], 52:[4,10]
}

def get_52_5x11_mat(data):
    # returns a matrix of size 5x11.
    mat = np.zeros((5, 11))
    for idx, i in enumerate((data)):
        loc = channel_52_5x11_mat[idx+1]
        mat[loc[0], loc[1]] = i
    return mat

channel_52_5x22_mat = {
           1:[0,1],  2:[0,3],  3:[0,5],  4:[0,7],  5:[0,9],  6:[0,11],  7:[0,13],  8:[0,15],  9:[0,17], 10:[0,19], 
11:[1,0], 12:[1,2], 13:[1,4], 14:[1,6], 15:[1,8], 16:[1,10], 17:[1,12], 18:[1,14], 19:[1,16], 20:[1,18], 21:[1,20], 
          22:[2,1], 23:[2,3], 24:[2,5], 25:[2,7], 26:[2,9], 27:[2,11], 28:[2,13], 29:[2,15], 30:[2,17], 31:[2,19], 
32:[3,0], 33:[3,2], 34:[3,4], 35:[3,6], 36:[3,8], 37:[3,10], 38:[3,12], 39:[3,14], 40:[3,16], 41:[3,18], 42:[3,20], 
          43:[4,1], 44:[4,3], 45:[4,5], 46:[4,7], 47:[4,9], 48:[4,11], 49:[4,13], 50:[4,15], 51:[4,17], 52:[4,19]
}

def get_52_5x22_mat(data):
    # returns a matrix of size 5x11.
    mat = np.zeros((5, 22))
    for idx, i in enumerate((data)):
        loc = channel_52_5x22_mat[idx+1]
        mat[loc[0], loc[1]] = i
    return mat



In [4]:
def collapse_tasks(tasks, min_dur):
    collapsed_tasks = []
    collapsed_tasks.append(
        tasks[0]
    )
    for i in range(1, len(tasks)):
        t1 = collapsed_tasks[-1]
        t2 = tasks[i]
        if t1["class"] == t2["class"] and (t2["duration"] < min_dur and t1["duration"] < min_dur):
            
            t1["data"] = np.concatenate((t1["data"], t2["data"]), axis=0)
            t1["duration"]+=t2["duration"]
            t1["end"]=t1["onset"]+t2["duration"]
            # merge and append
        else:
            # just append
            collapsed_tasks.append(t2)
    return collapsed_tasks
            
def read_tasks(condition, data):
    # conditions, data = csv, mat files
    # tuple containing (class, onset, duration, offset, oxy_data, dxy_data)
    print(condition)
    tasks = []
    # read conditions, data
    c_data = pd.read_csv(condition)
    m_data = loadmat(data)
    # get oxy, dxy data
    oxyDaya = m_data['nirs_data'][0][0][0]
    dxyData = m_data['nirs_data'][0][0][1]
    # iterate through all the tasks here now.
    for idx, key in enumerate(list(c_data.keys())):
        start = 0
        end = 0
        class_ = None
        if 'Task' in key or 'all_benchmarks_fNIRS' in key:
            # get start and end index of the task
            if 'Task' in key:
                start = int(c_data[key][0])
                duration = int(c_data[key][1])
                class_ = c_data[key][2]
            else:
                start = int(c_data[key][2])
                duration = int(c_data[key][3]) 
                class_ = c_data[key][4]
            if class_ == "adaptive_words" or class_ == "posner":
                continue
                
            end = start + duration
            
            # visualize heatmap: 
            # sns.heatmap(get_52_mat(oxyDaya[0]))

            oxy_series = oxyDaya[start:end, :]
            dxy_series = dxyData[start:end, :]

            # a 100x5x22 list
            oxy_dxy_series_mat = np.zeros((duration,2, 5, 11))

            for ts, (oxy_slice, dxy_slice) in enumerate(zip(oxy_series, dxy_series)):
                oxy_slice = get_52_5x11_mat(oxy_slice)
                dxy_slice = get_52_5x11_mat(dxy_slice)
                
                #oxy_dxy_series_mat[ts] = np.hstack([oxy_slice, dxy_slice])
                oxy_dxy_series_mat[ts] = np.array([oxy_slice, dxy_slice])
            tasks.append(
                {
                    "class": class_,
                    "onset": start,
                    "end": end,
                    "duration": duration,
                    "data" : oxy_dxy_series_mat
                }
            )
    return tasks 

def pad_tasks(tasks):
    lengths = [len(t["data"]) for t in tasks]
    #max_len = max(lengths)
    max_len = 3000
    for t in tasks:
        padded_task = np.zeros(np.concatenate( ([max_len], t["data"].shape[1:]) ))
        padded_task[:min(t["duration"], max_len)] = t["data"][:min(max_len, t["duration"])]
        t["data"] = padded_task
    return tasks

In [5]:
conditions = sorted(glob.glob('../../../data/multilabel/mats/mindfulness/*.csv'))
data = sorted(glob.glob('../../../data/multilabel/mats/mindfulness/*.mat'))

In [6]:
task_data = []
time_series_length = 10
"""
default3 labels
[ 
    wm, 
    v, 
    a
]
"""

for idx, (cond, dat) in enumerate(zip(conditions, data)):
    participant_id = os.path.basename(dat)[0:4]
    tasks = read_tasks(cond, dat)
    for t in tasks:
        task_data.append(t)
        task_data[-1]["id"] = participant_id
        task_data[-1]["wl_label"] = return_label(task_data[-1]["class"])
    task_data = collapse_tasks(task_data, min_dur=time_series_length)

../../../data/multilabel/mats/mindfulness\2001_fNIRS_conditions_s1.csv
../../../data/multilabel/mats/mindfulness\2001_fNIRS_conditions_s2.csv
../../../data/multilabel/mats/mindfulness\2002_fNIRS_conditions_s1.csv
../../../data/multilabel/mats/mindfulness\2002_fNIRS_conditions_s2.csv
../../../data/multilabel/mats/mindfulness\2003_fNIRS_conditions_s1.csv
../../../data/multilabel/mats/mindfulness\2003_fNIRS_conditions_s2.csv
../../../data/multilabel/mats/mindfulness\2004_fNIRS_conditions_s1.csv
../../../data/multilabel/mats/mindfulness\2004_fNIRS_conditions_s2.csv
../../../data/multilabel/mats/mindfulness\2006_fNIRS_conditions_s1.csv
../../../data/multilabel/mats/mindfulness\2006_fNIRS_conditions_s2.csv
../../../data/multilabel/mats/mindfulness\2011_fNIRS_conditions_s1.csv
../../../data/multilabel/mats/mindfulness\2011_fNIRS_conditions_s2.csv
../../../data/multilabel/mats/mindfulness\2012_fNIRS_conditions_s1.csv
../../../data/multilabel/mats/mindfulness\2012_fNIRS_conditions_s2.csv
../../

In [7]:
task_data = pad_tasks(task_data)

#### GET wm, vl, al (off, low, high) label counts and counts for each type of task

In [9]:
labels_bin = {"wm":{0:0, 1:0, 2:0}, "vl":{0:0, 1:0, 2:0}, "al":{0:0, 1:0, 2:0}}
task_cond_bin = {i:{"ts":0, "cnt":0} for i in cog_load_label_dict}
for t in task_data:
    if not (int(t["id"])//1000 == 7):
        label = return_label(t["class"])

        task_cond_bin[t["class"]]["cnt"] += 1
        task_cond_bin[t["class"]]["ts"] = t["duration"]

        labels_bin["wm"][label[0]]+=1
        labels_bin["vl"][label[1]]+=1
        labels_bin["al"][label[2]]+=1
        
print(task_cond_bin, labels_bin)

{'gng': {'cnt': 118, 'ts': 850}, 'go_nogo': {'cnt': 0, 'ts': 0}, 'es': {'cnt': 118, 'ts': 700}, 'nback': {'cnt': 0, 'ts': 0}, 'nb': {'cnt': 118, 'ts': 900}, 'ewm': {'cnt': 118, 'ts': 2800}, 'rt': {'cnt': 118, 'ts': 600}, 'cr': {'cnt': 706, 'ts': 250}, 'simple_rt': {'cnt': 0, 'ts': 0}, 'adaptive_words': {'cnt': 0, 'ts': 0}, 'posner': {'cnt': 0, 'ts': 0}, 'anb': {'cnt': 115, 'ts': 1050}, 'visual_search': {'cnt': 0, 'ts': 0}} {'al': {0: 1296, 1: 115, 2: 0}, 'vl': {0: 821, 1: 472, 2: 118}, 'wm': {0: 1060, 1: 118, 2: 233}}


In [10]:
participant_taskdata = {}

In [11]:
for t in task_data:
    if t["id"] not in participant_taskdata:
        participant_taskdata[t["id"]] = []
    participant_taskdata[t["id"]].append(t)

In [12]:
participant_ids = list(participant_taskdata.keys())
print(participant_ids)

['2013', '8215', '2015', '8218', '8219', '8217', '2001', '8203', '8221', '8205', '2006', '8208', '2004', '8213', '2011', '2002', '2014', '8206', '8216', '8204', '8211', '8212', '2012', '8214', '2019', '2017', '8201', '8209', '2003', '8210']


In [13]:
train_ids = participant_ids[:int(0.8*len(participant_ids))]
val_ids = participant_ids[int(0.8*len(participant_ids)):]
print(train_ids)
print(val_ids)

['2013', '8215', '2015', '8218', '8219', '8217', '2001', '8203', '8221', '8205', '2006', '8208', '2004', '8213', '2011', '2002', '2014', '8206', '8216', '8204', '8211', '8212', '2012', '8214']
['2019', '2017', '8201', '8209', '2003', '8210']


##### Bin train a-nback and n-back 

In [19]:
train_labeled_task_bin = {'anb':[], 'nb':[]}
for participant_id in train_ids:
    for participant_task in participant_taskdata[participant_id]:
        if participant_task["class"] in list(train_labeled_task_bin.keys()):
            train_labeled_task_bin[participant_task["class"]].append(participant_task)

print(len(train_labeled_task_bin["anb"]), len(train_labeled_task_bin["nb"]))

95 96


In [20]:
val_labeled_task_bin = {'anb':[], 'nb':[]}
for participant_id in val_ids:
    for participant_task in participant_taskdata[participant_id]:
        if participant_task["class"] in list(val_labeled_task_bin.keys()):
            val_labeled_task_bin[participant_task["class"]].append(participant_task)

print( len(val_labeled_task_bin["anb"]), len(val_labeled_task_bin["nb"]))

20 22


In [36]:
train_pairs = {0:[], 1:[]}

In [37]:
from random import shuffle
# matching pairs
for i in train_labeled_task_bin:
    
    lab_tasks_idx = [j for j in range(len(train_labeled_task_bin[i]))]
    lab_tasks_perm = lab_tasks_idx.copy()
    
    shuffle(lab_tasks_perm)
    
    while True:
        if not np.any(lab_tasks_idx == lab_tasks_perm):
            break

    for a in lab_tasks_idx:
        for b in lab_tasks_perm:
            if a == b : continue
            train_pairs[0].append(
                (
                    train_labeled_task_bin[i][a]["data"][50:250], 
                    train_labeled_task_bin[i][b]["data"][50:250], 
                    0
                )
            )

In [38]:
# different pairs
labels = train_labeled_task_bin.keys()
label_pairs = [("anb", "nb")]

for lab1, lab2 in label_pairs:
    for task1 in train_labeled_task_bin[lab1]:
        for task2 in train_labeled_task_bin[lab2]:
            train_pairs[1].append((
                task1["data"][50:250], 
                task2["data"][50:250],
                1
            ))

In [39]:
print(len(train_pairs[0]), len(train_pairs[1]))

shuffle(train_pairs[0])
shuffle(train_pairs[1])

18050 9120


In [40]:
"""
    write all data to disk as is
"""

"""
data_list = []
for idx, data in enumerate(task_data):
    data_list.append(data)
np.save("C://Users//dhruv//Development//git//thesis_dl-fnirs//data//multilabel//all//mindfulness\\data", data_list)
"""
"""
np.save("C://Users//dhruv//Development//git//thesis_dl-fnirs//data//multilabel//all//mindfulness\\data_siamese_train", train_pairs)
"""
# save matching
for idx, data in enumerate(train_pairs[0]):
    np.save("C://Users//dhruv//Development//git//thesis_dl-fnirs//data//multilabel//all//mindfulness//siamese//alvl//train//0//" + str(idx), data)

# save different
for idx, data in enumerate(train_pairs[1]):
    np.save("C://Users//dhruv//Development//git//thesis_dl-fnirs//data//multilabel//all//mindfulness//siamese//alvl//train//1//" + str(idx), data)


##### validation set

In [41]:
val_label_examples = {"anb":["nb"], "nb":["anb"]}
val_pairs = []
for i in val_labeled_task_bin:
    for task in val_labeled_task_bin[i]:
        
        t2 = val_label_examples[i][0]
        
        val_pairs.append({
            "t1": task["data"],
            "t3": random.choice(val_labeled_task_bin[t2])["data"][50:250],
            "t4": random.choice(val_labeled_task_bin[i])["data"][50:250],
        })

In [42]:
np.save("C://Users//dhruv//Development//git//thesis_dl-fnirs//data//multilabel//all//mindfulness//siamese//alvl//data_siamese_val", val_pairs)

##### siamese pairs validation 

In [43]:
siamese_pairs_val = {0:[], 1:[]}
# matching pairs
for i in val_labeled_task_bin:
    
    lab_tasks_idx = [j for j in range(len(val_labeled_task_bin[i]))]
    lab_tasks_perm = lab_tasks_idx.copy()
    
    shuffle(lab_tasks_perm)
    
    while True:
        if not np.any(lab_tasks_idx == lab_tasks_perm):
            break

    for a in lab_tasks_idx:
        for b in lab_tasks_perm:
            if a == b : continue
            siamese_pairs_val[0].append(
                (
                    val_labeled_task_bin[i][a]["data"][50:250], 
                    val_labeled_task_bin[i][b]["data"][50:250], 
                    0
                )
            )

# different pairs
labels = val_labeled_task_bin.keys()
label_pairs = [("anb", "nb")]

for lab1, lab2 in label_pairs:
    for task1 in val_labeled_task_bin[lab1]:
        for task2 in val_labeled_task_bin[lab2]:
            siamese_pairs_val[1].append((
                task1["data"][50:250], 
                task2["data"][50:250],
                1
            ))

In [44]:
print(len(siamese_pairs_val[0]), len(siamese_pairs_val[1]))

shuffle(siamese_pairs_val[0])
shuffle(siamese_pairs_val[1])

842 440


In [45]:
# save matching
for idx, data in enumerate(siamese_pairs_val[0][0:10000]):
    np.save("C://Users//dhruv//Development//git//thesis_dl-fnirs//data//multilabel//all//mindfulness//siamese//alvl//test//0//" + str(idx), data)

# save different
for idx, data in enumerate(siamese_pairs_val[1][0:10000]):
    np.save("C://Users//dhruv//Development//git//thesis_dl-fnirs//data//multilabel//all//mindfulness//siamese//alvl//test//1//" + str(idx), data)


In [None]:
import seaborn as sns;
sns.set();
import matplotlib.pyplot as plt

In [None]:
cond_1 = 79
cond_2 = 67



oxy_1 = [i[0, 1, 1] for i in task_data[cond_1]["data"]]
dxy_1 = [i[1, 1, 1] for i in task_data[cond_1]["data"]]

oxy_2 = [i[0, 1, 1] for i in task_data[cond_2]["data"]]
dxy_2 = [i[1, 1, 1] for i in task_data[cond_2]["data"]]

print(len(oxy_1), len(oxy_2))

df = pd.DataFrame.from_dict({
    task_data[cond_1]["class"]+"-oxy": oxy_1,
    task_data[cond_2]["class"]+"-oxy": oxy_2,

})

plt.figure(figsize=(19, 9))
ax = sns.lineplot(data=df)



In [None]:
print([i["class"] for i in task_data])

In [None]:
5//2