In [4]:
import pandas as pd
from scipy.io import loadmat
import tables
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns; sns.set()
import glob

In [6]:
"""
Author: Trevor Jordan Grant.
default4: spatial/verbal
"""
cog_load_label_dict = {
                       "nb": {
                              "default4": ["high", "off", "low", "off"],
                              "default3": ["high", "low", "off"],
                              # Add new labels for n-back task here:
                             },
                       "anb": {
                               "default4": ["high", "off", "off", "low"],
                               "default3": ["high", "off", "low"],
                              },
                       "ewm": {
                               "default4": ["low", "off", "high", "off"],
                               "default3": ["low", "high", "off"]
                              },
                        "cr": {
                               "default4": ["off", "off", "off", "off"],
                               "default3": ["off", "off", "off"],
                              },
                        "rt": {
                               "default4": ["off", "off", "low", "off"],
                               "default3": ["off", "low", "off"],
                              }
                      }


def strings_to_vectors(string_labels, as_list=False):
    """Maps strings in dict to interger values.
    Args:
        string_labels(list): The string label value of load.
        as_list(bool): False, if True, return list instead of np.array()
    Returns:
        labels as np.array()
    """

    maps = {
            "off": [1, 0, 0],
            "low": [0, 1, 0],
            "high": [0, 0, 1],
           }

    if as_list:
        return [maps[label] for label in string_labels]
    return np.array([maps[label] for label in string_labels])


def return_label(task, label_type="default3", as_strings=False):
    """Returns a label from the cog_load_label_dict.
    Args:
        task(str): The task label from the coditions file.
        label_type(string): The label schema used for the model.
        as_strings(bool): False, if True, return string (in list) values instead.
    Returns:
        labels(np.array): Under defaults labels will be returned as interger
        values in a np.array().
    """
    if as_strings:
        return cog_load_label_dict[task][label_type]
    return np.hstack(strings_to_vectors(cog_load_label_dict[task][label_type]))

#return_label("rt")

In [7]:
channel_52_mat = {
           1:[0,1],  2:[0,2],  3:[0,3],  4:[0,4],  5:[0,5],  6:[0,6],  7:[0,7],  8:[0,8],  9:[0,9], 10:[0,10], 
11:[1,0], 12:[1,1], 13:[1,2], 14:[1,3], 15:[1,4], 16:[1,5], 17:[1,6], 18:[1,7], 19:[1,8], 20:[1,9], 21:[1,10], 
          22:[2,1], 23:[2,2], 24:[2,3], 25:[2,4], 26:[2,5], 27:[2,6], 28:[2,7], 29:[2,8], 30:[2,9], 31:[2,10], 
32:[3,0], 33:[3,1], 34:[3,2], 35:[3,3], 36:[3,4], 37:[3,5], 38:[3,6], 39:[3,7], 40:[3,8], 41:[3,9], 42:[3,10], 
          43:[4,1], 44:[4,2], 45:[4,3], 46:[4,4], 47:[4,5], 48:[4,6], 49:[4,7], 50:[4,8], 51:[4,9], 52:[4,10]
}

def get_52_mat(data):
    # returns a matrix of size 5x11.
    mat = np.zeros((5, 11))
    for idx, i in enumerate((data)):
        loc = channel_52_mat[idx+1]
        mat[loc[0], loc[1]] = i
    return mat



In [8]:
conditions = glob.glob('../../../data/multilabel/mats/*_conditions*')
data = glob.glob('../../../data/multilabel/mats/*.mat')

In [9]:
time_series_length = 100
multilabel_data = []
"""
default3 labels
[ 
    wm_o, wm_l, wm_h, 
    v_o, v_l, v_h, 
    a_o, a_l, a_h
]
"""


for idx, (cond, dat) in enumerate(zip(conditions, data)):
    c_data = pd.read_csv(cond)
    m_data = loadmat(dat)
    
    
    oxyDaya = m_data['nirs_data'][0][0][0]
    dxyData = m_data['nirs_data'][0][0][1]
    
    # iterate through all the tasks here now.
    for idx, key in enumerate(list(c_data.keys())):
        if 'Task' in key:
            # no gng labels
            if c_data[key][2] in ["gng", "es"]:
                continue
            # get start and end index of the task
            # cap the length of the sequence to 100.
            start = int(c_data[key][0])
            end = start + min(int(c_data[key][1]), time_series_length)
            if end - start != time_series_length:
                continue
            # visualize heatmap: 
            # sns.heatmap(get_52_mat(oxyDaya[0]))
            
            oxy_series = oxyDaya[start:end, :]
            dxy_series = dxyData[start:end, :]
            
            # a 100x5x22 list
            oxy_dxy_series_mat = np.zeros((time_series_length, 5, 22))
            
            for ts, (oxy_slice, dxy_slice) in enumerate(zip(oxy_series, dxy_series)):
                oxy_slice = get_52_mat(oxy_slice)
                dxy_slice = get_52_mat(dxy_slice)
            
                oxy_dxy_series_mat[ts] = np.hstack([oxy_slice, dxy_slice])
            
            multilabel_data.append(
                [
                    np.asarray(oxy_dxy_series_mat), 
                    np.asarray(return_label(c_data[key][2]))
                ]
            )

In [10]:
"""
    write data to disk
"""
train_class_hist = {}
test_class_hist = {}
for idx, (data, label) in enumerate(multilabel_data[0:601]):
    np.save("C:\\Users\\dhruv\\Development\\git\\thesis_dl-fnirs\\data\\multilabel\\train\\" + str(idx), np.asarray([data, label]))
    train_class_hist+=label

for idx, (data, label) in enumerate(multilabel_data[601:]):
    np.save("C:\\Users\\dhruv\\Development\\git\\thesis_dl-fnirs\\data\\multilabel\\val\\" + str(idx), np.asarray([data, label]))
    test_class_hist+=label

In [11]:
train_class_hist

array([[421.,  60., 120., 421., 120.,  60., 541.,  60.,   0.]])

In [12]:
test_class_hist

array([[27.,  4.,  8., 27.,  8.,  4., 35.,  4.,  0.]])