In [83]:
import pandas as pd
import numpy as np
import os

import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib_inline.backend_inline
matplotlib_inline.backend_inline.set_matplotlib_formats('svg')
from matplotlib.colors import ListedColormap

import re

In [31]:
plt.rcParams['axes.grid'] = True
plt.rcParams['grid.alpha'] = 0.8
plt.rcParams['grid.linestyle'] = 'dotted'
plt.rcParams["font.family"] = "Times New Roman"
# plt.rcParams['figure.figsize'] = (4.845, 3.135)
plt.rcParams['figure.dpi'] = 300
plt.rcParams['font.size'] = 10
plt.rcParams['axes.labelsize'] = 11
plt.rcParams['mathtext.default']='regular'

## scenario 1

training set and test set have the same subjects and videos

In [174]:
path_prefix = '../'
class S1():
    def __init__(self):
        filenames = sorted(os.listdir(os.path.join(path_prefix, f'data/scenario_1/train/physiology')),
                           key=lambda s: (int(re.findall(r'(?<=sub_)\d+', s)[0]), int(re.findall(r'(?<=vid_)\d+', s)[0])))
        self.subs = list(set([int(re.findall(r'(?<=sub_)\d+', s)[0]) for s in filenames]))
        self.vids = list(set([int(re.findall(r'(?<=vid_)\d+', s)[0]) for s in filenames]))
        self.keys = ['ecg',' bvp', 'gsr', 'rsp', 'skt', 'emg_zygo', 'emg_coru', 'emg_trap']
    
    def train_data(self, sub: int, vid: int):
        return pd.read_csv(os.path.join(path_prefix, f'data/scenario_1/train/physiology', f'sub_{sub}_vid_{vid}.csv'), index_col="time"), \
            pd.read_csv(os.path.join(path_prefix, f'data/scenario_1/train/annotations', f'sub_{sub}_vid_{vid}.csv'), index_col="time")
    
    def test_data(self, sub: int, vid: int):
        return pd.read_csv(os.path.join(path_prefix, f'data/scenario_1/test/physiology', f'sub_{sub}_vid_{vid}.csv'), index_col="time"), \
            pd.read_csv(os.path.join(path_prefix, f'data/scenario_1/test/annotations', f'sub_{sub}_vid_{vid}.csv'), index_col="time")

In [175]:
s1 = S1()

In [176]:
np.array(s1.subs), np.array(s1.vids)

(array([ 1,  4,  6,  7,  8,  9, 11, 12, 13, 14, 17, 18, 19, 20, 22, 26, 28,
        29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 41, 43, 45]),
 array([ 1,  9, 10, 11, 13, 14, 18, 20]))

In [122]:
X, y = s1.train_data(1, 1)

## scenario 2

In [177]:
path_prefix = '../'
class S2():
    def __init__(self):
        self.fold = [0, 1, 2, 3, 4]
        
        self.train_subs = [] # (5, x)
        self.test_subs = [] # (5, x)
        self.vids = [] # (x)
        
        for i in self.fold:
            filenames = os.listdir(os.path.join(path_prefix, f'data/scenario_2/fold_{i}/train/physiology'))
            self.train_subs.append(sorted(list(set([int(re.findall(r'(?<=sub_)\d+', s)[0]) for s in filenames]))))
            if self.vids == []:
                self.vids = sorted(list(set([int(re.findall(r'(?<=vid_)\d+', s)[0]) for s in filenames])))
            
            filenames = os.listdir(os.path.join(path_prefix, f'data/scenario_2/fold_{i}/test/physiology'))
            self.test_subs.append(sorted(list(set([int(re.findall(r'(?<=sub_)\d+', s)[0]) for s in filenames]))))

        self.keys = ['ecg',' bvp', 'gsr', 'rsp', 'skt', 'emg_zygo', 'emg_coru', 'emg_trap']
    
    def train_data(self, fold: int, sub: int, vid: int):
        return pd.read_csv(os.path.join(path_prefix, f'data/scenario_2/fold_{fold}/train/physiology', f'sub_{sub}_vid_{vid}.csv'), index_col="time"), \
            pd.read_csv(os.path.join(path_prefix, f'data/scenario_2/fold_{fold}/train/annotations', f'sub_{sub}_vid_{vid}.csv'), index_col="time")
    
    def test_data(self, fold: int, sub: int, vid: int):
        return pd.read_csv(os.path.join(path_prefix, f'data/scenario_2/fold_{fold}/test/physiology', f'sub_{sub}_vid_{vid}.csv'), index_col="time"), \
            pd.read_csv(os.path.join(path_prefix, f'data/scenario_2/fold_{fold}/test/annotations', f'sub_{sub}_vid_{vid}.csv'), index_col="time")

In [150]:
s2 = S2()

## scenario 3

In [158]:
path_prefix = '../'
class S3():
    def __init__(self):
        self.fold = [0, 1, 2, 3]
        
        self.train_vids = []
        self.test_vids = []
        self.subs = []
        
        for i in self.fold:
            filenames = os.listdir(os.path.join(path_prefix, f'data/scenario_3/fold_{i}/train/physiology'))
            self.train_vids.append(sorted(list(set([int(re.findall(r'(?<=vid_)\d+', s)[0]) for s in filenames]))))
            if self.subs == []:
                self.subs = sorted(list(set([int(re.findall(r'(?<=sub_)\d+', s)[0]) for s in filenames])))
            
            filenames = os.listdir(os.path.join(path_prefix, f'data/scenario_3/fold_{i}/test/physiology'))
            self.test_vids.append(sorted(list(set([int(re.findall(r'(?<=vid_)\d+', s)[0]) for s in filenames]))))

        self.keys = ['ecg',' bvp', 'gsr', 'rsp', 'skt', 'emg_zygo', 'emg_coru', 'emg_trap']
    
    def train_data(self, fold: int, sub: int, vid: int):
        return pd.read_csv(os.path.join(path_prefix, f'data/scenario_3/fold_{fold}/train/physiology', f'sub_{sub}_vid_{vid}.csv'), index_col="time"), \
            pd.read_csv(os.path.join(path_prefix, f'data/scenario_3/fold_{fold}/train/annotations', f'sub_{sub}_vid_{vid}.csv'), index_col="time")
    
    def test_data(self, fold: int, sub: int, vid: int):
        return pd.read_csv(os.path.join(path_prefix, f'data/scenario_3/fold_{fold}/test/physiology', f'sub_{sub}_vid_{vid}.csv'), index_col="time"), \
            pd.read_csv(os.path.join(path_prefix, f'data/scenario_3/fold_{fold}/test/annotations', f'sub_{sub}_vid_{vid}.csv'), index_col="time")

In [166]:
s3 = S3()
# s3.train_vids
# s3.test_vids
# s3.subs

## scenario 4

In [173]:
path_prefix = '../'
class S4():
    def __init__(self):
        self.fold = [0, 1]
        
        self.train_vids = []
        self.test_vids = []
        self.subs = []
        
        for i in self.fold:
            filenames = os.listdir(os.path.join(path_prefix, f'data/scenario_4/fold_{i}/train/physiology'))
            self.train_vids.append(sorted(list(set([int(re.findall(r'(?<=vid_)\d+', s)[0]) for s in filenames]))))
            if self.subs == []:
                self.subs = sorted(list(set([int(re.findall(r'(?<=sub_)\d+', s)[0]) for s in filenames])))
            
            filenames = os.listdir(os.path.join(path_prefix, f'data/scenario_4/fold_{i}/test/physiology'))
            self.test_vids.append(sorted(list(set([int(re.findall(r'(?<=vid_)\d+', s)[0]) for s in filenames]))))

        self.keys = ['ecg',' bvp', 'gsr', 'rsp', 'skt', 'emg_zygo', 'emg_coru', 'emg_trap']
    
    def train_data(self, fold: int, sub: int, vid: int):
        return pd.read_csv(os.path.join(path_prefix, f'data/scenario_4/fold_{fold}/train/physiology', f'sub_{sub}_vid_{vid}.csv'), index_col="time"), \
            pd.read_csv(os.path.join(path_prefix, f'data/scenario_4/fold_{fold}/train/annotations', f'sub_{sub}_vid_{vid}.csv'), index_col="time")
    
    def test_data(self, fold: int, sub: int, vid: int):
        return pd.read_csv(os.path.join(path_prefix, f'data/scenario_4/fold_{fold}/test/physiology', f'sub_{sub}_vid_{vid}.csv'), index_col="time"), \
            pd.read_csv(os.path.join(path_prefix, f'data/scenario_4/fold_{fold}/test/annotations', f'sub_{sub}_vid_{vid}.csv'), index_col="time")

In [179]:
s4 = S4()
s4.train_vids, s4.test_vids

([[3, 16, 19, 20], [0, 9, 12, 15]], [[0, 9, 12, 15], [3, 16, 19, 20]])

In [None]:
plt.figure(figsize=(12, 5))
sns.lineplot(data=train_physiology.iloc, x='time', y='rsp', );

In [None]:
def plot_data_comparison(train_physiology, train_annotations, test_physiology, test_annotations):
    fig = plt.figure(figsize=(10,18))
    fig.patch.set_facecolor('white')
    # plot train physiology with annotations range 
    plt.subplot(411)
    plt.plot(train_physiology.index, train_physiology['rsp'])
    plt.axvspan(train_annotations.index[0], train_annotations.index[-1], color='green', alpha=0.3)
    plt.xlim(left=train_physiology.index[0], right=train_physiology.index[-1])
    plt.title("Training data")
    plt.ylabel("Signal value")
    plt.xlabel("Time")
    # plot train annotations
    plt.subplot(412)
    plt.plot(train_annotations.index, train_annotations['arousal'], label='arousal - train')
    plt.plot(train_annotations.index, train_annotations['valence'], label='valence - train')
    plt.xlim(left=train_physiology.index[0], right=train_physiology.index[-1])
    plt.legend()
    plt.ylabel("Annotation value")
    plt.xlabel("Time")
    # plot test physiology with annotations range 
    plt.subplot(413)
    plt.plot(test_physiology.index, test_physiology['rsp'])
    plt.axvspan(test_annotations.index[0], test_annotations.index[-1], color='green', alpha=0.3)
    plt.xlim(left=test_physiology.index[0], right=test_physiology.index[-1])
    plt.title("Test data")
    plt.ylabel("Signal value")
    plt.xlabel("Time")
    # plot test annotations
    ax = plt.subplot(414)
    plt.plot(test_annotations.index, test_annotations['arousal'], label='arousal - test')
    plt.plot(test_annotations.index, test_annotations['valence'], label='valence - test')
    plt.xlim(left=test_physiology.index[0], right=test_physiology.index[-1])
    plt.yticks([test_annotations['arousal'].iloc[0]], ["NaN"])
    plt.legend()
    plt.ylabel("Annotation value")
    plt.xlabel("Time")
    plt.tight_layout()
    plt.show()

In [None]:
plot_data_comparison(train_physiology, train_annotations, test_physiology, test_annotations)