In [2]:
import pandas as pd
import numpy as np
import re
import os
import sys

sys.path.append(os.path.relpath("../src/"))
from dataloader import S1, S2, S3, S4
import emg
from utils import create_sliding_window_features 

1. create sliding window, the input size is num_signals * window_size * 1
2. split train (80%) and test (20%) set on the provided dataset for different scenarios.
   For one scenario, we have one training set and multiple test sets, and the training data from each sub-dataset are concatenated into one file.
3. use tabular predictor first
4. next, explore how to analyze time series data by autogluon

In [12]:
path_prefix = '../data'
save_prefix = '../splitted_data/'
features = ['ecg', 'bvp', 'gsr', 'rsp', 'emg_zygo', 'emg_coru', 'emg_trap']
s1 = S1()

train_pairs = s1.train_test_indices['train']

train_data = []
for sub, vid in train_pairs:
    physiology, annotations = s1.train_data(sub, vid, features=features)
    X, y = create_sliding_window_features(physiology, annotations, window_size=50)
    
    df = X.join(y)
    
    length = len(df)
    
    train_length = int(length * 0.8)
    test_length = length - train_length

    train_data.append(df[:train_length])
    df[train_length:].to_csv(os.path.join(save_prefix, 'scenario_1/test', f'sub_{sub}_vid_{vid}.csv'), index_label='time')

train_data = pd.concat(train_data, axis=0)
train_data.to_csv(os.path.join(save_prefix, 'scenario_1/train', 'train.csv'), index_label='time')

In [5]:
path_prefix = '../data'
save_prefix = '../splitted_data/'
features = ['ecg', 'bvp', 'gsr', 'rsp', 'emg_zygo', 'emg_coru', 'emg_trap']
s2 = S2()

folds = s2.train_test_indices
for fold, train_test_pairs in enumerate(folds):
    train_pairs = train_test_pairs['train']

    train_data = []
    for sub, vid in train_pairs:
        physiology, annotations = s2.train_data(fold, sub, vid, features=features)
        X, y = create_sliding_window_features(physiology, annotations, window_size=50)

        df = X.join(y)

        length = len(df)

        train_length = int(length * 0.8)
        test_length = length - train_length

        train_data.append(df[:train_length])
        df[train_length:].to_csv(os.path.join(save_prefix, f'scenario_2/test/fold_{fold}', f'sub_{sub}_vid_{vid}.csv'), index_label='time')

    train_data = pd.concat(train_data, axis=0)
    train_data.to_csv(os.path.join(save_prefix, f'scenario_2/train/fold_{fold}', 'train.csv'), index_label='time')

In [6]:
path_prefix = '../data'
save_prefix = '../splitted_data/'
features = ['ecg', 'bvp', 'gsr', 'rsp', 'emg_zygo', 'emg_coru', 'emg_trap']
s3 = S3()

folds = s3.train_test_indices
for fold, train_test_pairs in enumerate(folds):
    train_pairs = train_test_pairs['train']

    train_data = []
    for sub, vid in train_pairs:
        physiology, annotations = s3.train_data(fold, sub, vid, features=features)
        X, y = create_sliding_window_features(physiology, annotations, window_size=50)

        df = X.join(y)

        length = len(df)

        train_length = int(length * 0.8)
        test_length = length - train_length

        train_data.append(df[:train_length])
        df[train_length:].to_csv(os.path.join(save_prefix, f'scenario_3/test/fold_{fold}', f'sub_{sub}_vid_{vid}.csv'), index_label='time')

    train_data = pd.concat(train_data, axis=0)
    train_data.to_csv(os.path.join(save_prefix, f'scenario_3/train/fold_{fold}', 'train.csv'), index_label='time')

In [7]:
path_prefix = '../data'
save_prefix = '../splitted_data/'
features = ['ecg', 'bvp', 'gsr', 'rsp', 'emg_zygo', 'emg_coru', 'emg_trap']
s4 = S4()

folds = s4.train_test_indices
for fold, train_test_pairs in enumerate(folds):
    train_pairs = train_test_pairs['train']

    train_data = []
    for sub, vid in train_pairs:
        physiology, annotations = s4.train_data(fold, sub, vid, features=features)
        X, y = create_sliding_window_features(physiology, annotations, window_size=50)

        df = X.join(y)

        length = len(df)

        train_length = int(length * 0.8)
        test_length = length - train_length

        train_data.append(df[:train_length])
        df[train_length:].to_csv(os.path.join(save_prefix, f'scenario_4/test/fold_{fold}', f'sub_{sub}_vid_{vid}.csv'), index_label='time')

    train_data = pd.concat(train_data, axis=0)
    train_data.to_csv(os.path.join(save_prefix, f'scenario_4/train/fold_{fold}', 'train.csv'), index_label='time')

In [8]:
from autogluon.tabular import TabularDataset, TabularPredictor

In [None]:
label = 'arousal'
s1 = S1()
subjectID = []
videoID = []
root_mean_squared_error = []
mean_squared_error = []
mean_absolute_error = []
r2 = []
pearsonr = []
median_absolute_error = []

leader_board_dataframe = None


train_data = TabularDataset(os.path.join(save_prefix, f'scenario_1/train', 'train.csv'))
train_data = train_data.drop(columns=['valence'])
predictor = TabularPredictor(label=label, problem_type='regression', path=f'AutogluonModels/scenario_1/arousal', verbosity=0).fit(train_data, ag_args_fit={'num_gpus': 1})
test_pairs = s1.train_test_indices['test']

for sub, vid in test_pairs:
    subjectID.append(sub)
    videoID.append(vid)

    test_data = TabularDataset(os.path.join(save_prefix, f'scenario_1/test', f'sub_{sub}_vid_{vid}.csv'))
    y_test = test_data[label]
    test_data_nolab = test_data.drop(columns=[label, 'valence'])

    predictor = TabularPredictor.load(f'AutogluonModels/scenario_1/arousal')
    
    y_pred = predictor.predict(test_data_nolab)
    perf = predictor.evaluate_predictions(y_true=y_test, y_pred=y_pred, auxiliary_metrics=True)

    root_mean_squared_error.append(perf['root_mean_squared_error'])
    mean_squared_error.append(perf['mean_squared_error'])
    mean_absolute_error.append(perf['mean_absolute_error'])
    r2.append(perf['r2'])
    pearsonr.append(perf['pearsonr'])
    median_absolute_error.append(perf['median_absolute_error'])

    subjectID_board = []
    videoID_board = []

    board = predictor.leaderboard(test_data, silent=True)
    board_length = len(board)
    for i in range(board_length):
        subjectID_board.append(sub)
        videoID_board.append(vid)

    board.insert(0, 'subjectID', subjectID_board)
    board.insert(1, 'videoID', videoID_board)

    if leader_board_dataframe is None:
        leader_board_dataframe = board
    else:
        leader_board_dataframe = pd.concat([leader_board_dataframe, board])

evaluation_dataframe = pd.DataFrame({'subjectID': subjectID, 'videoID': videoID, 'root_mean_squared_error': root_mean_squared_error, 'mean_squared_error': mean_squared_error,
                                'mean_absolute_error': mean_absolute_error, 'r2': r2, 'pearsonr': pearsonr, 'median_absolute_error': median_absolute_error})

evaluation_dataframe.to_csv(f'AutogluonModels/scenario_1/evaluation_arousal.csv')
leader_board_dataframe.to_csv(f'AutogluonModels/scenario_1/leaderboard_arousal.csv')

print(evaluation_dataframe)
print(leader_board_dataframe)

In [None]:
## Train and test on one dataset, predicting valence
label = 'valence'
s1 = S1()

subjectID = []
videoID = []
root_mean_squared_error = []
mean_squared_error = []
mean_absolute_error = []
r2 = []
pearsonr = []
median_absolute_error = []

leader_board_dataframe = None

train_data = TabularDataset(os.path.join(save_prefix, f'scenario_1/train', 'train.csv'))
train_data = train_data.drop(columns=['arousal'])
predictor = TabularPredictor(label=label, problem_type='regression', path=f'AutogluonModels/scenario_1/valence', verbosity=0).fit(train_data, ag_args_fit={'num_gpus': 1})
test_pairs = s1.train_test_indices['test']

for sub, vid in test_pairs:
    subjectID.append(sub)
    videoID.append(vid)

    test_data = TabularDataset(os.path.join(save_prefix, f'scenario_1/test', f'sub_{sub}_vid_{vid}.csv'))
    y_test = test_data[label]
    test_data_nolab = test_data.drop(columns=[label, 'arousal'])

    predictor = TabularPredictor.load(f'AutogluonModels/scenario_1/valence')
    y_pred = predictor.predict(test_data_nolab)
    perf = predictor.evaluate_predictions(y_true=y_test, y_pred=y_pred, auxiliary_metrics=True)
    print(perf)

    root_mean_squared_error.append(perf['root_mean_squared_error'])
    mean_squared_error.append(perf['mean_squared_error'])
    mean_absolute_error.append(perf['mean_absolute_error'])
    r2.append(perf['r2'])
    pearsonr.append(perf['pearsonr'])
    median_absolute_error.append(perf['median_absolute_error'])

    subjectID_board = []
    videoID_board = []

    test_data_no_arousal = test_data.drop(columns=['arousal'])

    board = predictor.leaderboard(test_data_no_arousal, silent=True)
    board_length = len(board)
    for i in range(board_length):
        subjectID_board.append(sub)
        videoID_board.append(vid)

    board.insert(0, 'subjectID', subjectID_board)
    board.insert(1, 'videoID', videoID_board)

    if leader_board_dataframe is None:
        leader_board_dataframe = board
    else:
        leader_board_dataframe = pd.concat([leader_board_dataframe, board])

evaluation_dataframe = pd.DataFrame({'subjectID': subjectID, 'videoID': videoID, 'root_mean_squared_error': root_mean_squared_error, 'mean_squared_error': mean_squared_error,
                                'mean_absolute_error': mean_absolute_error, 'r2': r2, 'pearsonr': pearsonr, 'median_absolute_error': median_absolute_error})

evaluation_dataframe.to_csv(f'AutogluonModels/scenario_1/evaluation_valence.csv')
leader_board_dataframe.to_csv(f'AutogluonModels/scenario_1/leaderboard_valence.csv')