# Libraries

In [None]:
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import neurokit2 as nk

import sys, os
sys.path.append(os.path.relpath("../src/"))
import emg

# Functions

In [None]:
## Load data functions

def load_data_no_folds(scenario_dir_path, dataset_type):
    # make dict to store data
    storage_list = list()
    # make paths for the specified dataset
    train_annotations_dir = Path(scenario_dir_path, dataset_type, "annotations")
    train_physiology_dir = Path(scenario_dir_path, dataset_type, "physiology")
    # sort contents of dirs, so that physiology and annotations are in the same order  
    train_physiology_files = sorted(Path(train_physiology_dir).iterdir())
    train_annotation_files = sorted(Path(train_annotations_dir).iterdir())
    # iterate over annotation and physiology files
    for physiology_file_path, annotations_file_path in zip(train_physiology_files, train_annotation_files):
        # make sure that we load corresponding physiology and annotations
        assert physiology_file_path.name == annotations_file_path.name, "Order mismatch"
        # load data from files
        df_physiology = pd.read_csv(physiology_file_path, index_col="time")
        df_annotations = pd.read_csv(annotations_file_path, index_col="time")
        # store data
        storage_list.append((annotations_file_path.name, df_physiology, df_annotations))
    return storage_list

def load_data_with_folds(scenario_dir_path, dataset_type):
    # make dict to store data
    storage_dict = dict()
    # iterate over the scenario directory
    for fold_dir in Path(scenario_dir_path).iterdir():
        # make paths for current fold
        train_annotations_dir = Path(fold_dir, f"{dataset_type}/annotations/")
        train_physiology_dir = Path(fold_dir, f"{dataset_type}/physiology/")
        # make key in a dict for current fold 
        storage_dict.setdefault(fold_dir.name, list())
        # sort contents of dirs, so that physiology and annotations are in the same order  
        train_physiology_files = sorted(Path(train_physiology_dir).iterdir())
        train_annotation_files = sorted(Path(train_annotations_dir).iterdir())
        # iterate over annotation and physiology files
        for physiology_file_path, annotations_file_path in zip(train_physiology_files, train_annotation_files):
            # make sure that we load corresponding physiology and annotations
            assert physiology_file_path.name == annotations_file_path.name, "Order mismatch"
            # load data from files
            df_physiology = pd.read_csv(physiology_file_path, index_col="time")
            df_annotations = pd.read_csv(annotations_file_path, index_col="time")
            # store data
            storage_dict[fold_dir.name].append((annotations_file_path.name, df_physiology, df_annotations))
    return storage_dict

## Plotting functions
def plot_data(modality, annotations, physiology, test=False):
    fig = plt.figure(figsize=(16,8))
    fig.patch.set_facecolor('white')
    # plot train physiology with annotations range 
    plt.subplot(211)
    plt.plot(physiology.index, physiology[modality])
    plt.axvspan(annotations.index[0], annotations.index[-1], color='green', alpha=0.3)
    plt.xlim(left=physiology.index[0], right=physiology.index[-1])
    plt.title(f"{test * 'Test' + (not test) * 'Training'} data", fontsize=20)
    plt.ylabel("Signal value", fontsize=16)
    plt.xlabel("Time", fontsize=16)
    plt.xticks(fontsize=14)
    plt.yticks(fontsize=14)
    # plot train annotations
    plt.subplot(212)
    plt.plot(annotations.index, annotations['arousal'], label='arousal - train')
    plt.plot(annotations.index, annotations['valence'], label='valence - train')
    plt.xlim(left=physiology.index[0], right=physiology.index[-1])
    plt.legend(fontsize=14)
    plt.ylabel("Annotation value", fontsize=16)
    plt.xlabel("Time", fontsize=16)
    plt.xticks(fontsize=14)
    plt.yticks(fontsize=14)
    plt.show()

# Plot from neurokit
def plot_emg(data, fs=1000):
    # Process the raw EDA signal
    # emg_signals, _ = nk.process(data, sampling_rate=fs)
    emg_signals, _ = emg.emg_process(data, sampling_rate=fs, filterCutoff=1.0)
    # Visualize cleaned EMG signal
    plot = nk.emg_plot(emg_signals)

# Loading data

In [None]:
# specify scenario path
scenario_dir = "../data/scenario_1"

# train data
print("Loading train data")
train = load_data_no_folds(scenario_dir, "train")

# test data
print("Loading test data")
test = load_data_no_folds(scenario_dir, "test")

# Processing tests

In [None]:
df, info = emg.emg_process(train[0][1].emg_coru, threshold=5, sampling_rate=1000)
info

In [None]:
plt.plot(df.EMG_Raw, label='emg_coru')
plt.plot(df.EMG_Clean, label='clean')
plt.plot(df.EMG_Rms, label='rms')
plt.plot(df.EMG_Onsets, label='onset')
plt.legend()

In [None]:
nk.emg_analyze(df, sampling_rate=1000)

In [None]:
df, info = emg.emg_process(train[0][1].emg_zygo, threshold=4, sampling_rate=1000)
info

plt.plot(df.EMG_Raw, label='emg_zygo')
plt.plot(df.EMG_Clean, label='clean')
plt.plot(df.EMG_Rms, label='rms')
plt.plot(df.EMG_Onsets, label='onset')
plt.legend()

In [None]:
df, info = emg.emg_process(train[0][1].emg_trap, threshold=6, sampling_rate=1000)
info

plt.plot(df.EMG_Raw, label='emg_trap')
plt.plot(df.EMG_Clean, label='clean')
plt.plot(df.EMG_Rms, label='rms')
plt.plot(df.EMG_Onsets, label='onset')
plt.legend()

# Plot data and ratings

In [None]:
for (name, data, label) in train:
    print(name)
    plot_data('emg_zygo', label, data, test=False)
    plt.figure()
    try:
        plot_emg(data.emg_zygo)
    except:
        print(f"Failed to plot {name}")
    plt.show()

In [None]:
for (name, data, label) in train:
    print(name)
    plot_data('emg_coru', label, data, test=False)
    plt.figure()
    try:
        plot_emg(data.emg_coru)
    except:
        print(f"Failed to plot {name}")
    plt.show()

In [None]:
for (name, data, label) in train:
    print(name)
    plot_data('emg_trap', label, data, test=False)
    plt.figure()
    try:
        plot_emg(data.emg_trap)
    except:
        print(f"Failed to plot {name}")
    plt.show()

# First try
This section follows the explain_data notebook, just the data type was changed

In [None]:
# specify file name
file_name = "sub_1_vid_1.csv"

# load data files
train_physiology = pd.read_csv(Path("../data/scenario_1/train/physiology", file_name), index_col="time")
train_annotations = pd.read_csv(Path("../data/scenario_1/train/annotations", file_name), index_col="time")
test_physiology = pd.read_csv(Path("../data/scenario_1/test/physiology", file_name), index_col="time")
test_annotations = pd.read_csv(Path("../data/scenario_1/test/annotations", file_name), index_col="time")

In [None]:
train_physiology

In [None]:
train_annotations

In [None]:
test_physiology

In [None]:
test_annotations

In [None]:
import matplotlib.pyplot as plt

def plot_data_comparison(train_annotations, train_physiology, test_annotations, test_physiology):
    fig = plt.figure(figsize=(32,40))
    fig.patch.set_facecolor('white')
    # plot train physiology with annotations range 
    plt.subplot(411)
    plt.plot(train_physiology.index, train_physiology['emg_coru'])
    plt.axvspan(train_annotations.index[0], train_annotations.index[-1], color='green', alpha=0.3)
    plt.xlim(left=test_physiology.index[0], right=test_physiology.index[-1])
    plt.title("Training data", fontsize=30)
    plt.ylabel("Signal value", fontsize=18)
    plt.xlabel("Time", fontsize=18)
    plt.xticks(fontsize=14)
    plt.yticks(fontsize=14)
    # plot train annotations
    plt.subplot(412)
    plt.plot(train_annotations.index, train_annotations['arousal'], label='arousal - train')
    plt.plot(train_annotations.index, train_annotations['valence'], label='valence - train')
    plt.xlim(left=test_physiology.index[0], right=test_physiology.index[-1])
    plt.legend(fontsize=14)
    plt.ylabel("Annotation value", fontsize=18)
    plt.xlabel("Time", fontsize=18)
    plt.xticks(fontsize=14)
    plt.yticks(fontsize=14)
    # plot test physiology with annotations range 
    plt.subplot(413)
    plt.plot(test_physiology.index, test_physiology['emg_coru'])
    plt.axvspan(test_annotations.index[0], test_annotations.index[-1], color='green', alpha=0.3)
    plt.xlim(left=test_physiology.index[0], right=test_physiology.index[-1])
    plt.title("Test data", fontsize=30)
    plt.ylabel("Signal value", fontsize=18)
    plt.xlabel("Time", fontsize=18)
    plt.xticks(fontsize=14)
    plt.yticks(fontsize=14)
    # plot test annotations
    ax = plt.subplot(414)
    plt.plot(test_annotations.index, test_annotations['arousal'], label='arousal - test')
    plt.plot(test_annotations.index, test_annotations['valence'], label='valence - test')
    plt.xlim(left=test_physiology.index[0], right=test_physiology.index[-1])
    plt.xticks(fontsize=14)
    plt.yticks([test_annotations['arousal'].iloc[0]], ["NaN"], fontsize=14)
    plt.legend(fontsize=14)
    plt.ylabel("Annotation value", fontsize=18)
    plt.xlabel("Time", fontsize=18)
    plt.show()

Below you can see the example of train and test data:
- in training data annotations start and end with the physiological data. Every annotation point has arousal and valence values assigned.
- in test data annotations start 10s after the physiology, and end 10s before the physiology ends. Annotation points are specified, but arousal and valence values have been deleted. Your task is to predict arousal and valence values in the specified points in time.

In plots, green regions show the range of physiological signals covered by annotation points.

The above holds for all scenarios as shown below.

In [None]:
# specify file name
file_name = "sub_1_vid_1.csv"

# load data files
train_physiology = pd.read_csv(Path("../data/scenario_1/train/physiology", file_name), index_col="time")
train_annotations = pd.read_csv(Path("../data/scenario_1/train/annotations", file_name), index_col="time")
test_physiology = pd.read_csv(Path("../data/scenario_1/test/physiology", file_name), index_col="time")
test_annotations = pd.read_csv(Path("../data/scenario_1/test/annotations", file_name), index_col="time")

# change test_annotations from NA to -1 - only for plotting, so pyplot works ok 
test_annotations.loc[:] = -1 

plot_data_comparison(train_annotations, train_physiology, test_annotations, test_physiology)

In [None]:
df, info = emg.emg_process(train_physiology.emg_coru, threshold=3, sampling_rate=1000)

In [None]:
info

In [None]:
plt.figure(figsize=[16,8])
plt.plot(df.EMG_Raw, label='emg')
plt.plot(df.EMG_Clean, label='clean')
plt.plot(df.EMG_Rms, label='rms')
plt.plot(df.EMG_Onsets, label='onset')
plt.legend()

In [None]:
nk.emg_activation?

In [None]:
# specify files to load
train_file_name = "sub_0_vid_3.csv"
test_file_name = "sub_0_vid_0.csv"
# load data files
train_physiology = pd.read_csv(Path("../data/scenario_4/fold_0/train/physiology", train_file_name), index_col="time")
train_annotations = pd.read_csv(Path("../data/scenario_4/fold_0/train/annotations", train_file_name), index_col="time")
test_physiology = pd.read_csv(Path("../data/scenario_4/fold_0/test/physiology", test_file_name), index_col="time")
test_annotations = pd.read_csv(Path("../data/scenario_4/fold_0/test/annotations", test_file_name), index_col="time")
# change test_annotations from NA to -1 - only for plotting, so pyplot works ok 
test_annotations.loc[:] = -1
# plot
plot_data_comparison(train_annotations, train_physiology, test_annotations, test_physiology)