In [1]:
import os
from typing import Union

import numpy as np
import scipy.io
from scipy.stats import mode
from sklearn.metrics import classification_report
from sklearn.svm import SVC, LinearSVC

In [2]:
train_path = "CoordinateData\\train"
validation_path = "CoordinateData\\validation"

In [3]:
def segment_data(data: np.ndarray, window_size: int = 180, overlap_ratio: float = 0.75, by_type: bool = True, min_frame: int = 12) -> np.ndarray:
    """
    Segment the input data into smaller windows based on the given parameters.

    Args:
        data (np.ndarray): Input data with shape (num_samples, num_features).
        window_size (int, optional): The length of each window. Defaults to 180.
        overlap_ratio (float, optional): The ratio of overlap between consecutive windows. Defaults to 0.75.
        by_type (bool, optional): Whether to segment the data by type (assuming the 71st feature is the type). Defaults to True.
        min_frame (int, optional): The minimum number of frames required to create a new instance. Defaults to 12.

    Returns:
        np.ndarray: The segmented data with shape (num_windows, window_size, num_features).
    """

    # Check the input constraints
    assert data.shape[0] > 0
    assert window_size > 0
    assert 0 <= overlap_ratio < 1
    assert 0 <= min_frame < window_size

    dim = data.shape[1]
    instances = []

    if not by_type:
        instances.append(data)
    else:
        assert data.shape[1] >= 71

        num_data = data.shape[0]
        left, right = 0, 1
        pre_type = -1
        cur_type = data[left, 70]

        # Segment the data by exercise type
        while right < num_data:
            if data[right, 70] == cur_type:
                right += 1
                continue

            if right - left <= min_frame:
                left = right
                cur_type = data[left, 70]
                right += 1
                continue

            new_instance = np.take(data, range(left, right), axis=0)
            if pre_type == new_instance[0, 70]:
                instances[-1] = np.vstack([instances[-1], new_instance])
            else:
                instances.append(new_instance)

            left = right
            pre_type = cur_type
            cur_type = data[left, 70]
            right += 1

        # Handle the remaining data
        new_instance = np.take(data, range(left, right), axis=0)
        last = instances[-1]
        if last[0, 70] == new_instance[0, 70]:
            instances[-1] = np.vstack([last, new_instance])
        else:
            instances.append(new_instance)

    # print(len(instances))

    step_size = int(window_size * (1 - overlap_ratio))
    windows = []

    # Create windows for each instance
    for instance in instances:
        if instance.shape[0] < window_size:
            instance = np.vstack([instance, np.zeros((window_size - instance.shape[0], dim))])
            windows.append(instance)
            continue

        if (instance.shape[0] - window_size) % step_size != 0:
            pad_size = step_size - (instance.shape[0] - window_size) % step_size
            instance = np.vstack([instance, np.zeros((pad_size, dim))])

        for i in range(0, instance.shape[0] - window_size + 1, step_size):
            windows.append(np.take(instance, range(i, i + window_size), axis=0))

    return np.array(windows)

In [4]:
def load_data(path: str, downsampling: bool, concat: bool, seg_parameters: dict = None) -> (Union[list, np.ndarray], Union[list, np.ndarray]):
    X_list = []
    y_list = []

    selected_data_list = []

    # Iterate through the files in the provided path
    for file in os.listdir(path):
        mat = scipy.io.loadmat(os.path.join(path, file))

        # If downsampling is True, ignore data with only one unique value in column 72
        if downsampling and np.unique(mat['data'][:, 72]).size == 1:
            continue
        else:
            selected_data_list.append(mat['data'])

    # Process and segment the selected data files
    for data in selected_data_list:

        # If seg_parameters are provided, segment the data using those parameters
        processed_data = segment_data(data, **seg_parameters) if seg_parameters else segment_data(data)

        # Extract feature data (columns 0 to 69) and labels (column 72)
        X_segmented = processed_data[:, :, 0:70]
        y_segmented = processed_data[:, :, 72]
        y_segmented = np.apply_along_axis(lambda x: mode(x)[0], 1, y_segmented)

        X_list.append(X_segmented)
        y_list.append(y_segmented.flatten())

    # If concat is True, concatenate the lists into numpy arrays
    if concat:
        return np.concatenate(X_list, axis=0), np.concatenate(y_list, axis=0)

    return X_list, y_list

This notebook aims to examine the performance of SVM on this dataset and analyze whether SVM is suitable for various modalities. Through experimentation, it was found that the coordinate information might have a low detection accuracy for positive cases due to its high dimensionality and scarce positive samples, even though the detection accuracy for negative cases is quite high. On the other hand, using sEMG data alone to train the SVM results in a reasonably accurate outcome (F1-score 0.72).

In [5]:
X, y = load_data(path=train_path, downsampling=True, concat=True)
X_v, y_v = load_data(path=validation_path, downsampling=False, concat=True, seg_parameters={'min_frame': 0})

In [6]:
svm_parameters = {'kernel': 'rbf', 'gamma': 'auto', 'probability': True, 'random_state': 42}    # , 'class_weight': 'balanced'

In [7]:
svm_1 = SVC(**svm_parameters)

X_svm = np.take(X, range(0, 66), axis=2).reshape(X.shape[0], -1)

svm_1.fit(X_svm, y)

X_v_svm = np.take(X_v, range(0, 66), axis=2).reshape(X_v.shape[0], -1)

pred = svm_1.predict_proba(X_v_svm)
print(classification_report(y_v, np.argmax(pred, axis=1), zero_division=0))

              precision    recall  f1-score   support

         0.0       0.94      1.00      0.97      2706
         1.0       0.00      0.00      0.00       166

    accuracy                           0.94      2872
   macro avg       0.47      0.50      0.49      2872
weighted avg       0.89      0.94      0.91      2872



In [8]:
svm_2 = SVC(**svm_parameters)

X_svm = np.take(X, range(66, 70), axis=2).reshape(X.shape[0], -1)

svm_2.fit(X_svm, y)

X_v_svm = np.take(X_v, range(66, 70), axis=2).reshape(X_v.shape[0], -1)

pred = svm_2.predict_proba(X_v_svm)
print(classification_report(y_v, np.argmax(pred, axis=1), zero_division=0))

              precision    recall  f1-score   support

         0.0       0.97      0.97      0.97      2706
         1.0       0.48      0.48      0.48       166

    accuracy                           0.94      2872
   macro avg       0.72      0.72      0.72      2872
weighted avg       0.94      0.94      0.94      2872



In [9]:
svm_3 = SVC(**svm_parameters)

X_svm = np.take(X, range(0, 70), axis=2).reshape(X.shape[0], -1)

svm_3.fit(X_svm, y)

X_v_svm = np.take(X_v, range(0, 70), axis=2).reshape(X_v.shape[0], -1)

pred = svm_3.predict_proba(X_v_svm)
print(classification_report(y_v, np.argmax(pred, axis=1), zero_division=0))

              precision    recall  f1-score   support

         0.0       0.94      1.00      0.97      2706
         1.0       0.00      0.00      0.00       166

    accuracy                           0.94      2872
   macro avg       0.47      0.50      0.49      2872
weighted avg       0.89      0.94      0.91      2872



In [10]:
svm_4 = LinearSVC(random_state=42)

X_svm = np.take(X, range(66, 70), axis=2).reshape(X.shape[0], -1)

svm_4.fit(X_svm, y)

X_v_svm = np.take(X_v, range(66, 70), axis=2).reshape(X_v.shape[0], -1)

pred = svm_4.predict(X_v_svm)
print(classification_report(y_v, pred, zero_division=0))

              precision    recall  f1-score   support

         0.0       0.96      0.90      0.93      2706
         1.0       0.22      0.46      0.30       166

    accuracy                           0.87      2872
   macro avg       0.59      0.68      0.61      2872
weighted avg       0.92      0.87      0.89      2872



