In [1]:
from data_utils import load_data, flatten_data
from early_model import stacked_lstm
from model_utils import model_pipeline, plot_history
import numpy as np
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.metrics import classification_report, confusion_matrix
from scipy.stats import ttest_ind
from scipy.stats import spearmanr

In [2]:
train_participant_num = ["C56D", "C93D", "C382D", "C382N", "C544D", "C709N", "C788N", "P113D", "P113N", "P191D", "P191N", "P299D", "P299N", "P300D", "P336D", "P492D", "P492N", "P531N", "P699D", "P699N", "P890N", "P921D", "P921N"]
valid_participant_num = ["C67D", "C202D", "C202N", "C256D", "C256N", "P54D", "P54N", "P342D", "P342N", "P487D", "P487N", "P649N"]

X_train, y_train = load_data(train_participant_num, 'train', downsampling=True, angle_energy=False, augment=False)
X_valid, y_valid = load_data(valid_participant_num, 'validation')

num_classes = y_train.shape[1]

C56D is not selected to be used for training (downsampling)
C93D is not selected to be used for training (downsampling)
C382D is not selected to be used for training (downsampling)
C382N is not selected to be used for training (downsampling)
C544D is not selected to be used for training (downsampling)
C709N is not selected to be used for training (downsampling)
C788N is not selected to be used for training (downsampling)
P113D is selected to be used for training (downsampling)
P113N is selected to be used for training (downsampling)
P191D is selected to be used for training (downsampling)
P191N is selected to be used for training (downsampling)
P299D is selected to be used for training (downsampling)
P299N is selected to be used for training (downsampling)
P300D is selected to be used for training (downsampling)
P336D is selected to be used for training (downsampling)
P492D is selected to be used for training (downsampling)
P492N is selected to be used for training (downsampling)
P531N

In [7]:
X_train_XYZ = X_train[:, :, :66]  # XYZ坐标
X_train_sEMG = X_train[:, :, 66:70]  # sEMG数据

# 定义模态分组
trunk_indices = [0, 7, 8, 19, 20, 21]  # 身体躯干模态索引
upper_limb_indices = [9, 10, 11, 12, 13, 14, 15, 16, 17, 18]  # 上肢模态索引
lower_limb_indices = [1, 2, 3, 4, 5, 6]  # 下肢模态索引

# 初始化相关性列表
correlations_trunk = []
correlations_upper_limb = []
correlations_lower_limb = []

# 分别计算每个模态组的相关性
for group, correlations in [(trunk_indices, correlations_trunk),
                            (upper_limb_indices, correlations_upper_limb),
                            (lower_limb_indices, correlations_lower_limb)]:
    group_correlations = []
    for i in group:
        # 获取每个XYZ坐标的展平后的数组
        X_flat = X_train[:, :, i].flatten()
        Y_flat = X_train[:, :, i+22].flatten()
        Z_flat = X_train[:, :, i+44].flatten()
        # 为了匹配X_flat, Y_flat, Z_flat的长度，我们需要正确地重复y_train
        y_repeated = np.repeat(y_train, X_train.shape[1])
        # 计算相关性
        coef_X, _ = spearmanr(X_flat, y_repeated[:len(X_flat)])
        coef_Y, _ = spearmanr(Y_flat, y_repeated[:len(Y_flat)])
        coef_Z, _ = spearmanr(Z_flat, y_repeated[:len(Z_flat)])
        # 计算平均相关系数
        avg_coef = np.mean([coef_X, coef_Y, coef_Z])
        group_correlations.append(avg_coef)
    # 计算并保存该模态组的平均相关性
    avg_group_correlation = np.mean(group_correlations)
    correlations.append(avg_group_correlation)

# 对sEMG数据计算相关性
correlations_sEMG = []
for i in range(4):
    sEMG_flat = X_train_sEMG[:, :, i].flatten()
    y_repeated_sEMG = np.repeat(y_train, X_train_sEMG.shape[1])
    coef_sEMG, _ = spearmanr(sEMG_flat, y_repeated_sEMG[:len(sEMG_flat)])
    correlations_sEMG.append(coef_sEMG)

correlation_sEMG = np.mean(correlations_sEMG)

all_correlations = []
all_correlations.append(correlations_trunk[0])

all_correlations.append(correlations_upper_limb[0])
all_correlations.append(correlations_lower_limb[0])
all_correlations.append(correlation_sEMG)


# 打印相关性结果
print("Correlations for Trunk:", correlations_trunk)
print("Correlations for Upper Limb:", correlations_upper_limb)
print("Correlations for Lower Limb:", correlations_lower_limb)
print("Correlations for sEMG signals:", correlation_sEMG)
print("Correlations for All:", all_correlations)

Correlations for Trunk: [-0.0005935154175248134]
Correlations for Upper Limb: [-5.168494671029889e-05]
Correlations for Lower Limb: [-0.000717485724721992]
Correlations for sEMG signals: -0.0029697894500262632
Correlations for All: [-0.0005935154175248134, -5.168494671029889e-05, -0.000717485724721992, -0.0029697894500262632]


In [17]:
# Calculate absolute values and normalize to get initial weights
abs_correlations = np.abs(all_correlations)
normalized_weights = abs_correlations / np.sum(abs_correlations)

# Clip weights exceeding the threshold and redistribute if necessary
max_threshold = 0.5
clipped_weights = np.clip(normalized_weights, None, max_threshold)

# Redistribute weights if any were clipped to the threshold
if np.any(clipped_weights == max_threshold):
    # Calculate the total weight to be redistributed among non-clipped weights
    total_redistribute_weight = 1 - np.sum(clipped_weights == max_threshold) * max_threshold
    # Calculate the sum of weights that are less than the threshold (these will be redistributed)
    sum_weights_to_redistribute = np.sum(clipped_weights[clipped_weights < max_threshold])
    # Adjust weights that are below the threshold
    for i, weight in enumerate(clipped_weights):
        if weight < max_threshold:
            clipped_weights[i] = weight / sum_weights_to_redistribute * total_redistribute_weight

print(clipped_weights)

[0.21777408 0.01896436 0.26326156 0.5       ]


In [18]:
clipped_weights

array([0.21777408, 0.01896436, 0.26326156, 0.5       ])

In [8]:
# Modality groups definition
trunk_indices = [0, 7, 8, 19, 20, 21]
upper_limb_indices = [9, 10, 11, 12, 13, 14, 15, 16, 17, 18]
lower_limb_indices = [1, 2, 3, 4, 5, 6]
sEMG_indices = list(range(66, 70))

# Function to prepare modality-specific datasets
def prepare_modality_data(X, indices, is_sEMG=False):
    if is_sEMG:
        return X[:, :, indices]
    else:
        all_indices = []
        for i in indices:
            all_indices.extend([i, i+22, i+44])
        return X[:, :, all_indices]

# Define modalities
modalities = {
    "Trunk": trunk_indices,
    "Upper Limb": upper_limb_indices,
    "Lower Limb": lower_limb_indices,
    "sEMG": sEMG_indices
}

In [36]:
predictions = {}

for modality_name, indices in modalities.items():
    is_sEMG = (modality_name == "sEMG")
    X_train_modality = prepare_modality_data(X_train, indices, is_sEMG)
    X_valid_modality = prepare_modality_data(X_valid, indices, is_sEMG)

    input_shape = (X_train_modality.shape[1], X_train_modality.shape[2])
    model = stacked_lstm(input_shape, num_classes)
    # model = stacked_lstm(input_shape, 1)
    y_pred_modality, _, _ = model_pipeline(model, X_train_modality, y_train, X_valid_modality, y_valid)
    print("here")
    print(y_pred_modality)

    predictions[modality_name] = y_pred_modality




Epoch 1: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 1/50

Epoch 2: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 2/50

Epoch 3: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 3/50

Epoch 4: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 4/50

Epoch 5: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 5/50

Epoch 6: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 6/50

Epoch 7: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 7/50

Epoch 8: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 8/50

Epoch 9: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 9/50

Epoch 10: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 10/50

Epoch 11: LearningRateScheduler setting learning rate to 0.0009500000451225787.
Epoch 1




Epoch 1: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 1/50

Epoch 2: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 2/50

Epoch 3: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 3/50

Epoch 4: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 4/50

Epoch 5: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 5/50

Epoch 6: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 6/50

Epoch 7: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 7/50

Epoch 8: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 8/50

Epoch 9: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 9/50

Epoch 10: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 10/50

Epoch 11: LearningRateScheduler setting learning rate to 0.0009500000451225787.
Epoch 1




Epoch 1: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 1/50

Epoch 2: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 2/50

Epoch 3: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 3/50

Epoch 4: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 4/50

Epoch 5: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 5/50

Epoch 6: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 6/50

Epoch 7: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 7/50

Epoch 8: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 8/50

Epoch 9: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 9/50

Epoch 10: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 10/50

Epoch 11: LearningRateScheduler setting learning rate to 0.0009500000451225787.
Epoch 1




Epoch 1: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 1/50

Epoch 2: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 2/50

Epoch 3: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 3/50

Epoch 4: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 4/50

Epoch 5: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 5/50

Epoch 6: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 6/50

Epoch 7: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 7/50

Epoch 8: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 8/50

Epoch 9: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 9/50

Epoch 10: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 10/50

Epoch 11: LearningRateScheduler setting learning rate to 0.0009500000451225787.
Epoch 1

In [39]:
def new_ensemble_predictions(prediction1, prediction2, prediction3, prediction4, strategy='average', weights=None, rule=None):
    """
    Ensembles two sets of predictions using different strategies.
    Parameters:
    prediction1 (np.ndarray): The first set of predictions.
    prediction2 (np.ndarray): The second set of predictions.
    strategy (str): The ensembling strategy to use. Options are 'average', 'product', 'max', or 'rule'.
    weights (list): A list of weights to use for each set of predictions. If not given, defaults to equal weights.
    rule (function): A callable rule function to use for rule-based ensembling.

    Returns:
        np.ndarray: The ensembled predictions.
    """
    prediction1, prediction2 = np.array(prediction1), np.array(prediction2)
    if not weights:
        # If no weights are given, assume equal weights for both predictions
        weights = [0.25, 0.25, 0.25, 0.25]
    if prediction1.shape != prediction2.shape or len(weights) != 2:
        raise ValueError("Both predictions must have the same shape, and weights must have a length of 2.")
    if strategy == 'average':
        ensemble = prediction1 * weights[0] + prediction2 * weights[1] + prediction3 * weights[2] + prediction4 * weights[3]
    # elif strategy == 'product':
    #     ensemble = prediction1 * prediction2 * prediction3 * prediction4
    # elif strategy == 'max':
    #     ensemble = np.maximum(prediction1, prediction2, prediction3, prediction4)
    # elif strategy == 'rule':
    #     if not rule or not callable(rule):
    #         raise ValueError("A callable rule function is required for rule-based ensembling.")
    #     ensemble = np.array([rule(pred1, pred2) for pred1, pred2 in zip(prediction1, prediction2)])
    else:
        raise ValueError("Invalid ensembling strategy. Options are 'average', 'product', 'max', or 'rule'.")
    ensemble = np.argmax(ensemble, axis=1)
    return ensemble

In [41]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

predictions_list = [predictions[modality] for modality in modalities.keys()]

weighted_predictions = np.zeros(predictions_list[0].shape)

# Apply the clipped and redistributed weights to the predictions
for i, prediction in enumerate(predictions_list):
    # print(clipped_weights[i])
    weighted_predictions += prediction * clipped_weights[i]
    # weighted_predictions += prediction / 4

# print(weighted_predictions)


final_predictions = np.round(weighted_predictions)

# Convert y_valid to class indices if it's in one-hot encoding
y_valid_indices = np.argmax(y_valid, axis=1)

# Evaluate the combined predictions
accuracy = accuracy_score(y_valid_indices, final_predictions)
classification_report_result = classification_report(y_valid_indices, final_predictions)
confusion_matrix_result = confusion_matrix(y_valid_indices, final_predictions)

print("Accuracy:", accuracy)
print("Classification Report:\n", classification_report_result)
print("Confusion Matrix:\n", confusion_matrix_result)

Accuracy: 0.9351690484489369
Classification Report:
               precision    recall  f1-score   support

           0       0.95      0.99      0.97      2698
           1       0.36      0.12      0.18       171

    accuracy                           0.94      2869
   macro avg       0.65      0.55      0.57      2869
weighted avg       0.91      0.94      0.92      2869

Confusion Matrix:
 [[2663   35]
 [ 151   20]]
