# Search good hyper-parameters in Normalization
- It seems finding a good value helps a lot in performance

In [3]:
import sys

from urllib3.http2.probe import acquire_and_get

sys.path.append("../../Share")
import config, utils, baseline, Trainer, Processing_same_with_MATLAB
import os
import scipy
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

baseline_K_val = [1] #Train ~18 session data, test with 19~
K = baseline_K_val[0]

feature_names = ['Zero Crossing (ZC)', 'Slope Sign Changes (SSC)', 'Waveform Length (WL)', 'WAMP', 'Mean Absolute Value (MAV)', 'Mean Square (MS)', 'Root Mean Square (RMS)',
                 'v-order 3 (V3)', 'log detector (LD)', 'difference absolute standard deviation value (DASDV)', 'maximum fractal length (MFL)', 'myopulse percentage rate (MPR)',
                 'mean absolute value slope (MAVS)', 'weighted mean absolute (WMS)',
                 'Cepstrum Coefficient 1', 'Cepstrum Coefficient 2', 'Cepstrum Coefficient 3', 'Cepstrum Coefficient Average', 'DWTC1', 'DWTC2',
                 'DWTPC1', 'DWTPC2', 'DWTPC3']


feature_idx = range(0,len(feature_names))

fs = round(10e6 / 2048)  # 4883 Hz
lower_cutoff = 100
upper_cutoff = 600
filter_b, filter_a = Processing_same_with_MATLAB.cheby2(4, 30, [lower_cutoff / (fs/2), upper_cutoff / (fs/2)], btype='bandpass')

feat_mean_1ch = np.array([0.1, 0.1, 2.5, 0.0, 11.0, 229.0, 13.8, -11.0, 9.0, 3.0, 1.5, 0.0, 0.0, 2.8])
feat_std_1ch = np.array([0.02, 0.05, 0.65, 0.02, 4.43, 303.9, 6.85, 12.18, 2.87, 0.87, 0.21, 0.04, 6.68, 1.12])
#feat_mean = np.tile(feat_mean_1ch, (4, 1))
#feat_std = np.tile(feat_std_1ch, (4, 1))

feat_mean_1ch = np.array([0.1, 0.1, 2.5, 0.0, 11.0, 229.0, 13.8, -11.0, 9.0, 3.0, 1.5, 0.0, 0.0, 2.8])
feat_std_1ch = np.array([0.02, 0.05, 0.65, 0.02, 4.43, 303.9, 6.85, 12.18, 2.87, 0.87, 0.21, 0.04, 6.68, 1.12])

SUBJECT = "Carlson"
data_files = config.dataset_sub_C
default_path = config.default_path_sub_C

trainer = Trainer.TremorModelTrainer(config, subject=SUBJECT)

In [4]:
import importlib
importlib.reload(Processing_same_with_MATLAB)

#1D - one feature at a time
def Train_and_test(Normalization_TF, mean, std, num_feature_set, target_feat_idx):

    extractor = Processing_same_with_MATLAB.EMGFeatureExtractor(mean, std, filter_b, filter_a, Norm_bool=Normalization_TF, num_feature_set=num_feature_set)
    X_train_all, y_train_all, X_test_all, y_test_all, X_val_all, y_val_all = [], [], [], [], [], []
    unseen_test_result = []

    for idx, session_info in enumerate(data_files):
        print(f"Dataset {idx + 1}/{len(data_files)} - Session {session_info}\n{'='*40}")
        path = os.path.join(default_path, f'{session_info}raw/')
        features, class_labels = [], []
        for c_idx, c in enumerate(config.classes_5):
            raw_data = os.listdir(path+c)
            mat = scipy.io.loadmat(path+c+raw_data[0])
            extractor.buffer = mat['Data_ADC']
            class_labels.append(mat['Data_Cls'].reshape(-1))

            #### features_per_cls = extractor.extract_features(num_feature_set=num_feature_set)  ### 이건 2D input
            features_per_cls = extractor.extract_one_feature_at_a_time(target_feature_idx=target_feat_idx)  ###1D input

            #여기서 Normalization_TF
            #features = (features - self.feat_mean[:, :, np.newaxis]) / self.feat_std[:, :, np.newaxis]
            features_per_cls = extractor.Normalization(features_per_cls, mean, std, target_feat_idx)

            features_per_cls = np.transpose(features_per_cls, (1, 0))  # shape: (1729, 4, 14)
            features.append(features_per_cls)
            #print(features_per_cls.shape, mat['Data_Cls'].reshape(-1).shape)

        X = np.concatenate(features, axis=0)
        y = np.concatenate(class_labels, axis=0)
        if X.shape[0] != y.shape[-1]:
            print(f"Incorrect shape between features and Class: {X.shape} and {y.shape}, {session_info}")
            break

        if idx < K:
            X_train, y_train, X_val, y_val = utils.split_data(X, y, ratio=0.8)
            X_train_all.append(X_train)
            y_train_all.append(y_train)
            X_val_all.append(X_train)
            y_val_all.append(y_train)

        elif idx == K:
            X_train, y_train, X_test, y_test,  = utils.split_data(X, y, ratio=0.8)
            X_train_all.append(X_train)
            y_train_all.append(y_train)
            X_val_all.append(X_test)
            y_val_all.append(y_test)

            X_train_stacked = np.concatenate(X_train_all, axis=0)
            y_train_stacked = np.concatenate(y_train_all, axis=0)
            print(f"\t Training {K}: ", X_train_stacked.shape, y_train_stacked.shape)
            acc, pre_trained_CNN = trainer.train_multiple_dataset_1D(X_train, y_train, X_test, y_test)
            print(f"\t Accuracy on test dataset {idx+1}: {acc:.4f}%")

        else:
            X_test, y_test, _, _ = utils.split_data(X, y, ratio=1)
            X_test_all.append(X_test)
            y_test_all.append(y_test)
            X_test_stacked = np.concatenate(X_test_all, axis=0)
            y_test_stacked = np.concatenate(y_test_all, axis=0)

            X = np.expand_dims(X, axis=-1)
            acc = pre_trained_CNN.evaluate(X_test, y_test, verbose=0)[1]*100
            print(f"\t Accuracy on unseen dataset {idx+1}: {acc:.4f}%")
            unseen_test_result.append(acc)

    return unseen_test_result, X_train_stacked, y_train_stacked, X_test_stacked, y_test_stacked

In [26]:
Normalization_TF = True
feature_idx = 2  # WL
mean_list = [1.5, 2, 2.5]
std_list = [0.8, 0.9, 1, 1.2]  #std_list = [0.5, 0.65, 0.8]
All_acc, All_param = [], []

for M in mean_list:
    for S in std_list:
        test_acc, X_train_stacked, y_train_stacked, X_test_stacked, y_test_stacked = Train_and_test(
            Normalization_TF, M, S, num_feature_set=14, target_feat_idx=feature_idx
        )
        All_acc.append(np.mean(test_acc))
        All_param.append([M, S])

# Find best param
best_idx = np.argmax(All_acc)
best_param = All_param[best_idx]
best_acc = All_acc[best_idx]

Dataset 1/8 - Session Exp_2025-06-30-v1/E9AD0E7DCC2B/
Dataset 2/8 - Session Exp_2025-06-30-v2/E9AD0E7DCC2B/
	 Training 1:  (13641, 4, 1) (13641,)
Start Training (total epochs: 50)...
Finish Training! (Model is NOT saved)

Maximum training accuracy : 86.8%
Maximum validation accuracy : 89.38%
Accuracy of test dataset using model V0: 89.2035%
	 Accuracy on test dataset 2: 89.3805%
Dataset 3/8 - Session Exp_2025-07-09-v1/E9AD0E7DCC2B/
	 Accuracy on unseen dataset 3: 85.3149%
Dataset 4/8 - Session Exp_2025-07-09-v2/E9AD0E7DCC2B/
	 Accuracy on unseen dataset 4: 84.6279%
Dataset 5/8 - Session Exp_2025-07-10-v1/E9AD0E7DCC2B/
	 Accuracy on unseen dataset 5: 82.6651%
Dataset 6/8 - Session Exp_2025-07-10-v2/E9AD0E7DCC2B/
	 Accuracy on unseen dataset 6: 83.3816%
Dataset 7/8 - Session Exp_2025-07-11-v1/E9AD0E7DCC2B/
	 Accuracy on unseen dataset 7: 83.6595%
Dataset 8/8 - Session Exp_2025-07-11-v2/E9AD0E7DCC2B/
	 Accuracy on unseen dataset 8: 84.1089%
Dataset 1/8 - Session Exp_2025-06-30-v1/E9AD0E7D

In [25]:
print(f"Best Accuracy: {best_acc:.4f} with mean={best_param[0]}, std={best_param[1]}")

df = pd.DataFrame(All_param, columns=["Mean", "Std"])
df["Accuracy"] = All_acc
print(df.pivot(index="Mean", columns="Std", values="Accuracy"))

Best Accuracy: 85.3437 with mean=2, std=0.8
Std        0.50       0.65       0.80
Mean                                 
1.0   83.590881  82.850123  82.722917
2.0   83.426270  83.555695  85.343668
2.5   83.059593  83.107557  85.115243
3.0   83.107278  83.140134  84.022954
4.0   83.022306  84.392695  84.339980


In [28]:
print(f"Best Accuracy: {best_acc:.4f} with mean={best_param[0]}, std={best_param[1]}")

df2 = pd.DataFrame(All_param, columns=["Mean", "Std"])
df2["Accuracy"] = All_acc
print(df2.pivot(index="Mean", columns="Std", values="Accuracy"))

Best Accuracy: 84.6127 with mean=2.5, std=1.2
Std         0.8        0.9        1.0        1.2
Mean                                            
1.5   83.959653  84.241997  82.556731  83.372279
2.0   82.394231  83.637125  82.425355  82.434742
2.5   83.675526  82.573859  82.399942  84.612737


In [10]:
test_acc #Without Norm

[83.41259956359863,
 84.05981659889221,
 78.71378660202026,
 81.50550127029419,
 82.3972225189209,
 82.63145685195923]

In [18]:
test_acc #With Norm

[85.39612293243408,
 84.01345014572144,
 81.91193342208862,
 83.25420022010803,
 84.81760025024414,
 84.60912108421326]

In [None]:
이제 좋은 param 찾기

In [None]:
근데 다 같이 했을때 Normalization이 중요한거고, 다 같이 안하면 크게 상관없는 거 아닌가요
- 아니지, 일단 좋은 param을 찾고 그거를 합치면 더 좋아질듯