In [1]:
from sklearn.svm import SVC
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from utility_SVM import read_all_test_data_from_path, extract_selected_feature, prepare_sliding_window, FaultDetectReg
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import f1_score, make_scorer

def remove_outliers(df: pd.DataFrame):
    df['temperature'] = df['temperature'].where(df['temperature'] <= 100, np.nan)
    df['temperature'] = df['temperature'].where(df['temperature'] >= 0, np.nan)
    df['temperature'] = df['temperature'].ffill()
    df['temperature'] = df['temperature'] - df['temperature'].iloc[0]

    df['voltage'] = df['voltage'].where(df['voltage'] >= 6000, np.nan)
    df['voltage'] = df['voltage'].where(df['voltage'] <= 9000, np.nan)
    df['voltage'] = df['voltage'].ffill()
    df['voltage'] = df['voltage'] - df['voltage'].iloc[0]

    df['position'] = df['position'].where(df['position'] >= 0, np.nan)
    df['position'] = df['position'].where(df['position'] <= 1000, np.nan)
    df['position'] = df['position'].ffill()
    df['position'] = df['position'] - df['position'].iloc[0]

base_dictionary = '../../dataset/training_data/'
df_data = read_all_test_data_from_path(base_dictionary, remove_outliers, is_plot=False)

normal_test_id = [
    '20240105_164214', '20240105_165300', '20240105_165972', 
    '20240320_152031', '20240320_153841', '20240320_155664', 
    '20240321_122650', '20240325_135213', '20240325_152902', 
    '20240426_141190', '20240426_141532', '20240426_141602', 
    '20240426_141726', '20240426_141938', '20240426_141980', 
    '20240503_164435'
]

df_tr = df_data[df_data['test_condition'].isin(normal_test_id)]

feature_list_all = [
    'data_motor_1_position', 'data_motor_1_temperature', 'data_motor_1_voltage',
    'data_motor_2_position', 'data_motor_2_temperature', 'data_motor_2_voltage',
    'data_motor_3_position', 'data_motor_3_temperature', 'data_motor_3_voltage',
    'data_motor_4_position', 'data_motor_4_temperature', 'data_motor_4_voltage',
    'data_motor_5_position', 'data_motor_5_temperature', 'data_motor_5_voltage',
    'data_motor_6_position', 'data_motor_6_temperature', 'data_motor_6_voltage'
]

x_tr_org, y_temp_tr_org = extract_selected_feature(df_data=df_tr, feature_list=feature_list_all, motor_idx=6, mdl_type='reg')

window_size = 10
sample_step = 1
prediction_lead_time = 1 
threshold = .9
abnormal_limit = 3

x_tr, y_temp_tr = prepare_sliding_window(df_x=x_tr_org, y=y_temp_tr_org, window_size=window_size, sample_step=sample_step, prediction_lead_time=prediction_lead_time, mdl_type='reg')

# 确保标签是二分类的
y_temp_tr_binary = y_temp_tr.apply(lambda x: 1 if x == 'fault' else 0)

steps = [
    ('standardizer', StandardScaler()),  # Step 1: StandardScaler 归一化
    ('mdl', SVC(class_weight='balanced'))  # Step 2: SVM kernel default rbf
]

pipeline = Pipeline(steps)

param_grid = {
    'mdl__C': [0.1, 1, 10, 100],
    'mdl__kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
    'mdl__gamma': ['scale', 'auto', 0.01, 0.1, 1, 10]
}

f1_scorer = make_scorer(f1_score, average='binary')

grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring=f1_scorer)

mdl = grid_search.fit(x_tr, y_temp_tr_binary)

test_id = [
    '20240325_155003', '20240425_093699', '20240425_094425', 
    '20240426_140055', '20240503_163963', '20240503_164675', 
    '20240503_165189'
]
df_test = df_data[df_data['test_condition'].isin(test_id)]

detector_reg = FaultDetectReg(
    reg_mdl=mdl, threshold=threshold, abnormal_limit=abnormal_limit, 
    window_size=window_size, sample_step=sample_step, pred_lead_time=prediction_lead_time
)

_, y_label_test_org = extract_selected_feature(df_data=df_test, feature_list=feature_list_all, motor_idx=6, mdl_type='clf')
x_test_org, y_temp_test_org = extract_selected_feature(df_data=df_test, feature_list=feature_list_all, motor_idx=6, mdl_type='reg')

# 确保测试数据中的标签也是二分类的
y_temp_test_org_binary = y_temp_test_org.apply(lambda x: 1 if x == 'fault' else 0)

motor_idx = 6
print(f'Model for motor {motor_idx}:')

n_fold = 7
df_perf = detector_reg.run_cross_val(df_x=x_test_org, y_label=y_label_test_org, y_response=y_temp_test_org_binary, n_fold=n_fold)

print(df_perf)
print('\n')
print('Mean performance metric and standard error:')
for name, metric, error in zip(df_perf.columns, df_perf.mean(), df_perf.std()):
    print(f'{name}: {metric:.4f} +- {error:.4f}')
print('\n')


ValueError: 
All the 480 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
480 fits failed with the following error:
Traceback (most recent call last):
  File "/opt/anaconda3/envs/digital_twin/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/opt/anaconda3/envs/digital_twin/lib/python3.9/site-packages/sklearn/base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/opt/anaconda3/envs/digital_twin/lib/python3.9/site-packages/sklearn/pipeline.py", line 475, in fit
    self._final_estimator.fit(Xt, y, **last_step_params["fit"])
  File "/opt/anaconda3/envs/digital_twin/lib/python3.9/site-packages/sklearn/base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/opt/anaconda3/envs/digital_twin/lib/python3.9/site-packages/sklearn/svm/_base.py", line 199, in fit
    y = self._validate_targets(y)
  File "/opt/anaconda3/envs/digital_twin/lib/python3.9/site-packages/sklearn/svm/_base.py", line 743, in _validate_targets
    raise ValueError(
ValueError: The number of classes has to be greater than one; got 1 class
