In [6]:
from sklearn.linear_model import LogisticRegression
from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import HalvingGridSearchCV
from sklearn.metrics import accuracy_score, f1_score as f1_score_rep
import numpy as np
import pandas as pd
from time import time
from pathlib import Path
import warnings
warnings.filterwarnings("ignore")

In [7]:
import numpy as np

x_test = np.load('Wustl_iiot/x_test.npy')
x_train = np.load('Wustl_iiot/x_train.npy')
x_val = np.load('Wustl_iiot/x_val.npy')
y_test = np.load('Wustl_iiot/y_test.npy')
y_train = np.load('Wustl_iiot/y_train.npy')
y_val = np.load('Wustl_iiot/y_val.npy')

In [8]:



# 定义参数
lr_params = {
    'C':[ 0.01, 0.1, 1, 10, 100 ],  # 正则化强度
    'penalty': ['l1', 'l2', 'elasticnet', 'none'],  # 正则化类型
    'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']  # 求解器
}

# 确保结果目录存在
# result_dir = Path('/content/drive/MyDrive/Data/results')
# if not result_dir.exists():
#     result_dir.mkdir()

def tune_with_halving_grid_search_lr(x_train, y_train, param_grid):
    lr = LogisticRegression()

    start = time()
    halving_lr_results = HalvingGridSearchCV(
        lr,
        param_grid,
        cv=5,
        factor=2,
        min_resources='exhaust',
        scoring='accuracy'
    ).fit(x_train, y_train)

    duration = time() - start

    results = pd.DataFrame(halving_lr_results.cv_results_)
    # results.to_csv(result_dir / 'halving_lr_results.csv')

    # 格式化和排序结果
    results = results.loc[:, ('iter', 'rank_test_score', 'mean_test_score', 'params')]
    results.sort_values(by=['iter', 'rank_test_score'], ascending=[False, True], inplace=True)

    return results, duration

# 使用验证数据集进行参数调整
halving_results, halving_duration = tune_with_halving_grid_search_lr(x_val, y_val, lr_params)

print(halving_results.head())

# 获取最佳参数和分数
best_score = halving_results['mean_test_score'].iloc[0]
best_params = halving_results['params'].iloc[0]



     iter  rank_test_score  mean_test_score  \
200     6                1         0.999966   
199     6                2         0.999933   
195     5                3         0.999933   
198     5                4         0.999933   
196     5                5         0.999925   

                                                params  
200      {'C': 10, 'penalty': 'l2', 'solver': 'lbfgs'}  
199   {'C': 1, 'penalty': 'l1', 'solver': 'liblinear'}  
195      {'C': 10, 'penalty': 'l2', 'solver': 'lbfgs'}  
198   {'C': 1, 'penalty': 'l1', 'solver': 'liblinear'}  
196  {'C': 10, 'penalty': 'l2', 'solver': 'newton-cg'}  


In [9]:
# 使用最佳参数训练逻辑回归模型
# best_lr = LogisticRegression(C=10000, penalty='l2', solver='lbfgs')
# best_lr.fit(x_train, y_train)

# # 在测试集上评估
# accuracy = accuracy_score(y_test, best_lr.predict(x_test))
# micro_f1 = f1_score_rep(y_test, best_lr.predict(x_test), average="micro")
# macro_f1 = f1_score_rep(y_test, best_lr.predict(x_test), average="macro")

# # 打印结果
# # print(f'Best score for HalvingGridSearchCV is {best_score:.3f}, took {halving_duration:.2f} seconds')
# # print(f'Params: {best_params}')
# print(f'Corresponding test accuracy: {accuracy * 100:.2f}%')
# print("Micro F1 Score: ", micro_f1)
# print("Macro F1 Score: ", macro_f1)

In [10]:
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.datasets import load_digits

best_lr = LogisticRegression(**best_params)
best_lr.fit(x_train, y_train)

y_pred = best_lr.predict(x_test)

conf_matrix = confusion_matrix(y_test, y_pred)
# tn, fp, fn, tp = conf_matrix.ravel()
cm = conf_matrix
fp = cm.sum(axis=0) - np.diag(cm)
fn = cm.sum(axis=1) - np.diag(cm)
tp = np.diag(cm)
tn = cm.sum() - (fp + fn + tp)


fpr = fp / (fp + tn)
tpr = tp / (tp + fn)
fnr = fn / (fn + tp)
tnr = tn / (tn + fp)
precision = precision_score(y_test, y_pred, average='macro')
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='macro')
auc = roc_auc_score(y_test, best_lr.predict_proba(x_test), multi_class='ovr')

print(f"FPR: {fpr}")
print(f"TPR: {tpr}")
print(f"FNR: {fnr}")
print(f"TNR: {tnr}")
print(f"Precision: {precision}")
print(f"Accuracy: {accuracy}")
print(f"F1-score: {f1}")
print(f"AUC: {auc}")

FPR: [8.37833196e-06 1.25680244e-05 0.00000000e+00 0.00000000e+00
 0.00000000e+00]
TPR: [0.97619048 0.96153846 1.         0.99878641 1.        ]
FNR: [0.02380952 0.03846154 0.         0.00121359 0.        ]
TNR: [0.99999162 0.99998743 1.         1.         1.        ]
Precision: 0.9793769197016235
Accuracy: 0.9999790578547704
F1-score: 0.9832959340384606
AUC: 0.9999993974354677
