In [4]:
!pip3 


Usage:   
  pip3 <command> [options]

Commands:
  install                     Install packages.
  lock                        Generate a lock file.
  download                    Download packages.
  uninstall                   Uninstall packages.
  freeze                      Output installed packages in requirements format.
  inspect                     Inspect the python environment.
  list                        List installed packages.
  show                        Show information about installed packages.
  check                       Verify installed packages have compatible dependencies.
  config                      Manage local and global configuration.
  search                      Search PyPI for packages.
  cache                       Inspect and manage pip's wheel cache.
  index                       Inspect information available from package indexes.
  wheel                       Build wheels from your requirements.
  hash                        Compute hashes of packag

In [6]:
!pip install scikit-learn

Collecting scikit-learn
  Downloading scikit_learn-1.7.1-cp310-cp310-win_amd64.whl.metadata (11 kB)
Collecting scipy>=1.8.0 (from scikit-learn)
  Downloading scipy-1.15.3-cp310-cp310-win_amd64.whl.metadata (60 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Downloading joblib-1.5.1-py3-none-any.whl.metadata (5.6 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Using cached threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)
Downloading scikit_learn-1.7.1-cp310-cp310-win_amd64.whl (8.9 MB)
   ---------------------------------------- 0.0/8.9 MB ? eta -:--:--
   ---------------------------------------- 0.0/8.9 MB ? eta -:--:--
   - -------------------------------------- 0.3/8.9 MB ? eta -:--:--
   --- ------------------------------------ 0.8/8.9 MB 2.4 MB/s eta 0:00:04
   ---------- ----------------------------- 2.4/8.9 MB 4.6 MB/s eta 0:00:02
   --------------------------- ------------ 6.0/8.9 MB 8.6 MB/s eta 0:00:01
   ---------------------------------------- 8.9/8.9 MB 10

In [None]:
# 

import pandas as pd
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.datasets import make_classification
from sklearn.preprocessing import StandardScaler
from tensorflow import keras
from scikeras.wrappers import KerasClassifier
import numpy as np
# 1. 数据生成
# 生成一个示例分类数据集
X, y = make_classification(
    n_samples=100,
    n_features=20,
    n_informative=10,
    n_redundant=5,
    n_classes=2,
    random_state=42
)

# 将数据划分为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 数据标准化 (对于神经网络通常是推荐的)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)





# 2. 模型定义 (使用 KerasClassifier 模拟带有优化器的模型)
def create_model(optimizer='adam', learning_rate=0.001):
    """
    创建一个简单的Keras分类模型。
    学习率作为参数传递给优化器。
    """
    model = keras.Sequential([
        keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
        keras.layers.Dropout(0.3),
        keras.layers.Dense(32, activation='relu'),
        keras.layers.Dropout(0.3),
        keras.layers.Dense(1, activation='sigmoid') # 二分类问题
    ])

    if optimizer == 'adam':
        opt = keras.optimizers.Adam(learning_rate=learning_rate)
    elif optimizer == 'sgd':
        opt = keras.optimizers.SGD(learning_rate=learning_rate)
    else:
        raise ValueError("Unsupported optimizer")

    model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])
    return model









# 包装Keras模型为Scikit-learn兼容的分类器
# 这里设置一个默认的学习率，但GridSearchCV会覆盖它
clf = KerasClassifier(
    model=create_model,
    optimizer='adam', # 默认使用Adam优化器
    learning_rate=0.0001, # 默认学习率，将被GridSearchCV覆盖
    epochs=2, # 为了快速运行，这里只设置少量epoch
    batch_size=32,
    verbose=0 # 在KerasClassifier内部训练时关闭详细输出，让GridSearchCV的verbose控制
)
print(clf)






# 3. GridSearchCV 设置
# 注意：train_test_split(X_train.indices_, ...) 这种用法通常用于
# scikit-learn的pipeline中，当X_train不是一个简单的numpy数组时。
# 对于普通的numpy数组，X_train.indices_ 是不存在的。
# 这里我们假设X_train是一个简单的numpy数组，
# 并且为了模拟原代码的行为，我们将手动创建一个split，
# 即将X_train内部再分割一次作为cv的验证集。

# 假设X_train是一个常规的numpy数组，我们需要模拟一个cv_split。
# Grid Search期望的cv是一个迭代器，每次迭代返回(train_idx, val_idx)
# 由于原始代码使用了 X_train.indices_ 并且 test_size=0.2, shuffle=False
# 这暗示了某种预定义的分割或者特定数据集的特性。
# 为了使代码可运行，我们模拟一个单次的 train/validation split。




# 获取X_train的索引
indices = list(range(len(X_train)))
# 进行一次训练-验证分割
train_idx, val_idx = train_test_split(indices, test_size=0.2, shuffle=False, random_state=42)

# 将分割结果包装成元组列表，以符合GridSearchCV的cv参数期望
train_val_split = [(train_idx, val_idx)]

param_grid = {
    "optimizer__learning_rate": np.logspace(np.log10(0.0001), np.log10(0.01), num=10)
, # KerasClassifier的参数名是'learning_rate'
    # 如果你还想调优Keras模型的其他参数，例如epochs或batch_size，可以在这里添加：
    "epochs": np.linalg[5, 10],
    "batch_size": [16, 32],
}







# By setting n_jobs=-1, grid search is performed
# with all the processors, in this case the output of the training
# process is not printed sequentially
search = GridSearchCV(
    estimator=clf,
    param_grid=param_grid,
    cv=train_val_split, # 使用我们手动创建的交叉验证分割
    return_train_score=True,
    scoring="accuracy",
    refit=True,
    verbose=1, # 显示GridSearchCV的进度
    error_score="raise",
    n_jobs=1, # 为了演示输出，这里设置为1，方便查看每个fit的进度
)






# 4. 执行网格搜索
print("开始执行GridSearchCV...")
search.fit(X_train, y_train)
print("GridSearchCV执行完毕。")






# 5. 结果分析与输出
search_results = pd.DataFrame(search.cv_results_)
print("\nGridSearchCV 搜索结果:")










# 打印所有参数组合的平均测试分数和排名
print(search_results[[
    "param_optimizer__learning_rate", # 或者 "param_optimizer__lr" 如果你的KerasClassifier允许
    "param_epochs",     # <-- 应该这样修改
    "param_batch_size", # <-- 应该这样修改
    "mean_test_score",
    "std_test_score",
    "mean_train_score",
    "rank_test_score",

]].sort_values(by="rank_test_score"))


best_run = search_results[search_results["rank_test_score"] == 1].squeeze()
best_parameters = best_run["params"]

print(f"\n最佳参数组合: {best_parameters}")
print(f"最佳验证集准确率: {best_run['mean_test_score']}")
print(f"对应的训练集准确率: {best_run['mean_train_score']}")


# 使用最佳模型在测试集上进行预测
print("\n使用最佳模型在测试集上进行评估...")
best_estimator = search.best_estimator_
test_accuracy = best_estimator.score(X_test, y_test)
print(f"最佳模型在独立测试集上的准确率: {test_accuracy:.4f}")








KerasClassifier(
	model=<function create_model at 0x00000140C3706710>
	build_fn=None
	warm_start=False
	random_state=None
	optimizer=adam
	loss=None
	metrics=None
	batch_size=32
	validation_batch_size=None
	verbose=0
	callbacks=None
	validation_split=0.0
	shuffle=True
	run_eagerly=False
	epochs=2
	learning_rate=0.0001
	class_weight=None
)
开始执行GridSearchCV...
Fitting 1 folds for each of 40 candidates, totalling 40 fits


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **

GridSearchCV执行完毕。

GridSearchCV 搜索结果:
    param_optimizer__learning_rate  param_epochs  param_batch_size  \
24                        0.000774             5                32   
28                        0.005995             5                32   
15                        0.001292            10                16   
22                        0.000278             5                32   
29                        0.010000             5                32   
31                        0.000167            10                32   
4                         0.000774             5                16   
6                         0.002154             5                16   
18                        0.005995            10                16   
20                        0.000100             5                32   
17                        0.003594            10                16   
7                         0.003594             5                16   
12                        0.000278            10    