In [1]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# 1.载入数据

In [6]:
def load_data(clean_csv_folder, noisy_csv_folder):
    clean_data_frames = []
    noisy_data_frames = []
    
    for filename in os.listdir(clean_csv_folder):
        if filename.endswith('.csv'):
            clean_csv_path = os.path.join(clean_csv_folder, filename)
            noisy_csv_path = os.path.join(noisy_csv_folder, filename)
            
            clean_df = pd.read_csv(clean_csv_path)
            noisy_df = pd.read_csv(noisy_csv_path)
            
            clean_data_frames.append(clean_df)
            noisy_data_frames.append(noisy_df)
    
    # 合并所有DataFrame为单一DataFrame
    clean_data = pd.concat(clean_data_frames, ignore_index=True)
    noisy_data = pd.concat(noisy_data_frames, ignore_index=True)
    
    return clean_data.values, noisy_data.values

clean_csv_folder = 'E:\\wavelet\\wavelet coefficient\\source\\test_source_小波系数能量自适应阈值降维'  # Update this path
noisy_csv_folder = 'E:\\wavelet\\wavelet coefficient\\Gauss\\test_Gauss_小波系数能量自适应阈值降维'  # Update this path
clean_data, noisy_data = load_data(clean_csv_folder, noisy_csv_folder)

# 2.划分数据集

In [7]:
X_train, X_test, y_train, y_test = train_test_split(noisy_data, clean_data, test_size=0.2, random_state=42)

# 3.设置网格搜索参数

In [8]:
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 4, 6]
}

# 初始化随机森林回归器
rf = RandomForestRegressor(random_state=42)

# 初始化网格搜索
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=3, scoring='neg_mean_squared_error', verbose=2)

# 执行网格搜索
grid_search.fit(X_train, y_train)

# 输出最佳参数
print(f"Best parameters found: {grid_search.best_params_}")

# 使用最佳参数的模型进行预测
y_pred = grid_search.best_estimator_.predict(X_test)

Fitting 3 folds for each of 36 candidates, totalling 108 fits
[CV] END max_depth=None, min_samples_split=2, n_estimators=100; total time=   1.9s
[CV] END max_depth=None, min_samples_split=2, n_estimators=100; total time=   1.9s
[CV] END max_depth=None, min_samples_split=2, n_estimators=100; total time=   1.9s
[CV] END max_depth=None, min_samples_split=2, n_estimators=200; total time=   3.9s
[CV] END max_depth=None, min_samples_split=2, n_estimators=200; total time=   3.7s
[CV] END max_depth=None, min_samples_split=2, n_estimators=200; total time=   3.8s
[CV] END max_depth=None, min_samples_split=2, n_estimators=300; total time=   5.9s
[CV] END max_depth=None, min_samples_split=2, n_estimators=300; total time=   5.7s
[CV] END max_depth=None, min_samples_split=2, n_estimators=300; total time=   5.7s
[CV] END max_depth=None, min_samples_split=4, n_estimators=100; total time=   1.6s
[CV] END max_depth=None, min_samples_split=4, n_estimators=100; total time=   1.5s
[CV] END max_depth=None, 

# 4.模型评估

In [9]:
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

Mean Squared Error: 2187.5200373534135
