In [None]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from autogluon.tabular import TabularPredictor
from autogluon.core.models import AbstractModel


In [None]:

class CustomRandomForestModel(AbstractModel):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.model = RandomForestRegressor(n_estimators=100, random_state=0)

    def _fit(self, X, y, **kwargs):
        self.model.fit(X, y)

    def _predict(self, X, **kwargs):
        return self.model.predict(X)

def prepare_data():
    # 为示例创建一个简单的回归数据集
    X, y = np.random.rand(1000, 5), np.random.rand(1000)
    df = pd.DataFrame(X, columns=[f'feature_{i}' for i in range(5)])
    df['target'] = y
    return train_test_split(df, test_size=0.2, random_state=42)

def train_autogluon_with_custom_rf():
    train_data, test_data = prepare_data()
    
    # 设置 AutoGluon 预测器
    predictor = TabularPredictor(
        label='target',
        problem_type='regression',
        eval_metric='mean_squared_error'
    )

    # 创建自定义模型字典
    custom_hyperparameters = {
        'custom_rf': {
            'custom_rf': CustomRandomForestModel
        }
    }

    # 训练模型
    predictor.fit(
        train_data=train_data,
        time_limit=300,  # 5分钟训练时间限制
        presets='best_quality',  # 使用最佳质量预设
        hyperparameters=custom_hyperparameters,  # 包括我们的自定义模型
        num_stack_levels=2,  # 使用两层堆叠
    )

    # 评估模型
    performance = predictor.evaluate(test_data)
    print(f"Test performance: {performance}")

    # 查看模型重要性
    model_importance = predictor.feature_importance(data=test_data)
    print("Feature importance:")
    print(model_importance)

    # 查看堆叠模型的结构
    leaderboard = predictor.leaderboard(test_data)
    print("Model leaderboard:")
    print(leaderboard)

    return predictor



In [None]:

trained_predictor = train_autogluon_with_custom_rf()