In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.model_selection import KFold, GridSearchCV
from PreproccesingAlatt import X, y

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
base_models = [
    ('random_forest', RandomForestRegressor(random_state=0)),
    ('gradient_boosting', GradientBoostingRegressor(random_state=0))
]

meta_model = LinearRegression()

rf_param_grid = {
    'n_estimators': [50, 100, 200],    
    'max_depth': [None, 10, 20, 30],   
    'min_samples_split': [2, 5, 10],    
    'min_samples_leaf': [1, 2, 4],      
    'max_features': ['auto', 'sqrt', 'log2']  
}

gb_param_grid = {
    'n_estimators': [50, 100, 200],    
    'learning_rate': [0.01, 0.1, 0.2], 
    'max_depth': [3, 5, 7],             
    'min_samples_split': [2, 5, 10],    
    'min_samples_leaf': [1, 2, 4],      
}

meta_param_grid = {
    'fit_intercept': [True, False],  
    'positive': [True, False],        
}

In [None]:
train_meta_features = np.zeros((X_train.shape[0], len(base_models)))

In [None]:
best_base_models = {}
for i, (name, model) in enumerate(base_models):
    grid_search = GridSearchCV(estimator=model, param_grid=rf_param_grid if name == 'random_forest' else gb_param_grid,
                               cv=3, scoring='r2', n_jobs=-1)
    grid_search.fit(X_train, y_train)
    best_base_models[name] = grid_search.best_estimator_
    print("Best parameters for {}:".format(name), grid_search.best_params_)

    train_meta_features[:, i] = grid_search.best_estimator_.predict(X_train)

  self.best_estimator_.fit(X, y, **fit_params)
  warn(


Best parameters for random_forest: {'max_depth': 10, 'max_features': 'auto', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}


  y = column_or_1d(y, warn=True)


Best parameters for gradient_boosting: {'learning_rate': 0.1, 'max_depth': 3, 'min_samples_leaf': 1, 'min_samples_split': 10, 'n_estimators': 200}


In [None]:
meta_grid_search = GridSearchCV(estimator=meta_model, param_grid=meta_param_grid,
                                cv=3, scoring='r2', n_jobs=-1)
meta_grid_search.fit(train_meta_features, y_train)
best_meta_model = meta_grid_search.best_estimator_
print("Best parameters for Meta-model (Linear Regression):", meta_grid_search.best_params_)

Best parameters for Meta-model (Linear Regression): {'fit_intercept': False, 'positive': True}


In [None]:
X_test = X_test.values
test_meta_features = np.column_stack(tuple(model.predict(X_test) for model in best_base_models.values()))
meta_predictions = best_meta_model.predict(test_meta_features)



In [None]:
r2 = r2_score(y_test, meta_predictions)
print("R^2 Score after Hyperparameter Tuning:", r2)
print("mse:",mean_squared_error(y_test, meta_predictions))

R^2 Score after Hyperparameter Tuning: 0.9970546074894654
mse: 0.003588069222793649


In [2]:
print("mae:", mean_absolute_error(y_test, meta_predictions))

NameError: name 'mean_absolute_error' is not defined

In [None]:
import pickle

In [None]:
with open("stackedAlat.pkl","wb") as f:
  pickle.dump(best_meta_model, f)

In [None]:
with open("base_modelsAlat.pkl","wb") as f:
  pickle.dump(best_base_models,f)