In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
import itertools
import matplotlib.pyplot as plt

from sklearn import model_selection
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn import metrics

# Classifiers
from sklearn.svm import SVR
# from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestRegressor
# from mlxtend.Regressor import StackingCVRegressor #
from mlxtend.regressor import StackingCVRegressor
import xgboost as xgb

pd.set_option('display.max_columns',None)
pd.set_option('display.max_rows',None)

# Used to ignore warnings generated from StackingCVClassifier
import warnings
warnings.simplefilter('ignore')

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression

rf = RandomForestRegressor(random_state = 1000, n_jobs = -1)
lr = LinearRegression(n_jobs = -1)
df = pd.read_csv('datasets_52721_99691_student-mat.csv')

X = df.drop('G3', axis=1)
y = df[['G3']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
X_train.head()

n_estimators = [10,20,50]
# Number of features to consider at every split
max_features = ['auto', 'sqrt']
# Maximum number of levels in tree
max_depth = [5,15,20]
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = [1,3, 5]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 3, 5]
# Method of selecting samples for training each tree
bootstrap = [True, False]

# Create the random grid
params = {
               'stackingcvregressor__randomforestregressor__n_estimators': n_estimators,  #randomforestregressor__bootstrap
               'stackingcvregressor__randomforestregressor__max_features': max_features,
               'stackingcvregressor__randomforestregressor__max_depth': max_depth,
               'stackingcvregressor__randomforestregressor__min_samples_split': min_samples_split,
               'stackingcvregressor__randomforestregressor__min_samples_leaf': min_samples_leaf,
               'stackingcvregressor__randomforestregressor__bootstrap': bootstrap,
#               "xgbclassifier__n_estimators": [10, 50, 100, 500],
#                 "xgbclassifier__learning_rate": [0.1, 0.5, 1],
         }


from sklearn.compose import ColumnTransformer, make_column_transformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import make_column_transformer
from sklearn.pipeline import make_pipeline

num_features = X_train.select_dtypes(include=['int64', 'float64']).columns
cat_features = X_train.select_dtypes(include=['object']).columns

preprocess = make_column_transformer(
    (StandardScaler(), num_features),(OneHotEncoder(handle_unknown='ignore'),cat_features))

stacking_model = StackingCVRegressor(regressors=(rf,lr), 
                            meta_regressor=lr,
                            random_state=42)

pipe_stack = Pipeline([('preprocessor', preprocess),('stackingcvregressor',stacking_model)])

grid = GridSearchCV(estimator=pipe_stack, 
                    param_grid=params, 
                    cv=3,
                    refit=True,n_jobs = -1, verbose = 5 )
grid.fit(X_train, y_train)
print ("score = %3.2f" %(grid.score(X_test,y_test)))
print (grid.best_params_)

Fitting 3 folds for each of 432 candidates, totalling 1296 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:    4.6s
[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:   20.0s
[Parallel(n_jobs=-1)]: Done 154 tasks      | elapsed:   58.7s
[Parallel(n_jobs=-1)]: Done 280 tasks      | elapsed:  1.8min
[Parallel(n_jobs=-1)]: Done 442 tasks      | elapsed:  2.9min
[Parallel(n_jobs=-1)]: Done 640 tasks      | elapsed:  4.5min
[Parallel(n_jobs=-1)]: Done 874 tasks      | elapsed:  6.1min
[Parallel(n_jobs=-1)]: Done 1144 tasks      | elapsed:  8.0min
[Parallel(n_jobs=-1)]: Done 1296 out of 1296 | elapsed:  9.0min finished


score = 0.73
{'stackingcvregressor__randomforestregressor__bootstrap': False, 'stackingcvregressor__randomforestregressor__max_depth': 5, 'stackingcvregressor__randomforestregressor__max_features': 'auto', 'stackingcvregressor__randomforestregressor__min_samples_leaf': 3, 'stackingcvregressor__randomforestregressor__min_samples_split': 3, 'stackingcvregressor__randomforestregressor__n_estimators': 20}
