In [17]:
%run Global.ipynb
%run Helper_Functions.ipynb

In [7]:
train_df_ohe_selected = pd.read_csv(DATASET_DIR + "/train_df_ohe_selected.csv", keep_default_na=False)

X = train_df_ohe_selected.iloc[:,:-1] # (transformed) attributes
y = train_df_ohe_selected.iloc[:,-1]  # ground truth labels

In [5]:
# run in terminal: python -m pip install mlxtend
# https://rasbt.github.io/mlxtend/user_guide/classifier/StackingCVClassifier/#example-3-stacked-cv-classification-and-gridsearch
import mlxtend
from mlxtend.classifier import StackingCVClassifier

In [8]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import Pipeline
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn import svm

# gaussian NB
gnb = GaussianNB()

# linear svm
linearSVM = svm.LinearSVC(random_state=30027, C=1)

# logr
logr = LogisticRegression(solver='sag', max_iter=100)

# decision tree
dt = DecisionTreeClassifier(max_depth = 200, random_state = 30027)

# knn
knn = KNeighborsClassifier(n_neighbors=1)


In [19]:
sclf = StackingCVClassifier(classifiers=[gnb, linearSVM, logr, dt, knn], 
                            meta_classifier=LogisticRegression(),
                            cv = 2,
                            random_state=30027)

In [20]:
sclf.get_params()

{'classifiers': [GaussianNB(),
  LinearSVC(C=1, random_state=30027),
  LogisticRegression(solver='sag'),
  DecisionTreeClassifier(max_depth=200, random_state=30027),
  KNeighborsClassifier(n_neighbors=1)],
 'cv': 2,
 'drop_proba_col': None,
 'meta_classifier__C': 1.0,
 'meta_classifier__class_weight': None,
 'meta_classifier__dual': False,
 'meta_classifier__fit_intercept': True,
 'meta_classifier__intercept_scaling': 1,
 'meta_classifier__l1_ratio': None,
 'meta_classifier__max_iter': 100,
 'meta_classifier__multi_class': 'auto',
 'meta_classifier__n_jobs': None,
 'meta_classifier__penalty': 'l2',
 'meta_classifier__random_state': None,
 'meta_classifier__solver': 'lbfgs',
 'meta_classifier__tol': 0.0001,
 'meta_classifier__verbose': 0,
 'meta_classifier__warm_start': False,
 'meta_classifier': LogisticRegression(),
 'n_jobs': None,
 'pre_dispatch': '2*n_jobs',
 'random_state': 30027,
 'shuffle': True,
 'store_train_meta_features': False,
 'stratify': True,
 'use_clones': True,
 'use_

In [None]:
param_grid = {'linearsvc__C': range(1,11,5), 
              'decisiontreeclassifier__max_depth': range(200, 401, 50), 
              'kneighborsclassifier__n_neighbors': range(1,6)}

grid = tune_hyperparameter(sclf, param_grid, X, y, cv=2)

Fitting 2 folds for each of 50 candidates, totalling 100 fits
[CV] END decisiontreeclassifier__max_depth=200, kneighborsclassifier__n_neighbors=1, linearsvc__C=1; total time= 1.1min
[CV] END decisiontreeclassifier__max_depth=200, kneighborsclassifier__n_neighbors=1, linearsvc__C=1; total time=  56.3s
[CV] END decisiontreeclassifier__max_depth=200, kneighborsclassifier__n_neighbors=1, linearsvc__C=6; total time=  44.2s
[CV] END decisiontreeclassifier__max_depth=200, kneighborsclassifier__n_neighbors=1, linearsvc__C=6; total time=  46.0s
[CV] END decisiontreeclassifier__max_depth=200, kneighborsclassifier__n_neighbors=2, linearsvc__C=1; total time=  43.8s
[CV] END decisiontreeclassifier__max_depth=200, kneighborsclassifier__n_neighbors=2, linearsvc__C=1; total time=  45.3s
[CV] END decisiontreeclassifier__max_depth=200, kneighborsclassifier__n_neighbors=2, linearsvc__C=6; total time=  50.3s
[CV] END decisiontreeclassifier__max_depth=200, kneighborsclassifier__n_neighbors=2, linearsvc__C=

[CV] END decisiontreeclassifier__max_depth=350, kneighborsclassifier__n_neighbors=3, linearsvc__C=1; total time=  41.0s
[CV] END decisiontreeclassifier__max_depth=350, kneighborsclassifier__n_neighbors=3, linearsvc__C=1; total time=  42.5s
[CV] END decisiontreeclassifier__max_depth=350, kneighborsclassifier__n_neighbors=3, linearsvc__C=6; total time=  45.2s
[CV] END decisiontreeclassifier__max_depth=350, kneighborsclassifier__n_neighbors=3, linearsvc__C=6; total time=  45.6s
[CV] END decisiontreeclassifier__max_depth=350, kneighborsclassifier__n_neighbors=4, linearsvc__C=1; total time=  43.0s
[CV] END decisiontreeclassifier__max_depth=350, kneighborsclassifier__n_neighbors=4, linearsvc__C=1; total time=  48.0s
[CV] END decisiontreeclassifier__max_depth=350, kneighborsclassifier__n_neighbors=4, linearsvc__C=6; total time=  44.9s
[CV] END decisiontreeclassifier__max_depth=350, kneighborsclassifier__n_neighbors=4, linearsvc__C=6; total time=  43.5s
[CV] END decisiontreeclassifier__max_dep

In [23]:
grid