In [11]:
import pandas as pd
from sklearn.grid_search import GridSearchCV

class EstimatorSelectionHelper:
    def __init__(self, models, params):
        if not set(models.keys()).issubset(set(params.keys())):
            missing_params = list(set(models.keys()) - set(params.keys()))
            raise ValueError("Some estimators are missing parameters: %s" % missing_params)
        self.models = models
        self.params = params
        self.keys = models.keys()
        self.grid_searches = {}

    def fit(self, X, y, cv=3, n_jobs=1, verbose=1, scoring=None, refit=False):
        for key in self.keys:
            print("Running GridSearchCV for %s." % key)
            model = self.models[key]
            params = self.params[key]
            gs = GridSearchCV(model, params, cv=cv, n_jobs=n_jobs, 
                              verbose=verbose, scoring=scoring, refit=refit)
            gs.fit(X,y)
            self.grid_searches[key] = gs    

    def score_summary(self, sort_by='mean_score'):
        def row(key, scores, params):
            mean = sum(scores) / float(len(scores))
            std = sum((x-mean)**2 for x in scores)
            d = {
                 'estimator': key,
                 'min_score': min(scores),
                 'max_score': max(scores),
                 'mean_score': mean,
                 'std_score': std,
            }
            return pd.Series(dict(params.items() + d.items()))

        rows = [row(k, gsc.cv_validation_scores, gsc.parameters) 
                     for k in self.keys
                     for gsc in self.grid_searches[k].grid_scores_]
        df = pd.concat(rows, axis=1).T.sort([sort_by], ascending=False)

        columns = ['estimator', 'min_score', 'mean_score', 'max_score', 'std_score']
        columns = columns + [c for c in df.columns if c not in columns]

        return df[columns]

In [20]:
from sklearn import datasets
from sklearn.preprocessing import MinMaxScaler, Normalizer,\
StandardScaler
from sklearn.pipeline import Pipeline, FeatureUnion

iris = datasets.load_iris()
X_iris = iris.data
y_iris = iris.target

from sklearn.ensemble import (ExtraTreesClassifier, RandomForestClassifier, 
                              AdaBoostClassifier, GradientBoostingClassifier)
from sklearn.svm import SVC

pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('ExtraTreesClassifier', ExtraTreesClassifier())
])

models1 = { 
    'ExtraTreesClassifier': pipeline,
    'RandomForestClassifier': RandomForestClassifier(),
    'AdaBoostClassifier': AdaBoostClassifier(),
    'GradientBoostingClassifier': GradientBoostingClassifier(),
    'SVC': SVC()
}

params1 = { 
    'ExtraTreesClassifier': { 'n_estimators': [16, 32] },
    'RandomForestClassifier': { 'n_estimators': [16, 32] },
    'AdaBoostClassifier':  { 'n_estimators': [16, 32] },
    'GradientBoostingClassifier': { 'n_estimators': [16, 32], 'learning_rate': [0.8, 1.0] },
    'SVC': [
        {'kernel': ['linear'], 'C': [1, 10]},
        {'kernel': ['rbf'], 'C': [1, 10], 'gamma': [0.001, 0.0001]},
    ]
}

In [22]:
pipeline.get_params().keys()

['ExtraTreesClassifier__max_features',
 'scaler',
 'ExtraTreesClassifier__min_samples_leaf',
 'ExtraTreesClassifier__max_depth',
 'ExtraTreesClassifier',
 'scaler__with_mean',
 'ExtraTreesClassifier__bootstrap',
 'ExtraTreesClassifier__class_weight',
 'ExtraTreesClassifier__min_weight_fraction_leaf',
 'ExtraTreesClassifier__verbose',
 'ExtraTreesClassifier__criterion',
 'ExtraTreesClassifier__min_samples_split',
 'scaler__with_std',
 'ExtraTreesClassifier__min_impurity_split',
 'ExtraTreesClassifier__random_state',
 'scaler__copy',
 'ExtraTreesClassifier__oob_score',
 'ExtraTreesClassifier__n_estimators',
 'steps',
 'ExtraTreesClassifier__warm_start',
 'ExtraTreesClassifier__n_jobs',
 'ExtraTreesClassifier__max_leaf_nodes']

In [21]:
#%%time
helper1 = EstimatorSelectionHelper(models1, params1)
helper1.fit(X_iris, y_iris, scoring='accuracy', n_jobs=-1)
helper1.score_summary()

Running GridSearchCV for SVC.
Fitting 3 folds for each of 6 candidates, totalling 18 fits
Running GridSearchCV for AdaBoostClassifier.
Fitting 3 folds for each of 2 candidates, totalling 6 fits


[Parallel(n_jobs=-1)]: Done  18 out of  18 | elapsed:    0.1s finished


Running GridSearchCV for GradientBoostingClassifier.
Fitting 3 folds for each of 4 candidates, totalling 12 fits


[Parallel(n_jobs=-1)]: Done   6 out of   6 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   6 out of   6 | elapsed:    0.2s finished
[Parallel(n_jobs=-1)]: Done   5 out of  12 | elapsed:    0.2s remaining:    0.3s


Running GridSearchCV for ExtraTreesClassifier.
Fitting 3 folds for each of 2 candidates, totalling 6 fits


[Parallel(n_jobs=-1)]: Done  12 out of  12 | elapsed:    0.5s finished


JoblibValueError: JoblibValueError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
/usr/lib/python2.7/runpy.py in _run_module_as_main(mod_name='ipykernel.__main__', alter_argv=1)
    157     pkg_name = mod_name.rpartition('.')[0]
    158     main_globals = sys.modules["__main__"].__dict__
    159     if alter_argv:
    160         sys.argv[0] = fname
    161     return _run_code(code, main_globals, None,
--> 162                      "__main__", fname, loader, pkg_name)
        fname = '/usr/local/lib/python2.7/dist-packages/ipykernel/__main__.py'
        loader = <pkgutil.ImpLoader instance>
        pkg_name = 'ipykernel'
    163 
    164 def run_module(mod_name, init_globals=None,
    165                run_name=None, alter_sys=False):
    166     """Execute a module's code without importing it

...........................................................................
/usr/lib/python2.7/runpy.py in _run_code(code=<code object <module> at 0x7f67569648b0, file "/...2.7/dist-packages/ipykernel/__main__.py", line 1>, run_globals={'__builtins__': <module '__builtin__' (built-in)>, '__doc__': None, '__file__': '/usr/local/lib/python2.7/dist-packages/ipykernel/__main__.py', '__loader__': <pkgutil.ImpLoader instance>, '__name__': '__main__', '__package__': 'ipykernel', 'app': <module 'ipykernel.kernelapp' from '/usr/local/lib/python2.7/dist-packages/ipykernel/kernelapp.pyc'>}, init_globals=None, mod_name='__main__', mod_fname='/usr/local/lib/python2.7/dist-packages/ipykernel/__main__.py', mod_loader=<pkgutil.ImpLoader instance>, pkg_name='ipykernel')
     67         run_globals.update(init_globals)
     68     run_globals.update(__name__ = mod_name,
     69                        __file__ = mod_fname,
     70                        __loader__ = mod_loader,
     71                        __package__ = pkg_name)
---> 72     exec code in run_globals
        code = <code object <module> at 0x7f67569648b0, file "/...2.7/dist-packages/ipykernel/__main__.py", line 1>
        run_globals = {'__builtins__': <module '__builtin__' (built-in)>, '__doc__': None, '__file__': '/usr/local/lib/python2.7/dist-packages/ipykernel/__main__.py', '__loader__': <pkgutil.ImpLoader instance>, '__name__': '__main__', '__package__': 'ipykernel', 'app': <module 'ipykernel.kernelapp' from '/usr/local/lib/python2.7/dist-packages/ipykernel/kernelapp.pyc'>}
     73     return run_globals
     74 
     75 def _run_module_code(code, init_globals=None,
     76                     mod_name=None, mod_fname=None,

...........................................................................
/usr/local/lib/python2.7/dist-packages/ipykernel/__main__.py in <module>()
      1 
      2 
----> 3 
      4 if __name__ == '__main__':
      5     from ipykernel import kernelapp as app
      6     app.launch_new_instance()
      7 
      8 
      9 
     10 

...........................................................................
/usr/local/lib/python2.7/dist-packages/traitlets/config/application.py in launch_instance(cls=<class 'ipykernel.kernelapp.IPKernelApp'>, argv=None, **kwargs={})
    591         
    592         If a global instance already exists, this reinitializes and starts it
    593         """
    594         app = cls.instance(**kwargs)
    595         app.initialize(argv)
--> 596         app.start()
        app.start = <bound method IPKernelApp.start of <ipykernel.kernelapp.IPKernelApp object>>
    597 
    598 #-----------------------------------------------------------------------------
    599 # utility functions, for convenience
    600 #-----------------------------------------------------------------------------

...........................................................................
/usr/local/lib/python2.7/dist-packages/ipykernel/kernelapp.py in start(self=<ipykernel.kernelapp.IPKernelApp object>)
    437         
    438         if self.poller is not None:
    439             self.poller.start()
    440         self.kernel.start()
    441         try:
--> 442             ioloop.IOLoop.instance().start()
    443         except KeyboardInterrupt:
    444             pass
    445 
    446 launch_new_instance = IPKernelApp.launch_instance

...........................................................................
/usr/local/lib/python2.7/dist-packages/zmq/eventloop/ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    157             PollIOLoop.configure(ZMQIOLoop)
    158         return PollIOLoop.current(*args, **kwargs)
    159     
    160     def start(self):
    161         try:
--> 162             super(ZMQIOLoop, self).start()
        self.start = <bound method ZMQIOLoop.start of <zmq.eventloop.ioloop.ZMQIOLoop object>>
    163         except ZMQError as e:
    164             if e.errno == ETERM:
    165                 # quietly return on ETERM
    166                 pass

...........................................................................
/usr/local/lib/python2.7/dist-packages/tornado/ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    878                 self._events.update(event_pairs)
    879                 while self._events:
    880                     fd, events = self._events.popitem()
    881                     try:
    882                         fd_obj, handler_func = self._handlers[fd]
--> 883                         handler_func(fd_obj, events)
        handler_func = <function null_wrapper>
        fd_obj = <zmq.sugar.socket.Socket object>
        events = 1
    884                     except (OSError, IOError) as e:
    885                         if errno_from_exception(e) == errno.EPIPE:
    886                             # Happens when the client closes the connection
    887                             pass

...........................................................................
/usr/local/lib/python2.7/dist-packages/tornado/stack_context.py in null_wrapper(*args=(<zmq.sugar.socket.Socket object>, 1), **kwargs={})
    270         # Fast path when there are no active contexts.
    271         def null_wrapper(*args, **kwargs):
    272             try:
    273                 current_state = _state.contexts
    274                 _state.contexts = cap_contexts[0]
--> 275                 return fn(*args, **kwargs)
        args = (<zmq.sugar.socket.Socket object>, 1)
        kwargs = {}
    276             finally:
    277                 _state.contexts = current_state
    278         null_wrapper._wrapped = True
    279         return null_wrapper

...........................................................................
/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py in _handle_events(self=<zmq.eventloop.zmqstream.ZMQStream object>, fd=<zmq.sugar.socket.Socket object>, events=1)
    435             # dispatch events:
    436             if events & IOLoop.ERROR:
    437                 gen_log.error("got POLLERR event on ZMQStream, which doesn't make sense")
    438                 return
    439             if events & IOLoop.READ:
--> 440                 self._handle_recv()
        self._handle_recv = <bound method ZMQStream._handle_recv of <zmq.eventloop.zmqstream.ZMQStream object>>
    441                 if not self.socket:
    442                     return
    443             if events & IOLoop.WRITE:
    444                 self._handle_send()

...........................................................................
/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py in _handle_recv(self=<zmq.eventloop.zmqstream.ZMQStream object>)
    467                 gen_log.error("RECV Error: %s"%zmq.strerror(e.errno))
    468         else:
    469             if self._recv_callback:
    470                 callback = self._recv_callback
    471                 # self._recv_callback = None
--> 472                 self._run_callback(callback, msg)
        self._run_callback = <bound method ZMQStream._run_callback of <zmq.eventloop.zmqstream.ZMQStream object>>
        callback = <function null_wrapper>
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    473                 
    474         # self.update_state()
    475         
    476 

...........................................................................
/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py in _run_callback(self=<zmq.eventloop.zmqstream.ZMQStream object>, callback=<function null_wrapper>, *args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    409         close our socket."""
    410         try:
    411             # Use a NullContext to ensure that all StackContexts are run
    412             # inside our blanket exception handler rather than outside.
    413             with stack_context.NullContext():
--> 414                 callback(*args, **kwargs)
        callback = <function null_wrapper>
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    415         except:
    416             gen_log.error("Uncaught exception, closing connection.",
    417                           exc_info=True)
    418             # Close the socket on an uncaught exception from a user callback

...........................................................................
/usr/local/lib/python2.7/dist-packages/tornado/stack_context.py in null_wrapper(*args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    270         # Fast path when there are no active contexts.
    271         def null_wrapper(*args, **kwargs):
    272             try:
    273                 current_state = _state.contexts
    274                 _state.contexts = cap_contexts[0]
--> 275                 return fn(*args, **kwargs)
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    276             finally:
    277                 _state.contexts = current_state
    278         null_wrapper._wrapped = True
    279         return null_wrapper

...........................................................................
/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py in dispatcher(msg=[<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>])
    271         if self.control_stream:
    272             self.control_stream.on_recv(self.dispatch_control, copy=False)
    273 
    274         def make_dispatcher(stream):
    275             def dispatcher(msg):
--> 276                 return self.dispatch_shell(stream, msg)
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    277             return dispatcher
    278 
    279         for s in self.shell_streams:
    280             s.on_recv(make_dispatcher(s), copy=False)

...........................................................................
/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py in dispatch_shell(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, msg={'buffers': [], 'content': {'allow_stdin': True, 'code': "#%%time\nhelper1 = EstimatorSelectionHelper(model...ng='accuracy', n_jobs=-1)\nhelper1.score_summary()", 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': '2016-12-20T23:14:52.790501', 'msg_id': '1F441C15321241CB8DAFED033D0DD4C9', 'msg_type': 'execute_request', 'session': '9C4FB20070734C9C86343199F4060DB0', 'username': 'username', 'version': '5.0'}, 'metadata': {}, 'msg_id': '1F441C15321241CB8DAFED033D0DD4C9', 'msg_type': 'execute_request', 'parent_header': {}})
    223             self.log.error("UNKNOWN MESSAGE TYPE: %r", msg_type)
    224         else:
    225             self.log.debug("%s: %s", msg_type, msg)
    226             self.pre_handler_hook()
    227             try:
--> 228                 handler(stream, idents, msg)
        handler = <bound method IPythonKernel.execute_request of <ipykernel.ipkernel.IPythonKernel object>>
        stream = <zmq.eventloop.zmqstream.ZMQStream object>
        idents = ['9C4FB20070734C9C86343199F4060DB0']
        msg = {'buffers': [], 'content': {'allow_stdin': True, 'code': "#%%time\nhelper1 = EstimatorSelectionHelper(model...ng='accuracy', n_jobs=-1)\nhelper1.score_summary()", 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': '2016-12-20T23:14:52.790501', 'msg_id': '1F441C15321241CB8DAFED033D0DD4C9', 'msg_type': 'execute_request', 'session': '9C4FB20070734C9C86343199F4060DB0', 'username': 'username', 'version': '5.0'}, 'metadata': {}, 'msg_id': '1F441C15321241CB8DAFED033D0DD4C9', 'msg_type': 'execute_request', 'parent_header': {}}
    229             except Exception:
    230                 self.log.error("Exception in message handler:", exc_info=True)
    231             finally:
    232                 self.post_handler_hook()

...........................................................................
/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py in execute_request(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, ident=['9C4FB20070734C9C86343199F4060DB0'], parent={'buffers': [], 'content': {'allow_stdin': True, 'code': "#%%time\nhelper1 = EstimatorSelectionHelper(model...ng='accuracy', n_jobs=-1)\nhelper1.score_summary()", 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': '2016-12-20T23:14:52.790501', 'msg_id': '1F441C15321241CB8DAFED033D0DD4C9', 'msg_type': 'execute_request', 'session': '9C4FB20070734C9C86343199F4060DB0', 'username': 'username', 'version': '5.0'}, 'metadata': {}, 'msg_id': '1F441C15321241CB8DAFED033D0DD4C9', 'msg_type': 'execute_request', 'parent_header': {}})
    386         if not silent:
    387             self.execution_count += 1
    388             self._publish_execute_input(code, parent, self.execution_count)
    389 
    390         reply_content = self.do_execute(code, silent, store_history,
--> 391                                         user_expressions, allow_stdin)
        user_expressions = {}
        allow_stdin = True
    392 
    393         # Flush output before sending the reply.
    394         sys.stdout.flush()
    395         sys.stderr.flush()

...........................................................................
/usr/local/lib/python2.7/dist-packages/ipykernel/ipkernel.py in do_execute(self=<ipykernel.ipkernel.IPythonKernel object>, code=u"#%%time\nhelper1 = EstimatorSelectionHelper(mo...='accuracy', n_jobs=-1)\nhelper1.score_summary()", silent=False, store_history=True, user_expressions={}, allow_stdin=True)
    194 
    195         reply_content = {}
    196         # FIXME: the shell calls the exception handler itself.
    197         shell._reply_content = None
    198         try:
--> 199             shell.run_cell(code, store_history=store_history, silent=silent)
        shell.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = u"#%%time\nhelper1 = EstimatorSelectionHelper(mo...='accuracy', n_jobs=-1)\nhelper1.score_summary()"
        store_history = True
        silent = False
    200         except:
    201             status = u'error'
    202             # FIXME: this code right now isn't being used yet by default,
    203             # because the run_cell() call above directly fires off exception

...........................................................................
/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell=u"#%%time\nhelper1 = EstimatorSelectionHelper(mo...='accuracy', n_jobs=-1)\nhelper1.score_summary()", store_history=True, silent=False, shell_futures=True)
   2718                 self.displayhook.exec_result = result
   2719 
   2720                 # Execute the user code
   2721                 interactivity = "none" if silent else self.ast_node_interactivity
   2722                 self.run_ast_nodes(code_ast.body, cell_name,
-> 2723                    interactivity=interactivity, compiler=compiler, result=result)
        interactivity = 'last_expr'
        compiler = <IPython.core.compilerop.CachingCompiler instance>
   2724 
   2725                 # Reset this so later displayed values do not modify the
   2726                 # ExecutionResult
   2727                 self.displayhook.exec_result = None

...........................................................................
/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py in run_ast_nodes(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, nodelist=[<_ast.Assign object>, <_ast.Expr object>, <_ast.Expr object>], cell_name='<ipython-input-21-8ba2ef6d15ee>', interactivity='last', compiler=<IPython.core.compilerop.CachingCompiler instance>, result=<IPython.core.interactiveshell.ExecutionResult object>)
   2820 
   2821         try:
   2822             for i, node in enumerate(to_run_exec):
   2823                 mod = ast.Module([node])
   2824                 code = compiler(mod, cell_name, "exec")
-> 2825                 if self.run_code(code, result):
        self.run_code = <bound method ZMQInteractiveShell.run_code of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = <code object <module> at 0x7f67178dc4b0, file "<ipython-input-21-8ba2ef6d15ee>", line 3>
        result = <IPython.core.interactiveshell.ExecutionResult object>
   2826                     return True
   2827 
   2828             for i, node in enumerate(to_run_interactive):
   2829                 mod = ast.Interactive([node])

...........................................................................
/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py in run_code(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, code_obj=<code object <module> at 0x7f67178dc4b0, file "<ipython-input-21-8ba2ef6d15ee>", line 3>, result=<IPython.core.interactiveshell.ExecutionResult object>)
   2880         outflag = 1  # happens in more places, so it's easier as default
   2881         try:
   2882             try:
   2883                 self.hooks.pre_run_code_hook()
   2884                 #rprint('Running code', repr(code_obj)) # dbg
-> 2885                 exec(code_obj, self.user_global_ns, self.user_ns)
        code_obj = <code object <module> at 0x7f67178dc4b0, file "<ipython-input-21-8ba2ef6d15ee>", line 3>
        self.user_global_ns = {'AdaBoostClassifier': <class 'sklearn.ensemble.weight_boosting.AdaBoostClassifier'>, 'EstimatorSelectionHelper': <class __main__.EstimatorSelectionHelper>, 'ExtraTreesClassifier': <class 'sklearn.ensemble.forest.ExtraTreesClassifier'>, 'FeatureUnion': <class 'sklearn.pipeline.FeatureUnion'>, 'GradientBoostingClassifier': <class 'sklearn.ensemble.gradient_boosting.GradientBoostingClassifier'>, 'GridSearchCV': <class 'sklearn.grid_search.GridSearchCV'>, 'In': ['', u'import pandas as pd\nfrom sklearn.grid_search ... c not in columns]\n\n        return df[columns]', u'import pandas as pd\nfrom sklearn.grid_search ... c not in columns]\n\n        return df[columns]', u"from sklearn import datasets\n\niris = dataset...': [1, 10], 'gamma': [0.001, 0.0001]},\n    ]\n}", u"helper1 = EstimatorSelectionHelper(models1, pa...=-1)\nhelper1.score_summary(sort_by='min_score')", u"helper1 = EstimatorSelectionHelper(models1, pa...coring='f1', n_jobs=-1)\nhelper1.score_summary()", u"helper1 = EstimatorSelectionHelper(models1, pa...='accurate', n_jobs=-1)\nhelper1.score_summary()", u"helper1 = EstimatorSelectionHelper(models1, pa...='accuracy', n_jobs=-1)\nhelper1.score_summary()", u'import pandas as pd\nfrom sklearn.grid_search ... c not in columns]\n\n        return df[columns]', u"from sklearn import datasets\n\niris = dataset...': [1, 10], 'gamma': [0.001, 0.0001]},\n    ]\n}", u"helper1 = EstimatorSelectionHelper(models1, pa...='accuracy', n_jobs=-1)\nhelper1.score_summary()", u'import pandas as pd\nfrom sklearn.grid_search ... c not in columns]\n\n        return df[columns]', u"from sklearn import datasets\n\niris = dataset...': [1, 10], 'gamma': [0.001, 0.0001]},\n    ]\n}", u"helper1 = EstimatorSelectionHelper(models1, pa...='accuracy', n_jobs=-1)\nhelper1.score_summary()", u'get_ipython().run_cell_magic(u\'time\', u\'\',...curacy\', n_jobs=-1)\\nhelper1.score_summary()")', u'get_ipython().run_cell_magic(u\'time\', u\'\',...curacy\', n_jobs=-1)\\nhelper1.score_summary()")', u"#%%time\nhelper1 = EstimatorSelectionHelper(mo...='accuracy', n_jobs=-1)\nhelper1.score_summary()", u"from sklearn import datasets\nfrom sklearn.pre...': [1, 10], 'gamma': [0.001, 0.0001]},\n    ]\n}", u"from sklearn import datasets\nfrom sklearn.pre...': [1, 10], 'gamma': [0.001, 0.0001]},\n    ]\n}", u"from sklearn import datasets\nfrom sklearn.pre...': [1, 10], 'gamma': [0.001, 0.0001]},\n    ]\n}", ...], 'MinMaxScaler': <class 'sklearn.preprocessing.data.MinMaxScaler'>, 'Normalizer': <class 'sklearn.preprocessing.data.Normalizer'>, 'Out': {13:                      estimator min_score mean_sc...  10  0.0001     rbf           NaN          NaN  , 16:                      estimator min_score mean_sc...  10  0.0001     rbf           NaN          NaN  }, ...}
        self.user_ns = {'AdaBoostClassifier': <class 'sklearn.ensemble.weight_boosting.AdaBoostClassifier'>, 'EstimatorSelectionHelper': <class __main__.EstimatorSelectionHelper>, 'ExtraTreesClassifier': <class 'sklearn.ensemble.forest.ExtraTreesClassifier'>, 'FeatureUnion': <class 'sklearn.pipeline.FeatureUnion'>, 'GradientBoostingClassifier': <class 'sklearn.ensemble.gradient_boosting.GradientBoostingClassifier'>, 'GridSearchCV': <class 'sklearn.grid_search.GridSearchCV'>, 'In': ['', u'import pandas as pd\nfrom sklearn.grid_search ... c not in columns]\n\n        return df[columns]', u'import pandas as pd\nfrom sklearn.grid_search ... c not in columns]\n\n        return df[columns]', u"from sklearn import datasets\n\niris = dataset...': [1, 10], 'gamma': [0.001, 0.0001]},\n    ]\n}", u"helper1 = EstimatorSelectionHelper(models1, pa...=-1)\nhelper1.score_summary(sort_by='min_score')", u"helper1 = EstimatorSelectionHelper(models1, pa...coring='f1', n_jobs=-1)\nhelper1.score_summary()", u"helper1 = EstimatorSelectionHelper(models1, pa...='accurate', n_jobs=-1)\nhelper1.score_summary()", u"helper1 = EstimatorSelectionHelper(models1, pa...='accuracy', n_jobs=-1)\nhelper1.score_summary()", u'import pandas as pd\nfrom sklearn.grid_search ... c not in columns]\n\n        return df[columns]', u"from sklearn import datasets\n\niris = dataset...': [1, 10], 'gamma': [0.001, 0.0001]},\n    ]\n}", u"helper1 = EstimatorSelectionHelper(models1, pa...='accuracy', n_jobs=-1)\nhelper1.score_summary()", u'import pandas as pd\nfrom sklearn.grid_search ... c not in columns]\n\n        return df[columns]', u"from sklearn import datasets\n\niris = dataset...': [1, 10], 'gamma': [0.001, 0.0001]},\n    ]\n}", u"helper1 = EstimatorSelectionHelper(models1, pa...='accuracy', n_jobs=-1)\nhelper1.score_summary()", u'get_ipython().run_cell_magic(u\'time\', u\'\',...curacy\', n_jobs=-1)\\nhelper1.score_summary()")', u'get_ipython().run_cell_magic(u\'time\', u\'\',...curacy\', n_jobs=-1)\\nhelper1.score_summary()")', u"#%%time\nhelper1 = EstimatorSelectionHelper(mo...='accuracy', n_jobs=-1)\nhelper1.score_summary()", u"from sklearn import datasets\nfrom sklearn.pre...': [1, 10], 'gamma': [0.001, 0.0001]},\n    ]\n}", u"from sklearn import datasets\nfrom sklearn.pre...': [1, 10], 'gamma': [0.001, 0.0001]},\n    ]\n}", u"from sklearn import datasets\nfrom sklearn.pre...': [1, 10], 'gamma': [0.001, 0.0001]},\n    ]\n}", ...], 'MinMaxScaler': <class 'sklearn.preprocessing.data.MinMaxScaler'>, 'Normalizer': <class 'sklearn.preprocessing.data.Normalizer'>, 'Out': {13:                      estimator min_score mean_sc...  10  0.0001     rbf           NaN          NaN  , 16:                      estimator min_score mean_sc...  10  0.0001     rbf           NaN          NaN  }, ...}
   2886             finally:
   2887                 # Reset our crash handler in place
   2888                 sys.excepthook = old_excepthook
   2889         except SystemExit as e:

...........................................................................
/home/guess/Desktop/core/Proyectos/ProyectoIGI/PyMach/Pruebas/Raspberri/Tx_0x01/<ipython-input-21-8ba2ef6d15ee> in <module>()
      1 
      2 
----> 3 
      4 #%%time
      5 helper1 = EstimatorSelectionHelper(models1, params1)
      6 helper1.fit(X_iris, y_iris, scoring='accuracy', n_jobs=-1)
      7 helper1.score_summary()
      8 
      9 
     10 

...........................................................................
/home/guess/Desktop/core/Proyectos/ProyectoIGI/PyMach/Pruebas/Raspberri/Tx_0x01/<ipython-input-11-3140d1c32f6c> in fit(self=<__main__.EstimatorSelectionHelper instance>, X=array([[ 5.1,  3.5,  1.4,  0.2],
       [ 4.9,  ...4,  5.4,  2.3],
       [ 5.9,  3. ,  5.1,  1.8]]), y=array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]), cv=3, n_jobs=-1, verbose=1, scoring='accuracy', refit=False)
     16             print("Running GridSearchCV for %s." % key)
     17             model = self.models[key]
     18             params = self.params[key]
     19             gs = GridSearchCV(model, params, cv=cv, n_jobs=n_jobs, 
     20                               verbose=verbose, scoring=scoring, refit=refit)
---> 21             gs.fit(X,y)
     22             self.grid_searches[key] = gs    
     23 
     24     def score_summary(self, sort_by='mean_score'):
     25         def row(key, scores, params):

...........................................................................
/usr/local/lib/python2.7/dist-packages/sklearn/grid_search.py in fit(self=GridSearchCV(cv=3, error_score='raise',
       e...      refit=False, scoring='accuracy', verbose=1), X=array([[ 5.1,  3.5,  1.4,  0.2],
       [ 4.9,  ...4,  5.4,  2.3],
       [ 5.9,  3. ,  5.1,  1.8]]), y=array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]))
    808         y : array-like, shape = [n_samples] or [n_samples, n_output], optional
    809             Target relative to X for classification or regression;
    810             None for unsupervised learning.
    811 
    812         """
--> 813         return self._fit(X, y, ParameterGrid(self.param_grid))
        self._fit = <bound method GridSearchCV._fit of GridSearchCV(...     refit=False, scoring='accuracy', verbose=1)>
        X = array([[ 5.1,  3.5,  1.4,  0.2],
       [ 4.9,  ...4,  5.4,  2.3],
       [ 5.9,  3. ,  5.1,  1.8]])
        y = array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
        self.param_grid = {'n_estimators': [16, 32]}
    814 
    815 
    816 class RandomizedSearchCV(BaseSearchCV):
    817     """Randomized search on hyper parameters.

...........................................................................
/usr/local/lib/python2.7/dist-packages/sklearn/grid_search.py in _fit(self=GridSearchCV(cv=3, error_score='raise',
       e...      refit=False, scoring='accuracy', verbose=1), X=array([[ 5.1,  3.5,  1.4,  0.2],
       [ 4.9,  ...4,  5.4,  2.3],
       [ 5.9,  3. ,  5.1,  1.8]]), y=array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]), parameter_iterable=<sklearn.grid_search.ParameterGrid object>)
    556         )(
    557             delayed(_fit_and_score)(clone(base_estimator), X, y, self.scorer_,
    558                                     train, test, self.verbose, parameters,
    559                                     self.fit_params, return_parameters=True,
    560                                     error_score=self.error_score)
--> 561                 for parameters in parameter_iterable
        parameters = undefined
        parameter_iterable = <sklearn.grid_search.ParameterGrid object>
    562                 for train, test in cv)
    563 
    564         # Out is a list of triplet: score, estimator, n_test_samples
    565         n_fits = len(out)

...........................................................................
/usr/local/lib/python2.7/dist-packages/sklearn/externals/joblib/parallel.py in __call__(self=Parallel(n_jobs=-1), iterable=<generator object <genexpr>>)
    763             if pre_dispatch == "all" or n_jobs == 1:
    764                 # The iterable was consumed all at once by the above for loop.
    765                 # No need to wait for async callbacks to trigger to
    766                 # consumption.
    767                 self._iterating = False
--> 768             self.retrieve()
        self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=-1)>
    769             # Make sure that we get a last message telling us we are done
    770             elapsed_time = time.time() - self._start_time
    771             self._print('Done %3i out of %3i | elapsed: %s finished',
    772                         (len(self._output), len(self._output),

---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
ValueError                                         Tue Dec 20 23:14:54 2016
PID: 11335                                    Python 2.7.6: /usr/bin/python
...........................................................................
/usr/local/lib/python2.7/dist-packages/sklearn/externals/joblib/parallel.py in __call__(self=<sklearn.externals.joblib.parallel.BatchedCalls object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        func = <function _fit_and_score>
        args = (Pipeline(steps=[('scaler', StandardScaler(copy=T...=None,
           verbose=0, warm_start=False))]), array([[ 5.1,  3.5,  1.4,  0.2],
       [ 4.9,  ...4,  5.4,  2.3],
       [ 5.9,  3. ,  5.1,  1.8]]), array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]), make_scorer(accuracy_score), array([ 17,  18,  19,  20,  21,  22,  23,  24,  ...,
       142, 143, 144, 145, 146, 147, 148, 149]), array([  0,   1,   2,   3,   4,   5,   6,   7,  ...07, 108, 109, 110, 111, 112, 113, 114, 115, 116]), 1, {'n_estimators': 16}, {})
        kwargs = {'error_score': 'raise', 'return_parameters': True}
        self.items = [(<function _fit_and_score>, (Pipeline(steps=[('scaler', StandardScaler(copy=T...=None,
           verbose=0, warm_start=False))]), array([[ 5.1,  3.5,  1.4,  0.2],
       [ 4.9,  ...4,  5.4,  2.3],
       [ 5.9,  3. ,  5.1,  1.8]]), array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]), make_scorer(accuracy_score), array([ 17,  18,  19,  20,  21,  22,  23,  24,  ...,
       142, 143, 144, 145, 146, 147, 148, 149]), array([  0,   1,   2,   3,   4,   5,   6,   7,  ...07, 108, 109, 110, 111, 112, 113, 114, 115, 116]), 1, {'n_estimators': 16}, {}), {'error_score': 'raise', 'return_parameters': True})]
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
/usr/local/lib/python2.7/dist-packages/sklearn/cross_validation.py in _fit_and_score(estimator=Pipeline(steps=[('scaler', StandardScaler(copy=T...=None,
           verbose=0, warm_start=False))]), X=array([[ 5.1,  3.5,  1.4,  0.2],
       [ 4.9,  ...4,  5.4,  2.3],
       [ 5.9,  3. ,  5.1,  1.8]]), y=array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]), scorer=make_scorer(accuracy_score), train=array([ 17,  18,  19,  20,  21,  22,  23,  24,  ...,
       142, 143, 144, 145, 146, 147, 148, 149]), test=array([  0,   1,   2,   3,   4,   5,   6,   7,  ...07, 108, 109, 110, 111, 112, 113, 114, 115, 116]), verbose=1, parameters={'n_estimators': 16}, fit_params={}, return_train_score=False, return_parameters=True, error_score='raise')
   1597     fit_params = fit_params if fit_params is not None else {}
   1598     fit_params = dict([(k, _index_param_value(X, v, train))
   1599                       for k, v in fit_params.items()])
   1600 
   1601     if parameters is not None:
-> 1602         estimator.set_params(**parameters)
        estimator.set_params = <bound method Pipeline.set_params of Pipeline(st...None,
           verbose=0, warm_start=False))])>
        parameters = {'n_estimators': 16}
   1603 
   1604     start_time = time.time()
   1605 
   1606     X_train, y_train = _safe_split(estimator, X, y, train)

...........................................................................
/usr/local/lib/python2.7/dist-packages/sklearn/pipeline.py in set_params(self=Pipeline(steps=[('scaler', StandardScaler(copy=T...=None,
           verbose=0, warm_start=False))]), **kwargs={'n_estimators': 16})
    175 
    176         Returns
    177         -------
    178         self
    179         """
--> 180         self._set_params('steps', **kwargs)
        self._set_params = <bound method Pipeline._set_params of Pipeline(s...None,
           verbose=0, warm_start=False))])>
        kwargs = {'n_estimators': 16}
    181         return self
    182 
    183     def _validate_steps(self):
    184         names, estimators = zip(*self.steps)

...........................................................................
/usr/local/lib/python2.7/dist-packages/sklearn/pipeline.py in _set_params(self=Pipeline(steps=[('scaler', StandardScaler(copy=T...=None,
           verbose=0, warm_start=False))]), steps_attr='steps', **params={'n_estimators': 16})
     64         step_names, _ = zip(*getattr(self, steps_attr))
     65         for name in list(six.iterkeys(params)):
     66             if '__' not in name and name in step_names:
     67                 self._replace_step(steps_attr, name, params.pop(name))
     68         # 3. Step parameters and other initilisation arguments
---> 69         super(_BasePipeline, self).set_params(**params)
        self.set_params = <bound method Pipeline.set_params of Pipeline(st...None,
           verbose=0, warm_start=False))])>
        params = {'n_estimators': 16}
     70         return self
     71 
     72     def _validate_names(self, names):
     73         if len(set(names)) != len(names):

...........................................................................
/usr/local/lib/python2.7/dist-packages/sklearn/base.py in set_params(self=Pipeline(steps=[('scaler', StandardScaler(copy=T...=None,
           verbose=0, warm_start=False))]), **params={'n_estimators': 16})
    286                 # simple objects case
    287                 if key not in valid_params:
    288                     raise ValueError('Invalid parameter %s for estimator %s. '
    289                                      'Check the list of available parameters '
    290                                      'with `estimator.get_params().keys()`.' %
--> 291                                      (key, self.__class__.__name__))
        key = 'n_estimators'
        self.__class__.__name__ = 'Pipeline'
    292                 setattr(self, key, value)
    293         return self
    294 
    295     def __repr__(self):

ValueError: Invalid parameter n_estimators for estimator Pipeline. Check the list of available parameters with `estimator.get_params().keys()`.
___________________________________________________________________________

In [None]:
parameters = {
    '__max_df': (0.5, 0.75, 1.0),
    #'vect__max_features': (None, 5000, 10000, 50000),
    'vect__ngram_range': ((1, 1), (1, 2)),  # unigrams or bigrams
    #'tfidf__use_idf': (True, False),
    #'tfidf__norm': ('l1', 'l2'),
            'clf__alpha': (0.00001, 0.000001),
            'clf__penalty': ('l2', 'elasticnet'),
            #'clf__n_iter': (10, 50, 80),
        }

# Improve


In [1]:
%%time
import define
import analyze
import prepare
import feature_selection
import evaluate

from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.svm import SVC
from sklearn import cross_validation
import pandas as pd

#name = "datasets/iris.csv"
name = "datasets/LocalizationNew.csv"
#name = "datasets/LocalizationOld.csv"
#name = "datasets/seguridad.csv"
#name = "datasets/breast-cancer-wisconsin.csv"
#name = "breast-cancer-wisconsin.csv"
#name = "inputBus.csv"
# className = "Ruta"
#className = "CATEGORY"
#className = "class"
className = "position"

#STEP 0: Define workflow parameters
definer = define.Define(nameData=name, className=className).pipeline()

#STEP 1: Analyze data by ploting it
#analyze.Analyze(definer).pipeline()

#STEP 2: Prepare data by scaling, normalizing, etc. 
preparer = prepare.Prepare(definer).pipeline()

#STEP 3: Feature selection
featurer = feature_selection.FeatureSelection(definer).pipeline()

#STEP4: Evalute the algorithms by using the pipelines
evaluator = evaluate.Evaluate(definer, preparer, featurer).pipeline()



                        Model      Mean       STD
7          AdaBoostClassifier  0.798338  0.048969
9  GradientBoostingClassifier  0.742983  0.060308
3        KNeighborsClassifier  0.742780  0.072794
6        ExtraTreesClassifier  0.736634  0.116844
8      RandomForestClassifier  0.709850  0.117016
1                         SVC  0.683069  0.056990
4      DecisionTreeClassifier  0.675291  0.067551
2                  GaussianNB  0.594325  0.035160
5          LogisticRegression  0.545059  0.049457
0                         LDA  0.527599  0.053464
CPU times: user 18min 21s, sys: 6.67 s, total: 18min 28s
Wall time: 18min 30s


In [14]:
pip = evaluator.pipelines[6][1]
pip

Pipeline(steps=[('preparer', FeatureUnion(n_jobs=1,
       transformer_list=[('scaler', StandardScaler(copy=True, with_mean=True, with_std=True))],
       transformer_weights=None)), ('featurer', FeatureUnion(n_jobs=1,
       transformer_list=[('pca', PCA(copy=True, iterated_power='auto', n_components=2, ran...timators=10, n_jobs=1, oob_score=False, random_state=None,
           verbose=0, warm_start=False))])

In [15]:
pip.get_params()

{'ExtraTreesClassifier': ExtraTreesClassifier(bootstrap=False, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=10, n_jobs=1, oob_score=False, random_state=None,
            verbose=0, warm_start=False),
 'ExtraTreesClassifier__bootstrap': False,
 'ExtraTreesClassifier__class_weight': None,
 'ExtraTreesClassifier__criterion': 'gini',
 'ExtraTreesClassifier__max_depth': None,
 'ExtraTreesClassifier__max_features': 'auto',
 'ExtraTreesClassifier__max_leaf_nodes': None,
 'ExtraTreesClassifier__min_impurity_split': 1e-07,
 'ExtraTreesClassifier__min_samples_leaf': 1,
 'ExtraTreesClassifier__min_samples_split': 2,
 'ExtraTreesClassifier__min_weight_fraction_leaf': 0.0,
 'ExtraTreesClassifier__n_estimators': 10,
 'ExtraTreesClassifier__n_jobs': 1,
 'ExtraTreesClassifier__oob_score': False,
 

13:40

In [17]:
%%time
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
param_GBC = { 
    'featurer__extraTC__n_estimators':  [10, 16, 32],
    'featurer__extraTC__criterion': ['gini','entropy'],
    'featurer__extraTC__n_jobs': [-1],
    'featurer__pca__svd_solver': ['auto', 'full', 'arpack', 'randomized'],
    'featurer__pca__whiten': [True],
    'GradientBoostingClassifier__n_estimators': [100, 150, 200],
    'GradientBoostingClassifier__learning_rate': [0.1, 0.2, 0.4, 0.8, 1.0]    
}
param_AdaBoost = { 
    'featurer__extraTC__n_estimators':  [10, 16, 32],
    'featurer__extraTC__criterion': ['gini','entropy'],
    'featurer__extraTC__n_jobs': [-1],
    'featurer__pca__svd_solver': ['auto', 'full', 'arpack', 'randomized'],
    'featurer__pca__whiten': [True],
    'AdaBoostClassifier__base_estimator__criterion': ['gini','entropy'],
    'AdaBoostClassifier__learning_rate': [0.1, 0.2, 0.4, 0.8, 1.0],
    'AdaBoostClassifier__n_estimators': [50, 100, 150, 200]
}
param_ExtraTrees = { 
    'ExtraTreesClassifier__bootstrap': [True],
    'ExtraTreesClassifier__class_weight': [None],
    'ExtraTreesClassifier__criterion': ['gini'],
    'ExtraTreesClassifier__max_depth': [None],
    'ExtraTreesClassifier__max_features': [None],
    'ExtraTreesClassifier__max_leaf_nodes': [None],
    'ExtraTreesClassifier__min_impurity_split': [1e-07],
    'ExtraTreesClassifier__min_samples_leaf': [1],
    'ExtraTreesClassifier__min_samples_split': [2],
    'ExtraTreesClassifier__min_weight_fraction_leaf': [0.0],
    'ExtraTreesClassifier__n_estimators': [905],
    'ExtraTreesClassifier__n_jobs': [1],
    'ExtraTreesClassifier__oob_score': [False],
    'ExtraTreesClassifier__random_state': [2],
    'ExtraTreesClassifier__verbose': [False],
    'ExtraTreesClassifier__warm_start': [False]
}

gsearch = GridSearchCV(estimator=pip, param_grid=param_ExtraTrees)
gsearch.fit(definer.X, definer.y)
print(gsearch.best_score_)
improvedModel = gsearch.best_estimator_
#print(improvedModel.get_params())

0.612920404736
CPU times: user 1min 43s, sys: 5.82 s, total: 1min 49s
Wall time: 4min 2s


In [18]:
pd.DataFrame(gsearch.cv_results_)

Unnamed: 0,mean_fit_time,mean_score_time,mean_test_score,mean_train_score,param_ExtraTreesClassifier__bootstrap,param_ExtraTreesClassifier__class_weight,param_ExtraTreesClassifier__criterion,param_ExtraTreesClassifier__max_depth,param_ExtraTreesClassifier__max_features,param_ExtraTreesClassifier__max_leaf_nodes,...,split0_test_score,split0_train_score,split1_test_score,split1_train_score,split2_test_score,split2_train_score,std_fit_time,std_score_time,std_test_score,std_train_score
0,36.242698,3.648133,0.61292,0.997931,True,,gini,,,,...,0.598674,0.997849,0.702311,0.997788,0.537752,0.998157,13.367403,0.078531,0.06792,0.000162


In [19]:
gsearch.best_params_

{'ExtraTreesClassifier__bootstrap': True,
 'ExtraTreesClassifier__class_weight': None,
 'ExtraTreesClassifier__criterion': 'gini',
 'ExtraTreesClassifier__max_depth': None,
 'ExtraTreesClassifier__max_features': None,
 'ExtraTreesClassifier__max_leaf_nodes': None,
 'ExtraTreesClassifier__min_impurity_split': 1e-07,
 'ExtraTreesClassifier__min_samples_leaf': 1,
 'ExtraTreesClassifier__min_samples_split': 2,
 'ExtraTreesClassifier__min_weight_fraction_leaf': 0.0,
 'ExtraTreesClassifier__n_estimators': 905,
 'ExtraTreesClassifier__n_jobs': 1,
 'ExtraTreesClassifier__oob_score': False,
 'ExtraTreesClassifier__random_state': 2,
 'ExtraTreesClassifier__verbose': False,
 'ExtraTreesClassifier__warm_start': False}

In [20]:
gsearch.cv_results_['params'][gsearch.best_index_]

{'ExtraTreesClassifier__bootstrap': True,
 'ExtraTreesClassifier__class_weight': None,
 'ExtraTreesClassifier__criterion': 'gini',
 'ExtraTreesClassifier__max_depth': None,
 'ExtraTreesClassifier__max_features': None,
 'ExtraTreesClassifier__max_leaf_nodes': None,
 'ExtraTreesClassifier__min_impurity_split': 1e-07,
 'ExtraTreesClassifier__min_samples_leaf': 1,
 'ExtraTreesClassifier__min_samples_split': 2,
 'ExtraTreesClassifier__min_weight_fraction_leaf': 0.0,
 'ExtraTreesClassifier__n_estimators': 905,
 'ExtraTreesClassifier__n_jobs': 1,
 'ExtraTreesClassifier__oob_score': False,
 'ExtraTreesClassifier__random_state': 2,
 'ExtraTreesClassifier__verbose': False,
 'ExtraTreesClassifier__warm_start': False}

# New Improve module

In [1]:
import define
import analyze
import prepare
import feature_selection
import evaluate

# from sklearn import datasets
# from sklearn.preprocessing import MinMaxScaler, Normalizer,\
# StandardScaler
# from sklearn.pipeline import Pipeline, FeatureUnion

# iris = datasets.load_iris()
# X_iris = iris.data
# y_iris = iris.target
data_name = "iris.csv"
class_name = "class"
definer = define.Define(
        data_name=data_name,
        header=None,
        class_name=class_name).pipeline()

preparer = prepare.Prepare(definer).pipeline()
featurer = feature_selection.FeatureSelection(definer).pipeline()
evaluator = evaluate.Evaluate(definer, preparer, featurer).buildPipelines()

# def gradientboosting_param(self):
#     parameters = { 
#         'featurer__extraTC__n_estimators':  [10, 16, 32],
#         'featurer__extraTC__criterion': ['gini','entropy'],
#         'featurer__extraTC__n_jobs': [-1],
#         'featurer__pca__svd_solver': ['auto', 'full', 'arpack', 'randomized'],
#         'featurer__pca__whiten': [True],
#         'GradientBoostingClassifier__n_estimators': [100, 150, 200],
#         'GradientBoostingClassifier__learning_rate': [0.1, 0.2, 0.4, 0.8, 1.0]    
#     }

#     return parameters

# # def adaboost_param(self):
# #     parameters = { 
# #         'featurer__extraTC__n_estimators':  [10, 16, 32],
# #         'featurer__extraTC__criterion': ['gini','entropy'],
# #         'featurer__extraTC__n_jobs': [-1],
# #         'featurer__pca__svd_solver': ['auto', 'full', 'arpack', 'randomized'],
# #         'featurer__pca__whiten': [True],
# #         'AdaBoostClassifier__base_estimator__criterion': ['gini','entropy'],
# #         'AdaBoostClassifier__learning_rate': [0.1, 0.2, 0.4, 0.8, 1.0],
# #         'AdaBoostClassifier__n_estimators': [50, 100, 150, 200]
# #     }

# #     return parameters

# def improve(self):
#     pipeline = evaluator

#     parameters = self.gradientboosting_param()

#     grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1)

#     print("Performing grid search...")
#     print("pipeline:", [name for name, _ in pipeline.steps])
#     print("parameters:")
#     print(parameters)
#     t0 = time()
#     grid_search.fit(self.definer.X, self.definer.y)
#     print("done in %0.3fs" % (time() - t0))
#     print()

#     print("Best score: %0.3f" % grid_search.best_score_)

ImportError: No module named pkg_resources

In [None]:
improve()